@ls-stack/agent-eval 0.60.4 → 0.61.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-gg10KvzS.mjs → app-Dm_9ZTVa.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-CM_zUhl_.css +1 -0
- package/dist/apps/web/dist/assets/{index-CM6MDNqo.js → index-CwSehYad.js} +76 -76
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-OLZIjQpx.mjs → cli-CPBIcMP-.mjs} +4 -4
- package/dist/index.d.mts +61 -52
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-Bu9yfdUS.mjs → runExecution-D-CnSRYy.mjs} +17 -1
- package/dist/{runOrchestration-mpgZmEZ6.mjs → runOrchestration-Basvyp4u.mjs} +1 -1
- package/dist/{runner-C4Y0lWb1.mjs → runner-B6UT1K7L.mjs} +1 -1
- package/dist/{runner-SxtKn-Xh.mjs → runner-DwNb5TCb.mjs} +2 -2
- package/dist/{src-Cy3OxoZW.mjs → src-SixIk0b7.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +76 -432
- package/dist/apps/web/dist/assets/index-CqWfzcFb.css +0 -1
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-CwSehYad.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-CM_zUhl_.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
package/dist/caseChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-
|
|
1
|
+
import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
|
|
2
2
|
//#region ../runner/src/caseChild.ts
|
|
3
3
|
let fatalErrorReported = false;
|
|
4
4
|
let disconnectExpected = false;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-
|
|
2
|
-
import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-
|
|
1
|
+
import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-Basvyp4u.mjs";
|
|
3
3
|
import { parseEnv } from "node:util";
|
|
4
4
|
import { resultify } from "t-result";
|
|
5
5
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
@@ -2243,8 +2243,8 @@ async function commandApp(args) {
|
|
|
2243
2243
|
const { serve } = await import("@hono/node-server");
|
|
2244
2244
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2245
2245
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2246
|
-
const appModule = await import("./app-
|
|
2247
|
-
const runnerModule = await import("./runner-
|
|
2246
|
+
const appModule = await import("./app-Dm_9ZTVa.mjs");
|
|
2247
|
+
const runnerModule = await import("./runner-B6UT1K7L.mjs");
|
|
2248
2248
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2249
2249
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2250
2250
|
await runnerModule.initRunner({ loadEnv: args.loadEnv });
|
package/dist/index.d.mts
CHANGED
|
@@ -2214,8 +2214,8 @@ declare const traceSpanSchema$1: z.ZodObject<{
|
|
|
2214
2214
|
status: z.ZodEnum<{
|
|
2215
2215
|
error: "error";
|
|
2216
2216
|
running: "running";
|
|
2217
|
-
ok: "ok";
|
|
2218
2217
|
cancelled: "cancelled";
|
|
2218
|
+
ok: "ok";
|
|
2219
2219
|
}>;
|
|
2220
2220
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2221
2221
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -2466,10 +2466,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2466
2466
|
caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2467
2467
|
lastRunStatus: z.ZodNullable<z.ZodEnum<{
|
|
2468
2468
|
error: "error";
|
|
2469
|
-
running: "running";
|
|
2470
|
-
cancelled: "cancelled";
|
|
2471
2469
|
pass: "pass";
|
|
2472
2470
|
fail: "fail";
|
|
2471
|
+
running: "running";
|
|
2472
|
+
cancelled: "cancelled";
|
|
2473
2473
|
unscored: "unscored";
|
|
2474
2474
|
}>>;
|
|
2475
2475
|
stats: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
@@ -2558,8 +2558,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2558
2558
|
}>;
|
|
2559
2559
|
label: z.ZodOptional<z.ZodString>;
|
|
2560
2560
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2561
|
-
error: "error";
|
|
2562
2561
|
success: "success";
|
|
2562
|
+
error: "error";
|
|
2563
2563
|
accent: "accent";
|
|
2564
2564
|
accentDim: "accentDim";
|
|
2565
2565
|
warning: "warning";
|
|
@@ -2582,8 +2582,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2582
2582
|
}>;
|
|
2583
2583
|
label: z.ZodOptional<z.ZodString>;
|
|
2584
2584
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2585
|
-
error: "error";
|
|
2586
2585
|
success: "success";
|
|
2586
|
+
error: "error";
|
|
2587
2587
|
accent: "accent";
|
|
2588
2588
|
accentDim: "accentDim";
|
|
2589
2589
|
warning: "warning";
|
|
@@ -2715,10 +2715,10 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2715
2715
|
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2716
2716
|
status: z.ZodEnum<{
|
|
2717
2717
|
error: "error";
|
|
2718
|
-
running: "running";
|
|
2719
|
-
cancelled: "cancelled";
|
|
2720
2718
|
pass: "pass";
|
|
2721
2719
|
fail: "fail";
|
|
2720
|
+
running: "running";
|
|
2721
|
+
cancelled: "cancelled";
|
|
2722
2722
|
pending: "pending";
|
|
2723
2723
|
}>;
|
|
2724
2724
|
durationMs: z.ZodNullable<z.ZodNumber>;
|
|
@@ -2857,8 +2857,8 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2857
2857
|
status: z.ZodEnum<{
|
|
2858
2858
|
error: "error";
|
|
2859
2859
|
running: "running";
|
|
2860
|
-
ok: "ok";
|
|
2861
2860
|
cancelled: "cancelled";
|
|
2861
|
+
ok: "ok";
|
|
2862
2862
|
}>;
|
|
2863
2863
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2864
2864
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -2920,10 +2920,10 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2920
2920
|
namespace: z.ZodString;
|
|
2921
2921
|
key: z.ZodString;
|
|
2922
2922
|
status: z.ZodEnum<{
|
|
2923
|
-
bypass: "bypass";
|
|
2924
|
-
refresh: "refresh";
|
|
2925
2923
|
hit: "hit";
|
|
2926
2924
|
miss: "miss";
|
|
2925
|
+
refresh: "refresh";
|
|
2926
|
+
bypass: "bypass";
|
|
2927
2927
|
}>;
|
|
2928
2928
|
read: z.ZodOptional<z.ZodBoolean>;
|
|
2929
2929
|
stored: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -2942,10 +2942,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2942
2942
|
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2943
2943
|
status: z.ZodEnum<{
|
|
2944
2944
|
error: "error";
|
|
2945
|
-
running: "running";
|
|
2946
|
-
cancelled: "cancelled";
|
|
2947
2945
|
pass: "pass";
|
|
2948
2946
|
fail: "fail";
|
|
2947
|
+
running: "running";
|
|
2948
|
+
cancelled: "cancelled";
|
|
2949
2949
|
pending: "pending";
|
|
2950
2950
|
}>;
|
|
2951
2951
|
input: z.ZodUnknown;
|
|
@@ -2960,8 +2960,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2960
2960
|
status: z.ZodEnum<{
|
|
2961
2961
|
error: "error";
|
|
2962
2962
|
running: "running";
|
|
2963
|
-
ok: "ok";
|
|
2964
2963
|
cancelled: "cancelled";
|
|
2964
|
+
ok: "ok";
|
|
2965
2965
|
}>;
|
|
2966
2966
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2967
2967
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -3029,8 +3029,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3029
3029
|
status: z.ZodEnum<{
|
|
3030
3030
|
error: "error";
|
|
3031
3031
|
running: "running";
|
|
3032
|
-
ok: "ok";
|
|
3033
3032
|
cancelled: "cancelled";
|
|
3033
|
+
ok: "ok";
|
|
3034
3034
|
}>;
|
|
3035
3035
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
3036
3036
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -3092,10 +3092,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3092
3092
|
namespace: z.ZodString;
|
|
3093
3093
|
key: z.ZodString;
|
|
3094
3094
|
status: z.ZodEnum<{
|
|
3095
|
-
bypass: "bypass";
|
|
3096
|
-
refresh: "refresh";
|
|
3097
3095
|
hit: "hit";
|
|
3098
3096
|
miss: "miss";
|
|
3097
|
+
refresh: "refresh";
|
|
3098
|
+
bypass: "bypass";
|
|
3099
3099
|
}>;
|
|
3100
3100
|
read: z.ZodOptional<z.ZodBoolean>;
|
|
3101
3101
|
stored: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -3213,10 +3213,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3213
3213
|
namespace: z.ZodString;
|
|
3214
3214
|
key: z.ZodString;
|
|
3215
3215
|
status: z.ZodEnum<{
|
|
3216
|
-
bypass: "bypass";
|
|
3217
|
-
refresh: "refresh";
|
|
3218
3216
|
hit: "hit";
|
|
3219
3217
|
miss: "miss";
|
|
3218
|
+
refresh: "refresh";
|
|
3219
|
+
bypass: "bypass";
|
|
3220
3220
|
}>;
|
|
3221
3221
|
read: z.ZodOptional<z.ZodBoolean>;
|
|
3222
3222
|
stored: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -3283,8 +3283,8 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
|
|
|
3283
3283
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
3284
3284
|
*/
|
|
3285
3285
|
declare const evalChartColorSchema: z.ZodEnum<{
|
|
3286
|
-
error: "error";
|
|
3287
3286
|
success: "success";
|
|
3287
|
+
error: "error";
|
|
3288
3288
|
accent: "accent";
|
|
3289
3289
|
accentDim: "accentDim";
|
|
3290
3290
|
warning: "warning";
|
|
@@ -3312,8 +3312,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3312
3312
|
}>;
|
|
3313
3313
|
label: z.ZodOptional<z.ZodString>;
|
|
3314
3314
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3315
|
-
error: "error";
|
|
3316
3315
|
success: "success";
|
|
3316
|
+
error: "error";
|
|
3317
3317
|
accent: "accent";
|
|
3318
3318
|
accentDim: "accentDim";
|
|
3319
3319
|
warning: "warning";
|
|
@@ -3336,8 +3336,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3336
3336
|
}>;
|
|
3337
3337
|
label: z.ZodOptional<z.ZodString>;
|
|
3338
3338
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3339
|
-
error: "error";
|
|
3340
3339
|
success: "success";
|
|
3340
|
+
error: "error";
|
|
3341
3341
|
accent: "accent";
|
|
3342
3342
|
accentDim: "accentDim";
|
|
3343
3343
|
warning: "warning";
|
|
@@ -3395,8 +3395,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3395
3395
|
}>;
|
|
3396
3396
|
label: z.ZodOptional<z.ZodString>;
|
|
3397
3397
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3398
|
-
error: "error";
|
|
3399
3398
|
success: "success";
|
|
3399
|
+
error: "error";
|
|
3400
3400
|
accent: "accent";
|
|
3401
3401
|
accentDim: "accentDim";
|
|
3402
3402
|
warning: "warning";
|
|
@@ -3419,8 +3419,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3419
3419
|
}>;
|
|
3420
3420
|
label: z.ZodOptional<z.ZodString>;
|
|
3421
3421
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3422
|
-
error: "error";
|
|
3423
3422
|
success: "success";
|
|
3423
|
+
error: "error";
|
|
3424
3424
|
accent: "accent";
|
|
3425
3425
|
accentDim: "accentDim";
|
|
3426
3426
|
warning: "warning";
|
|
@@ -3485,8 +3485,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3485
3485
|
}>;
|
|
3486
3486
|
label: z.ZodOptional<z.ZodString>;
|
|
3487
3487
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3488
|
-
error: "error";
|
|
3489
3488
|
success: "success";
|
|
3489
|
+
error: "error";
|
|
3490
3490
|
accent: "accent";
|
|
3491
3491
|
accentDim: "accentDim";
|
|
3492
3492
|
warning: "warning";
|
|
@@ -3509,8 +3509,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3509
3509
|
}>;
|
|
3510
3510
|
label: z.ZodOptional<z.ZodString>;
|
|
3511
3511
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3512
|
-
error: "error";
|
|
3513
3512
|
success: "success";
|
|
3513
|
+
error: "error";
|
|
3514
3514
|
accent: "accent";
|
|
3515
3515
|
accentDim: "accentDim";
|
|
3516
3516
|
warning: "warning";
|
|
@@ -3589,9 +3589,9 @@ declare const runManifestSchema$1: z.ZodObject<{
|
|
|
3589
3589
|
median: "median";
|
|
3590
3590
|
}>>>;
|
|
3591
3591
|
cacheMode: z.ZodOptional<z.ZodEnum<{
|
|
3592
|
-
use: "use";
|
|
3593
|
-
bypass: "bypass";
|
|
3594
3592
|
refresh: "refresh";
|
|
3593
|
+
bypass: "bypass";
|
|
3594
|
+
use: "use";
|
|
3595
3595
|
}>>;
|
|
3596
3596
|
}, z.core.$strip>;
|
|
3597
3597
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -4049,6 +4049,7 @@ declare const apiCallsConfigSchema: z.ZodObject<{
|
|
|
4049
4049
|
attributes: z.ZodOptional<z.ZodObject<{
|
|
4050
4050
|
method: z.ZodOptional<z.ZodString>;
|
|
4051
4051
|
url: z.ZodOptional<z.ZodString>;
|
|
4052
|
+
routeAlias: z.ZodOptional<z.ZodString>;
|
|
4052
4053
|
statusCode: z.ZodOptional<z.ZodString>;
|
|
4053
4054
|
request: z.ZodOptional<z.ZodString>;
|
|
4054
4055
|
response: z.ZodOptional<z.ZodString>;
|
|
@@ -4116,6 +4117,7 @@ type ResolvedApiCallsConfig = {
|
|
|
4116
4117
|
attributes: {
|
|
4117
4118
|
method: string;
|
|
4118
4119
|
url: string;
|
|
4120
|
+
routeAlias: string;
|
|
4119
4121
|
statusCode: string;
|
|
4120
4122
|
request: string;
|
|
4121
4123
|
response: string;
|
|
@@ -4302,6 +4304,7 @@ type AgentEvalsConfig$1 = {
|
|
|
4302
4304
|
* kinds: ['api', 'http.client', 'undici.request'],
|
|
4303
4305
|
* attributes: {
|
|
4304
4306
|
* statusCode: 'http.status_code',
|
|
4307
|
+
* routeAlias: 'http.route',
|
|
4305
4308
|
* },
|
|
4306
4309
|
* metrics: [
|
|
4307
4310
|
* { label: 'Retries', path: 'retryCount', format: 'number' },
|
|
@@ -4533,6 +4536,11 @@ type ApiCallEntry = {
|
|
|
4533
4536
|
status: EvalTraceSpan$1['status'];
|
|
4534
4537
|
method: string | null;
|
|
4535
4538
|
url: string | null;
|
|
4539
|
+
/**
|
|
4540
|
+
* Dynamic route alias read from the API span, such as `/v3/tabs/:id`.
|
|
4541
|
+
* The original `url` stays available for request details.
|
|
4542
|
+
*/
|
|
4543
|
+
routeAlias: string | null;
|
|
4536
4544
|
statusCode: number | null; /** Elapsed API call duration in milliseconds. */
|
|
4537
4545
|
durationMs: number | null;
|
|
4538
4546
|
request: unknown;
|
|
@@ -4567,9 +4575,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4567
4575
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4568
4576
|
*/
|
|
4569
4577
|
declare const cacheModeSchema: z.ZodEnum<{
|
|
4570
|
-
use: "use";
|
|
4571
|
-
bypass: "bypass";
|
|
4572
4578
|
refresh: "refresh";
|
|
4579
|
+
bypass: "bypass";
|
|
4580
|
+
use: "use";
|
|
4573
4581
|
}>;
|
|
4574
4582
|
/** Mode controlling how cached spans behave during a run. */
|
|
4575
4583
|
type CacheMode = z.infer<typeof cacheModeSchema>;
|
|
@@ -4583,17 +4591,17 @@ declare const spanCacheOptionsSchema: z.ZodObject<{
|
|
|
4583
4591
|
type SpanCacheOptions = z.infer<typeof spanCacheOptionsSchema>;
|
|
4584
4592
|
/** Category of operation stored in the eval cache. */
|
|
4585
4593
|
declare const cacheOperationTypeSchema: z.ZodEnum<{
|
|
4586
|
-
span: "span";
|
|
4587
4594
|
value: "value";
|
|
4595
|
+
span: "span";
|
|
4588
4596
|
}>;
|
|
4589
4597
|
/** Category of operation stored in the eval cache. */
|
|
4590
4598
|
type CacheOperationType = z.infer<typeof cacheOperationTypeSchema>;
|
|
4591
4599
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4592
4600
|
declare const cacheStatusSchema: z.ZodEnum<{
|
|
4593
|
-
bypass: "bypass";
|
|
4594
|
-
refresh: "refresh";
|
|
4595
4601
|
hit: "hit";
|
|
4596
4602
|
miss: "miss";
|
|
4603
|
+
refresh: "refresh";
|
|
4604
|
+
bypass: "bypass";
|
|
4597
4605
|
}>;
|
|
4598
4606
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4599
4607
|
type CacheStatus = z.infer<typeof cacheStatusSchema>;
|
|
@@ -4610,10 +4618,10 @@ declare const traceCacheRefSchema: z.ZodObject<{
|
|
|
4610
4618
|
namespace: z.ZodString;
|
|
4611
4619
|
key: z.ZodString;
|
|
4612
4620
|
status: z.ZodEnum<{
|
|
4613
|
-
bypass: "bypass";
|
|
4614
|
-
refresh: "refresh";
|
|
4615
4621
|
hit: "hit";
|
|
4616
4622
|
miss: "miss";
|
|
4623
|
+
refresh: "refresh";
|
|
4624
|
+
bypass: "bypass";
|
|
4617
4625
|
}>;
|
|
4618
4626
|
read: z.ZodOptional<z.ZodBoolean>;
|
|
4619
4627
|
stored: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -4721,8 +4729,8 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4721
4729
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4722
4730
|
error: "error";
|
|
4723
4731
|
running: "running";
|
|
4724
|
-
ok: "ok";
|
|
4725
4732
|
cancelled: "cancelled";
|
|
4733
|
+
ok: "ok";
|
|
4726
4734
|
}>>;
|
|
4727
4735
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4728
4736
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -4809,8 +4817,8 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4809
4817
|
key: z.ZodString;
|
|
4810
4818
|
namespace: z.ZodString;
|
|
4811
4819
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
4812
|
-
span: "span";
|
|
4813
4820
|
value: "value";
|
|
4821
|
+
span: "span";
|
|
4814
4822
|
}>>;
|
|
4815
4823
|
operationName: z.ZodOptional<z.ZodString>;
|
|
4816
4824
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -4822,8 +4830,8 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4822
4830
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4823
4831
|
error: "error";
|
|
4824
4832
|
running: "running";
|
|
4825
|
-
ok: "ok";
|
|
4826
4833
|
cancelled: "cancelled";
|
|
4834
|
+
ok: "ok";
|
|
4827
4835
|
}>>;
|
|
4828
4836
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4829
4837
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -4916,8 +4924,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4916
4924
|
key: z.ZodString;
|
|
4917
4925
|
namespace: z.ZodString;
|
|
4918
4926
|
operationType: z.ZodEnum<{
|
|
4919
|
-
span: "span";
|
|
4920
4927
|
value: "value";
|
|
4928
|
+
span: "span";
|
|
4921
4929
|
}>;
|
|
4922
4930
|
operationName: z.ZodString;
|
|
4923
4931
|
storedAt: z.ZodString;
|
|
@@ -4927,8 +4935,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4927
4935
|
key: z.ZodString;
|
|
4928
4936
|
namespace: z.ZodString;
|
|
4929
4937
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
4930
|
-
span: "span";
|
|
4931
4938
|
value: "value";
|
|
4939
|
+
span: "span";
|
|
4932
4940
|
}>>;
|
|
4933
4941
|
operationName: z.ZodOptional<z.ZodString>;
|
|
4934
4942
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -4940,8 +4948,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4940
4948
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4941
4949
|
error: "error";
|
|
4942
4950
|
running: "running";
|
|
4943
|
-
ok: "ok";
|
|
4944
4951
|
cancelled: "cancelled";
|
|
4952
|
+
ok: "ok";
|
|
4945
4953
|
}>>;
|
|
4946
4954
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4947
4955
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5034,8 +5042,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5034
5042
|
key: z.ZodString;
|
|
5035
5043
|
namespace: z.ZodString;
|
|
5036
5044
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
5037
|
-
span: "span";
|
|
5038
5045
|
value: "value";
|
|
5046
|
+
span: "span";
|
|
5039
5047
|
}>>;
|
|
5040
5048
|
operationName: z.ZodOptional<z.ZodString>;
|
|
5041
5049
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -5047,8 +5055,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5047
5055
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5048
5056
|
error: "error";
|
|
5049
5057
|
running: "running";
|
|
5050
|
-
ok: "ok";
|
|
5051
5058
|
cancelled: "cancelled";
|
|
5059
|
+
ok: "ok";
|
|
5052
5060
|
}>>;
|
|
5053
5061
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5054
5062
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5132,8 +5140,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5132
5140
|
key: z.ZodString;
|
|
5133
5141
|
namespace: z.ZodString;
|
|
5134
5142
|
operationType: z.ZodEnum<{
|
|
5135
|
-
span: "span";
|
|
5136
5143
|
value: "value";
|
|
5144
|
+
span: "span";
|
|
5137
5145
|
}>;
|
|
5138
5146
|
operationName: z.ZodString;
|
|
5139
5147
|
storedAt: z.ZodString;
|
|
@@ -5143,8 +5151,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5143
5151
|
key: z.ZodString;
|
|
5144
5152
|
namespace: z.ZodString;
|
|
5145
5153
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
5146
|
-
span: "span";
|
|
5147
5154
|
value: "value";
|
|
5155
|
+
span: "span";
|
|
5148
5156
|
}>>;
|
|
5149
5157
|
operationName: z.ZodOptional<z.ZodString>;
|
|
5150
5158
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -5156,8 +5164,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5156
5164
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5157
5165
|
error: "error";
|
|
5158
5166
|
running: "running";
|
|
5159
|
-
ok: "ok";
|
|
5160
5167
|
cancelled: "cancelled";
|
|
5168
|
+
ok: "ok";
|
|
5161
5169
|
}>>;
|
|
5162
5170
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5163
5171
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5250,8 +5258,8 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5250
5258
|
key: z.ZodString;
|
|
5251
5259
|
namespace: z.ZodString;
|
|
5252
5260
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
5253
|
-
span: "span";
|
|
5254
5261
|
value: "value";
|
|
5262
|
+
span: "span";
|
|
5255
5263
|
}>>;
|
|
5256
5264
|
operationName: z.ZodOptional<z.ZodString>;
|
|
5257
5265
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -5263,8 +5271,8 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5263
5271
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5264
5272
|
error: "error";
|
|
5265
5273
|
running: "running";
|
|
5266
|
-
ok: "ok";
|
|
5267
5274
|
cancelled: "cancelled";
|
|
5275
|
+
ok: "ok";
|
|
5268
5276
|
}>>;
|
|
5269
5277
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5270
5278
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5356,8 +5364,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5356
5364
|
key: z.ZodString;
|
|
5357
5365
|
namespace: z.ZodString;
|
|
5358
5366
|
operationType: z.ZodEnum<{
|
|
5359
|
-
span: "span";
|
|
5360
5367
|
value: "value";
|
|
5368
|
+
span: "span";
|
|
5361
5369
|
}>;
|
|
5362
5370
|
operationName: z.ZodString;
|
|
5363
5371
|
storedAt: z.ZodString;
|
|
@@ -5367,8 +5375,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5367
5375
|
key: z.ZodString;
|
|
5368
5376
|
namespace: z.ZodString;
|
|
5369
5377
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
5370
|
-
span: "span";
|
|
5371
5378
|
value: "value";
|
|
5379
|
+
span: "span";
|
|
5372
5380
|
}>>;
|
|
5373
5381
|
operationName: z.ZodOptional<z.ZodString>;
|
|
5374
5382
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -5380,8 +5388,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5380
5388
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5381
5389
|
error: "error";
|
|
5382
5390
|
running: "running";
|
|
5383
|
-
ok: "ok";
|
|
5384
5391
|
cancelled: "cancelled";
|
|
5392
|
+
ok: "ok";
|
|
5385
5393
|
}>>;
|
|
5386
5394
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5387
5395
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5579,9 +5587,9 @@ declare const createRunRequestSchema$1: z.ZodObject<{
|
|
|
5579
5587
|
temporary: z.ZodOptional<z.ZodBoolean>;
|
|
5580
5588
|
cache: z.ZodOptional<z.ZodObject<{
|
|
5581
5589
|
mode: z.ZodDefault<z.ZodEnum<{
|
|
5582
|
-
use: "use";
|
|
5583
|
-
bypass: "bypass";
|
|
5584
5590
|
refresh: "refresh";
|
|
5591
|
+
bypass: "bypass";
|
|
5592
|
+
use: "use";
|
|
5585
5593
|
}>>;
|
|
5586
5594
|
}, z.core.$strip>>;
|
|
5587
5595
|
manualInputs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
@@ -6682,6 +6690,7 @@ type ResolvedApiCallsConfig$1 = {
|
|
|
6682
6690
|
attributes: {
|
|
6683
6691
|
method: string;
|
|
6684
6692
|
url: string;
|
|
6693
|
+
routeAlias: string;
|
|
6685
6694
|
statusCode: string;
|
|
6686
6695
|
request: string;
|
|
6687
6696
|
response: string;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CPBIcMP-.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-SixIk0b7.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-
|
|
2
|
-
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-Basvyp4u.mjs";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|
|
@@ -1214,6 +1214,7 @@ const apiCallsConfigSchema = z.object({
|
|
|
1214
1214
|
attributes: z.object({
|
|
1215
1215
|
method: z.string().optional(),
|
|
1216
1216
|
url: z.string().optional(),
|
|
1217
|
+
routeAlias: z.string().optional(),
|
|
1217
1218
|
statusCode: z.string().optional(),
|
|
1218
1219
|
request: z.string().optional(),
|
|
1219
1220
|
response: z.string().optional(),
|
|
@@ -1278,6 +1279,7 @@ const DEFAULT_API_CALLS_CONFIG = {
|
|
|
1278
1279
|
attributes: {
|
|
1279
1280
|
method: "method",
|
|
1280
1281
|
url: "url",
|
|
1282
|
+
routeAlias: "routeAlias",
|
|
1281
1283
|
statusCode: "statusCode",
|
|
1282
1284
|
request: "request",
|
|
1283
1285
|
response: "response",
|
|
@@ -2494,6 +2496,17 @@ function pickError(span) {
|
|
|
2494
2496
|
if (span.errors && span.errors.length > 0) return span.errors[0] ?? null;
|
|
2495
2497
|
return null;
|
|
2496
2498
|
}
|
|
2499
|
+
function stripSearchAndHash(value) {
|
|
2500
|
+
const endIndex = [value.indexOf("?"), value.indexOf("#")].filter((index) => index !== -1).toSorted((a, b) => a - b)[0];
|
|
2501
|
+
return endIndex === void 0 ? value : value.slice(0, endIndex);
|
|
2502
|
+
}
|
|
2503
|
+
function normalizeRouteAlias(routeAlias) {
|
|
2504
|
+
if (routeAlias === null) return null;
|
|
2505
|
+
const trimmed = routeAlias.trim();
|
|
2506
|
+
if (trimmed.length === 0) return null;
|
|
2507
|
+
const withoutSearch = stripSearchAndHash(trimmed);
|
|
2508
|
+
return withoutSearch.startsWith("/") ? withoutSearch : `/${withoutSearch}`;
|
|
2509
|
+
}
|
|
2497
2510
|
/**
|
|
2498
2511
|
* Filter `spans` down to API calls and project each one to the structured
|
|
2499
2512
|
* shape consumed by the API calls tab.
|
|
@@ -2525,13 +2538,16 @@ function extractApiCalls(spans, config) {
|
|
|
2525
2538
|
placements: metric.placements
|
|
2526
2539
|
});
|
|
2527
2540
|
}
|
|
2541
|
+
const url = readString$1(attrs, config.attributes.url);
|
|
2542
|
+
const routeAlias = normalizeRouteAlias(readString$1(attrs, config.attributes.routeAlias));
|
|
2528
2543
|
result.push({
|
|
2529
2544
|
id: span.id,
|
|
2530
2545
|
name: span.name,
|
|
2531
2546
|
kind: span.kind,
|
|
2532
2547
|
status: span.status,
|
|
2533
2548
|
method: readString$1(attrs, config.attributes.method),
|
|
2534
|
-
url
|
|
2549
|
+
url,
|
|
2550
|
+
routeAlias,
|
|
2535
2551
|
statusCode: readNumber$1(attrs, config.attributes.statusCode),
|
|
2536
2552
|
durationMs: readNumber$1(attrs, config.attributes.durationMs) ?? computeDurationMs(span),
|
|
2537
2553
|
request: getNestedAttribute(attrs, config.attributes.request),
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-
|
|
1
|
+
import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-D-CnSRYy.mjs";
|
|
2
2
|
import { Result, resultify } from "t-result";
|
|
3
3
|
import { readFile, readdir, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-DwNb5TCb.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-CPBIcMP-.mjs";
|
|
2
|
+
import "./src-SixIk0b7.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance({ loadEnv = true } = {}) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-
|
|
2
|
-
import "./cli-
|
|
1
|
+
import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-D-CnSRYy.mjs";
|
|
2
|
+
import "./cli-CPBIcMP-.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.61.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -32,9 +32,9 @@
|
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "^24.7.2",
|
|
34
34
|
"typescript": "^5.9.2",
|
|
35
|
-
"@agent-evals/
|
|
35
|
+
"@agent-evals/runner": "0.0.1",
|
|
36
36
|
"@agent-evals/sdk": "0.0.1",
|
|
37
|
-
"@agent-evals/
|
|
37
|
+
"@agent-evals/shared": "0.0.1"
|
|
38
38
|
},
|
|
39
39
|
"scripts": {
|
|
40
40
|
"build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
|