@ls-stack/agent-eval 0.21.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-CmwmcUgG.mjs → app-Cw79dJDr.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-AUDD3rNB.js +118 -0
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/{cli-DumvanQI.mjs → cli-D3QNOcPN.mjs} +3 -3
- package/dist/index.d.mts +102 -56
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-zYAcAPtS.mjs → runOrchestration-CimthgI7.mjs} +248 -90
- package/dist/{runner-Dy_PECaf.mjs → runner-4yNYRvmF.mjs} +2 -2
- package/dist/{runner-BcwyX9CO.mjs → runner-B-SYzW8w.mjs} +1 -1
- package/dist/src-CcXfWT4M.mjs +3 -0
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +36 -17
- package/dist/apps/web/dist/assets/index-EXO08yya.js +0 -118
- package/dist/src-BoAJb4wC.mjs +0 -3
|
@@ -49,9 +49,46 @@ function repoFile(path, mimeType) {
|
|
|
49
49
|
//#region ../sdk/src/runtime.ts
|
|
50
50
|
const scopeStorage = new AsyncLocalStorage();
|
|
51
51
|
const runtimeScopeStorage = new AsyncLocalStorage();
|
|
52
|
+
const evalClockStorage = new AsyncLocalStorage();
|
|
52
53
|
let activeEvalScopeCount = 0;
|
|
53
54
|
let activeEvalRuntimeScopeCount = 0;
|
|
54
55
|
let consoleCaptureEnabled = true;
|
|
56
|
+
const defaultEvalStartTimeMs = Date.parse("2026-04-10T00:00:00.000Z");
|
|
57
|
+
const realDate = globalThis.__agentEvalsRealDate ?? Date;
|
|
58
|
+
globalThis.__agentEvalsRealDate = realDate;
|
|
59
|
+
function toDateConstructorArg(value) {
|
|
60
|
+
if (typeof value === "string" || typeof value === "number" || value instanceof realDate) return value;
|
|
61
|
+
return Number(value);
|
|
62
|
+
}
|
|
63
|
+
function toDateNumberArg(value) {
|
|
64
|
+
return typeof value === "number" ? value : Number(value);
|
|
65
|
+
}
|
|
66
|
+
function constructDateFromArgs(args) {
|
|
67
|
+
if (args.length === 0) return new realDate();
|
|
68
|
+
if (args.length === 1) return new realDate(toDateConstructorArg(args[0]));
|
|
69
|
+
return new realDate(toDateNumberArg(args[0]), toDateNumberArg(args[1]), args[2] === void 0 ? 1 : toDateNumberArg(args[2]), args[3] === void 0 ? 0 : toDateNumberArg(args[3]), args[4] === void 0 ? 0 : toDateNumberArg(args[4]), args[5] === void 0 ? 0 : toDateNumberArg(args[5]), args[6] === void 0 ? 0 : toDateNumberArg(args[6]));
|
|
70
|
+
}
|
|
71
|
+
const evalDate = new Proxy(realDate, {
|
|
72
|
+
apply(target, thisArg, argArray_) {
|
|
73
|
+
const nowMs = getEvalClockNowMs();
|
|
74
|
+
if (nowMs !== null) return new target(nowMs).toString();
|
|
75
|
+
return target.call(thisArg);
|
|
76
|
+
},
|
|
77
|
+
construct(target, argArray, newTarget_) {
|
|
78
|
+
const nowMs = getEvalClockNowMs();
|
|
79
|
+
if (argArray.length === 0 && nowMs !== null) return new target(nowMs);
|
|
80
|
+
return constructDateFromArgs(Array.from(argArray));
|
|
81
|
+
},
|
|
82
|
+
get(target, property) {
|
|
83
|
+
if (property === "now") return getEvalDateNow;
|
|
84
|
+
if (property === "parse") return target.parse;
|
|
85
|
+
if (property === "UTC") return target.UTC;
|
|
86
|
+
if (property === "prototype") return target.prototype;
|
|
87
|
+
if (property === "name") return target.name;
|
|
88
|
+
if (property === "length") return target.length;
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
globalThis.Date = evalDate;
|
|
55
92
|
const maxLogMessageLength = 2e4;
|
|
56
93
|
const maxLogStringLength = 1e4;
|
|
57
94
|
const maxLogArrayLength = 100;
|
|
@@ -79,6 +116,82 @@ var EvalAssertionError = class extends Error {
|
|
|
79
116
|
this.name = "EvalAssertionError";
|
|
80
117
|
}
|
|
81
118
|
};
|
|
119
|
+
function getEvalClockStateNowMs(state) {
|
|
120
|
+
const elapsedMs = state.frozen ? 0 : realDate.now() - state.realStartMs;
|
|
121
|
+
return state.startMs + elapsedMs + state.offsetMs;
|
|
122
|
+
}
|
|
123
|
+
function getEvalClockNowMs() {
|
|
124
|
+
const state = evalClockStorage.getStore();
|
|
125
|
+
if (state?.shifted !== true) return null;
|
|
126
|
+
return getEvalClockStateNowMs(state);
|
|
127
|
+
}
|
|
128
|
+
function getEvalDateNow() {
|
|
129
|
+
return getEvalClockNowMs() ?? realDate.now();
|
|
130
|
+
}
|
|
131
|
+
/** Return the host process clock, bypassing the eval Date shim. */
|
|
132
|
+
function getRealDateNowMs() {
|
|
133
|
+
return realDate.now();
|
|
134
|
+
}
|
|
135
|
+
/** Return the shifted wall-clock time for a stored eval clock state. */
|
|
136
|
+
function getEvalClockStateTimeMs(state) {
|
|
137
|
+
if (!state.shifted) return null;
|
|
138
|
+
return getEvalClockStateNowMs(state);
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Return the wall-clock start time captured for the active eval.
|
|
142
|
+
*
|
|
143
|
+
* For `startTime: 'now'`, this is the real time captured when the eval clock
|
|
144
|
+
* context was created.
|
|
145
|
+
*/
|
|
146
|
+
function getEvalStartTime() {
|
|
147
|
+
const state = evalClockStorage.getStore();
|
|
148
|
+
if (state === void 0) throw new Error("getEvalStartTime() must be called inside an active eval");
|
|
149
|
+
return new realDate(state.startMs);
|
|
150
|
+
}
|
|
151
|
+
function resolveEvalStartTimeMs(startTime) {
|
|
152
|
+
if (startTime === void 0) return defaultEvalStartTimeMs;
|
|
153
|
+
if (startTime === "now") return realDate.now();
|
|
154
|
+
const ms = startTime instanceof realDate ? startTime.getTime() : typeof startTime === "number" ? startTime : realDate.parse(startTime);
|
|
155
|
+
if (Number.isFinite(ms)) return ms;
|
|
156
|
+
throw new Error(`Invalid eval startTime "${String(startTime)}". Use a Date, timestamp, ISO date string, or "now".`);
|
|
157
|
+
}
|
|
158
|
+
function createEvalClockState(startTime, freezeTime) {
|
|
159
|
+
const nowMs = realDate.now();
|
|
160
|
+
return {
|
|
161
|
+
startMs: startTime === "now" ? nowMs : resolveEvalStartTimeMs(startTime),
|
|
162
|
+
realStartMs: nowMs,
|
|
163
|
+
offsetMs: 0,
|
|
164
|
+
frozen: freezeTime,
|
|
165
|
+
shifted: startTime !== "now" || freezeTime
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
/** Execute a callback with the eval Date clock shifted from `startTime`. */
|
|
169
|
+
async function runWithEvalClock(startTime, fn, options = {}) {
|
|
170
|
+
return await evalClockStorage.run(createEvalClockState(startTime, options.freezeTime === true), fn);
|
|
171
|
+
}
|
|
172
|
+
function getEvalTimeUnitMs(unit) {
|
|
173
|
+
if (unit === "millisecond" || unit === "milliseconds") return 1;
|
|
174
|
+
if (unit === "second" || unit === "seconds") return 1e3;
|
|
175
|
+
if (unit === "minute" || unit === "minutes") return 6e4;
|
|
176
|
+
if (unit === "hour" || unit === "hours") return 36e5;
|
|
177
|
+
if (unit === "day" || unit === "days") return 864e5;
|
|
178
|
+
throw new Error(`Unsupported eval time unit "${unit}"`);
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Advance the active eval's shifted Date clock and return the new time.
|
|
182
|
+
*
|
|
183
|
+
* Throws outside an active shifted eval clock. Evals that set
|
|
184
|
+
* `startTime: 'now'` use the real current clock unless `freezeTime: true` is
|
|
185
|
+
* also set.
|
|
186
|
+
*/
|
|
187
|
+
function advanceEvalTime(unit, amount) {
|
|
188
|
+
const state = evalClockStorage.getStore();
|
|
189
|
+
if (state === void 0) throw new Error("advanceEvalTime() must be called inside an active eval");
|
|
190
|
+
if (!state.shifted) throw new Error("advanceEvalTime() requires a shifted eval clock. Remove startTime: \"now\" or set freezeTime: true to use it.");
|
|
191
|
+
if (!Number.isFinite(amount)) throw new Error("advanceEvalTime() amount must be a finite number");
|
|
192
|
+
state.offsetMs += getEvalTimeUnitMs(unit) * amount;
|
|
193
|
+
return new realDate(getEvalClockStateNowMs(state));
|
|
194
|
+
}
|
|
82
195
|
/** Return the current eval scope for the active async context, if any. */
|
|
83
196
|
function getCurrentScope() {
|
|
84
197
|
if (activeEvalScopeCount === 0) return void 0;
|
|
@@ -349,7 +462,9 @@ async function runInExistingEvalScope(scope, runtimeScope, fn) {
|
|
|
349
462
|
activeEvalScopeCount++;
|
|
350
463
|
try {
|
|
351
464
|
return await scopeStorage.run(scope, async () => {
|
|
352
|
-
return await
|
|
465
|
+
return await evalClockStorage.run(scope.evalClockState, async () => {
|
|
466
|
+
return await runInEvalRuntimeScope(runtimeScope, fn);
|
|
467
|
+
});
|
|
353
468
|
});
|
|
354
469
|
} finally {
|
|
355
470
|
activeEvalScopeCount--;
|
|
@@ -362,6 +477,8 @@ async function runInExistingEvalScope(scope, runtimeScope, fn) {
|
|
|
362
477
|
async function runInEvalScope(caseId, fn, options = {}) {
|
|
363
478
|
const scope = {
|
|
364
479
|
caseId,
|
|
480
|
+
startTime: options.startTime,
|
|
481
|
+
evalClockState: createEvalClockState(options.startTime, options.freezeTime === true),
|
|
365
482
|
idPrefix: options.idPrefix,
|
|
366
483
|
nextEvalIdCounter: 0,
|
|
367
484
|
input: options.input,
|
|
@@ -1493,9 +1610,12 @@ function mergeSpanAttribute(span, key, patch) {
|
|
|
1493
1610
|
...patch
|
|
1494
1611
|
} });
|
|
1495
1612
|
}
|
|
1496
|
-
function
|
|
1613
|
+
function addElapsedMsToTimestamp(isoTimestamp, elapsedMs) {
|
|
1614
|
+
return new Date(new Date(isoTimestamp).getTime() + elapsedMs).toISOString();
|
|
1615
|
+
}
|
|
1616
|
+
function finishSpanWithoutThrownError(span, realStartedAt) {
|
|
1497
1617
|
span.status = hasSpanError(span) ? "error" : "ok";
|
|
1498
|
-
span.endedAt = (
|
|
1618
|
+
span.endedAt = addElapsedMsToTimestamp(span.startedAt, getRealDateNowMs() - realStartedAt);
|
|
1499
1619
|
}
|
|
1500
1620
|
function createSpanHandle(span) {
|
|
1501
1621
|
return {
|
|
@@ -1737,9 +1857,11 @@ async function traceSpanInternal(info, fn) {
|
|
|
1737
1857
|
const scope = getCurrentScope();
|
|
1738
1858
|
if (!scope) return await fn(noopActiveSpan());
|
|
1739
1859
|
const id = generateSpanId();
|
|
1860
|
+
const parentId = scope.activeSpanStack.at(-1)?.id ?? null;
|
|
1861
|
+
const realStartedAt = getRealDateNowMs();
|
|
1740
1862
|
const spanRecord = {
|
|
1741
1863
|
id,
|
|
1742
|
-
parentId
|
|
1864
|
+
parentId,
|
|
1743
1865
|
caseId: scope.caseId,
|
|
1744
1866
|
kind: info.kind,
|
|
1745
1867
|
name: info.name,
|
|
@@ -1779,7 +1901,7 @@ async function traceSpanInternal(info, fn) {
|
|
|
1779
1901
|
const recording = deserializeCacheRecording(hit.recording);
|
|
1780
1902
|
replayRecording(scope, spanRecord, recording, { generateSpanId });
|
|
1781
1903
|
spanRecord.status = recording.finalStatus ?? (hasSpanError(spanRecord) ? "error" : "ok");
|
|
1782
|
-
spanRecord.endedAt = (
|
|
1904
|
+
spanRecord.endedAt = addElapsedMsToTimestamp(spanRecord.startedAt, getRealDateNowMs() - realStartedAt);
|
|
1783
1905
|
return recording.returnValue;
|
|
1784
1906
|
}
|
|
1785
1907
|
mergeSpanAttributes(spanRecord, { "cache.status": "miss" });
|
|
@@ -1798,7 +1920,7 @@ async function traceSpanInternal(info, fn) {
|
|
|
1798
1920
|
scope.recordingStack.pop();
|
|
1799
1921
|
}
|
|
1800
1922
|
appendSubSpanOps(scope, frame);
|
|
1801
|
-
finishSpanWithoutThrownError(spanRecord);
|
|
1923
|
+
finishSpanWithoutThrownError(spanRecord, realStartedAt);
|
|
1802
1924
|
if (ctx.mode !== "bypass") {
|
|
1803
1925
|
const recording = {
|
|
1804
1926
|
returnValue: bodyResult,
|
|
@@ -1832,11 +1954,11 @@ async function traceSpanInternal(info, fn) {
|
|
|
1832
1954
|
return bodyResult;
|
|
1833
1955
|
}
|
|
1834
1956
|
const result = await fn(activeSpan);
|
|
1835
|
-
finishSpanWithoutThrownError(spanRecord);
|
|
1957
|
+
finishSpanWithoutThrownError(spanRecord, realStartedAt);
|
|
1836
1958
|
return result;
|
|
1837
1959
|
} catch (error) {
|
|
1838
1960
|
spanRecord.status = "error";
|
|
1839
|
-
spanRecord.endedAt = (
|
|
1961
|
+
spanRecord.endedAt = addElapsedMsToTimestamp(spanRecord.startedAt, getRealDateNowMs() - realStartedAt);
|
|
1840
1962
|
spanRecord.error = normalizeTraceError(error);
|
|
1841
1963
|
throw error;
|
|
1842
1964
|
} finally {
|
|
@@ -2468,7 +2590,8 @@ const caseRowSchema = z.object({
|
|
|
2468
2590
|
"error",
|
|
2469
2591
|
"cancelled"
|
|
2470
2592
|
]),
|
|
2471
|
-
|
|
2593
|
+
/** Elapsed case execution duration in milliseconds, or null before completion. */
|
|
2594
|
+
durationMs: z.number().nullable(),
|
|
2472
2595
|
costUsd: z.number().nullable().optional(),
|
|
2473
2596
|
columns: z.record(z.string(), cellValueSchema),
|
|
2474
2597
|
/** Winning trial index for the persisted case result. */
|
|
@@ -2586,7 +2709,7 @@ const defaultConfigKeySchema = z.enum([
|
|
|
2586
2709
|
"cachedInputTokens",
|
|
2587
2710
|
"cacheCreationInputTokens",
|
|
2588
2711
|
"reasoningTokens",
|
|
2589
|
-
"
|
|
2712
|
+
"llmDurationMs"
|
|
2590
2713
|
]);
|
|
2591
2714
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
2592
2715
|
const removeDefaultConfigSchema = z.union([z.literal(true), z.array(defaultConfigKeySchema)]);
|
|
@@ -2663,7 +2786,7 @@ const apiCallMetricSchema = z.object({
|
|
|
2663
2786
|
});
|
|
2664
2787
|
/**
|
|
2665
2788
|
* Schema for one model/provider pricing entry used to derive LLM-call costs
|
|
2666
|
-
* from token counts
|
|
2789
|
+
* from token counts.
|
|
2667
2790
|
*/
|
|
2668
2791
|
const llmCallPricingSchema = z.object({
|
|
2669
2792
|
/** Exact model name read from the configured `attributes.model` path. */
|
|
@@ -2682,6 +2805,8 @@ const llmCallPricingSchema = z.object({
|
|
|
2682
2805
|
cachedInputUsdPerMillion: z.number().nonnegative().optional(),
|
|
2683
2806
|
/** USD per one million prompt-cache write tokens. */
|
|
2684
2807
|
cacheCreationInputUsdPerMillion: z.number().nonnegative().optional(),
|
|
2808
|
+
/** USD per one million one-hour prompt-cache write tokens. */
|
|
2809
|
+
cacheCreationInput1hUsdPerMillion: z.number().nonnegative().optional(),
|
|
2685
2810
|
/** USD per one million reasoning tokens when reported separately. */
|
|
2686
2811
|
reasoningUsdPerMillion: z.number().nonnegative().optional()
|
|
2687
2812
|
});
|
|
@@ -2692,12 +2817,9 @@ const llmCallsConfigSchema = z.object({
|
|
|
2692
2817
|
/**
|
|
2693
2818
|
* Attribute paths used to extract structured per-call fields. Each entry is
|
|
2694
2819
|
* a dot-path inside `span.attributes`. Missing paths fall back to the
|
|
2695
|
-
* built-in defaults (e.g. `usage.inputTokens
|
|
2696
|
-
*
|
|
2697
|
-
*
|
|
2698
|
-
* `reasoningCost`) feed the cost breakdown table in the expanded row when
|
|
2699
|
-
* spans provide explicit USD cost overrides. Prefer `pricing` for deriving
|
|
2700
|
-
* costs from token counts globally.
|
|
2820
|
+
* built-in defaults (e.g. `usage.inputTokens`). Derived fields such as
|
|
2821
|
+
* total tokens, tokens/sec, duration, and USD costs are intentionally not
|
|
2822
|
+
* configurable as attribute paths.
|
|
2701
2823
|
*/
|
|
2702
2824
|
attributes: z.object({
|
|
2703
2825
|
model: z.string().optional(),
|
|
@@ -2706,15 +2828,9 @@ const llmCallsConfigSchema = z.object({
|
|
|
2706
2828
|
outputTokens: z.string().optional(),
|
|
2707
2829
|
cachedInputTokens: z.string().optional(),
|
|
2708
2830
|
cacheCreationInputTokens: z.string().optional(),
|
|
2831
|
+
cacheCreationInput1hTokens: z.string().optional(),
|
|
2709
2832
|
reasoningTokens: z.string().optional(),
|
|
2710
|
-
|
|
2711
|
-
tokensPerSecond: z.string().optional(),
|
|
2712
|
-
cost: z.string().optional(),
|
|
2713
|
-
inputCost: z.string().optional(),
|
|
2714
|
-
outputCost: z.string().optional(),
|
|
2715
|
-
cachedInputCost: z.string().optional(),
|
|
2716
|
-
cacheCreationInputCost: z.string().optional(),
|
|
2717
|
-
reasoningCost: z.string().optional(),
|
|
2833
|
+
latencyMs: z.string().optional(),
|
|
2718
2834
|
steps: z.string().optional(),
|
|
2719
2835
|
finishReason: z.string().optional(),
|
|
2720
2836
|
input: z.string().optional(),
|
|
@@ -2723,9 +2839,8 @@ const llmCallsConfigSchema = z.object({
|
|
|
2723
2839
|
toolCalls: z.string().optional()
|
|
2724
2840
|
}).optional(),
|
|
2725
2841
|
/**
|
|
2726
|
-
* Model/provider pricing registry used to calculate
|
|
2727
|
-
*
|
|
2728
|
-
* etc.) take precedence over derived prices.
|
|
2842
|
+
* Model/provider pricing registry used to calculate LLM-call costs from
|
|
2843
|
+
* token counts. Built-in LLM cost fields are only derived from this registry.
|
|
2729
2844
|
*/
|
|
2730
2845
|
pricing: z.array(llmCallPricingSchema).optional(),
|
|
2731
2846
|
/** Custom user-defined metrics surfaced on each LLM call. */
|
|
@@ -2773,15 +2888,9 @@ const DEFAULT_LLM_CALLS_CONFIG = {
|
|
|
2773
2888
|
outputTokens: "usage.outputTokens",
|
|
2774
2889
|
cachedInputTokens: "usage.cachedInputTokens",
|
|
2775
2890
|
cacheCreationInputTokens: "usage.cacheCreationInputTokens",
|
|
2891
|
+
cacheCreationInput1hTokens: "usage.cacheCreationInput1hTokens",
|
|
2776
2892
|
reasoningTokens: "usage.reasoningTokens",
|
|
2777
|
-
|
|
2778
|
-
tokensPerSecond: "tokensPerSecond",
|
|
2779
|
-
cost: "costUsd",
|
|
2780
|
-
inputCost: "cost.inputUsd",
|
|
2781
|
-
outputCost: "cost.outputUsd",
|
|
2782
|
-
cachedInputCost: "cost.cachedInputUsd",
|
|
2783
|
-
cacheCreationInputCost: "cost.cacheCreationInputUsd",
|
|
2784
|
-
reasoningCost: "cost.reasoningUsd",
|
|
2893
|
+
latencyMs: "latencyMs",
|
|
2785
2894
|
steps: "steps",
|
|
2786
2895
|
finishReason: "finishReason",
|
|
2787
2896
|
input: "input",
|
|
@@ -2823,8 +2932,8 @@ const DEFAULT_API_CALLS_CONFIG = {
|
|
|
2823
2932
|
* attribute path.
|
|
2824
2933
|
* - Missing `metrics[].format` defaults to `'string'`.
|
|
2825
2934
|
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
2826
|
-
* - Missing `pricing` defaults to an empty registry;
|
|
2827
|
-
*
|
|
2935
|
+
* - Missing `pricing` defaults to an empty registry; built-in costs are only
|
|
2936
|
+
* derived from configured pricing and token counts.
|
|
2828
2937
|
*/
|
|
2829
2938
|
function resolveLlmCallsConfig(input) {
|
|
2830
2939
|
return {
|
|
@@ -2848,6 +2957,7 @@ function resolveLlmCallsConfig(input) {
|
|
|
2848
2957
|
outputUsdPerMillion: p.outputUsdPerMillion,
|
|
2849
2958
|
cachedInputUsdPerMillion: p.cachedInputUsdPerMillion,
|
|
2850
2959
|
cacheCreationInputUsdPerMillion: p.cacheCreationInputUsdPerMillion,
|
|
2960
|
+
cacheCreationInput1hUsdPerMillion: p.cacheCreationInput1hUsdPerMillion,
|
|
2851
2961
|
reasoningUsdPerMillion: p.reasoningUsdPerMillion
|
|
2852
2962
|
}))
|
|
2853
2963
|
};
|
|
@@ -3037,8 +3147,8 @@ function deriveScopedSummaryFromCases(params) {
|
|
|
3037
3147
|
else if (caseRow.status === "cancelled") cancelledCases += 1;
|
|
3038
3148
|
else if (caseRow.status === "running") runningCases += 1;
|
|
3039
3149
|
else pendingCases += 1;
|
|
3040
|
-
if (caseRow.
|
|
3041
|
-
totalDurationMs += caseRow.
|
|
3150
|
+
if (caseRow.durationMs !== null) {
|
|
3151
|
+
totalDurationMs += caseRow.durationMs;
|
|
3042
3152
|
hasDuration = true;
|
|
3043
3153
|
}
|
|
3044
3154
|
}
|
|
@@ -3132,6 +3242,21 @@ function computeTokenCost(tokens, usdPerMillion) {
|
|
|
3132
3242
|
if (usdPerMillion === void 0) return null;
|
|
3133
3243
|
return tokens / 1e6 * usdPerMillion;
|
|
3134
3244
|
}
|
|
3245
|
+
function computeCacheCreationInputCost({ cacheCreationInputTokens, cacheCreationInput1hTokens, usdPerMillion, oneHourUsdPerMillion }) {
|
|
3246
|
+
if (cacheCreationInputTokens === null) return null;
|
|
3247
|
+
if (cacheCreationInputTokens === 0) return 0;
|
|
3248
|
+
if (cacheCreationInput1hTokens === null) return computeTokenCost(cacheCreationInputTokens, usdPerMillion);
|
|
3249
|
+
const oneHourTokens = Math.min(cacheCreationInput1hTokens, cacheCreationInputTokens);
|
|
3250
|
+
const shortLivedCost = computeTokenCost(cacheCreationInputTokens - oneHourTokens, usdPerMillion);
|
|
3251
|
+
const oneHourCost = computeTokenCost(oneHourTokens, oneHourUsdPerMillion);
|
|
3252
|
+
if (shortLivedCost === null || oneHourCost === null) return null;
|
|
3253
|
+
return shortLivedCost + oneHourCost;
|
|
3254
|
+
}
|
|
3255
|
+
function computeBaseInputTokens({ inputTokens, cachedInputTokens, cacheCreationInputTokens }) {
|
|
3256
|
+
if (inputTokens === null) return null;
|
|
3257
|
+
const cachedTokens = (cachedInputTokens ?? 0) + (cacheCreationInputTokens ?? 0);
|
|
3258
|
+
return Math.max(inputTokens - cachedTokens, 0);
|
|
3259
|
+
}
|
|
3135
3260
|
function pickPricingEntry({ pricing, model, provider }) {
|
|
3136
3261
|
if (model === null) return null;
|
|
3137
3262
|
let fallback = null;
|
|
@@ -3145,7 +3270,7 @@ function pickPricingEntry({ pricing, model, provider }) {
|
|
|
3145
3270
|
}
|
|
3146
3271
|
return fallback;
|
|
3147
3272
|
}
|
|
3148
|
-
function
|
|
3273
|
+
function computeTotalCost({ inputTokens, inputCostUsd, outputTokens, outputCostUsd, cachedInputTokens, cachedInputCostUsd, cacheCreationInputTokens, cacheCreationInputCostUsd, reasoningTokens, reasoningCostUsd }) {
|
|
3149
3274
|
const parts = [
|
|
3150
3275
|
{
|
|
3151
3276
|
tokens: inputTokens,
|
|
@@ -3182,7 +3307,7 @@ function computeFallbackTotalCost({ inputTokens, inputCostUsd, outputTokens, out
|
|
|
3182
3307
|
if (hasCost) return total;
|
|
3183
3308
|
return hasReportedTokens ? 0 : null;
|
|
3184
3309
|
}
|
|
3185
|
-
function
|
|
3310
|
+
function computeDurationMs$1(span) {
|
|
3186
3311
|
if (span.endedAt === null) return null;
|
|
3187
3312
|
const started = Date.parse(span.startedAt);
|
|
3188
3313
|
const ended = Date.parse(span.endedAt);
|
|
@@ -3190,10 +3315,16 @@ function computeLatencyMs$1(span) {
|
|
|
3190
3315
|
const delta = ended - started;
|
|
3191
3316
|
return delta >= 0 ? delta : null;
|
|
3192
3317
|
}
|
|
3193
|
-
function computeTotalTokens({
|
|
3194
|
-
if (
|
|
3195
|
-
|
|
3196
|
-
|
|
3318
|
+
function computeTotalTokens({ input, output }) {
|
|
3319
|
+
if (input === null && output === null) return null;
|
|
3320
|
+
return (input ?? 0) + (output ?? 0);
|
|
3321
|
+
}
|
|
3322
|
+
function computeTokensPerSecond({ outputTokens, durationMs, latencyMs }) {
|
|
3323
|
+
if (outputTokens === null || durationMs === null) return null;
|
|
3324
|
+
if (outputTokens === 0) return 0;
|
|
3325
|
+
const generationMs = latencyMs === null ? durationMs : durationMs - latencyMs;
|
|
3326
|
+
if (generationMs <= 0) return null;
|
|
3327
|
+
return outputTokens / (generationMs / 1e3);
|
|
3197
3328
|
}
|
|
3198
3329
|
function readSteps(attributes, path) {
|
|
3199
3330
|
const raw = getNestedAttribute(attributes, path);
|
|
@@ -3201,10 +3332,6 @@ function readSteps(attributes, path) {
|
|
|
3201
3332
|
stepCount: raw.length,
|
|
3202
3333
|
stepDetails: raw
|
|
3203
3334
|
};
|
|
3204
|
-
if (typeof raw === "number" && Number.isFinite(raw)) return {
|
|
3205
|
-
stepCount: raw,
|
|
3206
|
-
stepDetails: null
|
|
3207
|
-
};
|
|
3208
3335
|
return {
|
|
3209
3336
|
stepCount: null,
|
|
3210
3337
|
stepDetails: null
|
|
@@ -3226,16 +3353,22 @@ function pickError$1(span) {
|
|
|
3226
3353
|
* shape consumed by the LLM calls tab.
|
|
3227
3354
|
*
|
|
3228
3355
|
* Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
|
|
3229
|
-
* (`model`, token counts,
|
|
3356
|
+
* (`model`, token counts, latency, etc.) are read via
|
|
3230
3357
|
* `getNestedAttribute` from the configured paths, with safe coercion to
|
|
3231
|
-
* `string | null` / `number | null`.
|
|
3232
|
-
*
|
|
3233
|
-
* `
|
|
3234
|
-
*
|
|
3235
|
-
*
|
|
3236
|
-
*
|
|
3237
|
-
*
|
|
3238
|
-
*
|
|
3358
|
+
* `string | null` / `number | null`. `latencyMs` is an explicit
|
|
3359
|
+
* time-to-first-token attribute; full span elapsed time is reported separately
|
|
3360
|
+
* as `durationMs`. Built-in USD costs are derived only from configured model
|
|
3361
|
+
* pricing and token counts. `totalTokens` is always derived from input +
|
|
3362
|
+
* output tokens. Cached input and cache creation tokens are reported
|
|
3363
|
+
* separately because they are subsets of input/output usage. The main cache
|
|
3364
|
+
* creation token field is treated as the total write count; optional one-hour
|
|
3365
|
+
* cache creation tokens only split that total for cost calculation. Base input
|
|
3366
|
+
* cost uses input minus cache read/write tokens so cached tokens are not
|
|
3367
|
+
* charged twice. Cache read/write costs still contribute to the total USD cost
|
|
3368
|
+
* at their configured rates. The `steps` attribute path may resolve to an array
|
|
3369
|
+
* of per-step detail objects, with `stepCount` derived from the array length.
|
|
3370
|
+
* `durationMs` and `tokensPerSecond` are `null` while the span is still
|
|
3371
|
+
* running. User-defined `metrics` whose path resolves to
|
|
3239
3372
|
* `undefined` are dropped, but `null`, `0`, and `false` are preserved as
|
|
3240
3373
|
* legitimate values worth displaying. Original span order is preserved so the
|
|
3241
3374
|
* LLM calls tab matches the ordering in the Trace tab.
|
|
@@ -3252,19 +3385,30 @@ function extractLlmCalls(spans, config) {
|
|
|
3252
3385
|
const outputTokens = readNumber$2(attrs, config.attributes.outputTokens);
|
|
3253
3386
|
const cachedInputTokens = readNumber$2(attrs, config.attributes.cachedInputTokens);
|
|
3254
3387
|
const cacheCreationInputTokens = readNumber$2(attrs, config.attributes.cacheCreationInputTokens);
|
|
3388
|
+
const cacheCreationInput1hTokens = readNumber$2(attrs, config.attributes.cacheCreationInput1hTokens);
|
|
3255
3389
|
const reasoningTokens = readNumber$2(attrs, config.attributes.reasoningTokens);
|
|
3256
|
-
const
|
|
3390
|
+
const latencyMs = readNumber$2(attrs, config.attributes.latencyMs);
|
|
3391
|
+
const durationMs = computeDurationMs$1(span);
|
|
3257
3392
|
const pricing = pickPricingEntry({
|
|
3258
3393
|
pricing: config.pricing,
|
|
3259
3394
|
model,
|
|
3260
3395
|
provider
|
|
3261
3396
|
});
|
|
3262
|
-
const inputCostUsd =
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
const
|
|
3397
|
+
const inputCostUsd = computeTokenCost(computeBaseInputTokens({
|
|
3398
|
+
inputTokens,
|
|
3399
|
+
cachedInputTokens,
|
|
3400
|
+
cacheCreationInputTokens
|
|
3401
|
+
}), pricing?.inputUsdPerMillion);
|
|
3402
|
+
const outputCostUsd = computeTokenCost(outputTokens, pricing?.outputUsdPerMillion);
|
|
3403
|
+
const cachedInputCostUsd = computeTokenCost(cachedInputTokens, pricing?.cachedInputUsdPerMillion);
|
|
3404
|
+
const cacheCreationInputCostUsd = computeCacheCreationInputCost({
|
|
3405
|
+
cacheCreationInputTokens,
|
|
3406
|
+
cacheCreationInput1hTokens,
|
|
3407
|
+
usdPerMillion: pricing?.cacheCreationInputUsdPerMillion,
|
|
3408
|
+
oneHourUsdPerMillion: pricing?.cacheCreationInput1hUsdPerMillion
|
|
3409
|
+
});
|
|
3410
|
+
const reasoningCostUsd = computeTokenCost(reasoningTokens, pricing?.reasoningUsdPerMillion);
|
|
3411
|
+
const costUsd = computeTotalCost({
|
|
3268
3412
|
inputTokens,
|
|
3269
3413
|
inputCostUsd,
|
|
3270
3414
|
outputTokens,
|
|
@@ -3302,13 +3446,15 @@ function extractLlmCalls(spans, config) {
|
|
|
3302
3446
|
cacheCreationInputTokens,
|
|
3303
3447
|
reasoningTokens,
|
|
3304
3448
|
totalTokens: computeTotalTokens({
|
|
3305
|
-
declared: declaredTotalTokens,
|
|
3306
3449
|
input: inputTokens,
|
|
3307
|
-
output: outputTokens
|
|
3308
|
-
|
|
3309
|
-
|
|
3450
|
+
output: outputTokens
|
|
3451
|
+
}),
|
|
3452
|
+
latencyMs,
|
|
3453
|
+
tokensPerSecond: computeTokensPerSecond({
|
|
3454
|
+
outputTokens,
|
|
3455
|
+
durationMs,
|
|
3456
|
+
latencyMs
|
|
3310
3457
|
}),
|
|
3311
|
-
tokensPerSecond: readNumber$2(attrs, config.attributes.tokensPerSecond),
|
|
3312
3458
|
costUsd,
|
|
3313
3459
|
inputCostUsd,
|
|
3314
3460
|
outputCostUsd,
|
|
@@ -3317,7 +3463,7 @@ function extractLlmCalls(spans, config) {
|
|
|
3317
3463
|
reasoningCostUsd,
|
|
3318
3464
|
...readSteps(attrs, config.attributes.steps),
|
|
3319
3465
|
finishReason: readString$2(attrs, config.attributes.finishReason),
|
|
3320
|
-
|
|
3466
|
+
durationMs,
|
|
3321
3467
|
input: getNestedAttribute(attrs, config.attributes.input),
|
|
3322
3468
|
output: getNestedAttribute(attrs, config.attributes.output),
|
|
3323
3469
|
reasoning: getNestedAttribute(attrs, config.attributes.reasoning),
|
|
@@ -3342,7 +3488,7 @@ function readString$1(attributes, path) {
|
|
|
3342
3488
|
const raw = getNestedAttribute(attributes, path);
|
|
3343
3489
|
return typeof raw === "string" && raw.length > 0 ? raw : null;
|
|
3344
3490
|
}
|
|
3345
|
-
function
|
|
3491
|
+
function computeDurationMs(span) {
|
|
3346
3492
|
if (span.endedAt === null) return null;
|
|
3347
3493
|
const started = Date.parse(span.startedAt);
|
|
3348
3494
|
const ended = Date.parse(span.endedAt);
|
|
@@ -3367,10 +3513,10 @@ function pickError(span) {
|
|
|
3367
3513
|
*
|
|
3368
3514
|
* Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
|
|
3369
3515
|
* (`method`, `url`, `statusCode`, etc.) are read via `getNestedAttribute` from
|
|
3370
|
-
* the configured paths. `durationMs` takes precedence
|
|
3371
|
-
* fallback to the span start/end timestamps. User-defined `metrics`
|
|
3372
|
-
* resolves to `undefined` are dropped, but `null`, `0`, and `false`
|
|
3373
|
-
* preserved as legitimate values worth displaying. Original span order is
|
|
3516
|
+
* the configured paths. An explicit `durationMs` attribute takes precedence,
|
|
3517
|
+
* with a fallback to the span start/end timestamps. User-defined `metrics`
|
|
3518
|
+
* whose path resolves to `undefined` are dropped, but `null`, `0`, and `false`
|
|
3519
|
+
* are preserved as legitimate values worth displaying. Original span order is
|
|
3374
3520
|
* preserved so the API calls tab matches the ordering in the Trace tab.
|
|
3375
3521
|
*/
|
|
3376
3522
|
function extractApiCalls(spans, config) {
|
|
@@ -3400,7 +3546,7 @@ function extractApiCalls(spans, config) {
|
|
|
3400
3546
|
method: readString$1(attrs, config.attributes.method),
|
|
3401
3547
|
url: readString$1(attrs, config.attributes.url),
|
|
3402
3548
|
statusCode: readNumber$1(attrs, config.attributes.statusCode),
|
|
3403
|
-
|
|
3549
|
+
durationMs: readNumber$1(attrs, config.attributes.durationMs) ?? computeDurationMs(span),
|
|
3404
3550
|
request: getNestedAttribute(attrs, config.attributes.request),
|
|
3405
3551
|
response: getNestedAttribute(attrs, config.attributes.response),
|
|
3406
3552
|
requestBody: getNestedAttribute(attrs, config.attributes.requestBody),
|
|
@@ -3798,7 +3944,7 @@ async function writeCacheFile(cacheDir, cacheFile) {
|
|
|
3798
3944
|
await mkdir(cacheDir, { recursive: true });
|
|
3799
3945
|
const filePath = ownerPath(cacheDir, cacheFile.owner);
|
|
3800
3946
|
const tmpPath = `${filePath}.${process.pid.toString()}.tmp`;
|
|
3801
|
-
await writeFile(tmpPath, JSON.stringify(cacheFile));
|
|
3947
|
+
await writeFile(tmpPath, JSON.stringify(cacheFile, null, 2));
|
|
3802
3948
|
await rename(tmpPath, filePath);
|
|
3803
3949
|
}
|
|
3804
3950
|
async function readDebugKeyFile(debugDir, owner) {
|
|
@@ -4237,7 +4383,8 @@ const DEFAULT_CONFIG_KEYS = [
|
|
|
4237
4383
|
"totalTokens",
|
|
4238
4384
|
"cachedInputTokens",
|
|
4239
4385
|
"cacheCreationInputTokens",
|
|
4240
|
-
"
|
|
4386
|
+
"reasoningTokens",
|
|
4387
|
+
"llmDurationMs"
|
|
4241
4388
|
];
|
|
4242
4389
|
const tokenNumberFormat = { notation: "compact" };
|
|
4243
4390
|
const countNumberFormat = {
|
|
@@ -4303,8 +4450,8 @@ const DEFAULT_COLUMNS = {
|
|
|
4303
4450
|
numberFormat: tokenNumberFormat,
|
|
4304
4451
|
align: "right"
|
|
4305
4452
|
},
|
|
4306
|
-
|
|
4307
|
-
label: "LLM
|
|
4453
|
+
llmDurationMs: {
|
|
4454
|
+
label: "LLM Duration",
|
|
4308
4455
|
format: "duration",
|
|
4309
4456
|
align: "right"
|
|
4310
4457
|
}
|
|
@@ -4509,8 +4656,14 @@ function addDefaultOutputs(params) {
|
|
|
4509
4656
|
});
|
|
4510
4657
|
assignIfMissing({
|
|
4511
4658
|
outputs: params.outputs,
|
|
4512
|
-
key: "
|
|
4513
|
-
value: sumNullable(calls.map((call) => call.
|
|
4659
|
+
key: "reasoningTokens",
|
|
4660
|
+
value: sumNullable(calls.map((call) => call.reasoningTokens)),
|
|
4661
|
+
activeKeys
|
|
4662
|
+
});
|
|
4663
|
+
assignIfMissing({
|
|
4664
|
+
outputs: params.outputs,
|
|
4665
|
+
key: "llmDurationMs",
|
|
4666
|
+
value: sumNullable(calls.map((call) => call.durationMs)),
|
|
4514
4667
|
activeKeys
|
|
4515
4668
|
});
|
|
4516
4669
|
}
|
|
@@ -5204,7 +5357,9 @@ async function runCase(params) {
|
|
|
5204
5357
|
mode: cacheMode,
|
|
5205
5358
|
evalId,
|
|
5206
5359
|
codeFingerprint
|
|
5207
|
-
} : void 0
|
|
5360
|
+
} : void 0,
|
|
5361
|
+
startTime: evalDef.startTime,
|
|
5362
|
+
freezeTime: evalDef.freezeTime
|
|
5208
5363
|
});
|
|
5209
5364
|
const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
|
|
5210
5365
|
const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
|
|
@@ -5245,6 +5400,7 @@ async function runCase(params) {
|
|
|
5245
5400
|
}
|
|
5246
5401
|
const scoreResults = /* @__PURE__ */ new Map();
|
|
5247
5402
|
const scoringTraces = {};
|
|
5403
|
+
const scoreStartTime = getEvalClockStateTimeMs(scope.evalClockState) ?? evalDef.startTime;
|
|
5248
5404
|
if (!nonAssertError && scope.assertionFailures.length === 0 && evalDef.scores) for (const [key, def] of Object.entries(evalDef.scores)) {
|
|
5249
5405
|
const { compute, passThreshold, label } = normalizeScoreDef(def);
|
|
5250
5406
|
const scoreRun = await runInEvalScope(evalCase.id, async () => {
|
|
@@ -5264,7 +5420,9 @@ async function runCase(params) {
|
|
|
5264
5420
|
mode: cacheMode,
|
|
5265
5421
|
evalId: `${evalId}__score__${key}`,
|
|
5266
5422
|
codeFingerprint
|
|
5267
|
-
} : void 0
|
|
5423
|
+
} : void 0,
|
|
5424
|
+
startTime: scoreStartTime,
|
|
5425
|
+
freezeTime: evalDef.freezeTime
|
|
5268
5426
|
});
|
|
5269
5427
|
const { trace, traceDisplay } = resolveTracePresentation(scoreRun.scope.spans, globalTraceDisplay, evalDef.traceDisplay);
|
|
5270
5428
|
scope.logs.push(...scoreRun.scope.logs.map((entry) => ({
|
|
@@ -5356,7 +5514,7 @@ async function runCase(params) {
|
|
|
5356
5514
|
caseDetail,
|
|
5357
5515
|
caseRowUpdate: {
|
|
5358
5516
|
status,
|
|
5359
|
-
|
|
5517
|
+
durationMs: Date.now() - startTime,
|
|
5360
5518
|
columns
|
|
5361
5519
|
}
|
|
5362
5520
|
};
|
|
@@ -5577,7 +5735,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
|
|
|
5577
5735
|
await runInEvalRuntimeScope("cases", async () => {
|
|
5578
5736
|
await entry.use(async (evalDef) => {
|
|
5579
5737
|
const cases = filterEvalCases(resolveRunnableEvalCases({
|
|
5580
|
-
cases: typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [],
|
|
5738
|
+
cases: await runWithEvalClock(evalDef.startTime, async () => typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [], { freezeTime: evalDef.freezeTime }),
|
|
5581
5739
|
evalId: evalMeta.id
|
|
5582
5740
|
}), request.target.evalIds, request.target.caseIds, evalMeta.id);
|
|
5583
5741
|
runState.summary.totalCases += cases.length;
|
|
@@ -5647,7 +5805,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
|
|
|
5647
5805
|
caseId: evalCase.id,
|
|
5648
5806
|
evalId: evalMeta.id,
|
|
5649
5807
|
status: caseRowUpdate.status ?? "pending",
|
|
5650
|
-
|
|
5808
|
+
durationMs: caseRowUpdate.durationMs ?? null,
|
|
5651
5809
|
columns: caseRowUpdate.columns ?? {},
|
|
5652
5810
|
trial
|
|
5653
5811
|
}
|
|
@@ -5780,4 +5938,4 @@ function toLastRunStatus(status) {
|
|
|
5780
5938
|
return status === "pending" ? null : status;
|
|
5781
5939
|
}
|
|
5782
5940
|
//#endregion
|
|
5783
|
-
export { removeDefaultConfigSchema as $, columnKindSchema as $t, extractApiCalls as A,
|
|
5941
|
+
export { removeDefaultConfigSchema as $, columnKindSchema as $t, extractApiCalls as A, runInEvalScope as An, cacheFileSchema as At, DEFAULT_API_CALLS_CONFIG as B, traceAttributeDisplayFormatSchema as Bt, validateCharts as C, getEvalCaseInput as Cn, evalChartTooltipExtraSchema as Ct, sseEnvelopeSchema as D, mergeEvalOutput as Dn, cacheDebugKeyFileSchema as Dt, updateManualScoreRequestSchema as E, isInEvalScope as En, cacheDebugKeyEntrySchema as Et, deriveScopedSummaryFromCases as F, repoFile as Fn, cacheRecordingSchema as Ft, apiCallMetricSchema as G, traceDisplayInputConfigSchema as Gt, agentEvalsConfigSchema as H, traceAttributeDisplayPlacementSchema as Ht, deriveStatusFromCaseRows as I, defineEval as In, cacheStatusSchema as It, llmCallMetricFormatSchema as J, traceSpanSchema as Jt, apiCallsConfigSchema as K, traceSpanErrorSchema as Kt, deriveStatusFromChildStatuses as L, getEvalRegistry as Ln, serializedCacheSpanSchema as Lt, getNestedAttribute as M, setEvalOutput as Mn, cacheModeSchema as Mt, getEvalTitle as N, setScopeCacheContext as Nn, cacheOperationTypeSchema as Nt, extractCacheEntries as O, nextEvalId as On, cacheEntrySchema as Ot, getEvalDisplayStatus as P, startEvalBackgroundJob as Pn, cacheRecordingOpSchema as Pt, llmCallsConfigSchema as Q, columnFormatSchema as Qt, runManifestSchema as R, spanCacheOptionsSchema as Rt, normalizeScoreDef as S, getCurrentScope as Sn, evalChartMetricSchema as St, createRunRequestSchema as T, incrementEvalOutput as Tn, evalChartsConfigSchema as Tt, apiCallMetricFormatSchema as U, traceAttributeDisplaySchema as Ut, DEFAULT_LLM_CALLS_CONFIG as V, traceAttributeDisplayInputSchema as Vt, apiCallMetricPlacementSchema as W, traceDisplayConfigSchema as Wt, llmCallMetricSchema as X, cellValueSchema as Xt, llmCallMetricPlacementSchema as Y, traceSpanWarningSchema as Yt, llmCallPricingSchema as Z, columnDefSchema as Zt, loadEvalModule as _, advanceEvalTime as _n, evalChartAggregateSchema as _t, loadPersistedRunSnapshot as a, z$1 as an, caseDetailSchema as at, loadConfig as b, evalAssert as bn, evalChartColorSchema as bt, persistCaseDetail as c, evalSpan as cn, evalStatAggregateSchema as ct, recomputePersistedCaseStatus as d, hashCacheKeySync as dn, evalSummarySchema as dt, fileRefSchema as en, resolveApiCallsConfig as et, runTouchesEval as f, deserializeCacheRecording as fn, runLogEntrySchema as ft, setLatestRunInfoMap as g, EvalAssertionError as gn, scoreTraceSchema as gt, getTargetEvalIds as h, serializeCacheValue as hn, runLogPhaseSchema as ht, getLatestRunInfos as i, runArtifactRefSchema as in, assertionFailureSchema as it, extractLlmCalls as j, runInExistingEvalScope as jn, cacheListItemSchema as jt, extractCacheHits as k, runInEvalRuntimeScope as kn, cacheEntryWithDebugKeySchema as kt, persistRunState as l, evalTracer as ln, evalStatItemSchema as lt, buildEvalSummary as m, serializeCacheRecording as mn, runLogLocationSchema as mt, generateRunId as n, numberDisplayOptionsSchema as nn, runLogsConfigSchema as nt, loadPersistedRunSnapshots as o, buildTraceTree as on, caseRowSchema as ot, resolveArtifactPath as p, deserializeCacheValue as pn, runLogLevelSchema as pt, defaultConfigKeySchema as q, traceSpanKindSchema as qt, getLastRunStatuses as r, repoFileRefSchema as rn, trialSelectionModeSchema as rt, nextShortIdFromSnapshots as s, captureEvalSpanError as sn, evalFreshnessStatusSchema as st, executeRun as t, jsonCellSchema as tn, resolveLlmCallsConfig as tt, recomputeEvalStatusesInRuns as u, hashCacheKey as un, evalStatsConfigSchema as ut, parseEvalMetas as v, appendToEvalOutput as vn, evalChartAxisSchema as vt, createFsCacheStore as w, getEvalStartTime as wn, evalChartTypeSchema as wt, buildDeclaredColumnDefs as x, evalLog as xn, evalChartConfigSchema as xt, resolveEvalDefaultConfig as y, configureEvalRunLogs as yn, evalChartBuiltinMetricSchema as yt, runSummarySchema as z, traceCacheRefSchema as zt };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-D3QNOcPN.mjs";
|
|
2
|
+
import "./src-CcXfWT4M.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|