@ls-stack/agent-eval 0.34.0 → 0.35.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DOE-crd0.mjs → app-BlNzXWDM.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-CwyTEhGB.js +140 -0
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/{cli-CG66f0GN.mjs → cli-Dg3abrOv.mjs} +3 -3
- package/dist/index.d.mts +97 -90
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-CYhn4DJ0.mjs → runOrchestration-V1TxX8es.mjs} +19 -25
- package/dist/{runner-j9b6g0h0.mjs → runner-BCs5rzej.mjs} +2 -2
- package/dist/{runner-D5i7HLcE.mjs → runner-znY6PY1M.mjs} +1 -1
- package/dist/src-DBypR4TV.mjs +3 -0
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +6 -5
- package/dist/apps/web/dist/assets/index-ol64metU.js +0 -140
- package/dist/src-B0b1vgk0.mjs +0 -3
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
1
|
+
import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-V1TxX8es.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Dg3abrOv.mjs";
|
|
3
|
+
import "./src-DBypR4TV.mjs";
|
|
4
4
|
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-
|
|
1
|
+
import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-V1TxX8es.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|
|
@@ -913,13 +913,16 @@ async function cloneCacheValue(value, options = void 0) {
|
|
|
913
913
|
return deserializeCacheValue(await serializeCacheValue(value, options));
|
|
914
914
|
}
|
|
915
915
|
function normalizeCacheSerializationOptions(options) {
|
|
916
|
-
return {
|
|
916
|
+
return {
|
|
917
|
+
compress: options?.compress !== false,
|
|
918
|
+
preserveUndefined: options?.preserveUndefined === true
|
|
919
|
+
};
|
|
917
920
|
}
|
|
918
921
|
async function serializeJsonSafeValue(value, refs, depth, config) {
|
|
919
922
|
if (value === void 0) return config.preserveUndefined ? jsonSafeValue("Undefined") : void 0;
|
|
920
923
|
if (typeof value === "bigint") return jsonSafeValue("BigInt", value.toString());
|
|
921
924
|
if (typeof value === "number") return serializeNumber(value);
|
|
922
|
-
if (typeof value === "string") return serializeString(value, depth);
|
|
925
|
+
if (typeof value === "string") return serializeString(value, depth, config);
|
|
923
926
|
if (value instanceof Date) return jsonSafeValue("Date", value.toISOString());
|
|
924
927
|
if (value instanceof Map) return serializeMap(value, refs, depth, config);
|
|
925
928
|
if (value instanceof Set) return serializeSet(value, refs, depth, config);
|
|
@@ -959,7 +962,7 @@ async function serializeJsonSafeValue(value, refs, depth, config) {
|
|
|
959
962
|
if (serializedItem !== void 0) items.push(serializedItem);
|
|
960
963
|
}
|
|
961
964
|
refs.delete(value);
|
|
962
|
-
return compressNestedJsonValue(items, depth) ?? items;
|
|
965
|
+
return compressNestedJsonValue(items, depth, config) ?? items;
|
|
963
966
|
}
|
|
964
967
|
const entries = [];
|
|
965
968
|
for (const [key, entryValue] of Object.entries(value)) {
|
|
@@ -968,7 +971,7 @@ async function serializeJsonSafeValue(value, refs, depth, config) {
|
|
|
968
971
|
}
|
|
969
972
|
refs.delete(value);
|
|
970
973
|
const serialized = hasSerializationMarkerKey(value) ? jsonSafeValue("Object", entries) : Object.fromEntries(entries);
|
|
971
|
-
return compressNestedJsonValue(serialized, depth) ?? serialized;
|
|
974
|
+
return compressNestedJsonValue(serialized, depth, config) ?? serialized;
|
|
972
975
|
}
|
|
973
976
|
function serializeNumber(value) {
|
|
974
977
|
if (Number.isNaN(value)) return jsonSafeValue("Number", "NaN");
|
|
@@ -977,8 +980,9 @@ function serializeNumber(value) {
|
|
|
977
980
|
if (Object.is(value, -0)) return jsonSafeValue("Number", "-0");
|
|
978
981
|
return value;
|
|
979
982
|
}
|
|
980
|
-
function serializeString(value, depth) {
|
|
983
|
+
function serializeString(value, depth, config) {
|
|
981
984
|
if (depth === 0) return value;
|
|
985
|
+
if (!config.compress) return value;
|
|
982
986
|
return compressNestedStringValue(value) ?? value;
|
|
983
987
|
}
|
|
984
988
|
function isDenseNumberArray(value) {
|
|
@@ -1018,8 +1022,9 @@ function compressNestedStringValue(value) {
|
|
|
1018
1022
|
};
|
|
1019
1023
|
return compressionIsWorthIt(serialized, rawSize) ? serialized : void 0;
|
|
1020
1024
|
}
|
|
1021
|
-
function compressNestedJsonValue(value, depth) {
|
|
1025
|
+
function compressNestedJsonValue(value, depth, config) {
|
|
1022
1026
|
if (depth === 0) return void 0;
|
|
1027
|
+
if (!config.compress) return void 0;
|
|
1023
1028
|
const raw = JSON.stringify(value);
|
|
1024
1029
|
const rawSize = Buffer$1.byteLength(raw);
|
|
1025
1030
|
if (rawSize < compressedJsonMinBytes) return void 0;
|
|
@@ -5070,27 +5075,16 @@ function buildDeclaredColumnDefs(overrides, scores, manualScores) {
|
|
|
5070
5075
|
}
|
|
5071
5076
|
/**
|
|
5072
5077
|
* Coerce an arbitrary runtime value into a serializable `CellValue`.
|
|
5073
|
-
*
|
|
5074
|
-
*
|
|
5078
|
+
* Runtime values use the SDK's tagged serializer so saved run artifacts keep
|
|
5079
|
+
* structured data instead of storing JSON strings. Native binary/file root
|
|
5080
|
+
* values are handled before this helper.
|
|
5075
5081
|
*/
|
|
5076
|
-
function toCellValue(value) {
|
|
5077
|
-
if (value === null) return null;
|
|
5078
|
-
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
|
|
5079
|
-
if (value === void 0) return void 0;
|
|
5082
|
+
async function toCellValue(value) {
|
|
5080
5083
|
const fileRef = fileRefSchema.safeParse(value);
|
|
5081
5084
|
if (fileRef.success) return fileRef.data;
|
|
5082
|
-
|
|
5083
|
-
|
|
5084
|
-
|
|
5085
|
-
}
|
|
5086
|
-
if (value instanceof Date) return value.toISOString();
|
|
5087
|
-
return JSON.stringify(value);
|
|
5088
|
-
}
|
|
5089
|
-
function isPlainJsonContainer(value) {
|
|
5090
|
-
if (Array.isArray(value)) return true;
|
|
5091
|
-
if (typeof value !== "object" || value === null) return false;
|
|
5092
|
-
const prototype = Object.getPrototypeOf(value);
|
|
5093
|
-
return prototype === Object.prototype || prototype === null;
|
|
5085
|
+
const serialized = await serializeCacheValue(value, { compress: false });
|
|
5086
|
+
const parsed = jsonCellSchema.safeParse(serialized);
|
|
5087
|
+
if (parsed.success) return parsed.data;
|
|
5094
5088
|
}
|
|
5095
5089
|
function inferKindFromFormat(format) {
|
|
5096
5090
|
if (format === "boolean") return "boolean";
|
|
@@ -6723,7 +6717,7 @@ async function runCase(params) {
|
|
|
6723
6717
|
outputKey: key,
|
|
6724
6718
|
trial,
|
|
6725
6719
|
value
|
|
6726
|
-
}) : toCellValue(value);
|
|
6720
|
+
}) : await toCellValue(value);
|
|
6727
6721
|
if (cell !== void 0) columns[key] = cell;
|
|
6728
6722
|
}
|
|
6729
6723
|
for (const key of Object.keys(evalDef.manualScores ?? {})) columns[key] = null;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-Dg3abrOv.mjs";
|
|
2
|
+
import "./src-DBypR4TV.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-BCs5rzej.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.35.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -32,8 +32,8 @@
|
|
|
32
32
|
"@types/node": "^24.7.2",
|
|
33
33
|
"typescript": "^5.9.2",
|
|
34
34
|
"@agent-evals/runner": "0.0.1",
|
|
35
|
-
"@agent-evals/
|
|
36
|
-
"@agent-evals/
|
|
35
|
+
"@agent-evals/sdk": "0.0.1",
|
|
36
|
+
"@agent-evals/shared": "0.0.1"
|
|
37
37
|
},
|
|
38
38
|
"scripts": {
|
|
39
39
|
"build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
|
|
@@ -298,11 +298,12 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
|
|
|
298
298
|
## Outputs, columns, trace display
|
|
299
299
|
|
|
300
300
|
- `setEvalOutput(key, value)` writes reviewable data for the case. Values are
|
|
301
|
-
|
|
302
|
-
`Blob`/`File`
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
301
|
+
stored as received: primitives, objects/arrays, explicit file refs, and
|
|
302
|
+
native `Blob`/`File` values. `columns.format` only controls visualization.
|
|
303
|
+
Non-JSON runtime values such as `Date`, `Map`, `Set`, `BigInt`, typed arrays,
|
|
304
|
+
and class instances use the tagged value serializer instead of a string
|
|
305
|
+
fallback. Native `Blob`/`File` values are copied to run artifacts because
|
|
306
|
+
saved run files are JSON. Inside `execute`, prefer the context
|
|
306
307
|
`setOutput(key, value)` helper when writing schema-backed outputs; it is
|
|
307
308
|
typed from the eval's outputs generic. Keep `setEvalOutput` for shared
|
|
308
309
|
workflow code that does not receive the execute context.
|