@ls-stack/agent-eval 0.16.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as caseRowSchema, $t as appendToEvalOutput, A as getEvalTitle, At as traceDisplayConfigSchema, B as apiCallMetricFormatSchema, Bt as fileRefSchema, C as createRunRequestSchema, Ct as serializedCacheSpanSchema, D as extractApiCalls, Dt as traceAttributeDisplayInputSchema, E as extractCacheHits, Et as traceAttributeDisplayFormatSchema, F as runManifestSchema, Ft as traceSpanWarningSchema, G as llmCallMetricPlacementSchema, Gt as z, H as apiCallMetricSchema, Ht as numberDisplayOptionsSchema, I as runSummarySchema, It as cellValueSchema, J as resolveApiCallsConfig, Jt as evalSpan, K as llmCallMetricSchema, Kt as buildTraceTree, L as DEFAULT_API_CALLS_CONFIG, Lt as columnDefSchema, M as deriveScopedSummaryFromCases, Mt as traceSpanErrorSchema, N as deriveStatusFromCaseRows, Nt as traceSpanKindSchema, O as extractLlmCalls, Ot as traceAttributeDisplayPlacementSchema, P as deriveStatusFromChildStatuses, Pt as traceSpanSchema, Q as caseDetailSchema, Qt as EvalAssertionError, R as DEFAULT_LLM_CALLS_CONFIG, Rt as columnFormatSchema, St as cacheStatusSchema, T as sseEnvelopeSchema, Tt as traceCacheRefSchema, U as apiCallsConfigSchema, Ut as repoFileRefSchema, V as apiCallMetricPlacementSchema, Vt as jsonCellSchema, W as llmCallMetricFormatSchema, Wt as runArtifactRefSchema, X as trialSelectionModeSchema, Xt as hashCacheKey, Y as resolveLlmCallsConfig, Yt as evalTracer, Z as assertionFailureSchema, Zt as hashCacheKeySync, _t as cacheListItemSchema, an as mergeEvalOutput, at as scoreTraceSchema, bt as cacheRecordingOpSchema, cn as runInEvalScope, ct as evalChartBuiltinMetricSchema, dn as setScopeCacheContext, dt as evalChartMetricSchema, en as evalAssert, et as evalFreshnessStatusSchema, fn as startEvalBackgroundJob, ft as evalChartTooltipExtraSchema, gt as cacheFileSchema, hn as getEvalRegistry, ht as cacheEntrySchema, in as isInEvalScope, it as evalSummarySchema, j as getEvalDisplayStatus, jt as traceDisplayInputConfigSchema, k as getNestedAttribute, kt as traceAttributeDisplaySchema, ln as runInExistingEvalScope, lt as evalChartColorSchema, mn as defineEval, mt as evalChartsConfigSchema, nn as getEvalCaseInput, nt as evalStatItemSchema, on as nextEvalId, ot as evalChartAggregateSchema, pn as repoFile, pt as evalChartTypeSchema, q as llmCallsConfigSchema, qt as captureEvalSpanError, rn as incrementEvalOutput, rt as evalStatsConfigSchema, sn as runInEvalRuntimeScope, st as evalChartAxisSchema, tn as getCurrentScope, tt as evalStatAggregateSchema, un as setEvalOutput, ut as evalChartConfigSchema, vt as cacheModeSchema, w as updateManualScoreRequestSchema, wt as spanCacheOptionsSchema, xt as cacheRecordingSchema, yt as cacheOperationTypeSchema, z as agentEvalsConfigSchema, zt as columnKindSchema } from "./runOrchestration-BDyNrRQT.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-BmrtjQj_.mjs";
3
- import "./src-CEAJYN_X.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as caseDetailSchema, $t as buildTraceTree, A as getEvalTitle, At as serializedCacheSpanSchema, B as apiCallMetricFormatSchema, Bt as traceSpanKindSchema, C as createRunRequestSchema, Cn as setEvalOutput, Ct as cacheFileSchema, D as extractApiCalls, Dn as defineEval, Dt as cacheRecordingOpSchema, E as extractCacheHits, En as repoFile, Et as cacheOperationTypeSchema, F as runManifestSchema, Ft as traceAttributeDisplayPlacementSchema, G as llmCallMetricPlacementSchema, Gt as columnFormatSchema, H as apiCallMetricSchema, Ht as traceSpanWarningSchema, I as runSummarySchema, It as traceAttributeDisplaySchema, J as resolveApiCallsConfig, Jt as jsonCellSchema, K as llmCallMetricSchema, Kt as columnKindSchema, L as DEFAULT_API_CALLS_CONFIG, Lt as traceDisplayConfigSchema, M as deriveScopedSummaryFromCases, Mt as traceCacheRefSchema, N as deriveStatusFromCaseRows, Nt as traceAttributeDisplayFormatSchema, O as extractLlmCalls, On as getEvalRegistry, Ot as cacheRecordingSchema, P as deriveStatusFromChildStatuses, Pt as traceAttributeDisplayInputSchema, Q as assertionFailureSchema, Qt as z, R as DEFAULT_LLM_CALLS_CONFIG, Rt as traceDisplayInputConfigSchema, Sn as runInExistingEvalScope, St as cacheEntryWithDebugKeySchema, T as sseEnvelopeSchema, Tn as startEvalBackgroundJob, Tt as cacheModeSchema, U as apiCallsConfigSchema, Ut as cellValueSchema, V as apiCallMetricPlacementSchema, Vt as traceSpanSchema, W as llmCallMetricFormatSchema, Wt as columnDefSchema, X as runLogsConfigSchema, Xt as repoFileRefSchema, Y as resolveLlmCallsConfig, Yt as numberDisplayOptionsSchema, Z as trialSelectionModeSchema, Zt as runArtifactRefSchema, _n as isInEvalScope, _t as evalChartTypeSchema, an as deserializeCacheRecording, at as evalSummarySchema, bn as runInEvalRuntimeScope, bt as cacheDebugKeyFileSchema, cn as serializeCacheValue, ct as runLogPhaseSchema, dt as evalChartAxisSchema, en as captureEvalSpanError, et as caseRowSchema, fn as evalAssert, ft as evalChartBuiltinMetricSchema, gn as incrementEvalOutput, gt as evalChartTooltipExtraSchema, hn as getEvalCaseInput, ht as evalChartMetricSchema, in as hashCacheKeySync, it as evalStatsConfigSchema, j as getEvalDisplayStatus, jt as spanCacheOptionsSchema, k as getNestedAttribute, kt as cacheStatusSchema, ln as EvalAssertionError, lt as scoreTraceSchema, mn as getCurrentScope, mt as evalChartConfigSchema, nn as evalTracer, nt as evalStatAggregateSchema, on as deserializeCacheValue, ot as runLogEntrySchema, pn as evalLog, pt as evalChartColorSchema, q as llmCallsConfigSchema, qt as fileRefSchema, rn as hashCacheKey, rt as evalStatItemSchema, sn as serializeCacheRecording, st as runLogLevelSchema, tn as evalSpan, tt as evalFreshnessStatusSchema, un as appendToEvalOutput, ut as evalChartAggregateSchema, vn as mergeEvalOutput, vt as evalChartsConfigSchema, w as updateManualScoreRequestSchema, wn as setScopeCacheContext, wt as cacheListItemSchema, xn as runInEvalScope, xt as cacheEntrySchema, yn as nextEvalId, yt as cacheDebugKeyEntrySchema, z as agentEvalsConfigSchema, zt as traceSpanErrorSchema } from "./runOrchestration-D2okEB3I.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-CULTt3Xp.mjs";
3
+ import "./src-CNf3xwVw.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { C as createRunRequestSchema, F as runManifestSchema, I as runSummarySchema, Lt as columnDefSchema, S as createFsCacheStore, mt as evalChartsConfigSchema, rt as evalStatsConfigSchema, t as executeRun, v as parseEvalMetas, y as loadConfig } from "./runOrchestration-BDyNrRQT.mjs";
1
+ import { C as createRunRequestSchema, F as runManifestSchema, I as runSummarySchema, S as createFsCacheStore, Wt as columnDefSchema, dn as configureEvalRunLogs, it as evalStatsConfigSchema, t as executeRun, v as parseEvalMetas, vt as evalChartsConfigSchema, y as loadConfig } from "./runOrchestration-D2okEB3I.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -82,6 +82,7 @@ async function main() {
82
82
  const context = await readContext(process.argv[2]);
83
83
  process.chdir(context.workspaceRoot);
84
84
  const config = await loadConfig();
85
+ configureEvalRunLogs({ captureConsole: config.runLogs?.captureConsole !== false });
85
86
  const cacheStore = createFsCacheStore({
86
87
  workspaceRoot: context.workspaceRoot,
87
88
  dir: config.cache?.dir,