@ls-stack/agent-eval 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as llmCallMetricFormatSchema, $t as traceAttributeDisplayPlacementSchema, A as extractCacheHits, An as advanceEvalTime, At as evalChartBuiltinMetricSchema, B as runManifestSchema, Bn as mergeEvalOutput, Bt as cacheEntryWithDebugKeySchema, Cn as deserializeCacheRecording, Ct as runLogEntrySchema, D as updateManualScoreRequestSchema, Dn as repoFile, Dt as scoreTraceSchema, E as createRunRequestSchema, En as serializeCacheValue, Et as runLogPhaseSchema, F as getEvalTitle, Fn as getCurrentScope, Ft as evalChartTypeSchema, G as apiCallMetricFormatSchema, Gn as setEvalOutput, Gt as cacheRecordingOpSchema, H as DEFAULT_API_CALLS_CONFIG, Hn as runInEvalRuntimeScope, Ht as cacheListItemSchema, I as getEvalDisplayStatus, In as getEvalCaseInput, It as evalChartsConfigSchema, J as apiCallsConfigSchema, Jn as defineEval, Jt as serializedCacheSpanSchema, K as apiCallMetricPlacementSchema, Kn as setScopeCacheContext, Kt as cacheRecordingSchema, L as deriveScopedSummaryFromCases, Ln as getEvalStartTime, Lt as cacheDebugKeyEntrySchema, M as extractLlmCalls, Mt as evalChartConfigSchema, N as applyDerivedCallAttributes, Nn as evalAssert, Nt as evalChartMetricSchema, O as sseEnvelopeSchema, On as evalExpect, Ot as evalChartAggregateSchema, P as getNestedAttribute, Pn as evalLog, Pt as evalChartTooltipExtraSchema, Q as evalDeriveConfigSchema, Qt as traceAttributeDisplayInputSchema, R as deriveStatusFromCaseRows, Rn as incrementEvalOutput, Rt as cacheDebugKeyFileSchema, Sn as hashCacheKeySync, St as evalSummarySchema, Tn as serializeCacheRecording, Tt as runLogLocationSchema, U as DEFAULT_LLM_CALLS_CONFIG, Un as runInEvalScope, Ut as cacheModeSchema, V as runSummarySchema, Vn as nextEvalId, Vt as cacheFileSchema, W as agentEvalsConfigSchema, Wn as runInExistingEvalScope, Wt as cacheOperationTypeSchema, X as evalColumnOverrideSchema, Xt as traceCacheRefSchema, Y as defaultConfigKeySchema, Yn as getEvalRegistry, Yt as spanCacheOptionsSchema, Z as evalColumnsSchema, Zt as traceAttributeDisplayFormatSchema, _n as buildTraceTree, _t as discoveryIssueSchema, an as traceSpanSchema, at as removeDefaultConfigSchema, bn as evalTracer, bt as evalStatItemSchema, cn as columnDefSchema, ct as runLogsConfigSchema, dn as fileRefSchema, dt as buildEvalKey, en as traceAttributeDisplaySchema, et as llmCallMetricPlacementSchema, fn as jsonCellSchema, ft as getCaseRowCaseKey, gn as z, gt as caseRowSchema, hn as runArtifactRefSchema, ht as caseDetailSchema, in as traceSpanKindSchema, it as llmCallsConfigSchema, j as extractApiCalls, jn as appendToEvalOutput, jt as evalChartColorSchema, k as extractCacheEntries, kn as EvalAssertionError, kt as evalChartAxisSchema, ln as columnFormatSchema, lt as trialSelectionModeSchema, mn as repoFileRefSchema, mt as assertionFailureSchema, nn as traceDisplayInputConfigSchema, nt as llmCallPricingRateSchema, on as traceSpanWarningSchema, ot as resolveApiCallsConfig, pn as numberDisplayOptionsSchema, pt as getCaseRowEvalKey, q as apiCallMetricSchema, qn as startEvalBackgroundJob, qt as cacheStatusSchema, rn as traceSpanErrorSchema, rt as llmCallPricingSchema, sn as cellValueSchema, st as resolveLlmCallsConfig, tn as traceDisplayConfigSchema, tt as llmCallMetricSchema, un as columnKindSchema, ut as buildCaseKey, vn as captureEvalSpanError, vt as evalFreshnessStatusSchema, wn as deserializeCacheValue, wt as runLogLevelSchema, xn as hashCacheKey, xt as evalStatsConfigSchema, yn as evalSpan, yt as evalStatAggregateSchema, z as deriveStatusFromChildStatuses, zn as isInEvalScope, zt as cacheEntrySchema } from "./runOrchestration-ClWYWPen.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-BQwRbqsL.mjs";
3
- import "./src-CuirVcPY.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as defaultConfigKeySchema, $n as incrementEvalOutput, $t as cacheEntryWithDebugKeySchema, A as createRunRequestSchema, An as buildTraceTree, At as runLogPhaseSchema, B as getEvalDisplayStatus, Bn as repoFile, Bt as manualInputTextFieldSchema, Cn as columnKindSchema, Ct as evalStatAggregateSchema, Dn as repoFileRefSchema, Dt as runLogEntrySchema, En as numberDisplayOptionsSchema, Et as evalSummarySchema, F as extractApiCalls, Fn as hashCacheKeySync, Ft as manualInputJsonFieldSchema, G as runSummarySchema, Gn as advanceEvalTime, Gt as evalChartConfigSchema, H as deriveStatusFromCaseRows, Hn as readManualInputFile, Ht as evalChartAxisSchema, I as extractLlmCalls, In as deserializeCacheRecording, It as manualInputMultilineFieldSchema, J as agentEvalsConfigSchema, Jn as evalAssert, Jt as evalChartTypeSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as appendToEvalOutput, Kt as evalChartMetricSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheValue, Lt as manualInputNumberFieldSchema, M as sseEnvelopeSchema, Mn as evalSpan, Mt as manualInputBooleanFieldSchema, N as extractCacheEntries, Nn as evalTracer, Nt as manualInputDescriptorSchema, O as configReloadStateSchema, On as runArtifactRefSchema, Ot as runLogLevelSchema, P as extractCacheHits, Pn as hashCacheKey, Pt as manualInputFieldDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalStartTime, Qt as cacheEntrySchema, R as getNestedAttribute, Rn as serializeCacheRecording, Rt as manualInputSelectFieldSchema, Sn as columnFormatSchema, St as evalFreshnessStatusSchema, Tn as jsonCellSchema, Tt as evalStatsConfigSchema, U as deriveStatusFromChildStatuses, Un as evalExpect, Ut as evalChartBuiltinMetricSchema, V as deriveScopedSummaryFromCases, Vn as manualInputFileValueSchema, Vt as evalChartAggregateSchema, W as runManifestSchema, Wn as EvalAssertionError, Wt as evalChartColorSchema, X as apiCallMetricPlacementSchema, Xn as getCurrentScope, Xt as cacheDebugKeyEntrySchema, Y as apiCallMetricFormatSchema, Yn as evalLog, Yt as evalChartsConfigSchema, Z as apiCallMetricSchema, Zn as getEvalCaseInput, Zt as cacheDebugKeyFileSchema, _n as traceSpanKindSchema, _t as getCaseRowEvalKey, an as cacheRecordingSchema, ar as runInExistingEvalScope, at as llmCallMetricSchema, bn as cellValueSchema, bt as caseRowSchema, cn as spanCacheOptionsSchema, cr as startEvalBackgroundJob, ct as llmCallsConfigSchema, dn as traceAttributeDisplayInputSchema, dt as resolveLlmCallsConfig, en as cacheFileSchema, er as isInEvalScope, et as evalColumnOverrideSchema, fn as traceAttributeDisplayPlacementSchema, ft as runLogsConfigSchema, gn as traceSpanErrorSchema, gt as getCaseRowCaseKey, hn as traceDisplayInputConfigSchema, ht as buildEvalKey, in as cacheRecordingOpSchema, ir as runInEvalScope, it as llmCallMetricPlacementSchema, j as updateManualScoreRequestSchema, jn as captureEvalSpanError, jt as scoreTraceSchema, k as configReloadStatusSchema, kn as z, kt as runLogLocationSchema, ln as traceCacheRefSchema, lr as defineEval, lt as removeDefaultConfigSchema, mn as traceDisplayConfigSchema, mt as buildCaseKey, nn as cacheModeSchema, nr as nextEvalId, nt as evalDeriveConfigSchema, on as cacheStatusSchema, or as setEvalOutput, ot as llmCallPricingRateSchema, pn as traceAttributeDisplaySchema, pt as trialSelectionModeSchema, q as DEFAULT_LLM_CALLS_CONFIG, qt as evalChartTooltipExtraSchema, rn as cacheOperationTypeSchema, rr as runInEvalRuntimeScope, rt as llmCallMetricFormatSchema, sn as serializedCacheSpanSchema, sr as setScopeCacheContext, st as llmCallPricingSchema, tn as cacheListItemSchema, tr as mergeEvalOutput, tt as evalColumnsSchema, un as traceAttributeDisplayFormatSchema, ur as getEvalRegistry, ut as resolveApiCallsConfig, vn as traceSpanSchema, vt as assertionFailureSchema, wn as fileRefSchema, wt as evalStatItemSchema, xn as columnDefSchema, xt as discoveryIssueSchema, yn as traceSpanWarningSchema, yt as caseDetailSchema, z as getEvalTitle, zn as serializeCacheValue, zt as manualInputSelectOptionSchema } from "./runOrchestration-CO3Vf0cQ.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CiFOqMwS.mjs";
3
+ import "./src-BiPLv9ya.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { B as runManifestSchema, E as createRunRequestSchema, It as evalChartsConfigSchema, Mn as configureEvalRunLogs, T as createFsCacheStore, V as runSummarySchema, cn as columnDefSchema, dt as buildEvalKey, r as getTargetEvals$1, t as executeRun, x as loadConfig, xt as evalStatsConfigSchema, y as parseEvalDiscovery } from "./runOrchestration-ClWYWPen.mjs";
1
+ import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, G as runSummarySchema, Nt as manualInputDescriptorSchema, Tt as evalStatsConfigSchema, W as runManifestSchema, Yt as evalChartsConfigSchema, ht as buildEvalKey, qn as configureEvalRunLogs, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery, xn as columnDefSchema } from "./runOrchestration-CO3Vf0cQ.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -15,7 +15,9 @@ const evalMetaSchema = z.object({
15
15
  columnDefs: z.array(columnDefSchema),
16
16
  caseCount: z.number().nullable(),
17
17
  stats: evalStatsConfigSchema.optional(),
18
- charts: evalChartsConfigSchema.optional()
18
+ charts: evalChartsConfigSchema.optional(),
19
+ manualInputDescriptor: manualInputDescriptorSchema.optional(),
20
+ requiresManualInput: z.boolean().optional()
19
21
  });
20
22
  const runChildContextSchema = z.object({
21
23
  request: createRunRequestSchema,