@ls-stack/agent-eval 0.27.1 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as llmCallsConfigSchema, $t as traceSpanKindSchema, A as extractApiCalls, An as getCurrentScope, At as evalChartTypeSchema, B as runSummarySchema, Bn as setEvalOutput, Bt as cacheRecordingOpSchema, Cn as evalExpect, Ct as evalChartAggregateSchema, D as sseEnvelopeSchema, Dt as evalChartConfigSchema, E as updateManualScoreRequestSchema, En as appendToEvalOutput, Et as evalChartColorSchema, F as getEvalDisplayStatus, Fn as mergeEvalOutput, Ft as cacheEntryWithDebugKeySchema, G as apiCallMetricPlacementSchema, Gt as traceCacheRefSchema, H as DEFAULT_LLM_CALLS_CONFIG, Hn as startEvalBackgroundJob, Ht as cacheStatusSchema, I as deriveScopedSummaryFromCases, In as nextEvalId, It as cacheFileSchema, J as defaultConfigKeySchema, Jt as traceAttributeDisplayPlacementSchema, K as apiCallMetricSchema, Kt as traceAttributeDisplayFormatSchema, L as deriveStatusFromCaseRows, Ln as runInEvalRuntimeScope, Lt as cacheListItemSchema, M as applyDerivedCallAttributes, Mn as getEvalStartTime, Mt as cacheDebugKeyEntrySchema, N as getNestedAttribute, Nn as incrementEvalOutput, Nt as cacheDebugKeyFileSchema, O as extractCacheEntries, On as evalAssert, Ot as evalChartMetricSchema, P as getEvalTitle, Pn as isInEvalScope, Pt as cacheEntrySchema, Q as llmCallPricingSchema, Qt as traceSpanErrorSchema, R as deriveStatusFromChildStatuses, Rn as runInEvalScope, Rt as cacheModeSchema, Sn as repoFile, St as scoreTraceSchema, T as createRunRequestSchema, Tn as advanceEvalTime, Tt as evalChartBuiltinMetricSchema, U as agentEvalsConfigSchema, Un as defineEval, Ut as serializedCacheSpanSchema, V as DEFAULT_API_CALLS_CONFIG, Vn as setScopeCacheContext, Vt as cacheRecordingSchema, W as apiCallMetricFormatSchema, Wn as getEvalRegistry, Wt as spanCacheOptionsSchema, X as llmCallMetricPlacementSchema, Xt as traceDisplayConfigSchema, Y as llmCallMetricFormatSchema, Yt as traceAttributeDisplaySchema, Z as llmCallMetricSchema, Zt as traceDisplayInputConfigSchema, _n as hashCacheKeySync, _t as evalSummarySchema, an as columnKindSchema, at as buildCaseKey, bn as serializeCacheRecording, bt as runLogLocationSchema, cn as numberDisplayOptionsSchema, ct as getCaseRowEvalKey, dn as z, dt as caseRowSchema, en as traceSpanSchema, et as removeDefaultConfigSchema, fn as buildTraceTree, ft as discoveryIssueSchema, gn as hashCacheKey, gt as evalStatsConfigSchema, hn as evalTracer, ht as evalStatItemSchema, in as columnFormatSchema, it as trialSelectionModeSchema, j as extractLlmCalls, jn as getEvalCaseInput, jt as evalChartsConfigSchema, k as extractCacheHits, kn as evalLog, kt as evalChartTooltipExtraSchema, ln as repoFileRefSchema, lt as assertionFailureSchema, mn as evalSpan, mt as evalStatAggregateSchema, nn as cellValueSchema, nt as resolveLlmCallsConfig, on as fileRefSchema, ot as buildEvalKey, pn as captureEvalSpanError, pt as evalFreshnessStatusSchema, q as apiCallsConfigSchema, qt as traceAttributeDisplayInputSchema, rn as columnDefSchema, rt as runLogsConfigSchema, sn as jsonCellSchema, st as getCaseRowCaseKey, tn as traceSpanWarningSchema, tt as resolveApiCallsConfig, un as runArtifactRefSchema, ut as caseDetailSchema, vn as deserializeCacheRecording, vt as runLogEntrySchema, wn as EvalAssertionError, wt as evalChartAxisSchema, xn as serializeCacheValue, xt as runLogPhaseSchema, yn as deserializeCacheValue, yt as runLogLevelSchema, z as runManifestSchema, zn as runInExistingEvalScope, zt as cacheOperationTypeSchema } from "./runOrchestration-FEvBwwJI.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-Clf8xUFa.mjs";
3
- import "./src-BBwT7_cy.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as defaultConfigKeySchema, $n as incrementEvalOutput, $t as cacheEntryWithDebugKeySchema, A as createRunRequestSchema, An as buildTraceTree, At as runLogPhaseSchema, B as getEvalDisplayStatus, Bn as repoFile, Bt as manualInputTextFieldSchema, Cn as columnKindSchema, Ct as evalStatAggregateSchema, Dn as repoFileRefSchema, Dt as runLogEntrySchema, En as numberDisplayOptionsSchema, Et as evalSummarySchema, F as extractApiCalls, Fn as hashCacheKeySync, Ft as manualInputJsonFieldSchema, G as runSummarySchema, Gn as advanceEvalTime, Gt as evalChartConfigSchema, H as deriveStatusFromCaseRows, Hn as readManualInputFile, Ht as evalChartAxisSchema, I as extractLlmCalls, In as deserializeCacheRecording, It as manualInputMultilineFieldSchema, J as agentEvalsConfigSchema, Jn as evalAssert, Jt as evalChartTypeSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as appendToEvalOutput, Kt as evalChartMetricSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheValue, Lt as manualInputNumberFieldSchema, M as sseEnvelopeSchema, Mn as evalSpan, Mt as manualInputBooleanFieldSchema, N as extractCacheEntries, Nn as evalTracer, Nt as manualInputDescriptorSchema, O as configReloadStateSchema, On as runArtifactRefSchema, Ot as runLogLevelSchema, P as extractCacheHits, Pn as hashCacheKey, Pt as manualInputFieldDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalStartTime, Qt as cacheEntrySchema, R as getNestedAttribute, Rn as serializeCacheRecording, Rt as manualInputSelectFieldSchema, Sn as columnFormatSchema, St as evalFreshnessStatusSchema, Tn as jsonCellSchema, Tt as evalStatsConfigSchema, U as deriveStatusFromChildStatuses, Un as evalExpect, Ut as evalChartBuiltinMetricSchema, V as deriveScopedSummaryFromCases, Vn as manualInputFileValueSchema, Vt as evalChartAggregateSchema, W as runManifestSchema, Wn as EvalAssertionError, Wt as evalChartColorSchema, X as apiCallMetricPlacementSchema, Xn as getCurrentScope, Xt as cacheDebugKeyEntrySchema, Y as apiCallMetricFormatSchema, Yn as evalLog, Yt as evalChartsConfigSchema, Z as apiCallMetricSchema, Zn as getEvalCaseInput, Zt as cacheDebugKeyFileSchema, _n as traceSpanKindSchema, _t as getCaseRowEvalKey, an as cacheRecordingSchema, ar as runInExistingEvalScope, at as llmCallMetricSchema, bn as cellValueSchema, bt as caseRowSchema, cn as spanCacheOptionsSchema, cr as startEvalBackgroundJob, ct as llmCallsConfigSchema, dn as traceAttributeDisplayInputSchema, dt as resolveLlmCallsConfig, en as cacheFileSchema, er as isInEvalScope, et as evalColumnOverrideSchema, fn as traceAttributeDisplayPlacementSchema, ft as runLogsConfigSchema, gn as traceSpanErrorSchema, gt as getCaseRowCaseKey, hn as traceDisplayInputConfigSchema, ht as buildEvalKey, in as cacheRecordingOpSchema, ir as runInEvalScope, it as llmCallMetricPlacementSchema, j as updateManualScoreRequestSchema, jn as captureEvalSpanError, jt as scoreTraceSchema, k as configReloadStatusSchema, kn as z, kt as runLogLocationSchema, ln as traceCacheRefSchema, lr as defineEval, lt as removeDefaultConfigSchema, mn as traceDisplayConfigSchema, mt as buildCaseKey, nn as cacheModeSchema, nr as nextEvalId, nt as evalDeriveConfigSchema, on as cacheStatusSchema, or as setEvalOutput, ot as llmCallPricingRateSchema, pn as traceAttributeDisplaySchema, pt as trialSelectionModeSchema, q as DEFAULT_LLM_CALLS_CONFIG, qt as evalChartTooltipExtraSchema, rn as cacheOperationTypeSchema, rr as runInEvalRuntimeScope, rt as llmCallMetricFormatSchema, sn as serializedCacheSpanSchema, sr as setScopeCacheContext, st as llmCallPricingSchema, tn as cacheListItemSchema, tr as mergeEvalOutput, tt as evalColumnsSchema, un as traceAttributeDisplayFormatSchema, ur as getEvalRegistry, ut as resolveApiCallsConfig, vn as traceSpanSchema, vt as assertionFailureSchema, wn as fileRefSchema, wt as evalStatItemSchema, xn as columnDefSchema, xt as discoveryIssueSchema, yn as traceSpanWarningSchema, yt as caseDetailSchema, z as getEvalTitle, zn as serializeCacheValue, zt as manualInputSelectOptionSchema } from "./runOrchestration-CIARrLs6.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CIc_gBNM.mjs";
3
+ import "./src-CkWT1iSu.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { B as runSummarySchema, Dn as configureEvalRunLogs, T as createRunRequestSchema, b as loadConfig, gt as evalStatsConfigSchema, jt as evalChartsConfigSchema, ot as buildEvalKey, r as getTargetEvals$1, rn as columnDefSchema, t as executeRun, v as parseEvalDiscovery, w as createFsCacheStore, z as runManifestSchema } from "./runOrchestration-FEvBwwJI.mjs";
1
+ import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, G as runSummarySchema, Nt as manualInputDescriptorSchema, Tt as evalStatsConfigSchema, W as runManifestSchema, Yt as evalChartsConfigSchema, ht as buildEvalKey, qn as configureEvalRunLogs, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery, xn as columnDefSchema } from "./runOrchestration-CIARrLs6.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -15,7 +15,9 @@ const evalMetaSchema = z.object({
15
15
  columnDefs: z.array(columnDefSchema),
16
16
  caseCount: z.number().nullable(),
17
17
  stats: evalStatsConfigSchema.optional(),
18
- charts: evalChartsConfigSchema.optional()
18
+ charts: evalChartsConfigSchema.optional(),
19
+ manualInputDescriptor: manualInputDescriptorSchema.optional(),
20
+ requiresManualInput: z.boolean().optional()
19
21
  });
20
22
  const runChildContextSchema = z.object({
21
23
  request: createRunRequestSchema,