@ls-stack/agent-eval 0.37.0 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as apiCallMetricFormatSchema, $n as evalAssert, $t as evalChartTypeSchema, A as configReloadStateSchema, An as jsonCellSchema, At as evalStatsConfigSchema, B as simulateTokenAllocation, Bn as hashCacheKeySync, Bt as manualInputJsonFieldSchema, Cn as traceSpanSchema, Ct as assertionFailureSchema, Dn as columnFormatSchema, Dt as evalFreshnessStatusSchema, En as columnDefSchema, Et as discoveryIssueSchema, F as extractCacheEntries, Fn as buildTraceTree, Ft as runLogPhaseSchema, G as deriveScopedSummaryFromCases, Gn as repoFile, Gt as manualInputTextFieldSchema, H as getNestedAttribute, Hn as deserializeCacheValue, Ht as manualInputNumberFieldSchema, I as extractCacheHits, In as captureEvalSpanError, It as scoreTraceSchema, J as runManifestSchema, Jn as evalExpect, Jt as evalChartBuiltinMetricSchema, K as deriveStatusFromCaseRows, Kn as manualInputFileValueSchema, Kt as evalChartAggregateSchema, L as extractApiCalls, Ln as evalSpan, Lt as manualInputBooleanFieldSchema, M as createRunRequestSchema, Mn as repoFileRefSchema, Mt as runLogEntrySchema, N as updateManualScoreRequestSchema, Nn as runArtifactRefSchema, Nt as runLogLevelSchema, On as columnKindSchema, Ot as evalStatAggregateSchema, P as sseEnvelopeSchema, Pn as z, Pt as runLogLocationSchema, Q as agentEvalsConfigSchema, Qt as evalChartTooltipExtraSchema, R as extractLlmCalls, Rn as evalTracer, Rt as manualInputDescriptorSchema, Sn as traceSpanKindSchema, St as getCaseRowEvalKey, Tn as cellValueSchema, Tt as caseRowSchema, U as getEvalTitle, Un as serializeCacheRecording, Ut as manualInputSelectFieldSchema, V as applyDerivedCallAttributes, Vn as deserializeCacheRecording, Vt as manualInputMultilineFieldSchema, W as getEvalDisplayStatus, Wn as serializeCacheValue, Wt as manualInputSelectOptionSchema, X as DEFAULT_API_CALLS_CONFIG, Xn as advanceEvalTime, Xt as evalChartConfigSchema, Y as runSummarySchema, Yn as EvalAssertionError, Yt as evalChartColorSchema, Z as DEFAULT_LLM_CALLS_CONFIG, Zn as appendToEvalOutput, Zt as evalChartMetricSchema, _n as traceAttributeDisplayPlacementSchema, _t as runLogsConfigSchema, an as cacheFileSchema, ar as isInEvalScope, at as evalColumnsSchema, bn as traceDisplayInputConfigSchema, bt as buildEvalKey, cn as cacheOperationTypeSchema, cr as runInEvalRuntimeScope, ct as llmCallMetricFormatSchema, dn as cacheStatusSchema, dr as setEvalOutput, dt as llmCallPricingRateSchema, en as evalChartsConfigSchema, er as evalLog, et as apiCallMetricPlacementSchema, fn as serializedCacheSpanSchema, fr as setScopeCacheContext, ft as llmCallPricingSchema, gn as traceAttributeDisplayInputSchema, gt as resolveLlmCallsConfig, hn as traceAttributeDisplayFormatSchema, hr as getEvalRegistry, ht as resolveApiCallsConfig, in as cacheEntryWithDebugKeySchema, ir as incrementEvalOutput, it as evalColumnOverrideSchema, j as configReloadStatusSchema, jn as numberDisplayOptionsSchema, jt as evalSummarySchema, kn as fileRefSchema, kt as evalStatItemSchema, ln as cacheRecordingOpSchema, lr as runInEvalScope, lt as llmCallMetricPlacementSchema, mn as traceCacheRefSchema, mr as defineEval, mt as removeDefaultConfigSchema, nn as cacheDebugKeyFileSchema, nr as getEvalCaseInput, nt as apiCallsConfigSchema, on as cacheListItemSchema, or as mergeEvalOutput, ot as evalDeriveConfigSchema, pn as spanCacheOptionsSchema, pr as startEvalBackgroundJob, pt as llmCallsConfigSchema, q as deriveStatusFromChildStatuses, qn as readManualInputFile, qt as evalChartAxisSchema, rn as cacheEntrySchema, rr as getEvalStartTime, rt as defaultConfigKeySchema, sn as cacheModeSchema, sr as nextEvalId, st as llmCallCostCurrencySchema, tn as cacheDebugKeyEntrySchema, tr as getCurrentScope, tt as apiCallMetricSchema, un as cacheRecordingSchema, ur as runInExistingEvalScope, ut as llmCallMetricSchema, vn as traceAttributeDisplaySchema, vt as trialSelectionModeSchema, wn as traceSpanWarningSchema, wt as caseDetailSchema, xn as traceSpanErrorSchema, xt as getCaseRowCaseKey, yn as traceDisplayConfigSchema, yt as buildCaseKey, z as simulateLlmCallCost, zn as hashCacheKey, zt as manualInputFieldDescriptorSchema } from "./runOrchestration-C4o5TcIu.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CwGcJYWe.mjs";
3
- import "./src--13_4uDG.mjs";
1
+ import { $ as apiCallMetricFormatSchema, $n as evalAssert, $t as evalChartTypeSchema, A as configReloadStateSchema, An as jsonCellSchema, At as evalStatsConfigSchema, B as simulateTokenAllocation, Bn as hashCacheKeySync, Bt as manualInputJsonFieldSchema, Cn as traceSpanSchema, Ct as assertionFailureSchema, Dn as columnFormatSchema, Dt as evalFreshnessStatusSchema, En as columnDefSchema, Et as discoveryIssueSchema, F as extractCacheEntries, Fn as buildTraceTree, Ft as runLogPhaseSchema, G as deriveScopedSummaryFromCases, Gn as repoFile, Gt as manualInputTextFieldSchema, H as getNestedAttribute, Hn as deserializeCacheValue, Ht as manualInputNumberFieldSchema, I as extractCacheHits, In as captureEvalSpanError, It as scoreTraceSchema, J as runManifestSchema, Jn as evalExpect, Jt as evalChartBuiltinMetricSchema, K as deriveStatusFromCaseRows, Kn as manualInputFileValueSchema, Kt as evalChartAggregateSchema, L as extractApiCalls, Ln as evalSpan, Lt as manualInputBooleanFieldSchema, M as createRunRequestSchema, Mn as repoFileRefSchema, Mt as runLogEntrySchema, N as updateManualScoreRequestSchema, Nn as runArtifactRefSchema, Nt as runLogLevelSchema, On as columnKindSchema, Ot as evalStatAggregateSchema, P as sseEnvelopeSchema, Pn as z, Pt as runLogLocationSchema, Q as agentEvalsConfigSchema, Qt as evalChartTooltipExtraSchema, R as extractLlmCalls, Rn as evalTracer, Rt as manualInputDescriptorSchema, Sn as traceSpanKindSchema, St as getCaseRowEvalKey, Tn as cellValueSchema, Tt as caseRowSchema, U as getEvalTitle, Un as serializeCacheRecording, Ut as manualInputSelectFieldSchema, V as applyDerivedCallAttributes, Vn as deserializeCacheRecording, Vt as manualInputMultilineFieldSchema, W as getEvalDisplayStatus, Wn as serializeCacheValue, Wt as manualInputSelectOptionSchema, X as DEFAULT_API_CALLS_CONFIG, Xn as advanceEvalTime, Xt as evalChartConfigSchema, Y as runSummarySchema, Yn as EvalAssertionError, Yt as evalChartColorSchema, Z as DEFAULT_LLM_CALLS_CONFIG, Zn as appendToEvalOutput, Zt as evalChartMetricSchema, _n as traceAttributeDisplayPlacementSchema, _t as runLogsConfigSchema, an as cacheFileSchema, ar as isInEvalScope, at as evalColumnsSchema, bn as traceDisplayInputConfigSchema, bt as buildEvalKey, cn as cacheOperationTypeSchema, cr as runInEvalRuntimeScope, ct as llmCallMetricFormatSchema, dn as cacheStatusSchema, dr as setEvalOutput, dt as llmCallPricingRateSchema, en as evalChartsConfigSchema, er as evalLog, et as apiCallMetricPlacementSchema, fn as serializedCacheSpanSchema, fr as setScopeCacheContext, ft as llmCallPricingSchema, gn as traceAttributeDisplayInputSchema, gt as resolveLlmCallsConfig, hn as traceAttributeDisplayFormatSchema, hr as getEvalRegistry, ht as resolveApiCallsConfig, in as cacheEntryWithDebugKeySchema, ir as incrementEvalOutput, it as evalColumnOverrideSchema, j as configReloadStatusSchema, jn as numberDisplayOptionsSchema, jt as evalSummarySchema, kn as fileRefSchema, kt as evalStatItemSchema, ln as cacheRecordingOpSchema, lr as runInEvalScope, lt as llmCallMetricPlacementSchema, mn as traceCacheRefSchema, mr as defineEval, mt as removeDefaultConfigSchema, nn as cacheDebugKeyFileSchema, nr as getEvalCaseInput, nt as apiCallsConfigSchema, on as cacheListItemSchema, or as mergeEvalOutput, ot as evalDeriveConfigSchema, pn as spanCacheOptionsSchema, pr as startEvalBackgroundJob, pt as llmCallsConfigSchema, q as deriveStatusFromChildStatuses, qn as readManualInputFile, qt as evalChartAxisSchema, rn as cacheEntrySchema, rr as getEvalStartTime, rt as defaultConfigKeySchema, sn as cacheModeSchema, sr as nextEvalId, st as llmCallCostCurrencySchema, tn as cacheDebugKeyEntrySchema, tr as getCurrentScope, tt as apiCallMetricSchema, un as cacheRecordingSchema, ur as runInExistingEvalScope, ut as llmCallMetricSchema, vn as traceAttributeDisplaySchema, vt as trialSelectionModeSchema, wn as traceSpanWarningSchema, wt as caseDetailSchema, xn as traceSpanErrorSchema, xt as getCaseRowCaseKey, yn as traceDisplayConfigSchema, yt as buildCaseKey, z as simulateLlmCallCost, zn as hashCacheKey, zt as manualInputFieldDescriptorSchema } from "./runOrchestration-BhUFWvq9.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-BUX6tr9J.mjs";
3
+ import "./src-BwJ5tod2.mjs";
4
4
  export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { At as evalStatsConfigSchema, C as parseEvalDiscovery, En as columnDefSchema, J as runManifestSchema, M as createRunRequestSchema, Qn as configureEvalRunLogs, Rt as manualInputDescriptorSchema, T as loadConfig, Y as runSummarySchema, bt as buildEvalKey, en as evalChartsConfigSchema, k as createFsCacheStore, p as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-C4o5TcIu.mjs";
1
+ import { At as evalStatsConfigSchema, C as parseEvalDiscovery, En as columnDefSchema, J as runManifestSchema, M as createRunRequestSchema, Qn as configureEvalRunLogs, Rt as manualInputDescriptorSchema, T as loadConfig, Y as runSummarySchema, bt as buildEvalKey, en as evalChartsConfigSchema, k as createFsCacheStore, p as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BhUFWvq9.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -29,9 +29,41 @@ const runChildContextSchema = z.object({
29
29
  evals: z.array(evalMetaSchema).optional()
30
30
  });
31
31
  let activeContext;
32
+ let fatalErrorReported = false;
33
+ let disconnectExpected = false;
34
+ const pendingMessageSends = /* @__PURE__ */ new Set();
32
35
  function sendMessage(message) {
33
36
  if (process.send === void 0) return;
34
- process.send(message);
37
+ const sendPromise = new Promise((resolvePromise) => {
38
+ try {
39
+ process.send?.(message, (error) => {
40
+ if (error) {
41
+ console.error("Failed to send run child message:");
42
+ console.error(formatUnknownErrorDetails(error));
43
+ }
44
+ resolvePromise();
45
+ });
46
+ } catch (error) {
47
+ console.error("Failed to send run child message:");
48
+ console.error(formatUnknownErrorDetails(error));
49
+ resolvePromise();
50
+ }
51
+ });
52
+ pendingMessageSends.add(sendPromise);
53
+ sendPromise.finally(() => {
54
+ pendingMessageSends.delete(sendPromise);
55
+ });
56
+ }
57
+ async function flushMessageSends() {
58
+ while (pendingMessageSends.size > 0) await Promise.allSettled([...pendingMessageSends]);
59
+ }
60
+ function installFatalRunChildErrorHandlers() {
61
+ process.once("uncaughtException", (error) => {
62
+ reportFatalRunChildErrorAndExit(error);
63
+ });
64
+ process.once("unhandledRejection", (reason) => {
65
+ reportFatalRunChildErrorAndExit(toUnhandledRejectionError(reason));
66
+ });
35
67
  }
36
68
  function getSourceFingerprint(source) {
37
69
  return createHash("sha256").update(source).digest("hex");
@@ -92,6 +124,7 @@ async function readContext(contextPath) {
92
124
  }
93
125
  async function main() {
94
126
  process.on("disconnect", () => {
127
+ if (disconnectExpected) return;
95
128
  process.exit(1);
96
129
  });
97
130
  const context = await readContext(process.argv[2]);
@@ -155,8 +188,11 @@ async function main() {
155
188
  type: "done",
156
189
  evals: [...evals.values()]
157
190
  });
191
+ await flushMessageSends();
158
192
  }
159
193
  async function handleFatalRunChildError(error) {
194
+ if (fatalErrorReported) return;
195
+ fatalErrorReported = true;
160
196
  const message = formatUnknownErrorDetails(error);
161
197
  process.exitCode = 1;
162
198
  console.error(message);
@@ -187,15 +223,33 @@ async function handleFatalRunChildError(error) {
187
223
  payload: { message }
188
224
  }
189
225
  });
226
+ await flushMessageSends();
190
227
  }
191
228
  function formatUnknownErrorDetails(error) {
192
229
  if (error instanceof Error) return error.stack ?? error.message;
193
230
  if (typeof error === "string") return error;
194
231
  return String(error);
195
232
  }
233
+ function toUnhandledRejectionError(reason) {
234
+ if (reason instanceof Error) return reason;
235
+ return /* @__PURE__ */ new Error(`Unhandled rejection: ${formatUnknownErrorDetails(reason)}`);
236
+ }
237
+ async function reportFatalRunChildErrorAndExit(error) {
238
+ try {
239
+ await handleFatalRunChildError(error);
240
+ } catch (reportError) {
241
+ console.error("Failed to report fatal run child error:");
242
+ console.error(formatUnknownErrorDetails(reportError));
243
+ } finally {
244
+ process.exit(1);
245
+ }
246
+ }
247
+ installFatalRunChildErrorHandlers();
196
248
  await main().catch(async (error) => {
197
249
  await handleFatalRunChildError(error);
198
250
  });
251
+ await flushMessageSends();
252
+ disconnectExpected = true;
199
253
  process.disconnect();
200
254
  //#endregion
201
255
  export {};