@ls-stack/agent-eval 0.12.2 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-D3wtTfbu.mjs → app-Dg3qYVku.mjs} +3 -3
- package/dist/bin.mjs +1 -1
- package/dist/{cli-BEPaYHmX.mjs → cli-lOZdhO2D.mjs} +3 -3
- package/dist/index.d.mts +26 -3
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-DrgpaDaf.mjs → runOrchestration-H0pSUl3I.mjs} +38 -4
- package/dist/{runner-BHCokR_t.mjs → runner-BK1KX2SA.mjs} +1 -1
- package/dist/{runner-BVC9yBDu.mjs → runner-CmbmfBG2.mjs} +2 -2
- package/dist/src-Btb9RCYD.mjs +3 -0
- package/package.json +1 -1
- package/dist/src-BU6ZtVIB.mjs +0 -3
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { C as updateManualScoreRequestSchema, S as createRunRequestSchema } from "./runOrchestration-
|
|
2
|
-
import "./src-
|
|
3
|
-
import { t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { C as updateManualScoreRequestSchema, S as createRunRequestSchema } from "./runOrchestration-H0pSUl3I.mjs";
|
|
2
|
+
import "./src-Btb9RCYD.mjs";
|
|
3
|
+
import { t as getRunnerInstance } from "./runner-CmbmfBG2.mjs";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { dirname, join, relative, resolve, sep } from "node:path";
|
|
6
6
|
import { z } from "zod/v4";
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as getEvalDisplayStatus, F as runSummarySchema, J as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus,
|
|
1
|
+
import { A as getEvalDisplayStatus, F as runSummarySchema, J as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, fn as getEvalRegistry, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as deriveScopedSummaryFromCases, k as getEvalTitle, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, q as resolveApiCallsConfig, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-H0pSUl3I.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join, relative, resolve } from "node:path";
|
|
@@ -1050,8 +1050,8 @@ async function commandApp(args) {
|
|
|
1050
1050
|
const { serve } = await import("@hono/node-server");
|
|
1051
1051
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
1052
1052
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
1053
|
-
const appModule = await import("./app-
|
|
1054
|
-
const runnerModule = await import("./runner-
|
|
1053
|
+
const appModule = await import("./app-Dg3qYVku.mjs");
|
|
1054
|
+
const runnerModule = await import("./runner-BK1KX2SA.mjs");
|
|
1055
1055
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
1056
1056
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
1057
1057
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -2576,6 +2576,14 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
2576
2576
|
* `key` is provided.
|
|
2577
2577
|
*/
|
|
2578
2578
|
traceDisplay?: TraceDisplayInputConfig;
|
|
2579
|
+
/**
|
|
2580
|
+
* Whether registered background jobs should finish before outputs, tracing,
|
|
2581
|
+
* and scores are finalized. Defaults to `true`.
|
|
2582
|
+
*
|
|
2583
|
+
* Set to `false` for evals that intentionally fire work that should not
|
|
2584
|
+
* delay case finalization; late mutations are not guaranteed to persist.
|
|
2585
|
+
*/
|
|
2586
|
+
waitForBackgroundJobs?: boolean;
|
|
2579
2587
|
execute: (ctx: EvalExecuteContext<TInput, TOutputs>) => Promise<void> | void;
|
|
2580
2588
|
deriveFromTracing?: (ctx: EvalDeriveContext<TInput>) => Partial<TOutputs> | Promise<Partial<TOutputs>>;
|
|
2581
2589
|
scores?: Record<string, EvalScoreDef<TInput, TOutputs>>;
|
|
@@ -2709,7 +2717,8 @@ type EvalCaseScope = {
|
|
|
2709
2717
|
* active span. Span-bound refs are appended to the owning span's
|
|
2710
2718
|
* `cache.refs` attribute instead.
|
|
2711
2719
|
*/
|
|
2712
|
-
caseCacheRefs: TraceCacheRef[];
|
|
2720
|
+
caseCacheRefs: TraceCacheRef[]; /** Background promises that should settle before the case scope finalizes. */
|
|
2721
|
+
pendingBackgroundJobs: Set<Promise<unknown>>;
|
|
2713
2722
|
};
|
|
2714
2723
|
/** Error thrown when an eval assertion fails during case execution. */
|
|
2715
2724
|
declare class EvalAssertionError extends Error {
|
|
@@ -2724,6 +2733,14 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
|
|
|
2724
2733
|
* behavior without importing or inspecting the full eval scope.
|
|
2725
2734
|
*/
|
|
2726
2735
|
declare function isInEvalScope(): boolean;
|
|
2736
|
+
/**
|
|
2737
|
+
* Register background work that should settle before eval finalization.
|
|
2738
|
+
*
|
|
2739
|
+
* The original promise is returned unchanged, and its fulfillment or rejection
|
|
2740
|
+
* behavior remains normal for callers. The eval runtime only waits for
|
|
2741
|
+
* settlement; it does not convert background rejections into case errors.
|
|
2742
|
+
*/
|
|
2743
|
+
declare function startEvalBackgroundJob<T>(promise: Promise<T>): Promise<T>;
|
|
2727
2744
|
/**
|
|
2728
2745
|
* Return the authored input for the current eval case.
|
|
2729
2746
|
*
|
|
@@ -2744,7 +2761,8 @@ declare function setScopeCacheContext(scope: EvalCaseScope, context: CacheScopeC
|
|
|
2744
2761
|
type RunInEvalScopeOptions = {
|
|
2745
2762
|
/** Authored input for the active eval case. */input?: unknown; /** Stable prefix used when generating scoped IDs with `nextEvalId()`. */
|
|
2746
2763
|
idPrefix?: string; /** Cache adapter + mode attached to the scope before `fn` runs. */
|
|
2747
|
-
cacheContext?: CacheScopeContext;
|
|
2764
|
+
cacheContext?: CacheScopeContext; /** Whether registered background jobs should settle before scope finalizes. */
|
|
2765
|
+
waitForBackgroundJobs?: boolean;
|
|
2748
2766
|
};
|
|
2749
2767
|
/**
|
|
2750
2768
|
* Execute a callback inside a fresh eval case scope and capture its outputs,
|
|
@@ -2934,6 +2952,11 @@ type TraceSpanInfoBase = {
|
|
|
2934
2952
|
kind: string;
|
|
2935
2953
|
name: string;
|
|
2936
2954
|
attributes?: Record<string, unknown>;
|
|
2955
|
+
/**
|
|
2956
|
+
* Whether this span should delay eval finalization when the returned promise
|
|
2957
|
+
* is not awaited by user code. Defaults to `true`.
|
|
2958
|
+
*/
|
|
2959
|
+
waitForBackgroundJob?: boolean;
|
|
2937
2960
|
};
|
|
2938
2961
|
/** Info accepted by `evalTracer.span(info, fn)` when creating an uncached span. */
|
|
2939
2962
|
type TraceSpanInfoUncached = TraceSpanInfoBase & {
|
|
@@ -3112,4 +3135,4 @@ declare function createRunner({
|
|
|
3112
3135
|
*/
|
|
3113
3136
|
declare function runCli(argv: string[]): Promise<void>;
|
|
3114
3137
|
//#endregion
|
|
3115
|
-
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
3138
|
+
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
1
|
+
import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as defineEval, dt as evalChartTooltipExtraSchema, en as getCurrentScope, et as evalStatAggregateSchema, fn as getEvalRegistry, ft as evalChartTypeSchema, gt as cacheListItemSchema, ht as cacheFileSchema, in as mergeEvalOutput, it as scoreTraceSchema, j as deriveScopedSummaryFromCases, jt as traceSpanErrorSchema, k as getEvalTitle, kt as traceDisplayConfigSchema, ln as startEvalBackgroundJob, lt as evalChartConfigSchema, mt as cacheEntrySchema, nn as incrementEvalOutput, nt as evalStatsConfigSchema, on as runInEvalScope, ot as evalChartAxisSchema, pt as evalChartsConfigSchema, q as resolveApiCallsConfig, qt as evalSpan, rn as isInEvalScope, rt as evalSummarySchema, sn as setEvalOutput, st as evalChartBuiltinMetricSchema, tn as getEvalCaseInput, tt as evalStatItemSchema, un as repoFile, ut as evalChartMetricSchema, vt as cacheOperationTypeSchema, w as sseEnvelopeSchema, wt as traceCacheRefSchema, xt as cacheStatusSchema, yt as cacheRecordingOpSchema, z as apiCallMetricFormatSchema, zt as fileRefSchema } from "./runOrchestration-H0pSUl3I.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-lOZdhO2D.mjs";
|
|
3
|
+
import "./src-Btb9RCYD.mjs";
|
|
4
|
+
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { F as runSummarySchema, It as columnDefSchema, P as runManifestSchema, S as createRunRequestSchema, nt as evalStatsConfigSchema, pt as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-
|
|
1
|
+
import { F as runSummarySchema, It as columnDefSchema, P as runManifestSchema, S as createRunRequestSchema, nt as evalStatsConfigSchema, pt as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-H0pSUl3I.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { z } from "zod/v4";
|
|
@@ -68,6 +68,30 @@ function getCurrentScope() {
|
|
|
68
68
|
function isInEvalScope() {
|
|
69
69
|
return getCurrentScope() !== void 0;
|
|
70
70
|
}
|
|
71
|
+
function registerBackgroundJobInScope(scope, promise) {
|
|
72
|
+
const trackedPromise = promise.then(() => {
|
|
73
|
+
scope.pendingBackgroundJobs.delete(trackedPromise);
|
|
74
|
+
}, () => {
|
|
75
|
+
scope.pendingBackgroundJobs.delete(trackedPromise);
|
|
76
|
+
});
|
|
77
|
+
scope.pendingBackgroundJobs.add(trackedPromise);
|
|
78
|
+
return promise;
|
|
79
|
+
}
|
|
80
|
+
async function drainBackgroundJobs(scope) {
|
|
81
|
+
while (scope.pendingBackgroundJobs.size > 0) await Promise.allSettled([...scope.pendingBackgroundJobs]);
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Register background work that should settle before eval finalization.
|
|
85
|
+
*
|
|
86
|
+
* The original promise is returned unchanged, and its fulfillment or rejection
|
|
87
|
+
* behavior remains normal for callers. The eval runtime only waits for
|
|
88
|
+
* settlement; it does not convert background rejections into case errors.
|
|
89
|
+
*/
|
|
90
|
+
function startEvalBackgroundJob(promise) {
|
|
91
|
+
const scope = getCurrentScope();
|
|
92
|
+
if (!scope) return promise;
|
|
93
|
+
return registerBackgroundJobInScope(scope, promise);
|
|
94
|
+
}
|
|
71
95
|
function isObjectLike(value) {
|
|
72
96
|
return typeof value === "object" && value !== null;
|
|
73
97
|
}
|
|
@@ -117,18 +141,22 @@ async function runInEvalScope(caseId, fn, options = {}) {
|
|
|
117
141
|
recordingStack: [],
|
|
118
142
|
replayingDepth: 0,
|
|
119
143
|
cacheContext: options.cacheContext,
|
|
120
|
-
caseCacheRefs: []
|
|
144
|
+
caseCacheRefs: [],
|
|
145
|
+
pendingBackgroundJobs: /* @__PURE__ */ new Set()
|
|
121
146
|
};
|
|
122
147
|
activeEvalScopeCount++;
|
|
123
148
|
try {
|
|
124
149
|
return await scopeStorage.run(scope, async () => {
|
|
125
150
|
try {
|
|
151
|
+
const result = await fn();
|
|
152
|
+
if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
|
|
126
153
|
return {
|
|
127
|
-
result
|
|
154
|
+
result,
|
|
128
155
|
scope,
|
|
129
156
|
error: void 0
|
|
130
157
|
};
|
|
131
158
|
} catch (error) {
|
|
159
|
+
if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
|
|
132
160
|
return {
|
|
133
161
|
result: void 0,
|
|
134
162
|
scope,
|
|
@@ -2849,7 +2877,12 @@ function captureEvalSpanError(errorOrErrors, ...additionalErrorsOrOptions) {
|
|
|
2849
2877
|
appendSpanErrors(currentSpan, errors);
|
|
2850
2878
|
});
|
|
2851
2879
|
}
|
|
2852
|
-
|
|
2880
|
+
function traceSpan(info, fn) {
|
|
2881
|
+
const promise = traceSpanInternal(info, fn);
|
|
2882
|
+
if (!getCurrentScope() || info.waitForBackgroundJob === false) return promise;
|
|
2883
|
+
return startEvalBackgroundJob(promise);
|
|
2884
|
+
}
|
|
2885
|
+
async function traceSpanInternal(info, fn) {
|
|
2853
2886
|
const scope = getCurrentScope();
|
|
2854
2887
|
if (!scope) return await fn(noopActiveSpan());
|
|
2855
2888
|
const id = generateSpanId();
|
|
@@ -5432,6 +5465,7 @@ async function runCase(params) {
|
|
|
5432
5465
|
}, {
|
|
5433
5466
|
input: evalCase.input,
|
|
5434
5467
|
idPrefix: scopedIdPrefix,
|
|
5468
|
+
waitForBackgroundJobs: evalDef.waitForBackgroundJobs !== false,
|
|
5435
5469
|
cacheContext: cacheAdapter ? {
|
|
5436
5470
|
adapter: cacheAdapter,
|
|
5437
5471
|
mode: cacheMode,
|
|
@@ -5961,4 +5995,4 @@ function toLastRunStatus(status) {
|
|
|
5961
5995
|
return status === "pending" ? null : status;
|
|
5962
5996
|
}
|
|
5963
5997
|
//#endregion
|
|
5964
|
-
export { evalFreshnessStatusSchema as $, evalAssert as $t, getEvalDisplayStatus as A, traceDisplayInputConfigSchema as At, apiCallMetricPlacementSchema as B, jsonCellSchema as Bt, updateManualScoreRequestSchema as C, spanCacheOptionsSchema as Ct, extractLlmCalls as D, traceAttributeDisplayPlacementSchema as Dt, extractApiCalls as E, traceAttributeDisplayInputSchema as Et, runSummarySchema as F, cellValueSchema as Ft, llmCallMetricSchema as G, buildTraceTree as Gt, apiCallsConfigSchema as H, repoFileRefSchema as Ht, DEFAULT_API_CALLS_CONFIG as I, columnDefSchema as It, resolveLlmCallsConfig as J, evalTracer as Jt, llmCallsConfigSchema as K, captureEvalSpanError as Kt, DEFAULT_LLM_CALLS_CONFIG as L, columnFormatSchema as Lt, deriveStatusFromCaseRows as M, traceSpanKindSchema as Mt, deriveStatusFromChildStatuses as N, traceSpanSchema as Nt, getNestedAttribute as O, traceAttributeDisplaySchema as Ot, runManifestSchema as P, traceSpanWarningSchema as Pt, caseRowSchema as Q, appendToEvalOutput as Qt, agentEvalsConfigSchema as R, columnKindSchema as Rt, createRunRequestSchema as S, serializedCacheSpanSchema as St, extractCacheHits as T, traceAttributeDisplayFormatSchema as Tt, llmCallMetricFormatSchema as U, runArtifactRefSchema as Ut, apiCallMetricSchema as V, numberDisplayOptionsSchema as Vt, llmCallMetricPlacementSchema as W, z$1 as Wt, assertionFailureSchema as X, hashCacheKeySync as Xt, trialSelectionModeSchema as Y, hashCacheKey as Yt, caseDetailSchema as Z, EvalAssertionError as Zt, loadEvalModule as _, cacheModeSchema as _t, loadPersistedRunSnapshot as a, nextEvalId as an, evalChartAggregateSchema as at, normalizeScoreDef as b, cacheRecordingSchema as bt, persistCaseDetail as c, setScopeCacheContext as cn, evalChartColorSchema as ct, recomputePersistedCaseStatus as d,
|
|
5998
|
+
export { evalFreshnessStatusSchema as $, evalAssert as $t, getEvalDisplayStatus as A, traceDisplayInputConfigSchema as At, apiCallMetricPlacementSchema as B, jsonCellSchema as Bt, updateManualScoreRequestSchema as C, spanCacheOptionsSchema as Ct, extractLlmCalls as D, traceAttributeDisplayPlacementSchema as Dt, extractApiCalls as E, traceAttributeDisplayInputSchema as Et, runSummarySchema as F, cellValueSchema as Ft, llmCallMetricSchema as G, buildTraceTree as Gt, apiCallsConfigSchema as H, repoFileRefSchema as Ht, DEFAULT_API_CALLS_CONFIG as I, columnDefSchema as It, resolveLlmCallsConfig as J, evalTracer as Jt, llmCallsConfigSchema as K, captureEvalSpanError as Kt, DEFAULT_LLM_CALLS_CONFIG as L, columnFormatSchema as Lt, deriveStatusFromCaseRows as M, traceSpanKindSchema as Mt, deriveStatusFromChildStatuses as N, traceSpanSchema as Nt, getNestedAttribute as O, traceAttributeDisplaySchema as Ot, runManifestSchema as P, traceSpanWarningSchema as Pt, caseRowSchema as Q, appendToEvalOutput as Qt, agentEvalsConfigSchema as R, columnKindSchema as Rt, createRunRequestSchema as S, serializedCacheSpanSchema as St, extractCacheHits as T, traceAttributeDisplayFormatSchema as Tt, llmCallMetricFormatSchema as U, runArtifactRefSchema as Ut, apiCallMetricSchema as V, numberDisplayOptionsSchema as Vt, llmCallMetricPlacementSchema as W, z$1 as Wt, assertionFailureSchema as X, hashCacheKeySync as Xt, trialSelectionModeSchema as Y, hashCacheKey as Yt, caseDetailSchema as Z, EvalAssertionError as Zt, loadEvalModule as _, cacheModeSchema as _t, loadPersistedRunSnapshot as a, nextEvalId as an, evalChartAggregateSchema as at, normalizeScoreDef as b, cacheRecordingSchema as bt, persistCaseDetail as c, setScopeCacheContext as cn, evalChartColorSchema as ct, recomputePersistedCaseStatus as d, defineEval as dn, evalChartTooltipExtraSchema as dt, getCurrentScope as en, evalStatAggregateSchema as et, runTouchesEval as f, getEvalRegistry as fn, evalChartTypeSchema as ft, setLatestRunInfoMap as g, cacheListItemSchema as gt, getTargetEvalIds as h, cacheFileSchema as ht, getLatestRunInfos as i, mergeEvalOutput as in, scoreTraceSchema as it, deriveScopedSummaryFromCases as j, traceSpanErrorSchema as jt, getEvalTitle as k, traceDisplayConfigSchema as kt, persistRunState as l, startEvalBackgroundJob as ln, evalChartConfigSchema as lt, buildEvalSummary as m, cacheEntrySchema as mt, generateRunId as n, incrementEvalOutput as nn, evalStatsConfigSchema as nt, loadPersistedRunSnapshots as o, runInEvalScope as on, evalChartAxisSchema as ot, resolveArtifactPath as p, evalChartsConfigSchema as pt, resolveApiCallsConfig as q, evalSpan as qt, getLastRunStatuses as r, isInEvalScope as rn, evalSummarySchema as rt, nextShortIdFromSnapshots as s, setEvalOutput as sn, evalChartBuiltinMetricSchema as st, executeRun as t, getEvalCaseInput as tn, evalStatItemSchema as tt, recomputeEvalStatusesInRuns as u, repoFile as un, evalChartMetricSchema as ut, loadConfig as v, cacheOperationTypeSchema as vt, sseEnvelopeSchema as w, traceCacheRefSchema as wt, createFsCacheStore as x, cacheStatusSchema as xt, buildDeclaredColumnDefs as y, cacheRecordingOpSchema as yt, apiCallMetricFormatSchema as z, fileRefSchema as zt };
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-CmbmfBG2.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-lOZdhO2D.mjs";
|
|
2
|
+
import "./src-Btb9RCYD.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
package/package.json
CHANGED
package/dist/src-BU6ZtVIB.mjs
DELETED