@ls-stack/agent-eval 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -122,19 +122,13 @@ declare const cellValueSchema: z.ZodUnion<readonly [z.ZodType<string | number |
122
122
  type CellValue = z.infer<typeof cellValueSchema>;
123
123
  //#endregion
124
124
  //#region ../shared/src/schemas/trace.d.ts
125
- /** Schema for the semantic categories used to classify trace spans. */
126
- declare const traceSpanKindSchema: z.ZodEnum<{
127
- eval: "eval";
128
- agent: "agent";
129
- llm: "llm";
130
- tool: "tool";
131
- retrieval: "retrieval";
132
- scorer: "scorer";
133
- checkpoint: "checkpoint";
134
- custom: "custom";
135
- }>;
136
- /** Semantic category used to classify a trace span in the UI. */
137
- type TraceSpanKind = z.infer<typeof traceSpanKindSchema>;
125
+ /**
126
+ * Schema for span categories recorded in traces.
127
+ *
128
+ * The value is intentionally open-ended so external tracers can preserve their
129
+ * native span kinds instead of collapsing them into the built-in categories.
130
+ */
131
+ declare const traceSpanKindSchema: z.ZodString;
138
132
  /** Schema for the supported presentation formats of trace attributes. */
139
133
  declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
140
134
  string: "string";
@@ -301,21 +295,21 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
301
295
  }, z.core.$strip>;
302
296
  /** Trace display configuration authored by users in config or eval files. */
303
297
  type TraceDisplayInputConfig = z.infer<typeof traceDisplayInputConfigSchema>;
298
+ /** Schema for an error attached to a trace span. */
299
+ declare const traceSpanErrorSchema: z.ZodObject<{
300
+ name: z.ZodOptional<z.ZodString>;
301
+ message: z.ZodString;
302
+ stack: z.ZodOptional<z.ZodString>;
303
+ capturedAt: z.ZodOptional<z.ZodString>;
304
+ }, z.core.$catchall<z.ZodUnknown>>;
305
+ /** Error payload stored on a trace span. */
306
+ type EvalTraceSpanError = z.infer<typeof traceSpanErrorSchema>;
304
307
  /** Schema for a persisted trace span captured during case execution. */
305
308
  declare const traceSpanSchema: z.ZodObject<{
306
309
  id: z.ZodString;
307
310
  parentId: z.ZodNullable<z.ZodString>;
308
311
  caseId: z.ZodString;
309
- kind: z.ZodEnum<{
310
- eval: "eval";
311
- agent: "agent";
312
- llm: "llm";
313
- tool: "tool";
314
- retrieval: "retrieval";
315
- scorer: "scorer";
316
- checkpoint: "checkpoint";
317
- custom: "custom";
318
- }>;
312
+ kind: z.ZodString;
319
313
  name: z.ZodString;
320
314
  startedAt: z.ZodString;
321
315
  endedAt: z.ZodNullable<z.ZodString>;
@@ -330,7 +324,14 @@ declare const traceSpanSchema: z.ZodObject<{
330
324
  name: z.ZodOptional<z.ZodString>;
331
325
  message: z.ZodString;
332
326
  stack: z.ZodOptional<z.ZodString>;
333
- }, z.core.$strip>>;
327
+ capturedAt: z.ZodOptional<z.ZodString>;
328
+ }, z.core.$catchall<z.ZodUnknown>>>;
329
+ errors: z.ZodOptional<z.ZodArray<z.ZodObject<{
330
+ name: z.ZodOptional<z.ZodString>;
331
+ message: z.ZodString;
332
+ stack: z.ZodOptional<z.ZodString>;
333
+ capturedAt: z.ZodOptional<z.ZodString>;
334
+ }, z.core.$catchall<z.ZodUnknown>>>>;
334
335
  }, z.core.$strip>;
335
336
  /** Persisted trace span shape stored for each eval case run. */
336
337
  type EvalTraceSpan = z.infer<typeof traceSpanSchema>;
@@ -650,16 +651,7 @@ declare const scoreTraceSchema: z.ZodObject<{
650
651
  id: z.ZodString;
651
652
  parentId: z.ZodNullable<z.ZodString>;
652
653
  caseId: z.ZodString;
653
- kind: z.ZodEnum<{
654
- custom: "custom";
655
- eval: "eval";
656
- agent: "agent";
657
- llm: "llm";
658
- tool: "tool";
659
- retrieval: "retrieval";
660
- scorer: "scorer";
661
- checkpoint: "checkpoint";
662
- }>;
654
+ kind: z.ZodString;
663
655
  name: z.ZodString;
664
656
  startedAt: z.ZodString;
665
657
  endedAt: z.ZodNullable<z.ZodString>;
@@ -674,7 +666,14 @@ declare const scoreTraceSchema: z.ZodObject<{
674
666
  name: z.ZodOptional<z.ZodString>;
675
667
  message: z.ZodString;
676
668
  stack: z.ZodOptional<z.ZodString>;
677
- }, z.core.$strip>>;
669
+ capturedAt: z.ZodOptional<z.ZodString>;
670
+ }, z.core.$catchall<z.ZodUnknown>>>;
671
+ errors: z.ZodOptional<z.ZodArray<z.ZodObject<{
672
+ name: z.ZodOptional<z.ZodString>;
673
+ message: z.ZodString;
674
+ stack: z.ZodOptional<z.ZodString>;
675
+ capturedAt: z.ZodOptional<z.ZodString>;
676
+ }, z.core.$catchall<z.ZodUnknown>>>>;
678
677
  }, z.core.$strip>>;
679
678
  traceDisplay: z.ZodObject<{
680
679
  attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -724,16 +723,7 @@ declare const caseDetailSchema: z.ZodObject<{
724
723
  id: z.ZodString;
725
724
  parentId: z.ZodNullable<z.ZodString>;
726
725
  caseId: z.ZodString;
727
- kind: z.ZodEnum<{
728
- custom: "custom";
729
- eval: "eval";
730
- agent: "agent";
731
- llm: "llm";
732
- tool: "tool";
733
- retrieval: "retrieval";
734
- scorer: "scorer";
735
- checkpoint: "checkpoint";
736
- }>;
726
+ kind: z.ZodString;
737
727
  name: z.ZodString;
738
728
  startedAt: z.ZodString;
739
729
  endedAt: z.ZodNullable<z.ZodString>;
@@ -748,7 +738,14 @@ declare const caseDetailSchema: z.ZodObject<{
748
738
  name: z.ZodOptional<z.ZodString>;
749
739
  message: z.ZodString;
750
740
  stack: z.ZodOptional<z.ZodString>;
751
- }, z.core.$strip>>;
741
+ capturedAt: z.ZodOptional<z.ZodString>;
742
+ }, z.core.$catchall<z.ZodUnknown>>>;
743
+ errors: z.ZodOptional<z.ZodArray<z.ZodObject<{
744
+ name: z.ZodOptional<z.ZodString>;
745
+ message: z.ZodString;
746
+ stack: z.ZodOptional<z.ZodString>;
747
+ capturedAt: z.ZodOptional<z.ZodString>;
748
+ }, z.core.$catchall<z.ZodUnknown>>>>;
752
749
  }, z.core.$strip>>;
753
750
  traceDisplay: z.ZodObject<{
754
751
  attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -783,16 +780,7 @@ declare const caseDetailSchema: z.ZodObject<{
783
780
  id: z.ZodString;
784
781
  parentId: z.ZodNullable<z.ZodString>;
785
782
  caseId: z.ZodString;
786
- kind: z.ZodEnum<{
787
- custom: "custom";
788
- eval: "eval";
789
- agent: "agent";
790
- llm: "llm";
791
- tool: "tool";
792
- retrieval: "retrieval";
793
- scorer: "scorer";
794
- checkpoint: "checkpoint";
795
- }>;
783
+ kind: z.ZodString;
796
784
  name: z.ZodString;
797
785
  startedAt: z.ZodString;
798
786
  endedAt: z.ZodNullable<z.ZodString>;
@@ -807,7 +795,14 @@ declare const caseDetailSchema: z.ZodObject<{
807
795
  name: z.ZodOptional<z.ZodString>;
808
796
  message: z.ZodString;
809
797
  stack: z.ZodOptional<z.ZodString>;
810
- }, z.core.$strip>>;
798
+ capturedAt: z.ZodOptional<z.ZodString>;
799
+ }, z.core.$catchall<z.ZodUnknown>>>;
800
+ errors: z.ZodOptional<z.ZodArray<z.ZodObject<{
801
+ name: z.ZodOptional<z.ZodString>;
802
+ message: z.ZodString;
803
+ stack: z.ZodOptional<z.ZodString>;
804
+ capturedAt: z.ZodOptional<z.ZodString>;
805
+ }, z.core.$catchall<z.ZodUnknown>>>>;
811
806
  }, z.core.$strip>>;
812
807
  traceDisplay: z.ZodObject<{
813
808
  attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -1491,16 +1486,7 @@ declare const cacheListItemSchema: z.ZodObject<{
1491
1486
  key: z.ZodString;
1492
1487
  namespace: z.ZodString;
1493
1488
  spanName: z.ZodString;
1494
- spanKind: z.ZodEnum<{
1495
- eval: "eval";
1496
- agent: "agent";
1497
- llm: "llm";
1498
- tool: "tool";
1499
- retrieval: "retrieval";
1500
- scorer: "scorer";
1501
- checkpoint: "checkpoint";
1502
- custom: "custom";
1503
- }>;
1489
+ spanKind: z.ZodString;
1504
1490
  storedAt: z.ZodString;
1505
1491
  codeFingerprint: z.ZodString;
1506
1492
  sizeBytes: z.ZodNumber;
@@ -1509,15 +1495,12 @@ declare const cacheListItemSchema: z.ZodObject<{
1509
1495
  type CacheListItem = z.infer<typeof cacheListItemSchema>;
1510
1496
  /** Serialized nested span captured while recording a cached operation. */
1511
1497
  type SerializedCacheSpan = {
1512
- kind: TraceSpanKind;
1498
+ kind: string;
1513
1499
  name: string;
1514
1500
  attributes?: Record<string, unknown>;
1515
1501
  status: 'running' | 'ok' | 'error' | 'cancelled';
1516
- error?: {
1517
- name?: string;
1518
- message: string;
1519
- stack?: string;
1520
- };
1502
+ error?: EvalTraceSpanError;
1503
+ errors?: EvalTraceSpanError[];
1521
1504
  children: SerializedCacheSpan[];
1522
1505
  };
1523
1506
  /** Zod schema for `SerializedCacheSpan`, defined lazily for recursion. */
@@ -1550,6 +1533,24 @@ type CacheRecordingOp = z.infer<typeof cacheRecordingOpSchema>;
1550
1533
  declare const cacheRecordingSchema: z.ZodObject<{
1551
1534
  returnValue: z.ZodUnknown;
1552
1535
  finalAttributes: z.ZodRecord<z.ZodString, z.ZodUnknown>;
1536
+ finalStatus: z.ZodOptional<z.ZodEnum<{
1537
+ error: "error";
1538
+ running: "running";
1539
+ ok: "ok";
1540
+ cancelled: "cancelled";
1541
+ }>>;
1542
+ finalError: z.ZodOptional<z.ZodObject<{
1543
+ name: z.ZodOptional<z.ZodString>;
1544
+ message: z.ZodString;
1545
+ stack: z.ZodOptional<z.ZodString>;
1546
+ capturedAt: z.ZodOptional<z.ZodString>;
1547
+ }, z.core.$catchall<z.ZodUnknown>>>;
1548
+ finalErrors: z.ZodOptional<z.ZodArray<z.ZodObject<{
1549
+ name: z.ZodOptional<z.ZodString>;
1550
+ message: z.ZodString;
1551
+ stack: z.ZodOptional<z.ZodString>;
1552
+ capturedAt: z.ZodOptional<z.ZodString>;
1553
+ }, z.core.$catchall<z.ZodUnknown>>>>;
1553
1554
  ops: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1554
1555
  kind: z.ZodLiteral<"setOutput">;
1555
1556
  key: z.ZodString;
@@ -1575,21 +1576,30 @@ declare const cacheEntrySchema: z.ZodObject<{
1575
1576
  key: z.ZodString;
1576
1577
  namespace: z.ZodString;
1577
1578
  spanName: z.ZodString;
1578
- spanKind: z.ZodEnum<{
1579
- eval: "eval";
1580
- agent: "agent";
1581
- llm: "llm";
1582
- tool: "tool";
1583
- retrieval: "retrieval";
1584
- scorer: "scorer";
1585
- checkpoint: "checkpoint";
1586
- custom: "custom";
1587
- }>;
1579
+ spanKind: z.ZodString;
1588
1580
  storedAt: z.ZodString;
1589
1581
  codeFingerprint: z.ZodString;
1590
1582
  recording: z.ZodObject<{
1591
1583
  returnValue: z.ZodUnknown;
1592
1584
  finalAttributes: z.ZodRecord<z.ZodString, z.ZodUnknown>;
1585
+ finalStatus: z.ZodOptional<z.ZodEnum<{
1586
+ error: "error";
1587
+ running: "running";
1588
+ ok: "ok";
1589
+ cancelled: "cancelled";
1590
+ }>>;
1591
+ finalError: z.ZodOptional<z.ZodObject<{
1592
+ name: z.ZodOptional<z.ZodString>;
1593
+ message: z.ZodString;
1594
+ stack: z.ZodOptional<z.ZodString>;
1595
+ capturedAt: z.ZodOptional<z.ZodString>;
1596
+ }, z.core.$catchall<z.ZodUnknown>>>;
1597
+ finalErrors: z.ZodOptional<z.ZodArray<z.ZodObject<{
1598
+ name: z.ZodOptional<z.ZodString>;
1599
+ message: z.ZodString;
1600
+ stack: z.ZodOptional<z.ZodString>;
1601
+ capturedAt: z.ZodOptional<z.ZodString>;
1602
+ }, z.core.$catchall<z.ZodUnknown>>>>;
1593
1603
  ops: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1594
1604
  kind: z.ZodLiteral<"setOutput">;
1595
1605
  key: z.ZodString;
@@ -1619,21 +1629,30 @@ declare const cacheFileSchema: z.ZodObject<{
1619
1629
  key: z.ZodString;
1620
1630
  namespace: z.ZodString;
1621
1631
  spanName: z.ZodString;
1622
- spanKind: z.ZodEnum<{
1623
- eval: "eval";
1624
- agent: "agent";
1625
- llm: "llm";
1626
- tool: "tool";
1627
- retrieval: "retrieval";
1628
- scorer: "scorer";
1629
- checkpoint: "checkpoint";
1630
- custom: "custom";
1631
- }>;
1632
+ spanKind: z.ZodString;
1632
1633
  storedAt: z.ZodString;
1633
1634
  codeFingerprint: z.ZodString;
1634
1635
  recording: z.ZodObject<{
1635
1636
  returnValue: z.ZodUnknown;
1636
1637
  finalAttributes: z.ZodRecord<z.ZodString, z.ZodUnknown>;
1638
+ finalStatus: z.ZodOptional<z.ZodEnum<{
1639
+ error: "error";
1640
+ running: "running";
1641
+ ok: "ok";
1642
+ cancelled: "cancelled";
1643
+ }>>;
1644
+ finalError: z.ZodOptional<z.ZodObject<{
1645
+ name: z.ZodOptional<z.ZodString>;
1646
+ message: z.ZodString;
1647
+ stack: z.ZodOptional<z.ZodString>;
1648
+ capturedAt: z.ZodOptional<z.ZodString>;
1649
+ }, z.core.$catchall<z.ZodUnknown>>>;
1650
+ finalErrors: z.ZodOptional<z.ZodArray<z.ZodObject<{
1651
+ name: z.ZodOptional<z.ZodString>;
1652
+ message: z.ZodString;
1653
+ stack: z.ZodOptional<z.ZodString>;
1654
+ capturedAt: z.ZodOptional<z.ZodString>;
1655
+ }, z.core.$catchall<z.ZodUnknown>>>>;
1637
1656
  ops: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
1638
1657
  kind: z.ZodLiteral<"setOutput">;
1639
1658
  key: z.ZodString;
@@ -1703,7 +1722,7 @@ type EvalTraceTree = {
1703
1722
  spans: EvalTraceSpan[];
1704
1723
  rootSpans: EvalTraceSpan[];
1705
1724
  findSpan: (name: string) => EvalTraceSpan | undefined;
1706
- findSpansByKind: (kind: EvalTraceSpan['kind']) => EvalTraceSpan[];
1725
+ findSpansByKind: (kind: string) => EvalTraceSpan[];
1707
1726
  flattenDfs: () => EvalTraceSpan[];
1708
1727
  checkpoints: Map<string, unknown>;
1709
1728
  };
@@ -1963,7 +1982,7 @@ type TraceSpanTimestamp = Date | string;
1963
1982
  type TraceExternalSpanStartInfo = {
1964
1983
  /** Stable span id from the upstream tracer. Generated when omitted. */id?: string; /** Parent span id from the upstream tracer. Defaults to the active eval span. */
1965
1984
  parentId?: string | null; /** Semantic category used by the trace UI. */
1966
- kind: EvalTraceSpan['kind']; /** Display name for the span. */
1985
+ kind: string; /** Display name for the span. */
1967
1986
  name: string; /** Span start time. Defaults to now. */
1968
1987
  startedAt?: TraceSpanTimestamp; /** Initial span attributes. Later updates merge into this object. */
1969
1988
  attributes?: Record<string, unknown>;
@@ -1984,7 +2003,7 @@ type TraceExternalSpanEndInfo = TraceExternalSpanUpdateInfo & {
1984
2003
  type TraceExternalSpanRecordInfo = {
1985
2004
  /** Stable span id from the upstream tracer. Generated when omitted. */id?: string; /** Parent span id from the upstream tracer. Defaults to the active eval span. */
1986
2005
  parentId?: string | null; /** Semantic category used by the trace UI. */
1987
- kind: EvalTraceSpan['kind']; /** Display name for the span. */
2006
+ kind: string; /** Display name for the span. */
1988
2007
  name: string; /** Span start time. Defaults to now. */
1989
2008
  startedAt?: TraceSpanTimestamp; /** Span end time. Defaults to the start time. */
1990
2009
  endedAt?: TraceSpanTimestamp | null; /** Final span status. Defaults to `ok`. */
@@ -2007,8 +2026,15 @@ declare function recordExternalSpan(info: TraceExternalSpanRecordInfo): string;
2007
2026
  * Calls are no-ops when executed outside of `evalTracer.span(...)`.
2008
2027
  */
2009
2028
  declare const evalSpan: TraceActiveSpan;
2029
+ /**
2030
+ * Attach one or more recoverable errors to the active eval span.
2031
+ *
2032
+ * The active span is marked as `error` even if its callback later completes
2033
+ * without throwing. Calls outside `evalTracer.span(...)` are ignored.
2034
+ */
2035
+ declare function captureEvalSpanError(errorOrErrors: unknown, ...additionalErrors: readonly unknown[]): void;
2010
2036
  type TraceSpanInfoBase = {
2011
- kind: EvalTraceSpan['kind'];
2037
+ kind: string;
2012
2038
  name: string;
2013
2039
  attributes?: Record<string, unknown>;
2014
2040
  };
@@ -2117,7 +2143,8 @@ type EvalRunner = {
2117
2143
  cancelRun(id: string): void; /** Return full details for a single case in a run, when available. */
2118
2144
  getCaseDetail(runId: string, caseId: string): CaseDetail | undefined; /** Subscribe to streamed events for a specific run. */
2119
2145
  subscribe(runId: string, listener: (event: SseEnvelope) => void): () => void; /** Subscribe to discovery updates triggered by file changes or manual refresh. */
2120
- subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /** Resolve the workspace root backing this runner instance. */
2146
+ subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /** Stop background filesystem watchers owned by this runner instance. */
2147
+ close(): Promise<void>; /** Resolve the workspace root backing this runner instance. */
2121
2148
  getWorkspaceRoot(): string; /** Resolve a persisted artifact path when artifact storage is supported. */
2122
2149
  getArtifactPath(artifactId: string): string | undefined; /** Return summaries for every persisted cache entry in the workspace. */
2123
2150
  listCache(): Promise<CacheListItem[]>;
@@ -2175,4 +2202,4 @@ declare function createRunner({
2175
2202
  */
2176
2203
  declare function runCli(argv: string[]): Promise<void>;
2177
2204
  //#endregion
2178
- export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TraceSpanKind, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
2205
+ export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { $ as fileRefSchema, A as evalSummarySchema, B as evalChartsConfigSchema, C as assertionFailureSchema, D as evalStatAggregateSchema, E as evalFreshnessStatusSchema, F as evalChartColorSchema, G as traceDisplayConfigSchema, H as traceAttributeDisplayInputSchema, I as evalChartConfigSchema, J as traceSpanSchema, K as traceDisplayInputConfigSchema, L as evalChartMetricSchema, M as evalChartAggregateSchema, N as evalChartAxisSchema, O as evalStatItemSchema, P as evalChartBuiltinMetricSchema, Q as columnKindSchema, R as evalChartTooltipExtraSchema, S as spanCacheOptionsSchema, T as caseRowSchema, U as traceAttributeDisplayPlacementSchema, V as traceAttributeDisplayFormatSchema, W as traceAttributeDisplaySchema, X as columnDefSchema, Y as cellValueSchema, Z as columnFormatSchema, _ as cacheListItemSchema, _t as repoFile, a as sseEnvelopeSchema, at as evalSpan, b as cacheRecordingSchema, c as deriveScopedSummaryFromCases, ct as hashCacheKeySync, d as runManifestSchema, dt as getCurrentScope, et as jsonCellSchema, f as runSummarySchema, ft as incrementEvalOutput, g as cacheFileSchema, gt as setScopeCacheContext, h as cacheEntrySchema, ht as setEvalOutput, i as updateManualScoreRequestSchema, it as buildTraceTree, j as scoreTraceSchema, k as evalStatsConfigSchema, l as deriveStatusFromCaseRows, lt as EvalAssertionError, m as trialSelectionModeSchema, mt as runInEvalScope, n as createRunner, nt as repoFileRefSchema, o as getEvalTitle, ot as evalTracer, p as agentEvalsConfigSchema, pt as isInEvalScope, q as traceSpanKindSchema, r as createRunRequestSchema, rt as runArtifactRefSchema, s as getEvalDisplayStatus, st as hashCacheKey, t as runCli, tt as numberDisplayOptionsSchema, u as deriveStatusFromChildStatuses, ut as evalAssert, v as cacheModeSchema, vt as defineEval, w as caseDetailSchema, x as serializedCacheSpanSchema, y as cacheRecordingOpSchema, yt as getEvalRegistry, z as evalChartTypeSchema } from "./cli-Dw9et3_Z.mjs";
2
- import "./src-CXclO9ZI.mjs";
3
- export { EvalAssertionError, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
1
+ import { $ as columnKindSchema, A as evalSummarySchema, B as evalChartsConfigSchema, C as assertionFailureSchema, D as evalStatAggregateSchema, E as evalFreshnessStatusSchema, F as evalChartColorSchema, G as traceDisplayConfigSchema, H as traceAttributeDisplayInputSchema, I as evalChartConfigSchema, J as traceSpanKindSchema, K as traceDisplayInputConfigSchema, L as evalChartMetricSchema, M as evalChartAggregateSchema, N as evalChartAxisSchema, O as evalStatItemSchema, P as evalChartBuiltinMetricSchema, Q as columnFormatSchema, R as evalChartTooltipExtraSchema, S as spanCacheOptionsSchema, T as caseRowSchema, U as traceAttributeDisplayPlacementSchema, V as traceAttributeDisplayFormatSchema, W as traceAttributeDisplaySchema, X as cellValueSchema, Y as traceSpanSchema, Z as columnDefSchema, _ as cacheListItemSchema, _t as setEvalOutput, a as sseEnvelopeSchema, at as buildTraceTree, b as cacheRecordingSchema, bt as defineEval, c as deriveScopedSummaryFromCases, ct as evalTracer, d as runManifestSchema, dt as EvalAssertionError, et as fileRefSchema, f as runSummarySchema, ft as evalAssert, g as cacheFileSchema, gt as runInEvalScope, h as cacheEntrySchema, ht as isInEvalScope, i as updateManualScoreRequestSchema, it as runArtifactRefSchema, j as scoreTraceSchema, k as evalStatsConfigSchema, l as deriveStatusFromCaseRows, lt as hashCacheKey, m as trialSelectionModeSchema, mt as incrementEvalOutput, n as createRunner, nt as numberDisplayOptionsSchema, o as getEvalTitle, ot as captureEvalSpanError, p as agentEvalsConfigSchema, pt as getCurrentScope, q as traceSpanErrorSchema, r as createRunRequestSchema, rt as repoFileRefSchema, s as getEvalDisplayStatus, st as evalSpan, t as runCli, tt as jsonCellSchema, u as deriveStatusFromChildStatuses, ut as hashCacheKeySync, v as cacheModeSchema, vt as setScopeCacheContext, w as caseDetailSchema, x as serializedCacheSpanSchema, xt as getEvalRegistry, y as cacheRecordingOpSchema, yt as repoFile, z as evalChartTypeSchema } from "./cli-C5FL7C4G.mjs";
2
+ import "./src-gqm1z1Nu.mjs";
3
+ export { EvalAssertionError, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-kSiHsl91.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-K2bN8KRS.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-Dw9et3_Z.mjs";
2
- import "./src-CXclO9ZI.mjs";
1
+ import { n as createRunner } from "./cli-C5FL7C4G.mjs";
2
+ import "./src-gqm1z1Nu.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -0,0 +1,2 @@
1
+ import "./cli-C5FL7C4G.mjs";
2
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.3.0",
3
+ "version": "0.5.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"