@ls-stack/agent-eval 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DXYLqlWb.mjs → app-C5CJ1sX6.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-CBvHVkE7.js +109 -0
- package/dist/apps/web/dist/assets/index-Dd7I28ts.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-Dw9et3_Z.mjs → cli-C5FL7C4G.mjs} +190 -47
- package/dist/index.d.mts +126 -99
- package/dist/index.mjs +3 -3
- package/dist/{runner-CToL8eJs.mjs → runner-Cdlvk56X.mjs} +1 -1
- package/dist/{runner-kSiHsl91.mjs → runner-K2bN8KRS.mjs} +2 -2
- package/dist/src-gqm1z1Nu.mjs +2 -0
- package/package.json +1 -1
- package/dist/apps/web/dist/assets/index-Bq4Dz6AV.js +0 -109
- package/dist/apps/web/dist/assets/index-b2k20tzL.css +0 -1
- package/dist/src-CXclO9ZI.mjs +0 -2
package/dist/index.d.mts
CHANGED
|
@@ -122,19 +122,13 @@ declare const cellValueSchema: z.ZodUnion<readonly [z.ZodType<string | number |
|
|
|
122
122
|
type CellValue = z.infer<typeof cellValueSchema>;
|
|
123
123
|
//#endregion
|
|
124
124
|
//#region ../shared/src/schemas/trace.d.ts
|
|
125
|
-
/**
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
scorer: "scorer";
|
|
133
|
-
checkpoint: "checkpoint";
|
|
134
|
-
custom: "custom";
|
|
135
|
-
}>;
|
|
136
|
-
/** Semantic category used to classify a trace span in the UI. */
|
|
137
|
-
type TraceSpanKind = z.infer<typeof traceSpanKindSchema>;
|
|
125
|
+
/**
|
|
126
|
+
* Schema for span categories recorded in traces.
|
|
127
|
+
*
|
|
128
|
+
* The value is intentionally open-ended so external tracers can preserve their
|
|
129
|
+
* native span kinds instead of collapsing them into the built-in categories.
|
|
130
|
+
*/
|
|
131
|
+
declare const traceSpanKindSchema: z.ZodString;
|
|
138
132
|
/** Schema for the supported presentation formats of trace attributes. */
|
|
139
133
|
declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
|
|
140
134
|
string: "string";
|
|
@@ -301,21 +295,21 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
|
|
|
301
295
|
}, z.core.$strip>;
|
|
302
296
|
/** Trace display configuration authored by users in config or eval files. */
|
|
303
297
|
type TraceDisplayInputConfig = z.infer<typeof traceDisplayInputConfigSchema>;
|
|
298
|
+
/** Schema for an error attached to a trace span. */
|
|
299
|
+
declare const traceSpanErrorSchema: z.ZodObject<{
|
|
300
|
+
name: z.ZodOptional<z.ZodString>;
|
|
301
|
+
message: z.ZodString;
|
|
302
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
303
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
304
|
+
}, z.core.$catchall<z.ZodUnknown>>;
|
|
305
|
+
/** Error payload stored on a trace span. */
|
|
306
|
+
type EvalTraceSpanError = z.infer<typeof traceSpanErrorSchema>;
|
|
304
307
|
/** Schema for a persisted trace span captured during case execution. */
|
|
305
308
|
declare const traceSpanSchema: z.ZodObject<{
|
|
306
309
|
id: z.ZodString;
|
|
307
310
|
parentId: z.ZodNullable<z.ZodString>;
|
|
308
311
|
caseId: z.ZodString;
|
|
309
|
-
kind: z.
|
|
310
|
-
eval: "eval";
|
|
311
|
-
agent: "agent";
|
|
312
|
-
llm: "llm";
|
|
313
|
-
tool: "tool";
|
|
314
|
-
retrieval: "retrieval";
|
|
315
|
-
scorer: "scorer";
|
|
316
|
-
checkpoint: "checkpoint";
|
|
317
|
-
custom: "custom";
|
|
318
|
-
}>;
|
|
312
|
+
kind: z.ZodString;
|
|
319
313
|
name: z.ZodString;
|
|
320
314
|
startedAt: z.ZodString;
|
|
321
315
|
endedAt: z.ZodNullable<z.ZodString>;
|
|
@@ -330,7 +324,14 @@ declare const traceSpanSchema: z.ZodObject<{
|
|
|
330
324
|
name: z.ZodOptional<z.ZodString>;
|
|
331
325
|
message: z.ZodString;
|
|
332
326
|
stack: z.ZodOptional<z.ZodString>;
|
|
333
|
-
|
|
327
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
328
|
+
}, z.core.$catchall<z.ZodUnknown>>>;
|
|
329
|
+
errors: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
330
|
+
name: z.ZodOptional<z.ZodString>;
|
|
331
|
+
message: z.ZodString;
|
|
332
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
333
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
334
|
+
}, z.core.$catchall<z.ZodUnknown>>>>;
|
|
334
335
|
}, z.core.$strip>;
|
|
335
336
|
/** Persisted trace span shape stored for each eval case run. */
|
|
336
337
|
type EvalTraceSpan = z.infer<typeof traceSpanSchema>;
|
|
@@ -650,16 +651,7 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
650
651
|
id: z.ZodString;
|
|
651
652
|
parentId: z.ZodNullable<z.ZodString>;
|
|
652
653
|
caseId: z.ZodString;
|
|
653
|
-
kind: z.
|
|
654
|
-
custom: "custom";
|
|
655
|
-
eval: "eval";
|
|
656
|
-
agent: "agent";
|
|
657
|
-
llm: "llm";
|
|
658
|
-
tool: "tool";
|
|
659
|
-
retrieval: "retrieval";
|
|
660
|
-
scorer: "scorer";
|
|
661
|
-
checkpoint: "checkpoint";
|
|
662
|
-
}>;
|
|
654
|
+
kind: z.ZodString;
|
|
663
655
|
name: z.ZodString;
|
|
664
656
|
startedAt: z.ZodString;
|
|
665
657
|
endedAt: z.ZodNullable<z.ZodString>;
|
|
@@ -674,7 +666,14 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
674
666
|
name: z.ZodOptional<z.ZodString>;
|
|
675
667
|
message: z.ZodString;
|
|
676
668
|
stack: z.ZodOptional<z.ZodString>;
|
|
677
|
-
|
|
669
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
670
|
+
}, z.core.$catchall<z.ZodUnknown>>>;
|
|
671
|
+
errors: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
672
|
+
name: z.ZodOptional<z.ZodString>;
|
|
673
|
+
message: z.ZodString;
|
|
674
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
675
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
676
|
+
}, z.core.$catchall<z.ZodUnknown>>>>;
|
|
678
677
|
}, z.core.$strip>>;
|
|
679
678
|
traceDisplay: z.ZodObject<{
|
|
680
679
|
attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -724,16 +723,7 @@ declare const caseDetailSchema: z.ZodObject<{
|
|
|
724
723
|
id: z.ZodString;
|
|
725
724
|
parentId: z.ZodNullable<z.ZodString>;
|
|
726
725
|
caseId: z.ZodString;
|
|
727
|
-
kind: z.
|
|
728
|
-
custom: "custom";
|
|
729
|
-
eval: "eval";
|
|
730
|
-
agent: "agent";
|
|
731
|
-
llm: "llm";
|
|
732
|
-
tool: "tool";
|
|
733
|
-
retrieval: "retrieval";
|
|
734
|
-
scorer: "scorer";
|
|
735
|
-
checkpoint: "checkpoint";
|
|
736
|
-
}>;
|
|
726
|
+
kind: z.ZodString;
|
|
737
727
|
name: z.ZodString;
|
|
738
728
|
startedAt: z.ZodString;
|
|
739
729
|
endedAt: z.ZodNullable<z.ZodString>;
|
|
@@ -748,7 +738,14 @@ declare const caseDetailSchema: z.ZodObject<{
|
|
|
748
738
|
name: z.ZodOptional<z.ZodString>;
|
|
749
739
|
message: z.ZodString;
|
|
750
740
|
stack: z.ZodOptional<z.ZodString>;
|
|
751
|
-
|
|
741
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
742
|
+
}, z.core.$catchall<z.ZodUnknown>>>;
|
|
743
|
+
errors: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
744
|
+
name: z.ZodOptional<z.ZodString>;
|
|
745
|
+
message: z.ZodString;
|
|
746
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
747
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
748
|
+
}, z.core.$catchall<z.ZodUnknown>>>>;
|
|
752
749
|
}, z.core.$strip>>;
|
|
753
750
|
traceDisplay: z.ZodObject<{
|
|
754
751
|
attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -783,16 +780,7 @@ declare const caseDetailSchema: z.ZodObject<{
|
|
|
783
780
|
id: z.ZodString;
|
|
784
781
|
parentId: z.ZodNullable<z.ZodString>;
|
|
785
782
|
caseId: z.ZodString;
|
|
786
|
-
kind: z.
|
|
787
|
-
custom: "custom";
|
|
788
|
-
eval: "eval";
|
|
789
|
-
agent: "agent";
|
|
790
|
-
llm: "llm";
|
|
791
|
-
tool: "tool";
|
|
792
|
-
retrieval: "retrieval";
|
|
793
|
-
scorer: "scorer";
|
|
794
|
-
checkpoint: "checkpoint";
|
|
795
|
-
}>;
|
|
783
|
+
kind: z.ZodString;
|
|
796
784
|
name: z.ZodString;
|
|
797
785
|
startedAt: z.ZodString;
|
|
798
786
|
endedAt: z.ZodNullable<z.ZodString>;
|
|
@@ -807,7 +795,14 @@ declare const caseDetailSchema: z.ZodObject<{
|
|
|
807
795
|
name: z.ZodOptional<z.ZodString>;
|
|
808
796
|
message: z.ZodString;
|
|
809
797
|
stack: z.ZodOptional<z.ZodString>;
|
|
810
|
-
|
|
798
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
799
|
+
}, z.core.$catchall<z.ZodUnknown>>>;
|
|
800
|
+
errors: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
801
|
+
name: z.ZodOptional<z.ZodString>;
|
|
802
|
+
message: z.ZodString;
|
|
803
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
804
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
805
|
+
}, z.core.$catchall<z.ZodUnknown>>>>;
|
|
811
806
|
}, z.core.$strip>>;
|
|
812
807
|
traceDisplay: z.ZodObject<{
|
|
813
808
|
attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -1491,16 +1486,7 @@ declare const cacheListItemSchema: z.ZodObject<{
|
|
|
1491
1486
|
key: z.ZodString;
|
|
1492
1487
|
namespace: z.ZodString;
|
|
1493
1488
|
spanName: z.ZodString;
|
|
1494
|
-
spanKind: z.
|
|
1495
|
-
eval: "eval";
|
|
1496
|
-
agent: "agent";
|
|
1497
|
-
llm: "llm";
|
|
1498
|
-
tool: "tool";
|
|
1499
|
-
retrieval: "retrieval";
|
|
1500
|
-
scorer: "scorer";
|
|
1501
|
-
checkpoint: "checkpoint";
|
|
1502
|
-
custom: "custom";
|
|
1503
|
-
}>;
|
|
1489
|
+
spanKind: z.ZodString;
|
|
1504
1490
|
storedAt: z.ZodString;
|
|
1505
1491
|
codeFingerprint: z.ZodString;
|
|
1506
1492
|
sizeBytes: z.ZodNumber;
|
|
@@ -1509,15 +1495,12 @@ declare const cacheListItemSchema: z.ZodObject<{
|
|
|
1509
1495
|
type CacheListItem = z.infer<typeof cacheListItemSchema>;
|
|
1510
1496
|
/** Serialized nested span captured while recording a cached operation. */
|
|
1511
1497
|
type SerializedCacheSpan = {
|
|
1512
|
-
kind:
|
|
1498
|
+
kind: string;
|
|
1513
1499
|
name: string;
|
|
1514
1500
|
attributes?: Record<string, unknown>;
|
|
1515
1501
|
status: 'running' | 'ok' | 'error' | 'cancelled';
|
|
1516
|
-
error?:
|
|
1517
|
-
|
|
1518
|
-
message: string;
|
|
1519
|
-
stack?: string;
|
|
1520
|
-
};
|
|
1502
|
+
error?: EvalTraceSpanError;
|
|
1503
|
+
errors?: EvalTraceSpanError[];
|
|
1521
1504
|
children: SerializedCacheSpan[];
|
|
1522
1505
|
};
|
|
1523
1506
|
/** Zod schema for `SerializedCacheSpan`, defined lazily for recursion. */
|
|
@@ -1550,6 +1533,24 @@ type CacheRecordingOp = z.infer<typeof cacheRecordingOpSchema>;
|
|
|
1550
1533
|
declare const cacheRecordingSchema: z.ZodObject<{
|
|
1551
1534
|
returnValue: z.ZodUnknown;
|
|
1552
1535
|
finalAttributes: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
1536
|
+
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
1537
|
+
error: "error";
|
|
1538
|
+
running: "running";
|
|
1539
|
+
ok: "ok";
|
|
1540
|
+
cancelled: "cancelled";
|
|
1541
|
+
}>>;
|
|
1542
|
+
finalError: z.ZodOptional<z.ZodObject<{
|
|
1543
|
+
name: z.ZodOptional<z.ZodString>;
|
|
1544
|
+
message: z.ZodString;
|
|
1545
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
1546
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
1547
|
+
}, z.core.$catchall<z.ZodUnknown>>>;
|
|
1548
|
+
finalErrors: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
1549
|
+
name: z.ZodOptional<z.ZodString>;
|
|
1550
|
+
message: z.ZodString;
|
|
1551
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
1552
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
1553
|
+
}, z.core.$catchall<z.ZodUnknown>>>>;
|
|
1553
1554
|
ops: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
1554
1555
|
kind: z.ZodLiteral<"setOutput">;
|
|
1555
1556
|
key: z.ZodString;
|
|
@@ -1575,21 +1576,30 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
1575
1576
|
key: z.ZodString;
|
|
1576
1577
|
namespace: z.ZodString;
|
|
1577
1578
|
spanName: z.ZodString;
|
|
1578
|
-
spanKind: z.
|
|
1579
|
-
eval: "eval";
|
|
1580
|
-
agent: "agent";
|
|
1581
|
-
llm: "llm";
|
|
1582
|
-
tool: "tool";
|
|
1583
|
-
retrieval: "retrieval";
|
|
1584
|
-
scorer: "scorer";
|
|
1585
|
-
checkpoint: "checkpoint";
|
|
1586
|
-
custom: "custom";
|
|
1587
|
-
}>;
|
|
1579
|
+
spanKind: z.ZodString;
|
|
1588
1580
|
storedAt: z.ZodString;
|
|
1589
1581
|
codeFingerprint: z.ZodString;
|
|
1590
1582
|
recording: z.ZodObject<{
|
|
1591
1583
|
returnValue: z.ZodUnknown;
|
|
1592
1584
|
finalAttributes: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
1585
|
+
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
1586
|
+
error: "error";
|
|
1587
|
+
running: "running";
|
|
1588
|
+
ok: "ok";
|
|
1589
|
+
cancelled: "cancelled";
|
|
1590
|
+
}>>;
|
|
1591
|
+
finalError: z.ZodOptional<z.ZodObject<{
|
|
1592
|
+
name: z.ZodOptional<z.ZodString>;
|
|
1593
|
+
message: z.ZodString;
|
|
1594
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
1595
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
1596
|
+
}, z.core.$catchall<z.ZodUnknown>>>;
|
|
1597
|
+
finalErrors: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
1598
|
+
name: z.ZodOptional<z.ZodString>;
|
|
1599
|
+
message: z.ZodString;
|
|
1600
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
1601
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
1602
|
+
}, z.core.$catchall<z.ZodUnknown>>>>;
|
|
1593
1603
|
ops: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
1594
1604
|
kind: z.ZodLiteral<"setOutput">;
|
|
1595
1605
|
key: z.ZodString;
|
|
@@ -1619,21 +1629,30 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
1619
1629
|
key: z.ZodString;
|
|
1620
1630
|
namespace: z.ZodString;
|
|
1621
1631
|
spanName: z.ZodString;
|
|
1622
|
-
spanKind: z.
|
|
1623
|
-
eval: "eval";
|
|
1624
|
-
agent: "agent";
|
|
1625
|
-
llm: "llm";
|
|
1626
|
-
tool: "tool";
|
|
1627
|
-
retrieval: "retrieval";
|
|
1628
|
-
scorer: "scorer";
|
|
1629
|
-
checkpoint: "checkpoint";
|
|
1630
|
-
custom: "custom";
|
|
1631
|
-
}>;
|
|
1632
|
+
spanKind: z.ZodString;
|
|
1632
1633
|
storedAt: z.ZodString;
|
|
1633
1634
|
codeFingerprint: z.ZodString;
|
|
1634
1635
|
recording: z.ZodObject<{
|
|
1635
1636
|
returnValue: z.ZodUnknown;
|
|
1636
1637
|
finalAttributes: z.ZodRecord<z.ZodString, z.ZodUnknown>;
|
|
1638
|
+
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
1639
|
+
error: "error";
|
|
1640
|
+
running: "running";
|
|
1641
|
+
ok: "ok";
|
|
1642
|
+
cancelled: "cancelled";
|
|
1643
|
+
}>>;
|
|
1644
|
+
finalError: z.ZodOptional<z.ZodObject<{
|
|
1645
|
+
name: z.ZodOptional<z.ZodString>;
|
|
1646
|
+
message: z.ZodString;
|
|
1647
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
1648
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
1649
|
+
}, z.core.$catchall<z.ZodUnknown>>>;
|
|
1650
|
+
finalErrors: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
1651
|
+
name: z.ZodOptional<z.ZodString>;
|
|
1652
|
+
message: z.ZodString;
|
|
1653
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
1654
|
+
capturedAt: z.ZodOptional<z.ZodString>;
|
|
1655
|
+
}, z.core.$catchall<z.ZodUnknown>>>>;
|
|
1637
1656
|
ops: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
1638
1657
|
kind: z.ZodLiteral<"setOutput">;
|
|
1639
1658
|
key: z.ZodString;
|
|
@@ -1703,7 +1722,7 @@ type EvalTraceTree = {
|
|
|
1703
1722
|
spans: EvalTraceSpan[];
|
|
1704
1723
|
rootSpans: EvalTraceSpan[];
|
|
1705
1724
|
findSpan: (name: string) => EvalTraceSpan | undefined;
|
|
1706
|
-
findSpansByKind: (kind:
|
|
1725
|
+
findSpansByKind: (kind: string) => EvalTraceSpan[];
|
|
1707
1726
|
flattenDfs: () => EvalTraceSpan[];
|
|
1708
1727
|
checkpoints: Map<string, unknown>;
|
|
1709
1728
|
};
|
|
@@ -1963,7 +1982,7 @@ type TraceSpanTimestamp = Date | string;
|
|
|
1963
1982
|
type TraceExternalSpanStartInfo = {
|
|
1964
1983
|
/** Stable span id from the upstream tracer. Generated when omitted. */id?: string; /** Parent span id from the upstream tracer. Defaults to the active eval span. */
|
|
1965
1984
|
parentId?: string | null; /** Semantic category used by the trace UI. */
|
|
1966
|
-
kind:
|
|
1985
|
+
kind: string; /** Display name for the span. */
|
|
1967
1986
|
name: string; /** Span start time. Defaults to now. */
|
|
1968
1987
|
startedAt?: TraceSpanTimestamp; /** Initial span attributes. Later updates merge into this object. */
|
|
1969
1988
|
attributes?: Record<string, unknown>;
|
|
@@ -1984,7 +2003,7 @@ type TraceExternalSpanEndInfo = TraceExternalSpanUpdateInfo & {
|
|
|
1984
2003
|
type TraceExternalSpanRecordInfo = {
|
|
1985
2004
|
/** Stable span id from the upstream tracer. Generated when omitted. */id?: string; /** Parent span id from the upstream tracer. Defaults to the active eval span. */
|
|
1986
2005
|
parentId?: string | null; /** Semantic category used by the trace UI. */
|
|
1987
|
-
kind:
|
|
2006
|
+
kind: string; /** Display name for the span. */
|
|
1988
2007
|
name: string; /** Span start time. Defaults to now. */
|
|
1989
2008
|
startedAt?: TraceSpanTimestamp; /** Span end time. Defaults to the start time. */
|
|
1990
2009
|
endedAt?: TraceSpanTimestamp | null; /** Final span status. Defaults to `ok`. */
|
|
@@ -2007,8 +2026,15 @@ declare function recordExternalSpan(info: TraceExternalSpanRecordInfo): string;
|
|
|
2007
2026
|
* Calls are no-ops when executed outside of `evalTracer.span(...)`.
|
|
2008
2027
|
*/
|
|
2009
2028
|
declare const evalSpan: TraceActiveSpan;
|
|
2029
|
+
/**
|
|
2030
|
+
* Attach one or more recoverable errors to the active eval span.
|
|
2031
|
+
*
|
|
2032
|
+
* The active span is marked as `error` even if its callback later completes
|
|
2033
|
+
* without throwing. Calls outside `evalTracer.span(...)` are ignored.
|
|
2034
|
+
*/
|
|
2035
|
+
declare function captureEvalSpanError(errorOrErrors: unknown, ...additionalErrors: readonly unknown[]): void;
|
|
2010
2036
|
type TraceSpanInfoBase = {
|
|
2011
|
-
kind:
|
|
2037
|
+
kind: string;
|
|
2012
2038
|
name: string;
|
|
2013
2039
|
attributes?: Record<string, unknown>;
|
|
2014
2040
|
};
|
|
@@ -2117,7 +2143,8 @@ type EvalRunner = {
|
|
|
2117
2143
|
cancelRun(id: string): void; /** Return full details for a single case in a run, when available. */
|
|
2118
2144
|
getCaseDetail(runId: string, caseId: string): CaseDetail | undefined; /** Subscribe to streamed events for a specific run. */
|
|
2119
2145
|
subscribe(runId: string, listener: (event: SseEnvelope) => void): () => void; /** Subscribe to discovery updates triggered by file changes or manual refresh. */
|
|
2120
|
-
subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /**
|
|
2146
|
+
subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /** Stop background filesystem watchers owned by this runner instance. */
|
|
2147
|
+
close(): Promise<void>; /** Resolve the workspace root backing this runner instance. */
|
|
2121
2148
|
getWorkspaceRoot(): string; /** Resolve a persisted artifact path when artifact storage is supported. */
|
|
2122
2149
|
getArtifactPath(artifactId: string): string | undefined; /** Return summaries for every persisted cache entry in the workspace. */
|
|
2123
2150
|
listCache(): Promise<CacheListItem[]>;
|
|
@@ -2175,4 +2202,4 @@ declare function createRunner({
|
|
|
2175
2202
|
*/
|
|
2176
2203
|
declare function runCli(argv: string[]): Promise<void>;
|
|
2177
2204
|
//#endregion
|
|
2178
|
-
export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type
|
|
2205
|
+
export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import "./src-
|
|
3
|
-
export { EvalAssertionError, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
|
|
1
|
+
import { $ as columnKindSchema, A as evalSummarySchema, B as evalChartsConfigSchema, C as assertionFailureSchema, D as evalStatAggregateSchema, E as evalFreshnessStatusSchema, F as evalChartColorSchema, G as traceDisplayConfigSchema, H as traceAttributeDisplayInputSchema, I as evalChartConfigSchema, J as traceSpanKindSchema, K as traceDisplayInputConfigSchema, L as evalChartMetricSchema, M as evalChartAggregateSchema, N as evalChartAxisSchema, O as evalStatItemSchema, P as evalChartBuiltinMetricSchema, Q as columnFormatSchema, R as evalChartTooltipExtraSchema, S as spanCacheOptionsSchema, T as caseRowSchema, U as traceAttributeDisplayPlacementSchema, V as traceAttributeDisplayFormatSchema, W as traceAttributeDisplaySchema, X as cellValueSchema, Y as traceSpanSchema, Z as columnDefSchema, _ as cacheListItemSchema, _t as setEvalOutput, a as sseEnvelopeSchema, at as buildTraceTree, b as cacheRecordingSchema, bt as defineEval, c as deriveScopedSummaryFromCases, ct as evalTracer, d as runManifestSchema, dt as EvalAssertionError, et as fileRefSchema, f as runSummarySchema, ft as evalAssert, g as cacheFileSchema, gt as runInEvalScope, h as cacheEntrySchema, ht as isInEvalScope, i as updateManualScoreRequestSchema, it as runArtifactRefSchema, j as scoreTraceSchema, k as evalStatsConfigSchema, l as deriveStatusFromCaseRows, lt as hashCacheKey, m as trialSelectionModeSchema, mt as incrementEvalOutput, n as createRunner, nt as numberDisplayOptionsSchema, o as getEvalTitle, ot as captureEvalSpanError, p as agentEvalsConfigSchema, pt as getCurrentScope, q as traceSpanErrorSchema, r as createRunRequestSchema, rt as repoFileRefSchema, s as getEvalDisplayStatus, st as evalSpan, t as runCli, tt as jsonCellSchema, u as deriveStatusFromChildStatuses, ut as hashCacheKeySync, v as cacheModeSchema, vt as setScopeCacheContext, w as caseDetailSchema, x as serializedCacheSpanSchema, xt as getEvalRegistry, y as cacheRecordingOpSchema, yt as repoFile, z as evalChartTypeSchema } from "./cli-C5FL7C4G.mjs";
|
|
2
|
+
import "./src-gqm1z1Nu.mjs";
|
|
3
|
+
export { EvalAssertionError, agentEvalsConfigSchema, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, trialSelectionModeSchema, updateManualScoreRequestSchema };
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-K2bN8KRS.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-C5FL7C4G.mjs";
|
|
2
|
+
import "./src-gqm1z1Nu.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|