braintrust 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/dist/index.d.mts +156 -0
- package/dev/dist/index.d.ts +156 -0
- package/dev/dist/index.js +16 -9
- package/dev/dist/index.mjs +16 -9
- package/dist/browser.d.mts +514 -0
- package/dist/browser.d.ts +514 -0
- package/dist/chunk-CDBUTZMH.js +1713 -0
- package/dist/chunk-NB5AEJPK.mjs +19 -0
- package/dist/chunk-VKR7HDRS.js +19 -0
- package/dist/chunk-WKBXJQ57.mjs +1713 -0
- package/dist/cli.js +23 -14
- package/dist/getMachineId-bsd-7YM2UMB4.js +37 -0
- package/dist/getMachineId-bsd-L7QQYES7.mjs +37 -0
- package/dist/getMachineId-darwin-QV3NVG7H.js +37 -0
- package/dist/getMachineId-darwin-YXDFFCXM.mjs +37 -0
- package/dist/getMachineId-linux-HKJ2YLJC.js +29 -0
- package/dist/getMachineId-linux-LWEEVKPU.mjs +29 -0
- package/dist/getMachineId-unsupported-EGJSIDYQ.mjs +20 -0
- package/dist/getMachineId-unsupported-TX34Q66M.js +20 -0
- package/dist/getMachineId-win-M5YW2KGK.js +39 -0
- package/dist/getMachineId-win-UDA4B6X2.mjs +39 -0
- package/dist/index.d.mts +905 -27
- package/dist/index.d.ts +905 -27
- package/dist/index.js +3664 -237
- package/dist/index.mjs +3460 -33
- package/package.json +7 -5
package/dev/dist/index.d.mts
CHANGED
|
@@ -1531,6 +1531,52 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
|
|
|
1531
1531
|
name: string;
|
|
1532
1532
|
content: string | null;
|
|
1533
1533
|
role: "function";
|
|
1534
|
+
}>, z.ZodObject<{
|
|
1535
|
+
content: z.ZodUnion<[z.ZodDefault<z.ZodString>, z.ZodArray<z.ZodObject<{
|
|
1536
|
+
text: z.ZodDefault<z.ZodString>;
|
|
1537
|
+
type: z.ZodLiteral<"text">;
|
|
1538
|
+
cache_control: z.ZodOptional<z.ZodObject<{
|
|
1539
|
+
type: z.ZodEnum<["ephemeral"]>;
|
|
1540
|
+
}, "strip", z.ZodTypeAny, {
|
|
1541
|
+
type: "ephemeral";
|
|
1542
|
+
}, {
|
|
1543
|
+
type: "ephemeral";
|
|
1544
|
+
}>>;
|
|
1545
|
+
}, "strip", z.ZodTypeAny, {
|
|
1546
|
+
type: "text";
|
|
1547
|
+
text: string;
|
|
1548
|
+
cache_control?: {
|
|
1549
|
+
type: "ephemeral";
|
|
1550
|
+
} | undefined;
|
|
1551
|
+
}, {
|
|
1552
|
+
type: "text";
|
|
1553
|
+
text?: string | undefined;
|
|
1554
|
+
cache_control?: {
|
|
1555
|
+
type: "ephemeral";
|
|
1556
|
+
} | undefined;
|
|
1557
|
+
}>, "many">]>;
|
|
1558
|
+
role: z.ZodLiteral<"developer">;
|
|
1559
|
+
name: z.ZodOptional<z.ZodString>;
|
|
1560
|
+
}, "strip", z.ZodTypeAny, {
|
|
1561
|
+
content: string | {
|
|
1562
|
+
type: "text";
|
|
1563
|
+
text: string;
|
|
1564
|
+
cache_control?: {
|
|
1565
|
+
type: "ephemeral";
|
|
1566
|
+
} | undefined;
|
|
1567
|
+
}[];
|
|
1568
|
+
role: "developer";
|
|
1569
|
+
name?: string | undefined;
|
|
1570
|
+
}, {
|
|
1571
|
+
role: "developer";
|
|
1572
|
+
name?: string | undefined;
|
|
1573
|
+
content?: string | {
|
|
1574
|
+
type: "text";
|
|
1575
|
+
text?: string | undefined;
|
|
1576
|
+
cache_control?: {
|
|
1577
|
+
type: "ephemeral";
|
|
1578
|
+
} | undefined;
|
|
1579
|
+
}[] | undefined;
|
|
1534
1580
|
}>]>, z.ZodObject<{
|
|
1535
1581
|
role: z.ZodEnum<["model"]>;
|
|
1536
1582
|
content: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
@@ -1608,6 +1654,16 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
|
|
|
1608
1654
|
name: string;
|
|
1609
1655
|
content: string | null;
|
|
1610
1656
|
role: "function";
|
|
1657
|
+
} | {
|
|
1658
|
+
content: string | {
|
|
1659
|
+
type: "text";
|
|
1660
|
+
text: string;
|
|
1661
|
+
cache_control?: {
|
|
1662
|
+
type: "ephemeral";
|
|
1663
|
+
} | undefined;
|
|
1664
|
+
}[];
|
|
1665
|
+
role: "developer";
|
|
1666
|
+
name?: string | undefined;
|
|
1611
1667
|
} | {
|
|
1612
1668
|
role: "model";
|
|
1613
1669
|
content?: string | null | undefined;
|
|
@@ -1679,6 +1735,16 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
|
|
|
1679
1735
|
name: string;
|
|
1680
1736
|
content: string | null;
|
|
1681
1737
|
role: "function";
|
|
1738
|
+
} | {
|
|
1739
|
+
role: "developer";
|
|
1740
|
+
name?: string | undefined;
|
|
1741
|
+
content?: string | {
|
|
1742
|
+
type: "text";
|
|
1743
|
+
text?: string | undefined;
|
|
1744
|
+
cache_control?: {
|
|
1745
|
+
type: "ephemeral";
|
|
1746
|
+
} | undefined;
|
|
1747
|
+
}[] | undefined;
|
|
1682
1748
|
} | {
|
|
1683
1749
|
role: "model";
|
|
1684
1750
|
content?: string | null | undefined;
|
|
@@ -2612,6 +2678,52 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
2612
2678
|
name: string;
|
|
2613
2679
|
content: string | null;
|
|
2614
2680
|
role: "function";
|
|
2681
|
+
}>, z.ZodObject<{
|
|
2682
|
+
content: z.ZodUnion<[z.ZodDefault<z.ZodString>, z.ZodArray<z.ZodObject<{
|
|
2683
|
+
text: z.ZodDefault<z.ZodString>;
|
|
2684
|
+
type: z.ZodLiteral<"text">;
|
|
2685
|
+
cache_control: z.ZodOptional<z.ZodObject<{
|
|
2686
|
+
type: z.ZodEnum<["ephemeral"]>;
|
|
2687
|
+
}, "strip", z.ZodTypeAny, {
|
|
2688
|
+
type: "ephemeral";
|
|
2689
|
+
}, {
|
|
2690
|
+
type: "ephemeral";
|
|
2691
|
+
}>>;
|
|
2692
|
+
}, "strip", z.ZodTypeAny, {
|
|
2693
|
+
type: "text";
|
|
2694
|
+
text: string;
|
|
2695
|
+
cache_control?: {
|
|
2696
|
+
type: "ephemeral";
|
|
2697
|
+
} | undefined;
|
|
2698
|
+
}, {
|
|
2699
|
+
type: "text";
|
|
2700
|
+
text?: string | undefined;
|
|
2701
|
+
cache_control?: {
|
|
2702
|
+
type: "ephemeral";
|
|
2703
|
+
} | undefined;
|
|
2704
|
+
}>, "many">]>;
|
|
2705
|
+
role: z.ZodLiteral<"developer">;
|
|
2706
|
+
name: z.ZodOptional<z.ZodString>;
|
|
2707
|
+
}, "strip", z.ZodTypeAny, {
|
|
2708
|
+
content: string | {
|
|
2709
|
+
type: "text";
|
|
2710
|
+
text: string;
|
|
2711
|
+
cache_control?: {
|
|
2712
|
+
type: "ephemeral";
|
|
2713
|
+
} | undefined;
|
|
2714
|
+
}[];
|
|
2715
|
+
role: "developer";
|
|
2716
|
+
name?: string | undefined;
|
|
2717
|
+
}, {
|
|
2718
|
+
role: "developer";
|
|
2719
|
+
name?: string | undefined;
|
|
2720
|
+
content?: string | {
|
|
2721
|
+
type: "text";
|
|
2722
|
+
text?: string | undefined;
|
|
2723
|
+
cache_control?: {
|
|
2724
|
+
type: "ephemeral";
|
|
2725
|
+
} | undefined;
|
|
2726
|
+
}[] | undefined;
|
|
2615
2727
|
}>]>, z.ZodObject<{
|
|
2616
2728
|
role: z.ZodEnum<["model"]>;
|
|
2617
2729
|
content: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
@@ -2689,6 +2801,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
2689
2801
|
name: string;
|
|
2690
2802
|
content: string | null;
|
|
2691
2803
|
role: "function";
|
|
2804
|
+
} | {
|
|
2805
|
+
content: string | {
|
|
2806
|
+
type: "text";
|
|
2807
|
+
text: string;
|
|
2808
|
+
cache_control?: {
|
|
2809
|
+
type: "ephemeral";
|
|
2810
|
+
} | undefined;
|
|
2811
|
+
}[];
|
|
2812
|
+
role: "developer";
|
|
2813
|
+
name?: string | undefined;
|
|
2692
2814
|
} | {
|
|
2693
2815
|
role: "model";
|
|
2694
2816
|
content?: string | null | undefined;
|
|
@@ -2760,6 +2882,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
2760
2882
|
name: string;
|
|
2761
2883
|
content: string | null;
|
|
2762
2884
|
role: "function";
|
|
2885
|
+
} | {
|
|
2886
|
+
role: "developer";
|
|
2887
|
+
name?: string | undefined;
|
|
2888
|
+
content?: string | {
|
|
2889
|
+
type: "text";
|
|
2890
|
+
text?: string | undefined;
|
|
2891
|
+
cache_control?: {
|
|
2892
|
+
type: "ephemeral";
|
|
2893
|
+
} | undefined;
|
|
2894
|
+
}[] | undefined;
|
|
2763
2895
|
} | {
|
|
2764
2896
|
role: "model";
|
|
2765
2897
|
content?: string | null | undefined;
|
|
@@ -3447,6 +3579,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
3447
3579
|
name: string;
|
|
3448
3580
|
content: string | null;
|
|
3449
3581
|
role: "function";
|
|
3582
|
+
} | {
|
|
3583
|
+
content: string | {
|
|
3584
|
+
type: "text";
|
|
3585
|
+
text: string;
|
|
3586
|
+
cache_control?: {
|
|
3587
|
+
type: "ephemeral";
|
|
3588
|
+
} | undefined;
|
|
3589
|
+
}[];
|
|
3590
|
+
role: "developer";
|
|
3591
|
+
name?: string | undefined;
|
|
3450
3592
|
} | {
|
|
3451
3593
|
role: "model";
|
|
3452
3594
|
content?: string | null | undefined;
|
|
@@ -3646,6 +3788,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
3646
3788
|
name: string;
|
|
3647
3789
|
content: string | null;
|
|
3648
3790
|
role: "function";
|
|
3791
|
+
} | {
|
|
3792
|
+
role: "developer";
|
|
3793
|
+
name?: string | undefined;
|
|
3794
|
+
content?: string | {
|
|
3795
|
+
type: "text";
|
|
3796
|
+
text?: string | undefined;
|
|
3797
|
+
cache_control?: {
|
|
3798
|
+
type: "ephemeral";
|
|
3799
|
+
} | undefined;
|
|
3800
|
+
}[] | undefined;
|
|
3649
3801
|
} | {
|
|
3650
3802
|
role: "model";
|
|
3651
3803
|
content?: string | null | undefined;
|
|
@@ -3830,6 +3982,10 @@ interface EvalHooks<Expected, Metadata extends BaseMetadata, Parameters extends
|
|
|
3830
3982
|
* Report progress that will show up in the playground.
|
|
3831
3983
|
*/
|
|
3832
3984
|
reportProgress: (progress: TaskProgressEvent) => void;
|
|
3985
|
+
/**
|
|
3986
|
+
* The index of the current trial (0-based). This is useful when trialCount > 1.
|
|
3987
|
+
*/
|
|
3988
|
+
trialIndex: number;
|
|
3833
3989
|
}
|
|
3834
3990
|
type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
|
|
3835
3991
|
output: Output;
|
package/dev/dist/index.d.ts
CHANGED
|
@@ -1531,6 +1531,52 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
|
|
|
1531
1531
|
name: string;
|
|
1532
1532
|
content: string | null;
|
|
1533
1533
|
role: "function";
|
|
1534
|
+
}>, z.ZodObject<{
|
|
1535
|
+
content: z.ZodUnion<[z.ZodDefault<z.ZodString>, z.ZodArray<z.ZodObject<{
|
|
1536
|
+
text: z.ZodDefault<z.ZodString>;
|
|
1537
|
+
type: z.ZodLiteral<"text">;
|
|
1538
|
+
cache_control: z.ZodOptional<z.ZodObject<{
|
|
1539
|
+
type: z.ZodEnum<["ephemeral"]>;
|
|
1540
|
+
}, "strip", z.ZodTypeAny, {
|
|
1541
|
+
type: "ephemeral";
|
|
1542
|
+
}, {
|
|
1543
|
+
type: "ephemeral";
|
|
1544
|
+
}>>;
|
|
1545
|
+
}, "strip", z.ZodTypeAny, {
|
|
1546
|
+
type: "text";
|
|
1547
|
+
text: string;
|
|
1548
|
+
cache_control?: {
|
|
1549
|
+
type: "ephemeral";
|
|
1550
|
+
} | undefined;
|
|
1551
|
+
}, {
|
|
1552
|
+
type: "text";
|
|
1553
|
+
text?: string | undefined;
|
|
1554
|
+
cache_control?: {
|
|
1555
|
+
type: "ephemeral";
|
|
1556
|
+
} | undefined;
|
|
1557
|
+
}>, "many">]>;
|
|
1558
|
+
role: z.ZodLiteral<"developer">;
|
|
1559
|
+
name: z.ZodOptional<z.ZodString>;
|
|
1560
|
+
}, "strip", z.ZodTypeAny, {
|
|
1561
|
+
content: string | {
|
|
1562
|
+
type: "text";
|
|
1563
|
+
text: string;
|
|
1564
|
+
cache_control?: {
|
|
1565
|
+
type: "ephemeral";
|
|
1566
|
+
} | undefined;
|
|
1567
|
+
}[];
|
|
1568
|
+
role: "developer";
|
|
1569
|
+
name?: string | undefined;
|
|
1570
|
+
}, {
|
|
1571
|
+
role: "developer";
|
|
1572
|
+
name?: string | undefined;
|
|
1573
|
+
content?: string | {
|
|
1574
|
+
type: "text";
|
|
1575
|
+
text?: string | undefined;
|
|
1576
|
+
cache_control?: {
|
|
1577
|
+
type: "ephemeral";
|
|
1578
|
+
} | undefined;
|
|
1579
|
+
}[] | undefined;
|
|
1534
1580
|
}>]>, z.ZodObject<{
|
|
1535
1581
|
role: z.ZodEnum<["model"]>;
|
|
1536
1582
|
content: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
@@ -1608,6 +1654,16 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
|
|
|
1608
1654
|
name: string;
|
|
1609
1655
|
content: string | null;
|
|
1610
1656
|
role: "function";
|
|
1657
|
+
} | {
|
|
1658
|
+
content: string | {
|
|
1659
|
+
type: "text";
|
|
1660
|
+
text: string;
|
|
1661
|
+
cache_control?: {
|
|
1662
|
+
type: "ephemeral";
|
|
1663
|
+
} | undefined;
|
|
1664
|
+
}[];
|
|
1665
|
+
role: "developer";
|
|
1666
|
+
name?: string | undefined;
|
|
1611
1667
|
} | {
|
|
1612
1668
|
role: "model";
|
|
1613
1669
|
content?: string | null | undefined;
|
|
@@ -1679,6 +1735,16 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
|
|
|
1679
1735
|
name: string;
|
|
1680
1736
|
content: string | null;
|
|
1681
1737
|
role: "function";
|
|
1738
|
+
} | {
|
|
1739
|
+
role: "developer";
|
|
1740
|
+
name?: string | undefined;
|
|
1741
|
+
content?: string | {
|
|
1742
|
+
type: "text";
|
|
1743
|
+
text?: string | undefined;
|
|
1744
|
+
cache_control?: {
|
|
1745
|
+
type: "ephemeral";
|
|
1746
|
+
} | undefined;
|
|
1747
|
+
}[] | undefined;
|
|
1682
1748
|
} | {
|
|
1683
1749
|
role: "model";
|
|
1684
1750
|
content?: string | null | undefined;
|
|
@@ -2612,6 +2678,52 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
2612
2678
|
name: string;
|
|
2613
2679
|
content: string | null;
|
|
2614
2680
|
role: "function";
|
|
2681
|
+
}>, z.ZodObject<{
|
|
2682
|
+
content: z.ZodUnion<[z.ZodDefault<z.ZodString>, z.ZodArray<z.ZodObject<{
|
|
2683
|
+
text: z.ZodDefault<z.ZodString>;
|
|
2684
|
+
type: z.ZodLiteral<"text">;
|
|
2685
|
+
cache_control: z.ZodOptional<z.ZodObject<{
|
|
2686
|
+
type: z.ZodEnum<["ephemeral"]>;
|
|
2687
|
+
}, "strip", z.ZodTypeAny, {
|
|
2688
|
+
type: "ephemeral";
|
|
2689
|
+
}, {
|
|
2690
|
+
type: "ephemeral";
|
|
2691
|
+
}>>;
|
|
2692
|
+
}, "strip", z.ZodTypeAny, {
|
|
2693
|
+
type: "text";
|
|
2694
|
+
text: string;
|
|
2695
|
+
cache_control?: {
|
|
2696
|
+
type: "ephemeral";
|
|
2697
|
+
} | undefined;
|
|
2698
|
+
}, {
|
|
2699
|
+
type: "text";
|
|
2700
|
+
text?: string | undefined;
|
|
2701
|
+
cache_control?: {
|
|
2702
|
+
type: "ephemeral";
|
|
2703
|
+
} | undefined;
|
|
2704
|
+
}>, "many">]>;
|
|
2705
|
+
role: z.ZodLiteral<"developer">;
|
|
2706
|
+
name: z.ZodOptional<z.ZodString>;
|
|
2707
|
+
}, "strip", z.ZodTypeAny, {
|
|
2708
|
+
content: string | {
|
|
2709
|
+
type: "text";
|
|
2710
|
+
text: string;
|
|
2711
|
+
cache_control?: {
|
|
2712
|
+
type: "ephemeral";
|
|
2713
|
+
} | undefined;
|
|
2714
|
+
}[];
|
|
2715
|
+
role: "developer";
|
|
2716
|
+
name?: string | undefined;
|
|
2717
|
+
}, {
|
|
2718
|
+
role: "developer";
|
|
2719
|
+
name?: string | undefined;
|
|
2720
|
+
content?: string | {
|
|
2721
|
+
type: "text";
|
|
2722
|
+
text?: string | undefined;
|
|
2723
|
+
cache_control?: {
|
|
2724
|
+
type: "ephemeral";
|
|
2725
|
+
} | undefined;
|
|
2726
|
+
}[] | undefined;
|
|
2615
2727
|
}>]>, z.ZodObject<{
|
|
2616
2728
|
role: z.ZodEnum<["model"]>;
|
|
2617
2729
|
content: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
@@ -2689,6 +2801,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
2689
2801
|
name: string;
|
|
2690
2802
|
content: string | null;
|
|
2691
2803
|
role: "function";
|
|
2804
|
+
} | {
|
|
2805
|
+
content: string | {
|
|
2806
|
+
type: "text";
|
|
2807
|
+
text: string;
|
|
2808
|
+
cache_control?: {
|
|
2809
|
+
type: "ephemeral";
|
|
2810
|
+
} | undefined;
|
|
2811
|
+
}[];
|
|
2812
|
+
role: "developer";
|
|
2813
|
+
name?: string | undefined;
|
|
2692
2814
|
} | {
|
|
2693
2815
|
role: "model";
|
|
2694
2816
|
content?: string | null | undefined;
|
|
@@ -2760,6 +2882,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
2760
2882
|
name: string;
|
|
2761
2883
|
content: string | null;
|
|
2762
2884
|
role: "function";
|
|
2885
|
+
} | {
|
|
2886
|
+
role: "developer";
|
|
2887
|
+
name?: string | undefined;
|
|
2888
|
+
content?: string | {
|
|
2889
|
+
type: "text";
|
|
2890
|
+
text?: string | undefined;
|
|
2891
|
+
cache_control?: {
|
|
2892
|
+
type: "ephemeral";
|
|
2893
|
+
} | undefined;
|
|
2894
|
+
}[] | undefined;
|
|
2763
2895
|
} | {
|
|
2764
2896
|
role: "model";
|
|
2765
2897
|
content?: string | null | undefined;
|
|
@@ -3447,6 +3579,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
3447
3579
|
name: string;
|
|
3448
3580
|
content: string | null;
|
|
3449
3581
|
role: "function";
|
|
3582
|
+
} | {
|
|
3583
|
+
content: string | {
|
|
3584
|
+
type: "text";
|
|
3585
|
+
text: string;
|
|
3586
|
+
cache_control?: {
|
|
3587
|
+
type: "ephemeral";
|
|
3588
|
+
} | undefined;
|
|
3589
|
+
}[];
|
|
3590
|
+
role: "developer";
|
|
3591
|
+
name?: string | undefined;
|
|
3450
3592
|
} | {
|
|
3451
3593
|
role: "model";
|
|
3452
3594
|
content?: string | null | undefined;
|
|
@@ -3646,6 +3788,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
|
|
|
3646
3788
|
name: string;
|
|
3647
3789
|
content: string | null;
|
|
3648
3790
|
role: "function";
|
|
3791
|
+
} | {
|
|
3792
|
+
role: "developer";
|
|
3793
|
+
name?: string | undefined;
|
|
3794
|
+
content?: string | {
|
|
3795
|
+
type: "text";
|
|
3796
|
+
text?: string | undefined;
|
|
3797
|
+
cache_control?: {
|
|
3798
|
+
type: "ephemeral";
|
|
3799
|
+
} | undefined;
|
|
3800
|
+
}[] | undefined;
|
|
3649
3801
|
} | {
|
|
3650
3802
|
role: "model";
|
|
3651
3803
|
content?: string | null | undefined;
|
|
@@ -3830,6 +3982,10 @@ interface EvalHooks<Expected, Metadata extends BaseMetadata, Parameters extends
|
|
|
3830
3982
|
* Report progress that will show up in the playground.
|
|
3831
3983
|
*/
|
|
3832
3984
|
reportProgress: (progress: TaskProgressEvent) => void;
|
|
3985
|
+
/**
|
|
3986
|
+
* The index of the current trial (0-based). This is useful when trialCount > 1.
|
|
3987
|
+
*/
|
|
3988
|
+
trialIndex: number;
|
|
3833
3989
|
}
|
|
3834
3990
|
type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
|
|
3835
3991
|
output: Output;
|
package/dev/dist/index.js
CHANGED
|
@@ -100,7 +100,7 @@ async function getBaseBranchAncestor(remote = void 0) {
|
|
|
100
100
|
return void 0;
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
|
-
async function getPastNAncestors(n =
|
|
103
|
+
async function getPastNAncestors(n = 1e3, remote = void 0) {
|
|
104
104
|
const git = await currentRepo();
|
|
105
105
|
if (git === null) {
|
|
106
106
|
return [];
|
|
@@ -117,7 +117,7 @@ async function getPastNAncestors(n = 10, remote = void 0) {
|
|
|
117
117
|
if (!ancestor) {
|
|
118
118
|
return [];
|
|
119
119
|
}
|
|
120
|
-
const commits = await git.log({ from: ancestor, to: "HEAD" });
|
|
120
|
+
const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
|
|
121
121
|
return commits.all.map((c) => c.hash);
|
|
122
122
|
}
|
|
123
123
|
async function attempt(fn) {
|
|
@@ -6151,14 +6151,20 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
6151
6151
|
} else {
|
|
6152
6152
|
data = dataResult;
|
|
6153
6153
|
}
|
|
6154
|
-
|
|
6155
|
-
(datum) => [...Array(_nullishCoalesce(evaluator.trialCount, () => ( 1))).keys()].map(() =>
|
|
6154
|
+
const dataWithTrials = data.filter((d) => filters.every((f) => evaluateFilter(d, f))).flatMap(
|
|
6155
|
+
(datum) => [...Array(_nullishCoalesce(evaluator.trialCount, () => ( 1))).keys()].map((trialIndex) => ({
|
|
6156
|
+
datum,
|
|
6157
|
+
trialIndex
|
|
6158
|
+
}))
|
|
6156
6159
|
);
|
|
6157
|
-
progressReporter.start(evaluator.evalName,
|
|
6160
|
+
progressReporter.start(evaluator.evalName, dataWithTrials.length);
|
|
6158
6161
|
const results = [];
|
|
6159
6162
|
const q = queue(
|
|
6160
6163
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
6161
|
-
async (
|
|
6164
|
+
async ({
|
|
6165
|
+
datum,
|
|
6166
|
+
trialIndex
|
|
6167
|
+
}) => {
|
|
6162
6168
|
const eventDataset = experiment ? experiment.dataset : Dataset.isDataset(evaluator.data) ? evaluator.data : void 0;
|
|
6163
6169
|
const baseEvent = {
|
|
6164
6170
|
name: "eval",
|
|
@@ -6207,7 +6213,8 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
6207
6213
|
name: evaluator.evalName,
|
|
6208
6214
|
object_type: "task"
|
|
6209
6215
|
})]);
|
|
6210
|
-
}
|
|
6216
|
+
},
|
|
6217
|
+
trialIndex
|
|
6211
6218
|
});
|
|
6212
6219
|
if (outputResult instanceof Promise) {
|
|
6213
6220
|
output = await outputResult;
|
|
@@ -6357,9 +6364,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
6357
6364
|
return await experiment.traced(callback, baseEvent);
|
|
6358
6365
|
}
|
|
6359
6366
|
},
|
|
6360
|
-
Math.max(_nullishCoalesce(evaluator.maxConcurrency, () => (
|
|
6367
|
+
Math.max(_nullishCoalesce(evaluator.maxConcurrency, () => ( dataWithTrials.length)), 1)
|
|
6361
6368
|
);
|
|
6362
|
-
q.push(
|
|
6369
|
+
q.push(dataWithTrials);
|
|
6363
6370
|
const cancel = async () => {
|
|
6364
6371
|
await new Promise((_, reject2) => {
|
|
6365
6372
|
if (evaluator.timeout) {
|
package/dev/dist/index.mjs
CHANGED
|
@@ -100,7 +100,7 @@ async function getBaseBranchAncestor(remote = void 0) {
|
|
|
100
100
|
return void 0;
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
|
-
async function getPastNAncestors(n =
|
|
103
|
+
async function getPastNAncestors(n = 1e3, remote = void 0) {
|
|
104
104
|
const git = await currentRepo();
|
|
105
105
|
if (git === null) {
|
|
106
106
|
return [];
|
|
@@ -117,7 +117,7 @@ async function getPastNAncestors(n = 10, remote = void 0) {
|
|
|
117
117
|
if (!ancestor) {
|
|
118
118
|
return [];
|
|
119
119
|
}
|
|
120
|
-
const commits = await git.log({ from: ancestor, to: "HEAD" });
|
|
120
|
+
const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
|
|
121
121
|
return commits.all.map((c) => c.hash);
|
|
122
122
|
}
|
|
123
123
|
async function attempt(fn) {
|
|
@@ -6151,14 +6151,20 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
6151
6151
|
} else {
|
|
6152
6152
|
data = dataResult;
|
|
6153
6153
|
}
|
|
6154
|
-
|
|
6155
|
-
(datum) => [...Array(evaluator.trialCount ?? 1).keys()].map(() =>
|
|
6154
|
+
const dataWithTrials = data.filter((d) => filters.every((f) => evaluateFilter(d, f))).flatMap(
|
|
6155
|
+
(datum) => [...Array(evaluator.trialCount ?? 1).keys()].map((trialIndex) => ({
|
|
6156
|
+
datum,
|
|
6157
|
+
trialIndex
|
|
6158
|
+
}))
|
|
6156
6159
|
);
|
|
6157
|
-
progressReporter.start(evaluator.evalName,
|
|
6160
|
+
progressReporter.start(evaluator.evalName, dataWithTrials.length);
|
|
6158
6161
|
const results = [];
|
|
6159
6162
|
const q = queue(
|
|
6160
6163
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
6161
|
-
async (
|
|
6164
|
+
async ({
|
|
6165
|
+
datum,
|
|
6166
|
+
trialIndex
|
|
6167
|
+
}) => {
|
|
6162
6168
|
const eventDataset = experiment ? experiment.dataset : Dataset.isDataset(evaluator.data) ? evaluator.data : void 0;
|
|
6163
6169
|
const baseEvent = {
|
|
6164
6170
|
name: "eval",
|
|
@@ -6207,7 +6213,8 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
6207
6213
|
name: evaluator.evalName,
|
|
6208
6214
|
object_type: "task"
|
|
6209
6215
|
});
|
|
6210
|
-
}
|
|
6216
|
+
},
|
|
6217
|
+
trialIndex
|
|
6211
6218
|
});
|
|
6212
6219
|
if (outputResult instanceof Promise) {
|
|
6213
6220
|
output = await outputResult;
|
|
@@ -6357,9 +6364,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
6357
6364
|
return await experiment.traced(callback, baseEvent);
|
|
6358
6365
|
}
|
|
6359
6366
|
},
|
|
6360
|
-
Math.max(evaluator.maxConcurrency ??
|
|
6367
|
+
Math.max(evaluator.maxConcurrency ?? dataWithTrials.length, 1)
|
|
6361
6368
|
);
|
|
6362
|
-
q.push(
|
|
6369
|
+
q.push(dataWithTrials);
|
|
6363
6370
|
const cancel = async () => {
|
|
6364
6371
|
await new Promise((_, reject2) => {
|
|
6365
6372
|
if (evaluator.timeout) {
|