braintrust 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1531,6 +1531,52 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
1531
1531
  name: string;
1532
1532
  content: string | null;
1533
1533
  role: "function";
1534
+ }>, z.ZodObject<{
1535
+ content: z.ZodUnion<[z.ZodDefault<z.ZodString>, z.ZodArray<z.ZodObject<{
1536
+ text: z.ZodDefault<z.ZodString>;
1537
+ type: z.ZodLiteral<"text">;
1538
+ cache_control: z.ZodOptional<z.ZodObject<{
1539
+ type: z.ZodEnum<["ephemeral"]>;
1540
+ }, "strip", z.ZodTypeAny, {
1541
+ type: "ephemeral";
1542
+ }, {
1543
+ type: "ephemeral";
1544
+ }>>;
1545
+ }, "strip", z.ZodTypeAny, {
1546
+ type: "text";
1547
+ text: string;
1548
+ cache_control?: {
1549
+ type: "ephemeral";
1550
+ } | undefined;
1551
+ }, {
1552
+ type: "text";
1553
+ text?: string | undefined;
1554
+ cache_control?: {
1555
+ type: "ephemeral";
1556
+ } | undefined;
1557
+ }>, "many">]>;
1558
+ role: z.ZodLiteral<"developer">;
1559
+ name: z.ZodOptional<z.ZodString>;
1560
+ }, "strip", z.ZodTypeAny, {
1561
+ content: string | {
1562
+ type: "text";
1563
+ text: string;
1564
+ cache_control?: {
1565
+ type: "ephemeral";
1566
+ } | undefined;
1567
+ }[];
1568
+ role: "developer";
1569
+ name?: string | undefined;
1570
+ }, {
1571
+ role: "developer";
1572
+ name?: string | undefined;
1573
+ content?: string | {
1574
+ type: "text";
1575
+ text?: string | undefined;
1576
+ cache_control?: {
1577
+ type: "ephemeral";
1578
+ } | undefined;
1579
+ }[] | undefined;
1534
1580
  }>]>, z.ZodObject<{
1535
1581
  role: z.ZodEnum<["model"]>;
1536
1582
  content: z.ZodOptional<z.ZodNullable<z.ZodString>>;
@@ -1608,6 +1654,16 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
1608
1654
  name: string;
1609
1655
  content: string | null;
1610
1656
  role: "function";
1657
+ } | {
1658
+ content: string | {
1659
+ type: "text";
1660
+ text: string;
1661
+ cache_control?: {
1662
+ type: "ephemeral";
1663
+ } | undefined;
1664
+ }[];
1665
+ role: "developer";
1666
+ name?: string | undefined;
1611
1667
  } | {
1612
1668
  role: "model";
1613
1669
  content?: string | null | undefined;
@@ -1679,6 +1735,16 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
1679
1735
  name: string;
1680
1736
  content: string | null;
1681
1737
  role: "function";
1738
+ } | {
1739
+ role: "developer";
1740
+ name?: string | undefined;
1741
+ content?: string | {
1742
+ type: "text";
1743
+ text?: string | undefined;
1744
+ cache_control?: {
1745
+ type: "ephemeral";
1746
+ } | undefined;
1747
+ }[] | undefined;
1682
1748
  } | {
1683
1749
  role: "model";
1684
1750
  content?: string | null | undefined;
@@ -2612,6 +2678,52 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
2612
2678
  name: string;
2613
2679
  content: string | null;
2614
2680
  role: "function";
2681
+ }>, z.ZodObject<{
2682
+ content: z.ZodUnion<[z.ZodDefault<z.ZodString>, z.ZodArray<z.ZodObject<{
2683
+ text: z.ZodDefault<z.ZodString>;
2684
+ type: z.ZodLiteral<"text">;
2685
+ cache_control: z.ZodOptional<z.ZodObject<{
2686
+ type: z.ZodEnum<["ephemeral"]>;
2687
+ }, "strip", z.ZodTypeAny, {
2688
+ type: "ephemeral";
2689
+ }, {
2690
+ type: "ephemeral";
2691
+ }>>;
2692
+ }, "strip", z.ZodTypeAny, {
2693
+ type: "text";
2694
+ text: string;
2695
+ cache_control?: {
2696
+ type: "ephemeral";
2697
+ } | undefined;
2698
+ }, {
2699
+ type: "text";
2700
+ text?: string | undefined;
2701
+ cache_control?: {
2702
+ type: "ephemeral";
2703
+ } | undefined;
2704
+ }>, "many">]>;
2705
+ role: z.ZodLiteral<"developer">;
2706
+ name: z.ZodOptional<z.ZodString>;
2707
+ }, "strip", z.ZodTypeAny, {
2708
+ content: string | {
2709
+ type: "text";
2710
+ text: string;
2711
+ cache_control?: {
2712
+ type: "ephemeral";
2713
+ } | undefined;
2714
+ }[];
2715
+ role: "developer";
2716
+ name?: string | undefined;
2717
+ }, {
2718
+ role: "developer";
2719
+ name?: string | undefined;
2720
+ content?: string | {
2721
+ type: "text";
2722
+ text?: string | undefined;
2723
+ cache_control?: {
2724
+ type: "ephemeral";
2725
+ } | undefined;
2726
+ }[] | undefined;
2615
2727
  }>]>, z.ZodObject<{
2616
2728
  role: z.ZodEnum<["model"]>;
2617
2729
  content: z.ZodOptional<z.ZodNullable<z.ZodString>>;
@@ -2689,6 +2801,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
2689
2801
  name: string;
2690
2802
  content: string | null;
2691
2803
  role: "function";
2804
+ } | {
2805
+ content: string | {
2806
+ type: "text";
2807
+ text: string;
2808
+ cache_control?: {
2809
+ type: "ephemeral";
2810
+ } | undefined;
2811
+ }[];
2812
+ role: "developer";
2813
+ name?: string | undefined;
2692
2814
  } | {
2693
2815
  role: "model";
2694
2816
  content?: string | null | undefined;
@@ -2760,6 +2882,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
2760
2882
  name: string;
2761
2883
  content: string | null;
2762
2884
  role: "function";
2885
+ } | {
2886
+ role: "developer";
2887
+ name?: string | undefined;
2888
+ content?: string | {
2889
+ type: "text";
2890
+ text?: string | undefined;
2891
+ cache_control?: {
2892
+ type: "ephemeral";
2893
+ } | undefined;
2894
+ }[] | undefined;
2763
2895
  } | {
2764
2896
  role: "model";
2765
2897
  content?: string | null | undefined;
@@ -3447,6 +3579,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
3447
3579
  name: string;
3448
3580
  content: string | null;
3449
3581
  role: "function";
3582
+ } | {
3583
+ content: string | {
3584
+ type: "text";
3585
+ text: string;
3586
+ cache_control?: {
3587
+ type: "ephemeral";
3588
+ } | undefined;
3589
+ }[];
3590
+ role: "developer";
3591
+ name?: string | undefined;
3450
3592
  } | {
3451
3593
  role: "model";
3452
3594
  content?: string | null | undefined;
@@ -3646,6 +3788,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
3646
3788
  name: string;
3647
3789
  content: string | null;
3648
3790
  role: "function";
3791
+ } | {
3792
+ role: "developer";
3793
+ name?: string | undefined;
3794
+ content?: string | {
3795
+ type: "text";
3796
+ text?: string | undefined;
3797
+ cache_control?: {
3798
+ type: "ephemeral";
3799
+ } | undefined;
3800
+ }[] | undefined;
3649
3801
  } | {
3650
3802
  role: "model";
3651
3803
  content?: string | null | undefined;
@@ -3830,6 +3982,10 @@ interface EvalHooks<Expected, Metadata extends BaseMetadata, Parameters extends
3830
3982
  * Report progress that will show up in the playground.
3831
3983
  */
3832
3984
  reportProgress: (progress: TaskProgressEvent) => void;
3985
+ /**
3986
+ * The index of the current trial (0-based). This is useful when trialCount > 1.
3987
+ */
3988
+ trialIndex: number;
3833
3989
  }
3834
3990
  type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
3835
3991
  output: Output;
@@ -1531,6 +1531,52 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
1531
1531
  name: string;
1532
1532
  content: string | null;
1533
1533
  role: "function";
1534
+ }>, z.ZodObject<{
1535
+ content: z.ZodUnion<[z.ZodDefault<z.ZodString>, z.ZodArray<z.ZodObject<{
1536
+ text: z.ZodDefault<z.ZodString>;
1537
+ type: z.ZodLiteral<"text">;
1538
+ cache_control: z.ZodOptional<z.ZodObject<{
1539
+ type: z.ZodEnum<["ephemeral"]>;
1540
+ }, "strip", z.ZodTypeAny, {
1541
+ type: "ephemeral";
1542
+ }, {
1543
+ type: "ephemeral";
1544
+ }>>;
1545
+ }, "strip", z.ZodTypeAny, {
1546
+ type: "text";
1547
+ text: string;
1548
+ cache_control?: {
1549
+ type: "ephemeral";
1550
+ } | undefined;
1551
+ }, {
1552
+ type: "text";
1553
+ text?: string | undefined;
1554
+ cache_control?: {
1555
+ type: "ephemeral";
1556
+ } | undefined;
1557
+ }>, "many">]>;
1558
+ role: z.ZodLiteral<"developer">;
1559
+ name: z.ZodOptional<z.ZodString>;
1560
+ }, "strip", z.ZodTypeAny, {
1561
+ content: string | {
1562
+ type: "text";
1563
+ text: string;
1564
+ cache_control?: {
1565
+ type: "ephemeral";
1566
+ } | undefined;
1567
+ }[];
1568
+ role: "developer";
1569
+ name?: string | undefined;
1570
+ }, {
1571
+ role: "developer";
1572
+ name?: string | undefined;
1573
+ content?: string | {
1574
+ type: "text";
1575
+ text?: string | undefined;
1576
+ cache_control?: {
1577
+ type: "ephemeral";
1578
+ } | undefined;
1579
+ }[] | undefined;
1534
1580
  }>]>, z.ZodObject<{
1535
1581
  role: z.ZodEnum<["model"]>;
1536
1582
  content: z.ZodOptional<z.ZodNullable<z.ZodString>>;
@@ -1608,6 +1654,16 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
1608
1654
  name: string;
1609
1655
  content: string | null;
1610
1656
  role: "function";
1657
+ } | {
1658
+ content: string | {
1659
+ type: "text";
1660
+ text: string;
1661
+ cache_control?: {
1662
+ type: "ephemeral";
1663
+ } | undefined;
1664
+ }[];
1665
+ role: "developer";
1666
+ name?: string | undefined;
1611
1667
  } | {
1612
1668
  role: "model";
1613
1669
  content?: string | null | undefined;
@@ -1679,6 +1735,16 @@ declare const promptDefinitionSchema: z.ZodIntersection<z.ZodUnion<[z.ZodObject<
1679
1735
  name: string;
1680
1736
  content: string | null;
1681
1737
  role: "function";
1738
+ } | {
1739
+ role: "developer";
1740
+ name?: string | undefined;
1741
+ content?: string | {
1742
+ type: "text";
1743
+ text?: string | undefined;
1744
+ cache_control?: {
1745
+ type: "ephemeral";
1746
+ } | undefined;
1747
+ }[] | undefined;
1682
1748
  } | {
1683
1749
  role: "model";
1684
1750
  content?: string | null | undefined;
@@ -2612,6 +2678,52 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
2612
2678
  name: string;
2613
2679
  content: string | null;
2614
2680
  role: "function";
2681
+ }>, z.ZodObject<{
2682
+ content: z.ZodUnion<[z.ZodDefault<z.ZodString>, z.ZodArray<z.ZodObject<{
2683
+ text: z.ZodDefault<z.ZodString>;
2684
+ type: z.ZodLiteral<"text">;
2685
+ cache_control: z.ZodOptional<z.ZodObject<{
2686
+ type: z.ZodEnum<["ephemeral"]>;
2687
+ }, "strip", z.ZodTypeAny, {
2688
+ type: "ephemeral";
2689
+ }, {
2690
+ type: "ephemeral";
2691
+ }>>;
2692
+ }, "strip", z.ZodTypeAny, {
2693
+ type: "text";
2694
+ text: string;
2695
+ cache_control?: {
2696
+ type: "ephemeral";
2697
+ } | undefined;
2698
+ }, {
2699
+ type: "text";
2700
+ text?: string | undefined;
2701
+ cache_control?: {
2702
+ type: "ephemeral";
2703
+ } | undefined;
2704
+ }>, "many">]>;
2705
+ role: z.ZodLiteral<"developer">;
2706
+ name: z.ZodOptional<z.ZodString>;
2707
+ }, "strip", z.ZodTypeAny, {
2708
+ content: string | {
2709
+ type: "text";
2710
+ text: string;
2711
+ cache_control?: {
2712
+ type: "ephemeral";
2713
+ } | undefined;
2714
+ }[];
2715
+ role: "developer";
2716
+ name?: string | undefined;
2717
+ }, {
2718
+ role: "developer";
2719
+ name?: string | undefined;
2720
+ content?: string | {
2721
+ type: "text";
2722
+ text?: string | undefined;
2723
+ cache_control?: {
2724
+ type: "ephemeral";
2725
+ } | undefined;
2726
+ }[] | undefined;
2615
2727
  }>]>, z.ZodObject<{
2616
2728
  role: z.ZodEnum<["model"]>;
2617
2729
  content: z.ZodOptional<z.ZodNullable<z.ZodString>>;
@@ -2689,6 +2801,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
2689
2801
  name: string;
2690
2802
  content: string | null;
2691
2803
  role: "function";
2804
+ } | {
2805
+ content: string | {
2806
+ type: "text";
2807
+ text: string;
2808
+ cache_control?: {
2809
+ type: "ephemeral";
2810
+ } | undefined;
2811
+ }[];
2812
+ role: "developer";
2813
+ name?: string | undefined;
2692
2814
  } | {
2693
2815
  role: "model";
2694
2816
  content?: string | null | undefined;
@@ -2760,6 +2882,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
2760
2882
  name: string;
2761
2883
  content: string | null;
2762
2884
  role: "function";
2885
+ } | {
2886
+ role: "developer";
2887
+ name?: string | undefined;
2888
+ content?: string | {
2889
+ type: "text";
2890
+ text?: string | undefined;
2891
+ cache_control?: {
2892
+ type: "ephemeral";
2893
+ } | undefined;
2894
+ }[] | undefined;
2763
2895
  } | {
2764
2896
  role: "model";
2765
2897
  content?: string | null | undefined;
@@ -3447,6 +3579,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
3447
3579
  name: string;
3448
3580
  content: string | null;
3449
3581
  role: "function";
3582
+ } | {
3583
+ content: string | {
3584
+ type: "text";
3585
+ text: string;
3586
+ cache_control?: {
3587
+ type: "ephemeral";
3588
+ } | undefined;
3589
+ }[];
3590
+ role: "developer";
3591
+ name?: string | undefined;
3450
3592
  } | {
3451
3593
  role: "model";
3452
3594
  content?: string | null | undefined;
@@ -3646,6 +3788,16 @@ declare const evalParametersSchema: z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodOb
3646
3788
  name: string;
3647
3789
  content: string | null;
3648
3790
  role: "function";
3791
+ } | {
3792
+ role: "developer";
3793
+ name?: string | undefined;
3794
+ content?: string | {
3795
+ type: "text";
3796
+ text?: string | undefined;
3797
+ cache_control?: {
3798
+ type: "ephemeral";
3799
+ } | undefined;
3800
+ }[] | undefined;
3649
3801
  } | {
3650
3802
  role: "model";
3651
3803
  content?: string | null | undefined;
@@ -3830,6 +3982,10 @@ interface EvalHooks<Expected, Metadata extends BaseMetadata, Parameters extends
3830
3982
  * Report progress that will show up in the playground.
3831
3983
  */
3832
3984
  reportProgress: (progress: TaskProgressEvent) => void;
3985
+ /**
3986
+ * The index of the current trial (0-based). This is useful when trialCount > 1.
3987
+ */
3988
+ trialIndex: number;
3833
3989
  }
3834
3990
  type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
3835
3991
  output: Output;
package/dev/dist/index.js CHANGED
@@ -100,7 +100,7 @@ async function getBaseBranchAncestor(remote = void 0) {
100
100
  return void 0;
101
101
  }
102
102
  }
103
- async function getPastNAncestors(n = 10, remote = void 0) {
103
+ async function getPastNAncestors(n = 1e3, remote = void 0) {
104
104
  const git = await currentRepo();
105
105
  if (git === null) {
106
106
  return [];
@@ -117,7 +117,7 @@ async function getPastNAncestors(n = 10, remote = void 0) {
117
117
  if (!ancestor) {
118
118
  return [];
119
119
  }
120
- const commits = await git.log({ from: ancestor, to: "HEAD" });
120
+ const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
121
121
  return commits.all.map((c) => c.hash);
122
122
  }
123
123
  async function attempt(fn) {
@@ -6151,14 +6151,20 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
6151
6151
  } else {
6152
6152
  data = dataResult;
6153
6153
  }
6154
- data = data.filter((d) => filters.every((f) => evaluateFilter(d, f))).flatMap(
6155
- (datum) => [...Array(_nullishCoalesce(evaluator.trialCount, () => ( 1))).keys()].map(() => datum)
6154
+ const dataWithTrials = data.filter((d) => filters.every((f) => evaluateFilter(d, f))).flatMap(
6155
+ (datum) => [...Array(_nullishCoalesce(evaluator.trialCount, () => ( 1))).keys()].map((trialIndex) => ({
6156
+ datum,
6157
+ trialIndex
6158
+ }))
6156
6159
  );
6157
- progressReporter.start(evaluator.evalName, data.length);
6160
+ progressReporter.start(evaluator.evalName, dataWithTrials.length);
6158
6161
  const results = [];
6159
6162
  const q = queue(
6160
6163
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
6161
- async (datum) => {
6164
+ async ({
6165
+ datum,
6166
+ trialIndex
6167
+ }) => {
6162
6168
  const eventDataset = experiment ? experiment.dataset : Dataset.isDataset(evaluator.data) ? evaluator.data : void 0;
6163
6169
  const baseEvent = {
6164
6170
  name: "eval",
@@ -6207,7 +6213,8 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
6207
6213
  name: evaluator.evalName,
6208
6214
  object_type: "task"
6209
6215
  })]);
6210
- }
6216
+ },
6217
+ trialIndex
6211
6218
  });
6212
6219
  if (outputResult instanceof Promise) {
6213
6220
  output = await outputResult;
@@ -6357,9 +6364,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
6357
6364
  return await experiment.traced(callback, baseEvent);
6358
6365
  }
6359
6366
  },
6360
- Math.max(_nullishCoalesce(evaluator.maxConcurrency, () => ( data.length)), 1)
6367
+ Math.max(_nullishCoalesce(evaluator.maxConcurrency, () => ( dataWithTrials.length)), 1)
6361
6368
  );
6362
- q.push(data);
6369
+ q.push(dataWithTrials);
6363
6370
  const cancel = async () => {
6364
6371
  await new Promise((_, reject2) => {
6365
6372
  if (evaluator.timeout) {
@@ -100,7 +100,7 @@ async function getBaseBranchAncestor(remote = void 0) {
100
100
  return void 0;
101
101
  }
102
102
  }
103
- async function getPastNAncestors(n = 10, remote = void 0) {
103
+ async function getPastNAncestors(n = 1e3, remote = void 0) {
104
104
  const git = await currentRepo();
105
105
  if (git === null) {
106
106
  return [];
@@ -117,7 +117,7 @@ async function getPastNAncestors(n = 10, remote = void 0) {
117
117
  if (!ancestor) {
118
118
  return [];
119
119
  }
120
- const commits = await git.log({ from: ancestor, to: "HEAD" });
120
+ const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
121
121
  return commits.all.map((c) => c.hash);
122
122
  }
123
123
  async function attempt(fn) {
@@ -6151,14 +6151,20 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
6151
6151
  } else {
6152
6152
  data = dataResult;
6153
6153
  }
6154
- data = data.filter((d) => filters.every((f) => evaluateFilter(d, f))).flatMap(
6155
- (datum) => [...Array(evaluator.trialCount ?? 1).keys()].map(() => datum)
6154
+ const dataWithTrials = data.filter((d) => filters.every((f) => evaluateFilter(d, f))).flatMap(
6155
+ (datum) => [...Array(evaluator.trialCount ?? 1).keys()].map((trialIndex) => ({
6156
+ datum,
6157
+ trialIndex
6158
+ }))
6156
6159
  );
6157
- progressReporter.start(evaluator.evalName, data.length);
6160
+ progressReporter.start(evaluator.evalName, dataWithTrials.length);
6158
6161
  const results = [];
6159
6162
  const q = queue(
6160
6163
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
6161
- async (datum) => {
6164
+ async ({
6165
+ datum,
6166
+ trialIndex
6167
+ }) => {
6162
6168
  const eventDataset = experiment ? experiment.dataset : Dataset.isDataset(evaluator.data) ? evaluator.data : void 0;
6163
6169
  const baseEvent = {
6164
6170
  name: "eval",
@@ -6207,7 +6213,8 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
6207
6213
  name: evaluator.evalName,
6208
6214
  object_type: "task"
6209
6215
  });
6210
- }
6216
+ },
6217
+ trialIndex
6211
6218
  });
6212
6219
  if (outputResult instanceof Promise) {
6213
6220
  output = await outputResult;
@@ -6357,9 +6364,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
6357
6364
  return await experiment.traced(callback, baseEvent);
6358
6365
  }
6359
6366
  },
6360
- Math.max(evaluator.maxConcurrency ?? data.length, 1)
6367
+ Math.max(evaluator.maxConcurrency ?? dataWithTrials.length, 1)
6361
6368
  );
6362
- q.push(data);
6369
+ q.push(dataWithTrials);
6363
6370
  const cancel = async () => {
6364
6371
  await new Promise((_, reject2) => {
6365
6372
  if (evaluator.timeout) {