braintrust 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.mjs CHANGED
@@ -110,11 +110,19 @@ function getIdGenerator() {
110
110
 
111
111
  // util/db_fields.ts
112
112
  var TRANSACTION_ID_FIELD = "_xact_id";
113
+ var OBJECT_DELETE_FIELD = "_object_delete";
113
114
  var IS_MERGE_FIELD = "_is_merge";
114
115
  var AUDIT_SOURCE_FIELD = "_audit_source";
115
116
  var AUDIT_METADATA_FIELD = "_audit_metadata";
116
117
  var VALID_SOURCES = ["app", "api", "external"];
117
- var PARENT_ID_FIELD = "_parent_id";
118
+ var OBJECT_ID_KEYS = [
119
+ "experiment_id",
120
+ "dataset_id",
121
+ "prompt_session_id",
122
+ "project_id",
123
+ "log_id",
124
+ "function_data"
125
+ ];
118
126
 
119
127
  // util/span_identifier_v3.ts
120
128
  import * as uuid3 from "uuid";
@@ -785,13 +793,6 @@ function mergeDictsWithPathsHelper({
785
793
  function mergeDicts(mergeInto, mergeFrom) {
786
794
  return mergeDictsWithPaths({ mergeInto, mergeFrom, mergePaths: [] });
787
795
  }
788
- function mapAt(m, k) {
789
- const ret = m.get(k);
790
- if (ret === void 0) {
791
- throw new Error(`Map does not contain key ${k}`);
792
- }
793
- return ret;
794
- }
795
796
  function recordFind(m, k) {
796
797
  return m[k];
797
798
  }
@@ -806,72 +807,8 @@ function getObjValueByPath(row, path) {
806
807
  return curr;
807
808
  }
808
809
 
809
- // util/graph_util.ts
810
- function depthFirstSearch(args) {
811
- const { graph, firstVisitF, lastVisitF } = args;
812
- for (const vs of graph.values()) {
813
- for (const v of vs.values()) {
814
- if (!graph.has(v)) {
815
- throw new Error(`Outgoing vertex ${v} must be a key in the graph`);
816
- }
817
- }
818
- }
819
- const firstVisitedVertices = /* @__PURE__ */ new Set();
820
- const visitationOrder = args.visitationOrder ?? [...graph.keys()];
821
- const events = visitationOrder.map((vertex) => ({ eventType: "first", vertex, extras: {} })).reverse();
822
- while (events.length) {
823
- const { eventType, vertex, extras } = events.pop();
824
- if (eventType === "last") {
825
- lastVisitF?.(vertex);
826
- continue;
827
- }
828
- if (firstVisitedVertices.has(vertex)) {
829
- continue;
830
- }
831
- firstVisitedVertices.add(vertex);
832
- firstVisitF?.(vertex, { parentVertex: extras.parentVertex });
833
- events.push({ eventType: "last", vertex, extras: {} });
834
- mapAt(graph, vertex).forEach((child) => {
835
- events.push({
836
- eventType: "first",
837
- vertex: child,
838
- extras: { parentVertex: vertex }
839
- });
840
- });
841
- }
842
- }
843
- function undirectedConnectedComponents(graph) {
844
- const directedGraph = new Map(
845
- [...graph.vertices].map((v) => [v, /* @__PURE__ */ new Set()])
846
- );
847
- for (const [i, j] of graph.edges) {
848
- mapAt(directedGraph, i).add(j);
849
- mapAt(directedGraph, j).add(i);
850
- }
851
- let labelCounter = 0;
852
- const vertexLabels = /* @__PURE__ */ new Map();
853
- const firstVisitF = (vertex, args) => {
854
- const label = args?.parentVertex !== void 0 ? mapAt(vertexLabels, args?.parentVertex) : labelCounter++;
855
- vertexLabels.set(vertex, label);
856
- };
857
- depthFirstSearch({ graph: directedGraph, firstVisitF });
858
- const output = Array.from({ length: labelCounter }).map(() => []);
859
- for (const [vertex, label] of vertexLabels.entries()) {
860
- output[label].push(vertex);
861
- }
862
- return output;
863
- }
864
- function topologicalSort(graph, visitationOrder) {
865
- const reverseOrdering = [];
866
- const lastVisitF = (vertex) => {
867
- reverseOrdering.push(vertex);
868
- };
869
- depthFirstSearch({ graph, lastVisitF, visitationOrder });
870
- return reverseOrdering.reverse();
871
- }
872
-
873
810
  // util/merge_row_batch.ts
874
- function generateMergedRowKey(row, useParentIdForId) {
811
+ function generateMergedRowKey(row) {
875
812
  return JSON.stringify(
876
813
  [
877
814
  "org_id",
@@ -880,7 +817,7 @@ function generateMergedRowKey(row, useParentIdForId) {
880
817
  "dataset_id",
881
818
  "prompt_session_id",
882
819
  "log_id",
883
- useParentIdForId ?? false ? PARENT_ID_FIELD : "id"
820
+ "id"
884
821
  ].map((k) => row[k])
885
822
  );
886
823
  }
@@ -934,96 +871,34 @@ function mergeRowBatch(rows) {
934
871
  rowGroups.set(key, row);
935
872
  }
936
873
  }
937
- const merged = [...rowGroups.values()];
938
- const rowToLabel = new Map(
939
- merged.map((r, i) => [generateMergedRowKey(r), i])
940
- );
941
- const graph = new Map(
942
- Array.from({ length: merged.length }).map((_, i) => [i, /* @__PURE__ */ new Set()])
943
- );
944
- merged.forEach((r, i) => {
945
- const parentId = r[PARENT_ID_FIELD];
946
- if (!parentId) {
947
- return;
948
- }
949
- const parentRowKey = generateMergedRowKey(
950
- r,
951
- true
952
- /* useParentIdForId */
953
- );
954
- const parentLabel = rowToLabel.get(parentRowKey);
955
- if (parentLabel !== void 0) {
956
- mapAt(graph, parentLabel).add(i);
957
- }
958
- });
959
- const connectedComponents = undirectedConnectedComponents({
960
- vertices: new Set(graph.keys()),
961
- edges: new Set(
962
- [...graph.entries()].flatMap(
963
- ([k, vs]) => [...vs].map((v) => {
964
- const ret = [k, v];
965
- return ret;
966
- })
967
- )
968
- )
969
- });
970
- const buckets = connectedComponents.map(
971
- (cc) => topologicalSort(
972
- graph,
973
- cc
974
- /* visitationOrder */
975
- )
976
- );
977
- return buckets.map((bucket) => bucket.map((i) => merged[i]));
874
+ return [...rowGroups.values()];
978
875
  }
979
876
  function batchItems(args) {
980
- let { items } = args;
877
+ const { items } = args;
981
878
  const batchMaxNumItems = args.batchMaxNumItems ?? Number.POSITIVE_INFINITY;
982
879
  const batchMaxNumBytes = args.batchMaxNumBytes ?? Number.POSITIVE_INFINITY;
880
+ const getByteSize = args.getByteSize;
983
881
  const output = [];
984
- let nextItems = [];
985
- let batchSet = [];
986
882
  let batch = [];
987
883
  let batchLen = 0;
988
884
  function addToBatch(item) {
989
885
  batch.push(item);
990
- batchLen += item.length;
886
+ batchLen += getByteSize(item);
991
887
  }
992
888
  function flushBatch() {
993
- batchSet.push(batch);
889
+ output.push(batch);
994
890
  batch = [];
995
891
  batchLen = 0;
996
892
  }
997
- while (items.length) {
998
- for (const bucket of items) {
999
- let i = 0;
1000
- for (const item of bucket) {
1001
- if (batch.length === 0 || item.length + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems) {
1002
- addToBatch(item);
1003
- } else if (i === 0) {
1004
- flushBatch();
1005
- addToBatch(item);
1006
- } else {
1007
- break;
1008
- }
1009
- ++i;
1010
- }
1011
- if (i < bucket.length) {
1012
- nextItems.push(bucket.slice(i));
1013
- }
1014
- if (batchLen >= batchMaxNumBytes || batch.length > batchMaxNumItems) {
1015
- flushBatch();
1016
- }
1017
- }
1018
- if (batch.length) {
893
+ for (const item of items) {
894
+ const itemSize = getByteSize(item);
895
+ if (batch.length > 0 && !(itemSize + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems)) {
1019
896
  flushBatch();
1020
897
  }
1021
- if (batchSet.length) {
1022
- output.push(batchSet);
1023
- batchSet = [];
1024
- }
1025
- items = nextItems;
1026
- nextItems = [];
898
+ addToBatch(item);
899
+ }
900
+ if (batch.length > 0) {
901
+ flushBatch();
1027
902
  }
1028
903
  return output;
1029
904
  }
@@ -1610,10 +1485,15 @@ var FunctionTypeEnum = z6.enum([
1610
1485
  "preprocessor",
1611
1486
  "facet",
1612
1487
  "classifier",
1613
- "tag"
1488
+ "tag",
1489
+ "parameters"
1614
1490
  ]);
1615
1491
  var NullableSavedFunctionId = z6.union([
1616
- z6.object({ type: z6.literal("function"), id: z6.string() }),
1492
+ z6.object({
1493
+ type: z6.literal("function"),
1494
+ id: z6.string(),
1495
+ version: z6.string().optional()
1496
+ }),
1617
1497
  z6.object({
1618
1498
  type: z6.literal("global"),
1619
1499
  name: z6.string(),
@@ -1621,6 +1501,67 @@ var NullableSavedFunctionId = z6.union([
1621
1501
  }),
1622
1502
  z6.null()
1623
1503
  ]);
1504
+ var TopicMapReport = z6.object({
1505
+ version: z6.literal(1),
1506
+ created_at: z6.string().optional(),
1507
+ settings: z6.object({
1508
+ algorithm: z6.enum(["hdbscan", "kmeans", "hierarchical"]),
1509
+ dimension_reduction: z6.enum(["umap", "pca", "none"]),
1510
+ vector_field: z6.string(),
1511
+ embedding_model: z6.string(),
1512
+ n_clusters: z6.union([z6.number(), z6.null()]).optional(),
1513
+ umap_dimensions: z6.union([z6.number(), z6.null()]).optional(),
1514
+ min_cluster_size: z6.union([z6.number(), z6.null()]).optional(),
1515
+ min_samples: z6.union([z6.number(), z6.null()]).optional()
1516
+ }),
1517
+ query_settings: z6.object({
1518
+ hierarchy_threshold: z6.union([z6.number(), z6.null()]),
1519
+ auto_naming: z6.boolean(),
1520
+ skip_cache: z6.boolean(),
1521
+ viz_mode: z6.enum(["bar", "scatter"]),
1522
+ naming_model: z6.string()
1523
+ }).partial(),
1524
+ clusters: z6.array(
1525
+ z6.object({
1526
+ cluster_id: z6.number(),
1527
+ parent_cluster_id: z6.union([z6.number(), z6.null()]).optional(),
1528
+ topic_id: z6.string(),
1529
+ count: z6.number(),
1530
+ sample_texts: z6.array(z6.string()),
1531
+ samples: z6.array(
1532
+ z6.object({
1533
+ id: z6.string(),
1534
+ text: z6.string(),
1535
+ root_span_id: z6.string(),
1536
+ span_id: z6.string()
1537
+ })
1538
+ ),
1539
+ name: z6.string().optional(),
1540
+ description: z6.string().optional(),
1541
+ keywords: z6.array(z6.string()).optional(),
1542
+ centroid: z6.array(z6.number()).optional(),
1543
+ parent_id: z6.union([z6.number(), z6.null()]).optional(),
1544
+ is_leaf: z6.boolean().optional(),
1545
+ depth: z6.number().optional()
1546
+ })
1547
+ ),
1548
+ embedding_points: z6.array(
1549
+ z6.object({
1550
+ x: z6.number(),
1551
+ y: z6.number(),
1552
+ cluster: z6.number(),
1553
+ text: z6.string().optional()
1554
+ })
1555
+ ).optional()
1556
+ });
1557
+ var TopicMapData = z6.object({
1558
+ type: z6.literal("topic_map"),
1559
+ source_facet: z6.string(),
1560
+ embedding_model: z6.string(),
1561
+ bundle_key: z6.string(),
1562
+ distance_threshold: z6.number().optional(),
1563
+ report: TopicMapReport.optional()
1564
+ });
1624
1565
  var BatchedFacetData = z6.object({
1625
1566
  type: z6.literal("batched_facet"),
1626
1567
  preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
@@ -1629,9 +1570,17 @@ var BatchedFacetData = z6.object({
1629
1570
  name: z6.string(),
1630
1571
  prompt: z6.string(),
1631
1572
  model: z6.string().optional(),
1573
+ embedding_model: z6.string().optional(),
1632
1574
  no_match_pattern: z6.string().optional()
1633
1575
  })
1634
- )
1576
+ ),
1577
+ topic_maps: z6.record(
1578
+ z6.object({
1579
+ function_name: z6.string(),
1580
+ topic_map_id: z6.string().optional(),
1581
+ topic_map_data: TopicMapData
1582
+ })
1583
+ ).optional()
1635
1584
  });
1636
1585
  var BraintrustModelParams = z6.object({
1637
1586
  use_cache: z6.boolean(),
@@ -1842,6 +1791,18 @@ var ObjectReferenceNullish = z6.union([
1842
1791
  }),
1843
1792
  z6.null()
1844
1793
  ]);
1794
+ var SavedFunctionId = z6.union([
1795
+ z6.object({
1796
+ type: z6.literal("function"),
1797
+ id: z6.string(),
1798
+ version: z6.string().optional()
1799
+ }),
1800
+ z6.object({
1801
+ type: z6.literal("global"),
1802
+ name: z6.string(),
1803
+ function_type: FunctionTypeEnum.optional().default("scorer")
1804
+ })
1805
+ ]);
1845
1806
  var DatasetEvent = z6.object({
1846
1807
  id: z6.string(),
1847
1808
  _xact_id: z6.string(),
@@ -1861,7 +1822,36 @@ var DatasetEvent = z6.object({
1861
1822
  is_root: z6.union([z6.boolean(), z6.null()]).optional(),
1862
1823
  origin: ObjectReferenceNullish.optional(),
1863
1824
  comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
1864
- audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
1825
+ audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
1826
+ facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
1827
+ classifications: z6.union([
1828
+ z6.record(
1829
+ z6.array(
1830
+ z6.object({
1831
+ id: z6.string(),
1832
+ label: z6.string().optional(),
1833
+ confidence: z6.union([z6.number(), z6.null()]).optional(),
1834
+ metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
1835
+ source: SavedFunctionId.and(
1836
+ z6.union([
1837
+ z6.object({
1838
+ type: z6.literal("function"),
1839
+ id: z6.string(),
1840
+ version: z6.string().optional()
1841
+ }),
1842
+ z6.object({
1843
+ type: z6.literal("global"),
1844
+ name: z6.string(),
1845
+ function_type: FunctionTypeEnum.optional().default("scorer")
1846
+ }),
1847
+ z6.null()
1848
+ ])
1849
+ ).optional()
1850
+ })
1851
+ )
1852
+ ),
1853
+ z6.null()
1854
+ ]).optional()
1865
1855
  });
1866
1856
  var EnvVar = z6.object({
1867
1857
  id: z6.string().uuid(),
@@ -1938,7 +1928,8 @@ var SpanType = z6.union([
1938
1928
  "automation",
1939
1929
  "facet",
1940
1930
  "preprocessor",
1941
- "classifier"
1931
+ "classifier",
1932
+ "review"
1942
1933
  ]),
1943
1934
  z6.null()
1944
1935
  ]);
@@ -1979,10 +1970,43 @@ var ExperimentEvent = z6.object({
1979
1970
  is_root: z6.union([z6.boolean(), z6.null()]).optional(),
1980
1971
  origin: ObjectReferenceNullish.optional(),
1981
1972
  comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
1982
- audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
1973
+ audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
1974
+ facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
1975
+ classifications: z6.union([
1976
+ z6.record(
1977
+ z6.array(
1978
+ z6.object({
1979
+ id: z6.string(),
1980
+ label: z6.string().optional(),
1981
+ confidence: z6.union([z6.number(), z6.null()]).optional(),
1982
+ metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
1983
+ source: SavedFunctionId.and(
1984
+ z6.union([
1985
+ z6.object({
1986
+ type: z6.literal("function"),
1987
+ id: z6.string(),
1988
+ version: z6.string().optional()
1989
+ }),
1990
+ z6.object({
1991
+ type: z6.literal("global"),
1992
+ name: z6.string(),
1993
+ function_type: FunctionTypeEnum.optional().default("scorer")
1994
+ }),
1995
+ z6.null()
1996
+ ])
1997
+ ).optional()
1998
+ })
1999
+ )
2000
+ ),
2001
+ z6.null()
2002
+ ]).optional()
1983
2003
  });
1984
2004
  var ExtendedSavedFunctionId = z6.union([
1985
- z6.object({ type: z6.literal("function"), id: z6.string() }),
2005
+ z6.object({
2006
+ type: z6.literal("function"),
2007
+ id: z6.string(),
2008
+ version: z6.string().optional()
2009
+ }),
1986
2010
  z6.object({
1987
2011
  type: z6.literal("global"),
1988
2012
  name: z6.string(),
@@ -1999,6 +2023,7 @@ var FacetData = z6.object({
1999
2023
  preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
2000
2024
  prompt: z6.string(),
2001
2025
  model: z6.string().optional(),
2026
+ embedding_model: z6.string().optional(),
2002
2027
  no_match_pattern: z6.string().optional()
2003
2028
  });
2004
2029
  var PromptBlockDataNullish = z6.union([
@@ -2088,14 +2113,6 @@ var PromptParserNullish = z6.union([
2088
2113
  }),
2089
2114
  z6.null()
2090
2115
  ]);
2091
- var SavedFunctionId = z6.union([
2092
- z6.object({ type: z6.literal("function"), id: z6.string() }),
2093
- z6.object({
2094
- type: z6.literal("global"),
2095
- name: z6.string(),
2096
- function_type: FunctionTypeEnum.optional().default("scorer")
2097
- })
2098
- ]);
2099
2116
  var PromptDataNullish = z6.union([
2100
2117
  z6.object({
2101
2118
  prompt: PromptBlockDataNullish,
@@ -2146,7 +2163,8 @@ var FunctionTypeEnumNullish = z6.union([
2146
2163
  "preprocessor",
2147
2164
  "facet",
2148
2165
  "classifier",
2149
- "tag"
2166
+ "tag",
2167
+ "parameters"
2150
2168
  ]),
2151
2169
  z6.null()
2152
2170
  ]);
@@ -2238,7 +2256,8 @@ var FunctionData = z6.union([
2238
2256
  type: z6.literal("remote_eval"),
2239
2257
  endpoint: z6.string(),
2240
2258
  eval_name: z6.string(),
2241
- parameters: z6.object({}).partial().passthrough()
2259
+ parameters: z6.object({}).partial().passthrough(),
2260
+ parameters_version: z6.union([z6.string(), z6.null()]).optional()
2242
2261
  }),
2243
2262
  z6.object({
2244
2263
  type: z6.literal("global"),
@@ -2247,7 +2266,18 @@ var FunctionData = z6.union([
2247
2266
  config: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional()
2248
2267
  }),
2249
2268
  FacetData,
2250
- BatchedFacetData
2269
+ BatchedFacetData,
2270
+ z6.object({
2271
+ type: z6.literal("parameters"),
2272
+ data: z6.object({}).partial().passthrough(),
2273
+ __schema: z6.object({
2274
+ type: z6.literal("object"),
2275
+ properties: z6.record(z6.object({}).partial().passthrough()),
2276
+ required: z6.array(z6.string()).optional(),
2277
+ additionalProperties: z6.boolean().optional()
2278
+ })
2279
+ }),
2280
+ TopicMapData.and(z6.unknown())
2251
2281
  ]);
2252
2282
  var Function = z6.object({
2253
2283
  id: z6.string().uuid(),
@@ -2277,7 +2307,13 @@ var Function = z6.object({
2277
2307
  z6.null()
2278
2308
  ]).optional()
2279
2309
  });
2280
- var FunctionFormat = z6.enum(["llm", "code", "global", "graph"]);
2310
+ var FunctionFormat = z6.enum([
2311
+ "llm",
2312
+ "code",
2313
+ "global",
2314
+ "graph",
2315
+ "topic_map"
2316
+ ]);
2281
2317
  var PromptData = z6.object({
2282
2318
  prompt: PromptBlockDataNullish,
2283
2319
  options: PromptOptionsNullish,
@@ -2360,13 +2396,14 @@ var FunctionObjectType = z6.enum([
2360
2396
  "custom_view",
2361
2397
  "preprocessor",
2362
2398
  "facet",
2363
- "classifier"
2399
+ "classifier",
2400
+ "parameters"
2364
2401
  ]);
2365
2402
  var FunctionOutputType = z6.enum([
2366
2403
  "completion",
2367
2404
  "score",
2368
2405
  "facet",
2369
- "tag",
2406
+ "classification",
2370
2407
  "any"
2371
2408
  ]);
2372
2409
  var GitMetadataSettings = z6.object({
@@ -2402,6 +2439,10 @@ var GroupScope = z6.object({
2402
2439
  idle_seconds: z6.number().optional()
2403
2440
  });
2404
2441
  var IfExists = z6.enum(["error", "ignore", "replace"]);
2442
+ var ImageRenderingMode = z6.union([
2443
+ z6.enum(["auto", "click_to_load", "blocked"]),
2444
+ z6.null()
2445
+ ]);
2405
2446
  var InvokeParent = z6.union([
2406
2447
  z6.object({
2407
2448
  object_type: z6.enum(["project_logs", "experiment", "playground_logs"]),
@@ -2494,7 +2535,8 @@ var Organization = z6.object({
2494
2535
  is_universal_api: z6.union([z6.boolean(), z6.null()]).optional(),
2495
2536
  proxy_url: z6.union([z6.string(), z6.null()]).optional(),
2496
2537
  realtime_url: z6.union([z6.string(), z6.null()]).optional(),
2497
- created: z6.union([z6.string(), z6.null()]).optional()
2538
+ created: z6.union([z6.string(), z6.null()]).optional(),
2539
+ image_rendering_mode: ImageRenderingMode.optional()
2498
2540
  });
2499
2541
  var ProjectSettings = z6.union([
2500
2542
  z6.object({
@@ -2635,7 +2677,36 @@ var ProjectLogsEvent = z6.object({
2635
2677
  origin: ObjectReferenceNullish.optional(),
2636
2678
  comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
2637
2679
  audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
2638
- _async_scoring_state: z6.unknown().optional()
2680
+ _async_scoring_state: z6.unknown().optional(),
2681
+ facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
2682
+ classifications: z6.union([
2683
+ z6.record(
2684
+ z6.array(
2685
+ z6.object({
2686
+ id: z6.string(),
2687
+ label: z6.string().optional(),
2688
+ confidence: z6.union([z6.number(), z6.null()]).optional(),
2689
+ metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
2690
+ source: SavedFunctionId.and(
2691
+ z6.union([
2692
+ z6.object({
2693
+ type: z6.literal("function"),
2694
+ id: z6.string(),
2695
+ version: z6.string().optional()
2696
+ }),
2697
+ z6.object({
2698
+ type: z6.literal("global"),
2699
+ name: z6.string(),
2700
+ function_type: FunctionTypeEnum.optional().default("scorer")
2701
+ }),
2702
+ z6.null()
2703
+ ])
2704
+ ).optional()
2705
+ })
2706
+ )
2707
+ ),
2708
+ z6.null()
2709
+ ]).optional()
2639
2710
  });
2640
2711
  var ProjectScoreType = z6.enum([
2641
2712
  "slider",
@@ -2937,12 +3008,15 @@ var View = z6.object({
2937
3008
  "datasets",
2938
3009
  "dataset",
2939
3010
  "prompts",
3011
+ "parameters",
2940
3012
  "tools",
2941
3013
  "scorers",
2942
3014
  "classifiers",
2943
3015
  "logs",
2944
3016
  "monitor",
2945
- "for_review"
3017
+ "for_review_project_log",
3018
+ "for_review_experiments",
3019
+ "for_review_datasets"
2946
3020
  ]),
2947
3021
  name: z6.string(),
2948
3022
  created: z6.union([z6.string(), z6.null()]).optional(),
@@ -3687,6 +3761,52 @@ var PromptCache = class {
3687
3761
  }
3688
3762
  };
3689
3763
 
3764
+ // src/prompt-cache/parameters-cache.ts
3765
+ function createCacheKey2(key) {
3766
+ if (key.id) {
3767
+ return `parameters:id:${key.id}`;
3768
+ }
3769
+ const prefix = key.projectId ?? key.projectName;
3770
+ if (!prefix) {
3771
+ throw new Error("Either projectId or projectName must be provided");
3772
+ }
3773
+ if (!key.slug) {
3774
+ throw new Error("Slug must be provided when not using ID");
3775
+ }
3776
+ return `parameters:${prefix}:${key.slug}:${key.version ?? "latest"}`;
3777
+ }
3778
+ var ParametersCache = class {
3779
+ memoryCache;
3780
+ diskCache;
3781
+ constructor(options) {
3782
+ this.memoryCache = options.memoryCache;
3783
+ this.diskCache = options.diskCache;
3784
+ }
3785
+ async get(key) {
3786
+ const cacheKey = createCacheKey2(key);
3787
+ const memoryParams = this.memoryCache.get(cacheKey);
3788
+ if (memoryParams !== void 0) {
3789
+ return memoryParams;
3790
+ }
3791
+ if (this.diskCache) {
3792
+ const diskParams = await this.diskCache.get(cacheKey);
3793
+ if (!diskParams) {
3794
+ return void 0;
3795
+ }
3796
+ this.memoryCache.set(cacheKey, diskParams);
3797
+ return diskParams;
3798
+ }
3799
+ return void 0;
3800
+ }
3801
+ async set(key, value) {
3802
+ const cacheKey = createCacheKey2(key);
3803
+ this.memoryCache.set(cacheKey, value);
3804
+ if (this.diskCache) {
3805
+ await this.diskCache.set(cacheKey, value);
3806
+ }
3807
+ }
3808
+ };
3809
+
3690
3810
  // src/span-cache.ts
3691
3811
  var activeCaches = /* @__PURE__ */ new Set();
3692
3812
  var exitHandlersRegistered = false;
@@ -3977,7 +4097,24 @@ var SpanCache = class {
3977
4097
  // src/logger.ts
3978
4098
  var BRAINTRUST_ATTACHMENT = BraintrustAttachmentReference.shape.type.value;
3979
4099
  var EXTERNAL_ATTACHMENT = ExternalAttachmentReference.shape.type.value;
4100
+ var LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow";
3980
4101
  var BRAINTRUST_PARAMS = Object.keys(BraintrustModelParams.shape);
4102
+ var DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024;
4103
+ var parametersRowSchema = z8.object({
4104
+ id: z8.string().uuid(),
4105
+ _xact_id: z8.string(),
4106
+ project_id: z8.string().uuid(),
4107
+ name: z8.string(),
4108
+ slug: z8.string(),
4109
+ description: z8.union([z8.string(), z8.null()]).optional(),
4110
+ function_type: z8.literal("parameters"),
4111
+ function_data: z8.object({
4112
+ type: z8.literal("parameters"),
4113
+ data: z8.record(z8.unknown()).optional(),
4114
+ __schema: z8.record(z8.unknown())
4115
+ }),
4116
+ metadata: z8.union([z8.object({}).partial().passthrough(), z8.null()]).optional()
4117
+ });
3981
4118
  var LoginInvalidOrgError = class extends Error {
3982
4119
  constructor(message) {
3983
4120
  super(message);
@@ -4154,6 +4291,17 @@ var BraintrustState = class _BraintrustState {
4154
4291
  max: Number(isomorph_default.getEnv("BRAINTRUST_PROMPT_CACHE_DISK_MAX")) ?? 1 << 20
4155
4292
  }) : void 0;
4156
4293
  this.promptCache = new PromptCache({ memoryCache, diskCache });
4294
+ const parametersMemoryCache = new LRUCache({
4295
+ max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_MEMORY_MAX")) ?? 1 << 10
4296
+ });
4297
+ const parametersDiskCache = canUseDiskCache() ? new DiskCache({
4298
+ cacheDir: isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DIR") ?? `${isomorph_default.getEnv("HOME") ?? isomorph_default.homedir()}/.braintrust/parameters_cache`,
4299
+ max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DISK_MAX")) ?? 1 << 20
4300
+ }) : void 0;
4301
+ this.parametersCache = new ParametersCache({
4302
+ memoryCache: parametersMemoryCache,
4303
+ diskCache: parametersDiskCache
4304
+ });
4157
4305
  this.spanCache = new SpanCache({ disabled: loginParams.disableSpanCache });
4158
4306
  }
4159
4307
  id;
@@ -4183,6 +4331,7 @@ var BraintrustState = class _BraintrustState {
4183
4331
  _apiConn = null;
4184
4332
  _proxyConn = null;
4185
4333
  promptCache;
4334
+ parametersCache;
4186
4335
  spanCache;
4187
4336
  _idGenerator = null;
4188
4337
  _contextManager = null;
@@ -5437,8 +5586,100 @@ function castLogger(logger, asyncFlush) {
5437
5586
  }
5438
5587
  return logger;
5439
5588
  }
5589
+ var logs3OverflowUploadSchema = z8.object({
5590
+ method: z8.enum(["PUT", "POST"]),
5591
+ signedUrl: z8.string().url(),
5592
+ headers: z8.record(z8.string()).optional(),
5593
+ fields: z8.record(z8.string()).optional(),
5594
+ key: z8.string().min(1)
5595
+ });
5440
5596
  function constructLogs3Data(items) {
5441
- return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
5597
+ return `{"rows": ${constructJsonArray(items.map((i) => i.str))}, "api_version": 2}`;
5598
+ }
5599
+ function constructLogs3OverflowRequest(key) {
5600
+ return {
5601
+ rows: {
5602
+ type: LOGS3_OVERFLOW_REFERENCE_TYPE,
5603
+ key
5604
+ },
5605
+ api_version: 2
5606
+ };
5607
+ }
5608
+ function pickLogs3OverflowObjectIds(row) {
5609
+ const objectIds = {};
5610
+ for (const key of OBJECT_ID_KEYS) {
5611
+ if (key in row) {
5612
+ objectIds[key] = row[key];
5613
+ }
5614
+ }
5615
+ return objectIds;
5616
+ }
5617
+ async function uploadLogs3OverflowPayload(upload, payload, fetchFn = fetch) {
5618
+ if (upload.method === "POST") {
5619
+ if (!upload.fields) {
5620
+ throw new Error("Missing logs3 overflow upload fields");
5621
+ }
5622
+ if (typeof FormData === "undefined" || typeof Blob === "undefined") {
5623
+ throw new Error("FormData is not available for logs3 overflow upload");
5624
+ }
5625
+ const form = new FormData();
5626
+ for (const [key, value] of Object.entries(upload.fields)) {
5627
+ form.append(key, value);
5628
+ }
5629
+ const contentType = upload.fields["Content-Type"] ?? "application/json";
5630
+ form.append("file", new Blob([payload], { type: contentType }));
5631
+ const headers2 = {};
5632
+ for (const [key, value] of Object.entries(upload.headers ?? {})) {
5633
+ if (key.toLowerCase() !== "content-type") {
5634
+ headers2[key] = value;
5635
+ }
5636
+ }
5637
+ const response2 = await fetchFn(upload.signedUrl, {
5638
+ method: "POST",
5639
+ headers: headers2,
5640
+ body: form
5641
+ });
5642
+ if (!response2.ok) {
5643
+ const responseText = await response2.text().catch(() => "");
5644
+ throw new Error(
5645
+ `Failed to upload logs3 overflow payload: ${response2.status} ${responseText}`
5646
+ );
5647
+ }
5648
+ return;
5649
+ }
5650
+ const headers = { ...upload.headers ?? {} };
5651
+ addAzureBlobHeaders(headers, upload.signedUrl);
5652
+ const response = await fetchFn(upload.signedUrl, {
5653
+ method: "PUT",
5654
+ headers,
5655
+ body: payload
5656
+ });
5657
+ if (!response.ok) {
5658
+ const responseText = await response.text().catch(() => "");
5659
+ throw new Error(
5660
+ `Failed to upload logs3 overflow payload: ${response.status} ${responseText}`
5661
+ );
5662
+ }
5663
+ }
5664
+ function stringifyWithOverflowMeta(item) {
5665
+ const str = JSON.stringify(item);
5666
+ const record = item;
5667
+ return {
5668
+ str,
5669
+ overflowMeta: {
5670
+ object_ids: pickLogs3OverflowObjectIds(record),
5671
+ is_delete: record[OBJECT_DELETE_FIELD] === true,
5672
+ input_row: {
5673
+ byte_size: utf8ByteLength(str)
5674
+ }
5675
+ }
5676
+ };
5677
+ }
5678
+ function utf8ByteLength(value) {
5679
+ if (typeof TextEncoder !== "undefined") {
5680
+ return new TextEncoder().encode(value).length;
5681
+ }
5682
+ return value.length;
5442
5683
  }
5443
5684
  function now() {
5444
5685
  return (/* @__PURE__ */ new Date()).getTime();
@@ -5464,10 +5705,9 @@ var TestBackgroundLogger = class {
5464
5705
  events.push(await event.get());
5465
5706
  }
5466
5707
  }
5467
- const batch = mergeRowBatch(events);
5468
- let flatBatch = batch.flat();
5708
+ let batch = mergeRowBatch(events);
5469
5709
  if (this.maskingFunction) {
5470
- flatBatch = flatBatch.map((item) => {
5710
+ batch = batch.map((item) => {
5471
5711
  const maskedItem = { ...item };
5472
5712
  for (const field of REDACTION_FIELDS) {
5473
5713
  if (item[field] !== void 0) {
@@ -5492,7 +5732,7 @@ var TestBackgroundLogger = class {
5492
5732
  return maskedItem;
5493
5733
  });
5494
5734
  }
5495
- return flatBatch;
5735
+ return batch;
5496
5736
  }
5497
5737
  };
5498
5738
  var BACKGROUND_LOGGER_BASE_SLEEP_TIME_S = 1;
@@ -5505,8 +5745,8 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5505
5745
  onFlushError;
5506
5746
  maskingFunction = null;
5507
5747
  syncFlush = false;
5508
- // 6 MB for the AWS lambda gateway (from our own testing).
5509
- maxRequestSize = 6 * 1024 * 1024;
5748
+ maxRequestSizeOverride = null;
5749
+ _maxRequestSizePromise = null;
5510
5750
  defaultBatchSize = 100;
5511
5751
  numTries = 3;
5512
5752
  queueDropExceedingMaxsize = DEFAULT_QUEUE_SIZE;
@@ -5534,7 +5774,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5534
5774
  }
5535
5775
  const maxRequestSizeEnv = Number(isomorph_default.getEnv("BRAINTRUST_MAX_REQUEST_SIZE"));
5536
5776
  if (!isNaN(maxRequestSizeEnv)) {
5537
- this.maxRequestSize = maxRequestSizeEnv;
5777
+ this.maxRequestSizeOverride = maxRequestSizeEnv;
5538
5778
  }
5539
5779
  const numTriesEnv = Number(isomorph_default.getEnv("BRAINTRUST_NUM_RETRIES"));
5540
5780
  if (!isNaN(numTriesEnv)) {
@@ -5596,6 +5836,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5596
5836
  }
5597
5837
  }
5598
5838
  }
5839
+ getMaxRequestSize() {
5840
+ if (!this._maxRequestSizePromise) {
5841
+ this._maxRequestSizePromise = (async () => {
5842
+ let serverLimit = null;
5843
+ try {
5844
+ const conn = await this.apiConn.get();
5845
+ const versionInfo = await conn.get_json("version");
5846
+ serverLimit = z8.object({ logs3_payload_max_bytes: z8.number().nullish() }).parse(versionInfo).logs3_payload_max_bytes ?? null;
5847
+ } catch (e) {
5848
+ console.warn("Failed to fetch version info for payload limit:", e);
5849
+ }
5850
+ const validServerLimit = serverLimit !== null && serverLimit > 0 ? serverLimit : null;
5851
+ const canUseOverflow = validServerLimit !== null;
5852
+ let maxRequestSize = DEFAULT_MAX_REQUEST_SIZE;
5853
+ if (this.maxRequestSizeOverride !== null) {
5854
+ maxRequestSize = validServerLimit !== null ? Math.min(this.maxRequestSizeOverride, validServerLimit) : this.maxRequestSizeOverride;
5855
+ } else if (validServerLimit !== null) {
5856
+ maxRequestSize = validServerLimit;
5857
+ }
5858
+ return { maxRequestSize, canUseOverflow };
5859
+ })();
5860
+ }
5861
+ return this._maxRequestSizePromise;
5862
+ }
5599
5863
  async flush() {
5600
5864
  if (this.syncFlush) {
5601
5865
  this.triggerActiveFlush();
@@ -5639,33 +5903,33 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5639
5903
  if (allItems.length === 0) {
5640
5904
  return;
5641
5905
  }
5642
- const allItemsStr = allItems.map(
5643
- (bucket) => bucket.map((item) => JSON.stringify(item))
5906
+ const allItemsWithMeta = allItems.map(
5907
+ (item) => stringifyWithOverflowMeta(item)
5644
5908
  );
5645
- const batchSets = batchItems({
5646
- items: allItemsStr,
5909
+ const maxRequestSizeResult = await this.getMaxRequestSize();
5910
+ const batches = batchItems({
5911
+ items: allItemsWithMeta,
5647
5912
  batchMaxNumItems: batchSize,
5648
- batchMaxNumBytes: this.maxRequestSize / 2
5913
+ batchMaxNumBytes: maxRequestSizeResult.maxRequestSize / 2,
5914
+ getByteSize: (item) => item.str.length
5649
5915
  });
5650
- for (const batchSet of batchSets) {
5651
- const postPromises = batchSet.map(
5652
- (batch) => (async () => {
5653
- try {
5654
- await this.submitLogsRequest(batch);
5655
- return { type: "success" };
5656
- } catch (e) {
5657
- return { type: "error", value: e };
5658
- }
5659
- })()
5916
+ const postPromises = batches.map(
5917
+ (batch) => (async () => {
5918
+ try {
5919
+ await this.submitLogsRequest(batch, maxRequestSizeResult);
5920
+ return { type: "success" };
5921
+ } catch (e) {
5922
+ return { type: "error", value: e };
5923
+ }
5924
+ })()
5925
+ );
5926
+ const results = await Promise.all(postPromises);
5927
+ const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
5928
+ if (failingResultErrors.length) {
5929
+ throw new AggregateError(
5930
+ failingResultErrors,
5931
+ `Encountered the following errors while logging:`
5660
5932
  );
5661
- const results = await Promise.all(postPromises);
5662
- const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
5663
- if (failingResultErrors.length) {
5664
- throw new AggregateError(
5665
- failingResultErrors,
5666
- `Encountered the following errors while logging:`
5667
- );
5668
- }
5669
5933
  }
5670
5934
  const attachmentErrors = [];
5671
5935
  for (const attachment of attachments) {
@@ -5695,32 +5959,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5695
5959
  items.forEach((item) => extractAttachments(item, attachments));
5696
5960
  let mergedItems = mergeRowBatch(items);
5697
5961
  if (this.maskingFunction) {
5698
- mergedItems = mergedItems.map(
5699
- (batch) => batch.map((item) => {
5700
- const maskedItem = { ...item };
5701
- for (const field of REDACTION_FIELDS) {
5702
- if (item[field] !== void 0) {
5703
- const maskedValue = applyMaskingToField(
5704
- this.maskingFunction,
5705
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
5706
- item[field],
5707
- field
5708
- );
5709
- if (maskedValue instanceof MaskingError) {
5710
- delete maskedItem[field];
5711
- if (maskedItem.error) {
5712
- maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
5713
- } else {
5714
- maskedItem.error = maskedValue.errorMsg;
5715
- }
5962
+ mergedItems = mergedItems.map((item) => {
5963
+ const maskedItem = { ...item };
5964
+ for (const field of REDACTION_FIELDS) {
5965
+ if (item[field] !== void 0) {
5966
+ const maskedValue = applyMaskingToField(
5967
+ this.maskingFunction,
5968
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
5969
+ item[field],
5970
+ field
5971
+ );
5972
+ if (maskedValue instanceof MaskingError) {
5973
+ delete maskedItem[field];
5974
+ if (maskedItem.error) {
5975
+ maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
5716
5976
  } else {
5717
- maskedItem[field] = maskedValue;
5977
+ maskedItem.error = maskedValue.errorMsg;
5718
5978
  }
5979
+ } else {
5980
+ maskedItem[field] = maskedValue;
5719
5981
  }
5720
5982
  }
5721
- return maskedItem;
5722
- })
5723
- );
5983
+ }
5984
+ return maskedItem;
5985
+ });
5724
5986
  }
5725
5987
  return [mergedItems, attachments];
5726
5988
  } catch (e) {
@@ -5747,20 +6009,73 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5747
6009
  }
5748
6010
  throw new Error("Impossible");
5749
6011
  }
5750
- async submitLogsRequest(items) {
6012
+ async requestLogs3OverflowUpload(conn, args) {
6013
+ let response;
6014
+ try {
6015
+ response = await conn.post_json("logs3/overflow", {
6016
+ content_type: "application/json",
6017
+ size_bytes: args.sizeBytes,
6018
+ rows: args.rows
6019
+ });
6020
+ } catch (error) {
6021
+ const errorStr = JSON.stringify(error);
6022
+ throw new Error(
6023
+ `Failed to request logs3 overflow upload URL: ${errorStr}`
6024
+ );
6025
+ }
6026
+ try {
6027
+ return logs3OverflowUploadSchema.parse(response);
6028
+ } catch (error) {
6029
+ if (error instanceof ZodError) {
6030
+ const errorStr = JSON.stringify(error.flatten());
6031
+ throw new Error(`Invalid response from API server: ${errorStr}`);
6032
+ }
6033
+ throw error;
6034
+ }
6035
+ }
6036
+ async _uploadLogs3OverflowPayload(conn, upload, payload) {
6037
+ await uploadLogs3OverflowPayload(upload, payload, conn.fetch.bind(conn));
6038
+ }
6039
+ async submitLogsRequest(items, {
6040
+ maxRequestSize,
6041
+ canUseOverflow
6042
+ }) {
5751
6043
  const conn = await this.apiConn.get();
5752
6044
  const dataStr = constructLogs3Data(items);
6045
+ const payloadBytes = utf8ByteLength(dataStr);
6046
+ const useOverflow = canUseOverflow && payloadBytes > maxRequestSize;
5753
6047
  if (this.allPublishPayloadsDir) {
5754
6048
  await _HTTPBackgroundLogger.writePayloadToDir({
5755
6049
  payloadDir: this.allPublishPayloadsDir,
5756
6050
  payload: dataStr
5757
6051
  });
5758
6052
  }
6053
+ let overflowUpload = null;
6054
+ const overflowRows = useOverflow ? items.map((item) => item.overflowMeta) : null;
5759
6055
  for (let i = 0; i < this.numTries; i++) {
5760
6056
  const startTime = now();
5761
6057
  let error = void 0;
5762
6058
  try {
5763
- await conn.post_json("logs3", dataStr);
6059
+ if (overflowRows) {
6060
+ if (!overflowUpload) {
6061
+ const currentUpload = await this.requestLogs3OverflowUpload(conn, {
6062
+ rows: overflowRows,
6063
+ sizeBytes: payloadBytes
6064
+ });
6065
+ await this._uploadLogs3OverflowPayload(
6066
+ conn,
6067
+ currentUpload,
6068
+ dataStr
6069
+ );
6070
+ overflowUpload = currentUpload;
6071
+ }
6072
+ await conn.post_json(
6073
+ "logs3",
6074
+ constructLogs3OverflowRequest(overflowUpload.key)
6075
+ );
6076
+ } else {
6077
+ await conn.post_json("logs3", dataStr);
6078
+ }
5764
6079
  } catch (e) {
5765
6080
  error = e;
5766
6081
  }
@@ -5776,7 +6091,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5776
6091
  return `${error}`;
5777
6092
  }
5778
6093
  })();
5779
- const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}.${retryingText}
6094
+ const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${payloadBytes}.${retryingText}
5780
6095
  Error: ${errorText}`;
5781
6096
  if (!isRetrying && this.failedPublishPayloadsDir) {
5782
6097
  await _HTTPBackgroundLogger.writePayloadToDir({
@@ -5830,7 +6145,7 @@ Error: ${errorText}`;
5830
6145
  try {
5831
6146
  const [allItems, allAttachments] = await this.unwrapLazyValues(wrappedItems);
5832
6147
  const dataStr = constructLogs3Data(
5833
- allItems.map((x) => JSON.stringify(x))
6148
+ allItems.map((x) => stringifyWithOverflowMeta(x))
5834
6149
  );
5835
6150
  const attachmentStr = JSON.stringify(
5836
6151
  allAttachments.map((a) => a.debugInfo())
@@ -6397,23 +6712,144 @@ async function loadPrompt({
6397
6712
  }
6398
6713
  return prompt;
6399
6714
  }
6400
- function setMaskingFunction(maskingFunction) {
6401
- _globalState.setMaskingFunction(maskingFunction);
6402
- }
6403
- async function login(options = {}) {
6404
- const { forceLogin = false } = options || {};
6405
- if (_globalState.loggedIn && !forceLogin) {
6406
- let checkUpdatedParam2 = function(varname, arg, orig) {
6407
- if (!isEmpty2(arg) && !isEmpty2(orig) && arg !== orig) {
6408
- throw new Error(
6409
- `Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
6410
- );
6411
- }
6412
- };
6413
- var checkUpdatedParam = checkUpdatedParam2;
6414
- checkUpdatedParam2("appUrl", options.appUrl, _globalState.appUrl);
6415
- checkUpdatedParam2(
6416
- "apiKey",
6715
+ async function loadParameters({
6716
+ projectName,
6717
+ projectId,
6718
+ slug,
6719
+ version,
6720
+ environment,
6721
+ id,
6722
+ appUrl,
6723
+ apiKey,
6724
+ orgName,
6725
+ fetch: fetch2,
6726
+ forceLogin,
6727
+ state: stateArg
6728
+ }) {
6729
+ if (version && environment) {
6730
+ throw new Error(
6731
+ "Cannot specify both 'version' and 'environment' parameters. Please use only one (remove the other)."
6732
+ );
6733
+ }
6734
+ if (id) {
6735
+ } else if (isEmpty2(projectName) && isEmpty2(projectId)) {
6736
+ throw new Error("Must specify either projectName or projectId");
6737
+ } else if (isEmpty2(slug)) {
6738
+ throw new Error("Must specify slug");
6739
+ }
6740
+ const state = stateArg ?? _globalState;
6741
+ let response;
6742
+ try {
6743
+ await state.login({
6744
+ orgName,
6745
+ apiKey,
6746
+ appUrl,
6747
+ fetch: fetch2,
6748
+ forceLogin
6749
+ });
6750
+ if (id) {
6751
+ response = await state.apiConn().get_json(`v1/function/${id}`, {
6752
+ ...version && { version },
6753
+ ...environment && { environment }
6754
+ });
6755
+ if (response) {
6756
+ response = { objects: [response] };
6757
+ }
6758
+ } else {
6759
+ response = await state.apiConn().get_json("v1/function", {
6760
+ project_name: projectName,
6761
+ project_id: projectId,
6762
+ slug,
6763
+ version,
6764
+ function_type: "parameters",
6765
+ ...environment && { environment }
6766
+ });
6767
+ }
6768
+ } catch (e) {
6769
+ if (environment || version) {
6770
+ throw new Error(`Parameters not found with specified parameters: ${e}`);
6771
+ }
6772
+ console.warn(
6773
+ "Failed to load parameters, attempting to fall back to cache:",
6774
+ e
6775
+ );
6776
+ let parameters2;
6777
+ if (id) {
6778
+ parameters2 = await state.parametersCache.get({ id });
6779
+ if (!parameters2) {
6780
+ throw new Error(
6781
+ `Parameters with id ${id} not found (not found on server or in local cache): ${e}`
6782
+ );
6783
+ }
6784
+ } else {
6785
+ parameters2 = await state.parametersCache.get({
6786
+ slug,
6787
+ projectId,
6788
+ projectName,
6789
+ version: version ?? "latest"
6790
+ });
6791
+ if (!parameters2) {
6792
+ throw new Error(
6793
+ `Parameters ${slug} (version ${version ?? "latest"}) not found in ${[
6794
+ projectName ?? projectId
6795
+ ]} (not found on server or in local cache): ${e}`
6796
+ );
6797
+ }
6798
+ }
6799
+ return parameters2;
6800
+ }
6801
+ if (!("objects" in response) || response.objects.length === 0) {
6802
+ if (id) {
6803
+ throw new Error(`Parameters with id ${id} not found.`);
6804
+ } else {
6805
+ throw new Error(
6806
+ `Parameters ${slug} not found in ${[projectName ?? projectId]}`
6807
+ );
6808
+ }
6809
+ } else if (response.objects.length > 1) {
6810
+ if (id) {
6811
+ throw new Error(
6812
+ `Multiple parameters found with id ${id}. This should never happen.`
6813
+ );
6814
+ } else {
6815
+ throw new Error(
6816
+ `Multiple parameters found with slug ${slug} in project ${projectName ?? projectId}. This should never happen.`
6817
+ );
6818
+ }
6819
+ }
6820
+ const metadata = parametersRowSchema.parse(response["objects"][0]);
6821
+ const parameters = new RemoteEvalParameters(metadata);
6822
+ try {
6823
+ if (id) {
6824
+ await state.parametersCache.set({ id }, parameters);
6825
+ } else if (slug) {
6826
+ await state.parametersCache.set(
6827
+ { slug, projectId, projectName, version: version ?? "latest" },
6828
+ parameters
6829
+ );
6830
+ }
6831
+ } catch (e) {
6832
+ console.warn("Failed to set parameters in cache:", e);
6833
+ }
6834
+ return parameters;
6835
+ }
6836
+ function setMaskingFunction(maskingFunction) {
6837
+ _globalState.setMaskingFunction(maskingFunction);
6838
+ }
6839
+ async function login(options = {}) {
6840
+ const { forceLogin = false } = options || {};
6841
+ if (_globalState.loggedIn && !forceLogin) {
6842
+ let checkUpdatedParam2 = function(varname, arg, orig) {
6843
+ if (!isEmpty2(arg) && !isEmpty2(orig) && arg !== orig) {
6844
+ throw new Error(
6845
+ `Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
6846
+ );
6847
+ }
6848
+ };
6849
+ var checkUpdatedParam = checkUpdatedParam2;
6850
+ checkUpdatedParam2("appUrl", options.appUrl, _globalState.appUrl);
6851
+ checkUpdatedParam2(
6852
+ "apiKey",
6417
6853
  options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
6418
6854
  _globalState.loginToken
6419
6855
  );
@@ -8467,6 +8903,55 @@ var Prompt2 = class _Prompt {
8467
8903
  );
8468
8904
  }
8469
8905
  };
8906
+ var RemoteEvalParameters = class {
8907
+ constructor(metadata) {
8908
+ this.metadata = metadata;
8909
+ }
8910
+ __braintrust_parameters_marker = true;
8911
+ get id() {
8912
+ return this.metadata.id;
8913
+ }
8914
+ get projectId() {
8915
+ return this.metadata.project_id;
8916
+ }
8917
+ get name() {
8918
+ return this.metadata.name;
8919
+ }
8920
+ get slug() {
8921
+ return this.metadata.slug;
8922
+ }
8923
+ get version() {
8924
+ return this.metadata[TRANSACTION_ID_FIELD];
8925
+ }
8926
+ get schema() {
8927
+ return this.metadata.function_data.__schema;
8928
+ }
8929
+ get data() {
8930
+ return this.metadata.function_data.data ?? {};
8931
+ }
8932
+ validate(data) {
8933
+ if (typeof data !== "object" || data === null) {
8934
+ return false;
8935
+ }
8936
+ const schemaProps = this.schema.properties;
8937
+ if (typeof schemaProps !== "object" || schemaProps === null) {
8938
+ return true;
8939
+ }
8940
+ for (const key of Object.keys(schemaProps)) {
8941
+ if (!(key in data)) {
8942
+ const required = Array.isArray(this.schema.required) ? this.schema.required : [];
8943
+ if (required.includes(key)) {
8944
+ return false;
8945
+ }
8946
+ }
8947
+ }
8948
+ return true;
8949
+ }
8950
+ static isParameters(x) {
8951
+ return typeof x === "object" && x !== null && "__braintrust_parameters_marker" in x && // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
8952
+ x.__braintrust_parameters_marker === true;
8953
+ }
8954
+ };
8470
8955
  var TEST_API_KEY = "___TEST_API_KEY__THIS_IS_NOT_REAL___";
8471
8956
  function setInitialTestState() {
8472
8957
  if (!_internalGetGlobalState()) {
@@ -8602,6 +9087,7 @@ __export(exports_exports, {
8602
9087
  CodePrompt: () => CodePrompt,
8603
9088
  ContextManager: () => ContextManager,
8604
9089
  DEFAULT_FETCH_BATCH_SIZE: () => DEFAULT_FETCH_BATCH_SIZE,
9090
+ DEFAULT_MAX_REQUEST_SIZE: () => DEFAULT_MAX_REQUEST_SIZE,
8605
9091
  Dataset: () => Dataset2,
8606
9092
  ERR_PERMALINK: () => ERR_PERMALINK,
8607
9093
  Eval: () => Eval,
@@ -8612,6 +9098,7 @@ __export(exports_exports, {
8612
9098
  IDGenerator: () => IDGenerator,
8613
9099
  JSONAttachment: () => JSONAttachment,
8614
9100
  LEGACY_CACHED_HEADER: () => LEGACY_CACHED_HEADER,
9101
+ LOGS3_OVERFLOW_REFERENCE_TYPE: () => LOGS3_OVERFLOW_REFERENCE_TYPE,
8615
9102
  LazyValue: () => LazyValue,
8616
9103
  Logger: () => Logger,
8617
9104
  LoginInvalidOrgError: () => LoginInvalidOrgError,
@@ -8636,8 +9123,10 @@ __export(exports_exports, {
8636
9123
  _exportsForTestingOnly: () => _exportsForTestingOnly,
8637
9124
  _internalGetGlobalState: () => _internalGetGlobalState,
8638
9125
  _internalSetInitialState: () => _internalSetInitialState,
9126
+ addAzureBlobHeaders: () => addAzureBlobHeaders,
8639
9127
  braintrustStreamChunkSchema: () => braintrustStreamChunkSchema,
8640
9128
  buildLocalSummary: () => buildLocalSummary,
9129
+ constructLogs3OverflowRequest: () => constructLogs3OverflowRequest,
8641
9130
  createFinalValuePassThroughStream: () => createFinalValuePassThroughStream,
8642
9131
  currentExperiment: () => currentExperiment,
8643
9132
  currentLogger: () => currentLogger,
@@ -8661,15 +9150,18 @@ __export(exports_exports, {
8661
9150
  initLogger: () => initLogger,
8662
9151
  invoke: () => invoke,
8663
9152
  isTemplateFormat: () => isTemplateFormat,
9153
+ loadParameters: () => loadParameters,
8664
9154
  loadPrompt: () => loadPrompt,
8665
9155
  log: () => log,
8666
9156
  logError: () => logError,
8667
9157
  login: () => login,
8668
9158
  loginToState: () => loginToState,
9159
+ logs3OverflowUploadSchema: () => logs3OverflowUploadSchema,
8669
9160
  newId: () => newId,
8670
9161
  parseCachedHeader: () => parseCachedHeader,
8671
9162
  parseTemplateFormat: () => parseTemplateFormat,
8672
9163
  permalink: () => permalink,
9164
+ pickLogs3OverflowObjectIds: () => pickLogs3OverflowObjectIds,
8673
9165
  projects: () => projects,
8674
9166
  promptContentsSchema: () => promptContentsSchema,
8675
9167
  promptDefinitionSchema: () => promptDefinitionSchema,
@@ -8690,6 +9182,8 @@ __export(exports_exports, {
8690
9182
  traceable: () => traceable,
8691
9183
  traced: () => traced,
8692
9184
  updateSpan: () => updateSpan,
9185
+ uploadLogs3OverflowPayload: () => uploadLogs3OverflowPayload,
9186
+ utf8ByteLength: () => utf8ByteLength,
8693
9187
  withCurrent: () => withCurrent,
8694
9188
  withDataset: () => withDataset,
8695
9189
  withExperiment: () => withExperiment,
@@ -12160,6 +12654,189 @@ function wrapMastraAgent(agent, _options) {
12160
12654
  }
12161
12655
 
12162
12656
  // src/wrappers/claude-agent-sdk/claude-agent-sdk.ts
12657
+ function getMcpServerMetadata(serverName, mcpServers) {
12658
+ if (!serverName || !mcpServers) {
12659
+ return {};
12660
+ }
12661
+ const serverConfig = mcpServers[serverName];
12662
+ if (!serverConfig) {
12663
+ return {};
12664
+ }
12665
+ const metadata = {};
12666
+ if (serverConfig.type) {
12667
+ metadata["mcp.type"] = serverConfig.type;
12668
+ } else if (typeof serverConfig === "object" && "transport" in serverConfig) {
12669
+ metadata["mcp.type"] = "sdk";
12670
+ }
12671
+ if (serverConfig.url) {
12672
+ metadata["mcp.url"] = serverConfig.url;
12673
+ }
12674
+ if (serverConfig.command) {
12675
+ metadata["mcp.command"] = serverConfig.command;
12676
+ if (serverConfig.args) {
12677
+ metadata["mcp.args"] = serverConfig.args.join(" ");
12678
+ }
12679
+ }
12680
+ return metadata;
12681
+ }
12682
+ function parseToolName(rawToolName) {
12683
+ const mcpMatch = rawToolName.match(/^mcp__([^_]+)__(.+)$/);
12684
+ if (mcpMatch) {
12685
+ const [, mcpServer, toolName] = mcpMatch;
12686
+ return {
12687
+ displayName: `tool: ${mcpServer}/${toolName}`,
12688
+ toolName,
12689
+ mcpServer,
12690
+ rawToolName
12691
+ };
12692
+ }
12693
+ return {
12694
+ displayName: `tool: ${rawToolName}`,
12695
+ toolName: rawToolName,
12696
+ rawToolName
12697
+ };
12698
+ }
12699
+ function createToolTracingHooks(resolveParentSpan, activeToolSpans, mcpServers, subAgentSpans, endedSubAgentSpans) {
12700
+ const preToolUse = async (input, toolUseID) => {
12701
+ if (input.hook_event_name !== "PreToolUse" || !toolUseID) {
12702
+ return {};
12703
+ }
12704
+ if (input.tool_name === "Task") {
12705
+ return {};
12706
+ }
12707
+ const parsed = parseToolName(input.tool_name);
12708
+ const mcpMetadata = getMcpServerMetadata(parsed.mcpServer, mcpServers);
12709
+ const parentExport = await resolveParentSpan(toolUseID);
12710
+ const toolSpan = startSpan({
12711
+ name: parsed.displayName,
12712
+ spanAttributes: { type: "tool" /* TOOL */ },
12713
+ event: {
12714
+ input: input.tool_input,
12715
+ metadata: {
12716
+ // GenAI semantic conventions
12717
+ "gen_ai.tool.name": parsed.toolName,
12718
+ "gen_ai.tool.call.id": toolUseID,
12719
+ // MCP-specific metadata
12720
+ ...parsed.mcpServer && { "mcp.server": parsed.mcpServer },
12721
+ ...mcpMetadata,
12722
+ // Claude SDK metadata
12723
+ "claude_agent_sdk.raw_tool_name": parsed.rawToolName,
12724
+ "claude_agent_sdk.session_id": input.session_id,
12725
+ "claude_agent_sdk.cwd": input.cwd
12726
+ }
12727
+ },
12728
+ parent: parentExport
12729
+ });
12730
+ activeToolSpans.set(toolUseID, toolSpan);
12731
+ return {};
12732
+ };
12733
+ const postToolUse = async (input, toolUseID) => {
12734
+ if (input.hook_event_name !== "PostToolUse" || !toolUseID) {
12735
+ return {};
12736
+ }
12737
+ const subAgentSpan = subAgentSpans.get(toolUseID);
12738
+ if (subAgentSpan) {
12739
+ try {
12740
+ const response = input.tool_response;
12741
+ const metadata = {};
12742
+ if (response?.status) {
12743
+ metadata["claude_agent_sdk.status"] = response.status;
12744
+ }
12745
+ if (response?.totalDurationMs) {
12746
+ metadata["claude_agent_sdk.duration_ms"] = response.totalDurationMs;
12747
+ }
12748
+ if (response?.totalToolUseCount !== void 0) {
12749
+ metadata["claude_agent_sdk.tool_use_count"] = response.totalToolUseCount;
12750
+ }
12751
+ subAgentSpan.log({
12752
+ output: response?.content,
12753
+ metadata
12754
+ });
12755
+ } finally {
12756
+ subAgentSpan.end();
12757
+ endedSubAgentSpans.add(toolUseID);
12758
+ }
12759
+ return {};
12760
+ }
12761
+ const toolSpan = activeToolSpans.get(toolUseID);
12762
+ if (!toolSpan) {
12763
+ return {};
12764
+ }
12765
+ try {
12766
+ toolSpan.log({ output: input.tool_response });
12767
+ } finally {
12768
+ toolSpan.end();
12769
+ activeToolSpans.delete(toolUseID);
12770
+ }
12771
+ return {};
12772
+ };
12773
+ const postToolUseFailure = async (input, toolUseID) => {
12774
+ if (input.hook_event_name !== "PostToolUseFailure" || !toolUseID) {
12775
+ return {};
12776
+ }
12777
+ const subAgentSpan = subAgentSpans.get(toolUseID);
12778
+ if (subAgentSpan) {
12779
+ try {
12780
+ subAgentSpan.log({ error: input.error });
12781
+ } finally {
12782
+ subAgentSpan.end();
12783
+ endedSubAgentSpans.add(toolUseID);
12784
+ }
12785
+ return {};
12786
+ }
12787
+ const toolSpan = activeToolSpans.get(toolUseID);
12788
+ if (!toolSpan) {
12789
+ return {};
12790
+ }
12791
+ const parsed = parseToolName(input.tool_name);
12792
+ try {
12793
+ toolSpan.log({
12794
+ error: input.error,
12795
+ metadata: {
12796
+ "gen_ai.tool.name": parsed.toolName,
12797
+ "gen_ai.tool.call.id": toolUseID,
12798
+ ...parsed.mcpServer && { "mcp.server": parsed.mcpServer },
12799
+ "claude_agent_sdk.is_interrupt": input.is_interrupt,
12800
+ "claude_agent_sdk.session_id": input.session_id
12801
+ }
12802
+ });
12803
+ } finally {
12804
+ toolSpan.end();
12805
+ activeToolSpans.delete(toolUseID);
12806
+ }
12807
+ return {};
12808
+ };
12809
+ return { preToolUse, postToolUse, postToolUseFailure };
12810
+ }
12811
+ function injectTracingHooks(options, resolveParentSpan, activeToolSpans, subAgentSpans, endedSubAgentSpans) {
12812
+ const mcpServers = options.mcpServers;
12813
+ const { preToolUse, postToolUse, postToolUseFailure } = createToolTracingHooks(
12814
+ resolveParentSpan,
12815
+ activeToolSpans,
12816
+ mcpServers,
12817
+ subAgentSpans,
12818
+ endedSubAgentSpans
12819
+ );
12820
+ const existingHooks = options.hooks ?? {};
12821
+ return {
12822
+ ...options,
12823
+ hooks: {
12824
+ ...existingHooks,
12825
+ PreToolUse: [
12826
+ ...existingHooks.PreToolUse ?? [],
12827
+ { hooks: [preToolUse] }
12828
+ ],
12829
+ PostToolUse: [
12830
+ ...existingHooks.PostToolUse ?? [],
12831
+ { hooks: [postToolUse] }
12832
+ ],
12833
+ PostToolUseFailure: [
12834
+ ...existingHooks.PostToolUseFailure ?? [],
12835
+ { hooks: [postToolUseFailure] }
12836
+ ]
12837
+ }
12838
+ };
12839
+ }
12163
12840
  function filterSerializableOptions(options) {
12164
12841
  const allowedKeys = [
12165
12842
  "model",
@@ -12184,18 +12861,45 @@ function filterSerializableOptions(options) {
12184
12861
  }
12185
12862
  return filtered;
12186
12863
  }
12864
+ function isAsyncIterable(value) {
12865
+ return value !== null && value !== void 0 && typeof value[Symbol.asyncIterator] === "function";
12866
+ }
12187
12867
  function wrapClaudeAgentQuery(queryFn, defaultThis) {
12188
12868
  const proxy = new Proxy(queryFn, {
12189
12869
  apply(target, thisArg, argArray) {
12190
12870
  const params = argArray[0] ?? {};
12191
12871
  const { prompt, options = {} } = params;
12872
+ const promptIsAsyncIterable = isAsyncIterable(prompt);
12873
+ let capturedPromptMessages;
12874
+ let promptForQuery = prompt;
12875
+ let promptStarted = false;
12876
+ let resolvePromptDone;
12877
+ const promptDone = new Promise((resolve) => {
12878
+ resolvePromptDone = resolve;
12879
+ });
12880
+ if (promptIsAsyncIterable) {
12881
+ capturedPromptMessages = [];
12882
+ const originalPrompt = prompt;
12883
+ const capturingPrompt = (async function* () {
12884
+ promptStarted = true;
12885
+ try {
12886
+ for await (const msg of originalPrompt) {
12887
+ capturedPromptMessages.push(msg);
12888
+ yield msg;
12889
+ }
12890
+ } finally {
12891
+ resolvePromptDone?.();
12892
+ }
12893
+ })();
12894
+ promptForQuery = capturingPrompt;
12895
+ }
12192
12896
  const span = startSpan({
12193
12897
  name: "Claude Agent",
12194
12898
  spanAttributes: {
12195
12899
  type: "task" /* TASK */
12196
12900
  },
12197
12901
  event: {
12198
- input: typeof prompt === "string" ? prompt : { type: "streaming", description: "AsyncIterable<SDKMessage>" },
12902
+ input: typeof prompt === "string" ? prompt : promptIsAsyncIterable ? void 0 : prompt !== void 0 ? String(prompt) : void 0,
12199
12903
  metadata: filterSerializableOptions(options)
12200
12904
  }
12201
12905
  });
@@ -12206,13 +12910,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12206
12910
  let currentMessageStartTime = getCurrentUnixTimestamp();
12207
12911
  const currentMessages = [];
12208
12912
  const createLLMSpan = async () => {
12913
+ const parentToolUseId = currentMessages[0]?.parent_tool_use_id ?? null;
12914
+ let parentSpanExport;
12915
+ if (parentToolUseId) {
12916
+ const subAgentSpan = subAgentSpans.get(parentToolUseId);
12917
+ parentSpanExport = subAgentSpan ? await subAgentSpan.export() : await span.export();
12918
+ } else {
12919
+ parentSpanExport = await span.export();
12920
+ }
12209
12921
  const finalMessageContent = await _createLLMSpanForMessages(
12210
12922
  currentMessages,
12211
12923
  prompt,
12212
12924
  finalResults,
12213
12925
  options,
12214
12926
  currentMessageStartTime,
12215
- await span.export()
12927
+ capturedPromptMessages,
12928
+ parentSpanExport
12216
12929
  );
12217
12930
  if (finalMessageContent) {
12218
12931
  finalResults.push(finalMessageContent);
@@ -12225,14 +12938,78 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12225
12938
  currentMessages.length = 0;
12226
12939
  };
12227
12940
  const invocationTarget = thisArg === proxy || thisArg === void 0 ? defaultThis ?? thisArg : thisArg;
12941
+ const activeToolSpans = /* @__PURE__ */ new Map();
12942
+ const subAgentSpans = /* @__PURE__ */ new Map();
12943
+ const endedSubAgentSpans = /* @__PURE__ */ new Set();
12944
+ const toolUseToParent = /* @__PURE__ */ new Map();
12945
+ const pendingSubAgentNames = /* @__PURE__ */ new Map();
12946
+ const resolveParentSpan = async (toolUseID) => {
12947
+ const parentToolUseId = toolUseToParent.get(toolUseID);
12948
+ if (parentToolUseId) {
12949
+ const subAgentSpan = subAgentSpans.get(parentToolUseId);
12950
+ if (subAgentSpan) {
12951
+ return subAgentSpan.export();
12952
+ }
12953
+ }
12954
+ return span.export();
12955
+ };
12956
+ const optionsWithHooks = injectTracingHooks(
12957
+ options,
12958
+ resolveParentSpan,
12959
+ activeToolSpans,
12960
+ subAgentSpans,
12961
+ endedSubAgentSpans
12962
+ );
12963
+ const modifiedArgArray = [
12964
+ {
12965
+ ...params,
12966
+ ...promptForQuery !== void 0 ? { prompt: promptForQuery } : {},
12967
+ options: optionsWithHooks
12968
+ }
12969
+ ];
12228
12970
  const originalGenerator = withCurrent(
12229
12971
  span,
12230
- () => Reflect.apply(target, invocationTarget, argArray)
12972
+ () => Reflect.apply(target, invocationTarget, modifiedArgArray)
12231
12973
  );
12232
12974
  const wrappedGenerator = (async function* () {
12233
12975
  try {
12234
12976
  for await (const message of originalGenerator) {
12235
12977
  const currentTime = getCurrentUnixTimestamp();
12978
+ if (message.type === "assistant" && Array.isArray(message.message?.content)) {
12979
+ const parentToolUseId = message.parent_tool_use_id ?? null;
12980
+ for (const block of message.message.content) {
12981
+ if (block.type === "tool_use" && block.id) {
12982
+ toolUseToParent.set(block.id, parentToolUseId);
12983
+ if (block.name === "Task" && block.input?.subagent_type) {
12984
+ pendingSubAgentNames.set(
12985
+ block.id,
12986
+ block.input.subagent_type
12987
+ );
12988
+ }
12989
+ }
12990
+ }
12991
+ }
12992
+ if ("parent_tool_use_id" in message) {
12993
+ const parentToolUseId = message.parent_tool_use_id;
12994
+ if (parentToolUseId && !subAgentSpans.has(parentToolUseId)) {
12995
+ const agentName = pendingSubAgentNames.get(parentToolUseId);
12996
+ const spanName = agentName ? `Agent: ${agentName}` : "Agent: sub-agent";
12997
+ const parentExport = await span.export();
12998
+ const subAgentSpan = startSpan({
12999
+ name: spanName,
13000
+ spanAttributes: { type: "task" /* TASK */ },
13001
+ event: {
13002
+ metadata: {
13003
+ ...agentName && {
13004
+ "claude_agent_sdk.agent_type": agentName
13005
+ }
13006
+ }
13007
+ },
13008
+ parent: parentExport
13009
+ });
13010
+ subAgentSpans.set(parentToolUseId, subAgentSpan);
13011
+ }
13012
+ }
12236
13013
  const messageId = message.message?.id;
12237
13014
  if (messageId && messageId !== currentMessageId) {
12238
13015
  await createLLMSpan();
@@ -12278,6 +13055,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12278
13055
  });
12279
13056
  throw error;
12280
13057
  } finally {
13058
+ for (const [id, subSpan] of subAgentSpans) {
13059
+ if (!endedSubAgentSpans.has(id)) {
13060
+ subSpan.end();
13061
+ }
13062
+ }
13063
+ subAgentSpans.clear();
13064
+ if (capturedPromptMessages) {
13065
+ if (promptStarted) {
13066
+ await promptDone;
13067
+ }
13068
+ if (capturedPromptMessages.length > 0) {
13069
+ span.log({
13070
+ input: _formatCapturedMessages(capturedPromptMessages)
13071
+ });
13072
+ }
13073
+ }
12281
13074
  span.end();
12282
13075
  }
12283
13076
  })();
@@ -12305,43 +13098,25 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12305
13098
  });
12306
13099
  return proxy;
12307
13100
  }
12308
- function wrapClaudeAgentTool(toolDef) {
12309
- const originalHandler = toolDef.handler;
12310
- const wrappedHandler = (args, extra) => traced(
12311
- async (span) => {
12312
- span.log({
12313
- input: args,
12314
- metadata: {
12315
- tool_name: toolDef.name,
12316
- tool_description: toolDef.description
12317
- }
12318
- });
12319
- const result = await originalHandler(args, extra);
12320
- span.log({
12321
- output: result
12322
- });
12323
- return result;
12324
- },
12325
- {
12326
- name: `${toolDef.name}`,
12327
- spanAttributes: {
12328
- type: "tool" /* TOOL */
13101
+ function _buildLLMInput(prompt, conversationHistory, capturedPromptMessages) {
13102
+ const promptMessages = [];
13103
+ if (typeof prompt === "string") {
13104
+ promptMessages.push({ content: prompt, role: "user" });
13105
+ } else if (capturedPromptMessages && capturedPromptMessages.length > 0) {
13106
+ for (const msg of capturedPromptMessages) {
13107
+ const role = msg.message?.role;
13108
+ const content = msg.message?.content;
13109
+ if (role && content !== void 0) {
13110
+ promptMessages.push({ content, role });
12329
13111
  }
12330
13112
  }
12331
- );
12332
- return {
12333
- ...toolDef,
12334
- handler: wrappedHandler
12335
- };
12336
- }
12337
- function _buildLLMInput(prompt, conversationHistory) {
12338
- const promptMessage = typeof prompt === "string" ? { content: prompt, role: "user" } : void 0;
12339
- const inputParts = [
12340
- ...promptMessage ? [promptMessage] : [],
12341
- ...conversationHistory
12342
- ];
13113
+ }
13114
+ const inputParts = [...promptMessages, ...conversationHistory];
12343
13115
  return inputParts.length > 0 ? inputParts : void 0;
12344
13116
  }
13117
+ function _formatCapturedMessages(messages) {
13118
+ return messages.length > 0 ? messages : [];
13119
+ }
12345
13120
  function _extractUsageFromMessage(message) {
12346
13121
  const metrics = {};
12347
13122
  let usage;
@@ -12375,7 +13150,7 @@ function _extractUsageFromMessage(message) {
12375
13150
  }
12376
13151
  return metrics;
12377
13152
  }
12378
- async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, parentSpan) {
13153
+ async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, capturedPromptMessages, parentSpan) {
12379
13154
  if (messages.length === 0) return void 0;
12380
13155
  const lastMessage = messages[messages.length - 1];
12381
13156
  if (lastMessage.type !== "assistant" || !lastMessage.message?.usage) {
@@ -12383,7 +13158,11 @@ async function _createLLMSpanForMessages(messages, prompt, conversationHistory,
12383
13158
  }
12384
13159
  const model = lastMessage.message.model || options.model;
12385
13160
  const usage = _extractUsageFromMessage(lastMessage);
12386
- const input = _buildLLMInput(prompt, conversationHistory);
13161
+ const input = _buildLLMInput(
13162
+ prompt,
13163
+ conversationHistory,
13164
+ capturedPromptMessages
13165
+ );
12387
13166
  const outputs = messages.map(
12388
13167
  (m) => m.message?.content && m.message?.role ? { content: m.message.content, role: m.message.role } : void 0
12389
13168
  ).filter((c) => c !== void 0);
@@ -12424,25 +13203,9 @@ function wrapClaudeAgentSDK(sdk) {
12424
13203
  return wrappedQuery;
12425
13204
  }
12426
13205
  if (prop === "tool" && typeof value === "function") {
12427
- const toolFn = value;
12428
- const wrappedToolFactory = new Proxy(toolFn, {
12429
- apply(toolTarget, thisArg, argArray) {
12430
- const invocationTarget = thisArg === receiver || thisArg === void 0 ? target : thisArg;
12431
- const toolDef = Reflect.apply(
12432
- toolTarget,
12433
- invocationTarget,
12434
- argArray
12435
- );
12436
- if (toolDef && typeof toolDef === "object" && "handler" in toolDef) {
12437
- return wrapClaudeAgentTool(
12438
- toolDef
12439
- );
12440
- }
12441
- return toolDef;
12442
- }
12443
- });
12444
- cache.set(prop, wrappedToolFactory);
12445
- return wrappedToolFactory;
13206
+ const bound = value.bind(target);
13207
+ cache.set(prop, bound);
13208
+ return bound;
12446
13209
  }
12447
13210
  if (typeof value === "function") {
12448
13211
  const bound = value.bind(target);
@@ -13227,7 +13990,7 @@ function isAsync(fn) {
13227
13990
  function isAsyncGenerator2(fn) {
13228
13991
  return fn[Symbol.toStringTag] === "AsyncGenerator";
13229
13992
  }
13230
- function isAsyncIterable(obj) {
13993
+ function isAsyncIterable2(obj) {
13231
13994
  return typeof obj[Symbol.asyncIterator] === "function";
13232
13995
  }
13233
13996
  function wrapAsync(asyncFn) {
@@ -13398,7 +14161,7 @@ var eachOfLimit$2 = (limit) => {
13398
14161
  if (isAsyncGenerator2(obj)) {
13399
14162
  return asyncEachOfLimit(obj, limit, iteratee, callback);
13400
14163
  }
13401
- if (isAsyncIterable(obj)) {
14164
+ if (isAsyncIterable2(obj)) {
13402
14165
  return asyncEachOfLimit(obj[Symbol.asyncIterator](), limit, iteratee, callback);
13403
14166
  }
13404
14167
  var nextElem = createIterator(obj);
@@ -14327,6 +15090,7 @@ var LocalTrace = class {
14327
15090
  spansFlushed = false;
14328
15091
  spansFlushPromise = null;
14329
15092
  cachedFetcher;
15093
+ threadCache = /* @__PURE__ */ new Map();
14330
15094
  constructor({
14331
15095
  objectType,
14332
15096
  objectId,
@@ -14397,6 +15161,36 @@ var LocalTrace = class {
14397
15161
  }
14398
15162
  return this.cachedFetcher.getSpans({ spanType });
14399
15163
  }
15164
+ /**
15165
+ * Get the thread (preprocessed messages) for this trace.
15166
+ * Calls the API with the project_default preprocessor (which falls back to "thread").
15167
+ */
15168
+ async getThread(options) {
15169
+ const cacheKey = options?.preprocessor ?? "project_default";
15170
+ if (!this.threadCache.has(cacheKey)) {
15171
+ const promise = this.fetchThread(options);
15172
+ this.threadCache.set(cacheKey, promise);
15173
+ }
15174
+ return this.threadCache.get(cacheKey);
15175
+ }
15176
+ async fetchThread(options) {
15177
+ await this.ensureSpansReady();
15178
+ await this.state.login({});
15179
+ const result = await invoke({
15180
+ globalFunction: options?.preprocessor ?? "project_default",
15181
+ functionType: "preprocessor",
15182
+ input: {
15183
+ trace_ref: {
15184
+ object_type: this.objectType,
15185
+ object_id: this.objectId,
15186
+ root_span_id: this.rootSpanId
15187
+ }
15188
+ },
15189
+ mode: "json",
15190
+ state: this.state
15191
+ });
15192
+ return Array.isArray(result) ? result : [];
15193
+ }
14400
15194
  async ensureSpansReady() {
14401
15195
  if (this.spansFlushed || !this.ensureSpansFlushed) {
14402
15196
  return;
@@ -14431,648 +15225,371 @@ var SimpleProgressReporter = class {
14431
15225
 
14432
15226
  // src/eval-parameters.ts
14433
15227
  import { z as z10 } from "zod/v3";
15228
+ import Ajv from "ajv";
14434
15229
 
14435
- // src/framework2.ts
15230
+ // src/prompt-schemas.ts
14436
15231
  import { z as z9 } from "zod/v3";
14437
- var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
14438
- var ProjectBuilder = class {
14439
- create(opts) {
14440
- return new Project2(opts);
15232
+ var promptContentsSchema = z9.union([
15233
+ z9.object({
15234
+ prompt: z9.string()
15235
+ }),
15236
+ z9.object({
15237
+ messages: z9.array(ChatCompletionMessageParam)
15238
+ })
15239
+ ]);
15240
+ var promptDefinitionSchema = promptContentsSchema.and(
15241
+ z9.object({
15242
+ model: z9.string(),
15243
+ params: ModelParams.optional(),
15244
+ templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
15245
+ })
15246
+ );
15247
+ var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
15248
+ z9.object({
15249
+ tools: z9.array(ToolFunctionDefinition).optional()
15250
+ })
15251
+ );
15252
+ function promptDefinitionToPromptData(promptDefinition, rawTools) {
15253
+ const promptBlock = "messages" in promptDefinition ? {
15254
+ type: "chat",
15255
+ messages: promptDefinition.messages,
15256
+ tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
15257
+ } : {
15258
+ type: "completion",
15259
+ content: promptDefinition.prompt
15260
+ };
15261
+ return {
15262
+ prompt: promptBlock,
15263
+ options: {
15264
+ model: promptDefinition.model,
15265
+ params: promptDefinition.params
15266
+ },
15267
+ ...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
15268
+ };
15269
+ }
15270
+
15271
+ // src/eval-parameters.ts
15272
+ var evalParametersSchema = z10.record(
15273
+ z10.string(),
15274
+ z10.union([
15275
+ z10.object({
15276
+ type: z10.literal("prompt"),
15277
+ default: promptDefinitionWithToolsSchema.optional(),
15278
+ description: z10.string().optional()
15279
+ }),
15280
+ z10.instanceof(z10.ZodType)
15281
+ // For Zod schemas
15282
+ ])
15283
+ );
15284
+ async function validateParameters(parameters, parameterSchema) {
15285
+ let resolvedSchema = parameterSchema;
15286
+ if (resolvedSchema instanceof Promise) {
15287
+ resolvedSchema = await resolvedSchema;
15288
+ }
15289
+ if (resolvedSchema === void 0 || resolvedSchema === null) {
15290
+ return parameters;
15291
+ }
15292
+ if (RemoteEvalParameters.isParameters(resolvedSchema)) {
15293
+ const mergedParameters = parameters && Object.keys(parameters).length > 0 ? {
15294
+ ...resolvedSchema.data,
15295
+ ...parameters
15296
+ } : resolvedSchema.data;
15297
+ return validateParametersWithJsonSchema(
15298
+ mergedParameters,
15299
+ resolvedSchema.schema
15300
+ );
14441
15301
  }
14442
- };
14443
- var projects = new ProjectBuilder();
14444
- var Project2 = class {
14445
- name;
14446
- id;
14447
- tools;
14448
- prompts;
14449
- scorers;
14450
- _publishableCodeFunctions = [];
14451
- _publishablePrompts = [];
14452
- constructor(args) {
14453
- _initializeSpanContext();
14454
- this.name = "name" in args ? args.name : void 0;
14455
- this.id = "id" in args ? args.id : void 0;
14456
- this.tools = new ToolBuilder(this);
14457
- this.prompts = new PromptBuilder(this);
14458
- this.scorers = new ScorerBuilder(this);
15302
+ return validateParametersWithZod(
15303
+ parameters,
15304
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15305
+ resolvedSchema
15306
+ );
15307
+ }
15308
+ function validateParametersWithZod(parameters, parameterSchema) {
15309
+ return Object.fromEntries(
15310
+ Object.entries(parameterSchema).map(([name, schema]) => {
15311
+ const value = parameters[name];
15312
+ try {
15313
+ if ("type" in schema && schema.type === "prompt") {
15314
+ const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
15315
+ schema.default,
15316
+ schema.default.tools
15317
+ ) : void 0;
15318
+ if (!promptData) {
15319
+ throw new Error(`Parameter '${name}' is required`);
15320
+ }
15321
+ return [name, Prompt2.fromPromptData(name, promptData)];
15322
+ } else {
15323
+ const schemaCasted = schema;
15324
+ return [name, schemaCasted.parse(value)];
15325
+ }
15326
+ } catch (e) {
15327
+ console.error("Error validating parameter", name, e);
15328
+ throw Error(
15329
+ `Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
15330
+ );
15331
+ }
15332
+ })
15333
+ );
15334
+ }
15335
+ function validateParametersWithJsonSchema(parameters, schema) {
15336
+ const ajv = new Ajv({ coerceTypes: true, useDefaults: true, strict: false });
15337
+ const validate = ajv.compile(schema);
15338
+ if (!validate(parameters)) {
15339
+ const errorMessages = validate.errors?.map((err) => {
15340
+ const path = err.instancePath || "root";
15341
+ return `${path}: ${err.message}`;
15342
+ }).join(", ");
15343
+ throw Error(`Invalid parameters: ${errorMessages}`);
14459
15344
  }
14460
- addPrompt(prompt) {
14461
- this._publishablePrompts.push(prompt);
14462
- if (globalThis._lazy_load) {
14463
- globalThis._evals.prompts.push(prompt);
14464
- }
15345
+ return parameters;
15346
+ }
15347
+
15348
+ // src/framework.ts
15349
+ function BaseExperiment(options = {}) {
15350
+ return { _type: "BaseExperiment", ...options };
15351
+ }
15352
+ var EvalResultWithSummary = class {
15353
+ constructor(summary, results) {
15354
+ this.summary = summary;
15355
+ this.results = results;
14465
15356
  }
14466
- addCodeFunction(fn) {
14467
- this._publishableCodeFunctions.push(fn);
14468
- if (globalThis._lazy_load) {
14469
- globalThis._evals.functions.push(fn);
14470
- }
15357
+ /**
15358
+ * @deprecated Use `summary` instead.
15359
+ */
15360
+ toString() {
15361
+ return JSON.stringify(this.summary);
14471
15362
  }
14472
- async publish() {
14473
- if (globalThis._lazy_load) {
14474
- console.warn("publish() is a no-op when running `braintrust push`.");
14475
- return;
14476
- }
14477
- await login();
14478
- const projectMap = new ProjectNameIdMap();
14479
- const functionDefinitions = [];
14480
- if (this._publishableCodeFunctions.length > 0) {
14481
- console.warn(
14482
- "Code functions cannot be published directly. Use `braintrust push` instead."
14483
- );
14484
- }
14485
- if (this._publishablePrompts.length > 0) {
14486
- for (const prompt of this._publishablePrompts) {
14487
- const functionDefinition = await prompt.toFunctionDefinition(projectMap);
14488
- functionDefinitions.push(functionDefinition);
14489
- }
14490
- }
14491
- await _internalGetGlobalState().apiConn().post_json("insert-functions", {
14492
- functions: functionDefinitions
14493
- });
15363
+ [Symbol.for("nodejs.util.inspect.custom")]() {
15364
+ return `EvalResultWithSummary(summary="...", results=[...])`;
15365
+ }
15366
+ toJSON() {
15367
+ return {
15368
+ summary: this.summary,
15369
+ results: this.results
15370
+ };
14494
15371
  }
14495
15372
  };
14496
- var ToolBuilder = class {
14497
- constructor(project) {
14498
- this.project = project;
15373
+ function makeEvalName(projectName, experimentName) {
15374
+ let out = projectName;
15375
+ if (experimentName) {
15376
+ out += ` [experimentName=${experimentName}]`;
14499
15377
  }
14500
- taskCounter = 0;
14501
- // This type definition is just a catch all so that the implementation can be
14502
- // less specific than the two more specific declarations above.
14503
- create(opts) {
14504
- this.taskCounter++;
14505
- opts = opts ?? {};
14506
- const { handler, name, slug, parameters, returns, ...rest } = opts;
14507
- let resolvedName = name ?? handler.name;
14508
- if (resolvedName.trim().length === 0) {
14509
- resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
14510
- }
14511
- const tool = new CodeFunction(this.project, {
14512
- handler,
14513
- name: resolvedName,
14514
- slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
14515
- type: "tool",
14516
- // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
14517
- parameters,
14518
- // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
14519
- returns,
14520
- ...rest
14521
- });
14522
- this.project.addCodeFunction(tool);
14523
- return tool;
15378
+ return out;
15379
+ }
15380
+ function initExperiment2(state, options = {}) {
15381
+ return init({
15382
+ state,
15383
+ ...options,
15384
+ setCurrent: false
15385
+ });
15386
+ }
15387
+ function callEvaluatorData(data) {
15388
+ const dataResult = typeof data === "function" ? data() : data;
15389
+ let baseExperiment = void 0;
15390
+ if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
15391
+ baseExperiment = dataResult.name;
14524
15392
  }
15393
+ return {
15394
+ data: dataResult,
15395
+ baseExperiment
15396
+ };
15397
+ }
15398
+ function isAsyncIterable3(value) {
15399
+ return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
15400
+ }
15401
+ function isIterable(value) {
15402
+ return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
15403
+ }
15404
+ globalThis._evals = {
15405
+ functions: [],
15406
+ prompts: [],
15407
+ parameters: [],
15408
+ evaluators: {},
15409
+ reporters: {}
14525
15410
  };
14526
- var ScorerBuilder = class {
14527
- constructor(project) {
14528
- this.project = project;
15411
+ function _initializeSpanContext() {
15412
+ globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
15413
+ }
15414
+ async function Eval(name, evaluator, reporterOrOpts) {
15415
+ const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
15416
+ let evalName = makeEvalName(name, evaluator.experimentName);
15417
+ if (globalThis._evals.evaluators[evalName]) {
15418
+ evalName = `${evalName}_${Object.keys(_evals).length}`;
14529
15419
  }
14530
- taskCounter = 0;
14531
- create(opts) {
14532
- this.taskCounter++;
14533
- let resolvedName = opts.name;
14534
- if (!resolvedName && "handler" in opts) {
14535
- resolvedName = opts.handler.name;
15420
+ if (globalThis._lazy_load) {
15421
+ globalThis._evals.evaluators[evalName] = {
15422
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15423
+ evaluator: {
15424
+ evalName,
15425
+ projectName: name,
15426
+ ...evaluator
15427
+ },
15428
+ reporter: options.reporter
15429
+ };
15430
+ _initializeSpanContext();
15431
+ return new EvalResultWithSummary(
15432
+ {
15433
+ scores: {},
15434
+ metrics: {},
15435
+ projectName: "",
15436
+ experimentName: ""
15437
+ },
15438
+ []
15439
+ );
15440
+ }
15441
+ const progressReporter = options.progress ?? new SimpleProgressReporter();
15442
+ const shouldCollectResults = options.returnResults ?? true;
15443
+ if (typeof options.reporter === "string") {
15444
+ throw new Error(
15445
+ "Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
15446
+ );
15447
+ }
15448
+ const resolvedReporter = options.reporter || defaultReporter;
15449
+ try {
15450
+ const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
15451
+ evaluator.data
15452
+ );
15453
+ const experiment = options.parent || options.noSendLogs ? null : initExperiment2(evaluator.state, {
15454
+ ...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
15455
+ experiment: evaluator.experimentName,
15456
+ description: evaluator.description,
15457
+ metadata: evaluator.metadata,
15458
+ isPublic: evaluator.isPublic,
15459
+ update: evaluator.update,
15460
+ baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
15461
+ baseExperimentId: evaluator.baseExperimentId,
15462
+ gitMetadataSettings: evaluator.gitMetadataSettings,
15463
+ repoInfo: evaluator.repoInfo,
15464
+ dataset: Dataset2.isDataset(data) ? data : void 0
15465
+ });
15466
+ if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
15467
+ await experiment._waitForId();
14536
15468
  }
14537
- if (!resolvedName || resolvedName.trim().length === 0) {
14538
- resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
15469
+ if (experiment && options.onStart) {
15470
+ const summary = await experiment.summarize({ summarizeScores: false });
15471
+ options.onStart(summary);
14539
15472
  }
14540
- const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
14541
- if ("handler" in opts) {
14542
- const scorer = new CodeFunction(this.project, {
14543
- ...opts,
14544
- name: resolvedName,
14545
- slug,
14546
- type: "scorer"
14547
- });
14548
- this.project.addCodeFunction(scorer);
14549
- } else {
14550
- const promptBlock = "messages" in opts ? {
14551
- type: "chat",
14552
- messages: opts.messages
14553
- } : {
14554
- type: "completion",
14555
- content: opts.prompt
14556
- };
14557
- const promptData = {
14558
- prompt: promptBlock,
14559
- options: {
14560
- model: opts.model,
14561
- params: opts.params
14562
- },
14563
- parser: {
14564
- type: "llm_classifier",
14565
- use_cot: opts.useCot,
14566
- choice_scores: opts.choiceScores
14567
- }
15473
+ try {
15474
+ const evalDef = {
15475
+ evalName,
15476
+ projectName: name,
15477
+ ...evaluator,
15478
+ data
14568
15479
  };
14569
- const codePrompt = new CodePrompt(
14570
- this.project,
14571
- promptData,
14572
- [],
14573
- {
14574
- ...opts,
14575
- name: resolvedName,
14576
- slug
14577
- },
14578
- "scorer"
14579
- );
14580
- this.project.addPrompt(codePrompt);
14581
- }
14582
- }
14583
- };
14584
- var CodeFunction = class {
14585
- constructor(project, opts) {
14586
- this.project = project;
14587
- this.handler = opts.handler;
14588
- this.name = opts.name;
14589
- this.slug = opts.slug;
14590
- this.description = opts.description;
14591
- this.type = opts.type;
14592
- this.ifExists = opts.ifExists;
14593
- this.metadata = opts.metadata;
14594
- this.parameters = opts.parameters;
14595
- this.returns = opts.returns;
14596
- if (this.returns && !this.parameters) {
14597
- throw new Error("parameters are required if return type is defined");
15480
+ const enableCache = options.enableCache ?? true;
15481
+ let ret;
15482
+ if (options.parent) {
15483
+ ret = await withParent(
15484
+ options.parent,
15485
+ () => runEvaluator(
15486
+ null,
15487
+ evalDef,
15488
+ progressReporter,
15489
+ [],
15490
+ options.stream,
15491
+ options.parameters,
15492
+ shouldCollectResults,
15493
+ enableCache
15494
+ ),
15495
+ evaluator.state
15496
+ );
15497
+ } else {
15498
+ ret = await runEvaluator(
15499
+ experiment,
15500
+ evalDef,
15501
+ progressReporter,
15502
+ [],
15503
+ options.stream,
15504
+ options.parameters,
15505
+ shouldCollectResults,
15506
+ enableCache
15507
+ );
15508
+ }
15509
+ progressReporter.stop();
15510
+ resolvedReporter.reportEval(evalDef, ret, {
15511
+ verbose: true,
15512
+ jsonl: false
15513
+ });
15514
+ return ret;
15515
+ } finally {
15516
+ if (experiment) {
15517
+ await experiment.flush().catch(console.error);
15518
+ } else if (options.parent) {
15519
+ await flush().catch(console.error);
15520
+ }
14598
15521
  }
15522
+ } finally {
15523
+ progressReporter.stop();
14599
15524
  }
14600
- handler;
14601
- name;
14602
- slug;
14603
- type;
14604
- description;
14605
- parameters;
14606
- returns;
14607
- ifExists;
14608
- metadata;
14609
- key() {
14610
- return JSON.stringify([
14611
- this.project.id ?? "",
14612
- this.project.name ?? "",
14613
- this.slug
14614
- ]);
15525
+ }
15526
+ function Reporter(name, reporter) {
15527
+ const ret = { name, ...reporter };
15528
+ if (_evals.reporters[name]) {
15529
+ throw new Error(`Reporter ${name} already exists`);
14615
15530
  }
14616
- };
14617
- var CodePrompt = class {
14618
- project;
14619
- name;
14620
- slug;
14621
- prompt;
14622
- ifExists;
14623
- description;
14624
- id;
14625
- functionType;
14626
- toolFunctions;
14627
- metadata;
14628
- constructor(project, prompt, toolFunctions, opts, functionType) {
14629
- this.project = project;
14630
- this.name = opts.name;
14631
- this.slug = opts.slug;
14632
- this.prompt = prompt;
14633
- this.toolFunctions = toolFunctions;
14634
- this.ifExists = opts.ifExists;
14635
- this.description = opts.description;
14636
- this.id = opts.id;
14637
- this.functionType = functionType;
14638
- this.metadata = opts.metadata;
15531
+ if (globalThis._lazy_load) {
15532
+ _evals.reporters[name] = ret;
14639
15533
  }
14640
- async toFunctionDefinition(projectNameToId) {
14641
- const prompt_data = {
14642
- ...this.prompt
14643
- };
14644
- if (this.toolFunctions.length > 0) {
14645
- const resolvableToolFunctions = await Promise.all(
14646
- this.toolFunctions.map(async (fn) => {
14647
- if ("slug" in fn) {
14648
- return {
14649
- type: "slug",
14650
- project_id: await projectNameToId.resolve(fn.project),
14651
- slug: fn.slug
14652
- };
14653
- } else {
14654
- return fn;
14655
- }
14656
- })
14657
- );
14658
- prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
14659
- resolvableToolFunctions;
14660
- }
14661
- return {
14662
- project_id: await projectNameToId.resolve(this.project),
14663
- name: this.name,
14664
- slug: this.slug,
14665
- description: this.description ?? "",
14666
- function_data: {
14667
- type: "prompt"
14668
- },
14669
- function_type: this.functionType,
14670
- prompt_data,
14671
- if_exists: this.ifExists,
14672
- metadata: this.metadata
14673
- };
15534
+ return ret;
15535
+ }
15536
+ function serializeJSONWithPlainString(v) {
15537
+ if (typeof v === "string") {
15538
+ return v;
15539
+ } else {
15540
+ return JSON.stringify(v);
14674
15541
  }
14675
- };
14676
- var promptContentsSchema = z9.union([
14677
- z9.object({
14678
- prompt: z9.string()
14679
- }),
14680
- z9.object({
14681
- messages: z9.array(ChatCompletionMessageParam)
14682
- })
14683
- ]);
14684
- var promptDefinitionSchema = promptContentsSchema.and(
14685
- z9.object({
14686
- model: z9.string(),
14687
- params: ModelParams.optional(),
14688
- templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
14689
- })
14690
- );
14691
- var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
14692
- z9.object({
14693
- tools: z9.array(ToolFunctionDefinition).optional()
14694
- })
14695
- );
14696
- var PromptBuilder = class {
14697
- constructor(project) {
14698
- this.project = project;
14699
- }
14700
- create(opts) {
14701
- const toolFunctions = [];
14702
- const rawTools = [];
14703
- for (const tool of opts.tools ?? []) {
14704
- if (tool instanceof CodeFunction) {
14705
- toolFunctions.push(tool);
14706
- } else if ("type" in tool && !("function" in tool)) {
14707
- toolFunctions.push(tool);
14708
- } else {
14709
- rawTools.push(tool);
14710
- }
14711
- }
14712
- const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
14713
- const promptData = promptDefinitionToPromptData(opts, rawTools);
14714
- const promptRow = {
14715
- id: opts.id,
14716
- _xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
14717
- name: opts.name,
14718
- slug,
14719
- prompt_data: promptData,
14720
- ...this.project.id !== void 0 ? { project_id: this.project.id } : {}
14721
- };
14722
- const prompt = new Prompt2(
14723
- promptRow,
14724
- {},
14725
- // It doesn't make sense to specify defaults here.
14726
- opts.noTrace ?? false
14727
- );
14728
- const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
14729
- ...opts,
14730
- slug
14731
- });
14732
- this.project.addPrompt(codePrompt);
14733
- return prompt;
14734
- }
14735
- };
14736
- function promptDefinitionToPromptData(promptDefinition, rawTools) {
14737
- const promptBlock = "messages" in promptDefinition ? {
14738
- type: "chat",
14739
- messages: promptDefinition.messages,
14740
- tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
14741
- } : {
14742
- type: "completion",
14743
- content: promptDefinition.prompt
14744
- };
14745
- return {
14746
- prompt: promptBlock,
14747
- options: {
14748
- model: promptDefinition.model,
14749
- params: promptDefinition.params
14750
- },
14751
- ...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
14752
- };
14753
15542
  }
14754
- var ProjectNameIdMap = class {
14755
- nameToId = {};
14756
- idToName = {};
14757
- async getId(projectName) {
14758
- if (!(projectName in this.nameToId)) {
14759
- const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
14760
- project_name: projectName
14761
- });
14762
- const result = z9.object({
14763
- project: Project
14764
- }).parse(response);
14765
- const projectId = result.project.id;
14766
- this.nameToId[projectName] = projectId;
14767
- this.idToName[projectId] = projectName;
14768
- }
14769
- return this.nameToId[projectName];
14770
- }
14771
- async getName(projectId) {
14772
- if (!(projectId in this.idToName)) {
14773
- const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
14774
- id: projectId
14775
- });
14776
- const result = z9.array(Project).nonempty().parse(response);
14777
- const projectName = result[0].name;
14778
- this.idToName[projectId] = projectName;
14779
- this.nameToId[projectName] = projectId;
14780
- }
14781
- return this.idToName[projectId];
14782
- }
14783
- async resolve(project) {
14784
- if (project.id) {
14785
- return project.id;
14786
- }
14787
- return this.getId(project.name);
14788
- }
14789
- };
14790
-
14791
- // src/eval-parameters.ts
14792
- var evalParametersSchema = z10.record(
14793
- z10.string(),
14794
- z10.union([
14795
- z10.object({
14796
- type: z10.literal("prompt"),
14797
- default: promptDefinitionWithToolsSchema.optional(),
14798
- description: z10.string().optional()
14799
- }),
14800
- z10.instanceof(z10.ZodType)
14801
- // For Zod schemas
14802
- ])
14803
- );
14804
- function validateParameters(parameters, parameterSchema) {
14805
- return Object.fromEntries(
14806
- Object.entries(parameterSchema).map(([name, schema]) => {
14807
- const value = parameters[name];
14808
- try {
14809
- if ("type" in schema && schema.type === "prompt") {
14810
- const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
14811
- schema.default,
14812
- schema.default.tools
14813
- ) : void 0;
14814
- if (!promptData) {
14815
- throw new Error(`Parameter '${name}' is required`);
14816
- }
14817
- return [name, Prompt2.fromPromptData(name, promptData)];
14818
- } else {
14819
- const schemaCasted = schema;
14820
- return [name, schemaCasted.parse(value)];
14821
- }
14822
- } catch (e) {
14823
- console.error("Error validating parameter", name, e);
14824
- throw Error(
14825
- `Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
14826
- );
14827
- }
14828
- })
15543
+ function evaluateFilter(object, filter2) {
15544
+ const { path, pattern } = filter2;
15545
+ const key = path.reduce(
15546
+ (acc, p) => typeof acc === "object" && acc !== null ? (
15547
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15548
+ acc[p]
15549
+ ) : void 0,
15550
+ object
14829
15551
  );
14830
- }
14831
-
14832
- // src/framework.ts
14833
- function BaseExperiment(options = {}) {
14834
- return { _type: "BaseExperiment", ...options };
14835
- }
14836
- var EvalResultWithSummary = class {
14837
- constructor(summary, results) {
14838
- this.summary = summary;
14839
- this.results = results;
14840
- }
14841
- /**
14842
- * @deprecated Use `summary` instead.
14843
- */
14844
- toString() {
14845
- return JSON.stringify(this.summary);
14846
- }
14847
- [Symbol.for("nodejs.util.inspect.custom")]() {
14848
- return `EvalResultWithSummary(summary="...", results=[...])`;
14849
- }
14850
- toJSON() {
14851
- return {
14852
- summary: this.summary,
14853
- results: this.results
14854
- };
14855
- }
14856
- };
14857
- function makeEvalName(projectName, experimentName) {
14858
- let out = projectName;
14859
- if (experimentName) {
14860
- out += ` [experimentName=${experimentName}]`;
14861
- }
14862
- return out;
14863
- }
14864
- function initExperiment2(state, options = {}) {
14865
- return init({
14866
- state,
14867
- ...options,
14868
- setCurrent: false
14869
- });
14870
- }
14871
- function callEvaluatorData(data) {
14872
- const dataResult = typeof data === "function" ? data() : data;
14873
- let baseExperiment = void 0;
14874
- if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
14875
- baseExperiment = dataResult.name;
15552
+ if (key === void 0) {
15553
+ return false;
14876
15554
  }
14877
- return {
14878
- data: dataResult,
14879
- baseExperiment
14880
- };
15555
+ return pattern.test(serializeJSONWithPlainString(key));
14881
15556
  }
14882
- function isAsyncIterable2(value) {
14883
- return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
15557
+ function scorerName(scorer, scorer_idx) {
15558
+ return scorer.name || `scorer_${scorer_idx}`;
14884
15559
  }
14885
- function isIterable(value) {
14886
- return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
15560
+ async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
15561
+ return await runEvaluatorInternal(
15562
+ experiment,
15563
+ evaluator,
15564
+ progressReporter,
15565
+ filters,
15566
+ stream,
15567
+ parameters,
15568
+ collectResults,
15569
+ enableCache
15570
+ );
14887
15571
  }
14888
- globalThis._evals = {
14889
- functions: [],
14890
- prompts: [],
14891
- evaluators: {},
14892
- reporters: {}
15572
+ var defaultErrorScoreHandler = ({
15573
+ rootSpan,
15574
+ data: _,
15575
+ unhandledScores
15576
+ }) => {
15577
+ const scores = Object.fromEntries(unhandledScores.map((s) => [s, 0]));
15578
+ rootSpan.log({ scores });
15579
+ return scores;
14893
15580
  };
14894
- function _initializeSpanContext() {
14895
- globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
14896
- }
14897
- async function Eval(name, evaluator, reporterOrOpts) {
14898
- const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
14899
- let evalName = makeEvalName(name, evaluator.experimentName);
14900
- if (globalThis._evals.evaluators[evalName]) {
14901
- evalName = `${evalName}_${Object.keys(_evals).length}`;
14902
- }
14903
- if (globalThis._lazy_load) {
14904
- globalThis._evals.evaluators[evalName] = {
14905
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
14906
- evaluator: {
14907
- evalName,
14908
- projectName: name,
14909
- ...evaluator
14910
- },
14911
- reporter: options.reporter
14912
- };
14913
- _initializeSpanContext();
14914
- return new EvalResultWithSummary(
14915
- {
14916
- scores: {},
14917
- metrics: {},
14918
- projectName: "",
14919
- experimentName: ""
14920
- },
14921
- []
14922
- );
14923
- }
14924
- const progressReporter = options.progress ?? new SimpleProgressReporter();
14925
- const shouldCollectResults = options.returnResults ?? true;
14926
- if (typeof options.reporter === "string") {
14927
- throw new Error(
14928
- "Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
14929
- );
14930
- }
14931
- const resolvedReporter = options.reporter || defaultReporter;
14932
- try {
14933
- const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
14934
- evaluator.data
14935
- );
14936
- const experiment = options.parent || options.noSendLogs ? null : initExperiment2(evaluator.state, {
14937
- ...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
14938
- experiment: evaluator.experimentName,
14939
- description: evaluator.description,
14940
- metadata: evaluator.metadata,
14941
- isPublic: evaluator.isPublic,
14942
- update: evaluator.update,
14943
- baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
14944
- baseExperimentId: evaluator.baseExperimentId,
14945
- gitMetadataSettings: evaluator.gitMetadataSettings,
14946
- repoInfo: evaluator.repoInfo,
14947
- dataset: Dataset2.isDataset(data) ? data : void 0
14948
- });
14949
- if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
14950
- await experiment._waitForId();
14951
- }
14952
- if (experiment && options.onStart) {
14953
- const summary = await experiment.summarize({ summarizeScores: false });
14954
- options.onStart(summary);
14955
- }
14956
- try {
14957
- const evalDef = {
14958
- evalName,
14959
- projectName: name,
14960
- ...evaluator,
14961
- data
14962
- };
14963
- const enableCache = options.enableCache ?? true;
14964
- let ret;
14965
- if (options.parent) {
14966
- ret = await withParent(
14967
- options.parent,
14968
- () => runEvaluator(
14969
- null,
14970
- evalDef,
14971
- progressReporter,
14972
- [],
14973
- options.stream,
14974
- options.parameters,
14975
- shouldCollectResults,
14976
- enableCache
14977
- ),
14978
- evaluator.state
14979
- );
14980
- } else {
14981
- ret = await runEvaluator(
14982
- experiment,
14983
- evalDef,
14984
- progressReporter,
14985
- [],
14986
- options.stream,
14987
- options.parameters,
14988
- shouldCollectResults,
14989
- enableCache
14990
- );
14991
- }
14992
- progressReporter.stop();
14993
- resolvedReporter.reportEval(evalDef, ret, {
14994
- verbose: true,
14995
- jsonl: false
14996
- });
14997
- return ret;
14998
- } finally {
14999
- if (experiment) {
15000
- await experiment.flush().catch(console.error);
15001
- } else if (options.parent) {
15002
- await flush().catch(console.error);
15003
- }
15004
- }
15005
- } finally {
15006
- progressReporter.stop();
15007
- }
15008
- }
15009
- function Reporter(name, reporter) {
15010
- const ret = { name, ...reporter };
15011
- if (_evals.reporters[name]) {
15012
- throw new Error(`Reporter ${name} already exists`);
15013
- }
15014
- if (globalThis._lazy_load) {
15015
- _evals.reporters[name] = ret;
15016
- }
15017
- return ret;
15018
- }
15019
- function serializeJSONWithPlainString(v) {
15020
- if (typeof v === "string") {
15021
- return v;
15022
- } else {
15023
- return JSON.stringify(v);
15024
- }
15025
- }
15026
- function evaluateFilter(object, filter2) {
15027
- const { path, pattern } = filter2;
15028
- const key = path.reduce(
15029
- (acc, p) => typeof acc === "object" && acc !== null ? (
15030
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15031
- acc[p]
15032
- ) : void 0,
15033
- object
15034
- );
15035
- if (key === void 0) {
15036
- return false;
15037
- }
15038
- return pattern.test(serializeJSONWithPlainString(key));
15039
- }
15040
- function scorerName(scorer, scorer_idx) {
15041
- return scorer.name || `scorer_${scorer_idx}`;
15042
- }
15043
- async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
15044
- return await runEvaluatorInternal(
15045
- experiment,
15046
- evaluator,
15047
- progressReporter,
15048
- filters,
15049
- stream,
15050
- parameters,
15051
- collectResults,
15052
- enableCache
15053
- );
15054
- }
15055
- var defaultErrorScoreHandler = ({
15056
- rootSpan,
15057
- data: _,
15058
- unhandledScores
15059
- }) => {
15060
- const scores = Object.fromEntries(unhandledScores.map((s) => [s, 0]));
15061
- rootSpan.log({ scores });
15062
- return scores;
15063
- };
15064
- async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
15065
- if (enableCache) {
15066
- (evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
15581
+ async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
15582
+ if (enableCache) {
15583
+ (evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
15067
15584
  }
15068
15585
  try {
15069
15586
  if (typeof evaluator.data === "string") {
15070
15587
  throw new Error("Unimplemented: string data paths");
15071
15588
  }
15072
15589
  let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
15073
- parameters = validateParameters(
15590
+ parameters = await validateParameters(
15074
15591
  parameters ?? {},
15075
- evaluator.parameters ?? {}
15592
+ evaluator.parameters
15076
15593
  );
15077
15594
  if ("_type" in dataResult) {
15078
15595
  if (dataResult._type !== "BaseExperiment") {
@@ -15099,7 +15616,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
15099
15616
  }
15100
15617
  const resolvedDataResult = dataResult instanceof Promise ? await dataResult : dataResult;
15101
15618
  const dataIterable = (() => {
15102
- if (isAsyncIterable2(resolvedDataResult)) {
15619
+ if (isAsyncIterable3(resolvedDataResult)) {
15103
15620
  return resolvedDataResult;
15104
15621
  }
15105
15622
  if (Array.isArray(resolvedDataResult) || isIterable(resolvedDataResult)) {
@@ -15231,6 +15748,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
15231
15748
  } else {
15232
15749
  rootSpan.log({ output, metadata, expected });
15233
15750
  }
15751
+ if (evaluator.flushBeforeScoring) {
15752
+ await rootSpan.flush();
15753
+ }
15234
15754
  const scoringArgs = {
15235
15755
  input: datum.input,
15236
15756
  expected: "expected" in datum ? datum.expected : void 0,
@@ -15477,206 +15997,646 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
15477
15997
  spanCache?.stop();
15478
15998
  }
15479
15999
  }
15480
- }
15481
- var warning = (text) => `Warning: ${text}`;
15482
- function logError2(e, verbose) {
15483
- if (!verbose) {
15484
- console.error(`${e}`);
15485
- } else {
15486
- console.error(e);
16000
+ }
16001
+ var warning = (text) => `Warning: ${text}`;
16002
+ function logError2(e, verbose) {
16003
+ if (!verbose) {
16004
+ console.error(`${e}`);
16005
+ } else {
16006
+ console.error(e);
16007
+ }
16008
+ }
16009
+ function accumulateScores(accumulator, scores) {
16010
+ for (const [name, score] of Object.entries(scores)) {
16011
+ if (score === null || score === void 0) {
16012
+ continue;
16013
+ }
16014
+ const existing = accumulator[name] ?? { total: 0, count: 0 };
16015
+ accumulator[name] = {
16016
+ total: existing.total + score,
16017
+ count: existing.count + 1
16018
+ };
16019
+ }
16020
+ }
16021
+ function ensureScoreAccumulator(results) {
16022
+ const accumulator = {};
16023
+ for (const result of results) {
16024
+ accumulateScores(accumulator, result.scores);
16025
+ }
16026
+ return accumulator;
16027
+ }
16028
+ function buildLocalSummary(evaluator, results, precomputedScores) {
16029
+ const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
16030
+ return {
16031
+ projectName: evaluator.projectName,
16032
+ experimentName: evaluator.evalName,
16033
+ scores: Object.fromEntries(
16034
+ Object.entries(scoresByName).map(([name, { total, count }]) => [
16035
+ name,
16036
+ {
16037
+ name,
16038
+ score: count === 0 ? 0 : total / count,
16039
+ improvements: 0,
16040
+ regressions: 0
16041
+ }
16042
+ ])
16043
+ )
16044
+ };
16045
+ }
16046
+ function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
16047
+ if (failingResults.length > 0) {
16048
+ console.error(
16049
+ warning(
16050
+ `Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
16051
+ )
16052
+ );
16053
+ if (jsonl) {
16054
+ console.log(
16055
+ JSON.stringify({
16056
+ evaluatorName: evaluator.evalName,
16057
+ errors: failingResults.map(
16058
+ (r) => `${r.error instanceof Error ? r.error.stack : r.error}`
16059
+ )
16060
+ })
16061
+ );
16062
+ } else {
16063
+ for (const result of failingResults) {
16064
+ logError2(result.error, verbose);
16065
+ }
16066
+ }
16067
+ if (!verbose && !jsonl) {
16068
+ console.error(warning("Add --verbose to see full stack traces."));
16069
+ }
16070
+ }
16071
+ }
16072
+ var defaultReporter = {
16073
+ name: "Braintrust default reporter",
16074
+ async reportEval(evaluator, result, { verbose, jsonl }) {
16075
+ const { results, summary } = result;
16076
+ const failingResults = results.filter(
16077
+ (r) => r.error !== void 0
16078
+ );
16079
+ if (failingResults.length > 0) {
16080
+ reportFailures(evaluator, failingResults, { verbose, jsonl });
16081
+ }
16082
+ if (jsonl) {
16083
+ isomorph_default.writeln(JSON.stringify(summary));
16084
+ } else {
16085
+ isomorph_default.writeln("Experiment summary");
16086
+ isomorph_default.writeln("==================");
16087
+ if (summary.comparisonExperimentName) {
16088
+ isomorph_default.writeln(
16089
+ `${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
16090
+ );
16091
+ isomorph_default.writeln("");
16092
+ }
16093
+ const hasScores = Object.keys(summary.scores).length > 0;
16094
+ const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
16095
+ const hasComparison = !!summary.comparisonExperimentName;
16096
+ if (hasScores || hasMetrics) {
16097
+ if (hasComparison) {
16098
+ isomorph_default.writeln(
16099
+ "Name Value Change Improvements Regressions"
16100
+ );
16101
+ isomorph_default.writeln(
16102
+ "----------------------------------------------------------------"
16103
+ );
16104
+ }
16105
+ for (const score of Object.values(summary.scores)) {
16106
+ const scorePercent = (score.score * 100).toFixed(2);
16107
+ const scoreValue = `${scorePercent}%`;
16108
+ if (hasComparison) {
16109
+ let diffString = "-";
16110
+ if (!isEmpty2(score.diff)) {
16111
+ const diffPercent = (score.diff * 100).toFixed(2);
16112
+ const diffSign = score.diff > 0 ? "+" : "";
16113
+ diffString = `${diffSign}${diffPercent}%`;
16114
+ }
16115
+ const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
16116
+ const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
16117
+ isomorph_default.writeln(
16118
+ `${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
16119
+ );
16120
+ } else {
16121
+ isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
16122
+ }
16123
+ }
16124
+ for (const metric of Object.values(summary.metrics ?? {})) {
16125
+ const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
16126
+ const formattedValue = metric.metric.toFixed(fractionDigits);
16127
+ const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
16128
+ if (hasComparison) {
16129
+ let diffString = "-";
16130
+ if (!isEmpty2(metric.diff)) {
16131
+ const diffPercent = (metric.diff * 100).toFixed(2);
16132
+ const diffSign = metric.diff > 0 ? "+" : "";
16133
+ diffString = `${diffSign}${diffPercent}%`;
16134
+ }
16135
+ const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
16136
+ const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
16137
+ isomorph_default.writeln(
16138
+ `${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
16139
+ );
16140
+ } else {
16141
+ isomorph_default.writeln(
16142
+ `${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
16143
+ );
16144
+ }
16145
+ }
16146
+ }
16147
+ if (summary.experimentUrl) {
16148
+ isomorph_default.writeln("");
16149
+ isomorph_default.writeln(`View results for ${summary.experimentName}`);
16150
+ isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
16151
+ }
16152
+ }
16153
+ isomorph_default.writeln("");
16154
+ return failingResults.length === 0;
16155
+ },
16156
+ async reportRun(evalReports) {
16157
+ return evalReports.every((r) => r);
16158
+ }
16159
+ };
16160
+
16161
+ // src/framework2.ts
16162
+ import { z as z11 } from "zod/v3";
16163
+ var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
16164
+ var ProjectBuilder = class {
16165
+ create(opts) {
16166
+ return new Project2(opts);
16167
+ }
16168
+ };
16169
+ var projects = new ProjectBuilder();
16170
+ var Project2 = class {
16171
+ name;
16172
+ id;
16173
+ tools;
16174
+ prompts;
16175
+ parameters;
16176
+ scorers;
16177
+ _publishableCodeFunctions = [];
16178
+ _publishablePrompts = [];
16179
+ _publishableParameters = [];
16180
+ constructor(args) {
16181
+ _initializeSpanContext();
16182
+ this.name = "name" in args ? args.name : void 0;
16183
+ this.id = "id" in args ? args.id : void 0;
16184
+ this.tools = new ToolBuilder(this);
16185
+ this.prompts = new PromptBuilder(this);
16186
+ this.parameters = new ParametersBuilder(this);
16187
+ this.scorers = new ScorerBuilder(this);
16188
+ }
16189
+ addPrompt(prompt) {
16190
+ this._publishablePrompts.push(prompt);
16191
+ if (globalThis._lazy_load) {
16192
+ globalThis._evals.prompts.push(prompt);
16193
+ }
16194
+ }
16195
+ addParameters(parameters) {
16196
+ this._publishableParameters.push(parameters);
16197
+ if (globalThis._lazy_load) {
16198
+ if (globalThis._evals.parameters == null)
16199
+ globalThis._evals.parameters = [];
16200
+ globalThis._evals.parameters.push(parameters);
16201
+ }
16202
+ }
16203
+ addCodeFunction(fn) {
16204
+ this._publishableCodeFunctions.push(fn);
16205
+ if (globalThis._lazy_load) {
16206
+ globalThis._evals.functions.push(fn);
16207
+ }
16208
+ }
16209
+ async publish() {
16210
+ if (globalThis._lazy_load) {
16211
+ console.warn("publish() is a no-op when running `braintrust push`.");
16212
+ return;
16213
+ }
16214
+ await login();
16215
+ const projectMap = new ProjectNameIdMap();
16216
+ const functionDefinitions = [];
16217
+ if (this._publishableCodeFunctions.length > 0) {
16218
+ console.warn(
16219
+ "Code functions cannot be published directly. Use `braintrust push` instead."
16220
+ );
16221
+ }
16222
+ if (this._publishablePrompts.length > 0) {
16223
+ for (const prompt of this._publishablePrompts) {
16224
+ const functionDefinition = await prompt.toFunctionDefinition(projectMap);
16225
+ functionDefinitions.push(functionDefinition);
16226
+ }
16227
+ }
16228
+ await _internalGetGlobalState().apiConn().post_json("insert-functions", {
16229
+ functions: functionDefinitions
16230
+ });
16231
+ }
16232
+ };
16233
+ var ToolBuilder = class {
16234
+ constructor(project) {
16235
+ this.project = project;
16236
+ }
16237
+ taskCounter = 0;
16238
+ // This type definition is just a catch all so that the implementation can be
16239
+ // less specific than the two more specific declarations above.
16240
+ create(opts) {
16241
+ this.taskCounter++;
16242
+ opts = opts ?? {};
16243
+ const { handler, name, slug, parameters, returns, ...rest } = opts;
16244
+ let resolvedName = name ?? handler.name;
16245
+ if (resolvedName.trim().length === 0) {
16246
+ resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
16247
+ }
16248
+ const tool = new CodeFunction(this.project, {
16249
+ handler,
16250
+ name: resolvedName,
16251
+ slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
16252
+ type: "tool",
16253
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
16254
+ parameters,
16255
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
16256
+ returns,
16257
+ ...rest
16258
+ });
16259
+ this.project.addCodeFunction(tool);
16260
+ return tool;
16261
+ }
16262
+ };
16263
+ var ScorerBuilder = class {
16264
+ constructor(project) {
16265
+ this.project = project;
16266
+ }
16267
+ taskCounter = 0;
16268
+ create(opts) {
16269
+ this.taskCounter++;
16270
+ let resolvedName = opts.name;
16271
+ if (!resolvedName && "handler" in opts) {
16272
+ resolvedName = opts.handler.name;
16273
+ }
16274
+ if (!resolvedName || resolvedName.trim().length === 0) {
16275
+ resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
16276
+ }
16277
+ const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
16278
+ if ("handler" in opts) {
16279
+ const scorer = new CodeFunction(this.project, {
16280
+ ...opts,
16281
+ name: resolvedName,
16282
+ slug,
16283
+ type: "scorer"
16284
+ });
16285
+ this.project.addCodeFunction(scorer);
16286
+ } else {
16287
+ const promptBlock = "messages" in opts ? {
16288
+ type: "chat",
16289
+ messages: opts.messages
16290
+ } : {
16291
+ type: "completion",
16292
+ content: opts.prompt
16293
+ };
16294
+ const promptData = {
16295
+ prompt: promptBlock,
16296
+ options: {
16297
+ model: opts.model,
16298
+ params: opts.params
16299
+ },
16300
+ parser: {
16301
+ type: "llm_classifier",
16302
+ use_cot: opts.useCot,
16303
+ choice_scores: opts.choiceScores
16304
+ }
16305
+ };
16306
+ const codePrompt = new CodePrompt(
16307
+ this.project,
16308
+ promptData,
16309
+ [],
16310
+ {
16311
+ ...opts,
16312
+ name: resolvedName,
16313
+ slug
16314
+ },
16315
+ "scorer"
16316
+ );
16317
+ this.project.addPrompt(codePrompt);
16318
+ }
16319
+ }
16320
+ };
16321
+ var CodeFunction = class {
16322
+ constructor(project, opts) {
16323
+ this.project = project;
16324
+ this.handler = opts.handler;
16325
+ this.name = opts.name;
16326
+ this.slug = opts.slug;
16327
+ this.description = opts.description;
16328
+ this.type = opts.type;
16329
+ this.ifExists = opts.ifExists;
16330
+ this.metadata = opts.metadata;
16331
+ this.parameters = opts.parameters;
16332
+ this.returns = opts.returns;
16333
+ if (this.returns && !this.parameters) {
16334
+ throw new Error("parameters are required if return type is defined");
16335
+ }
16336
+ }
16337
+ handler;
16338
+ name;
16339
+ slug;
16340
+ type;
16341
+ description;
16342
+ parameters;
16343
+ returns;
16344
+ ifExists;
16345
+ metadata;
16346
+ key() {
16347
+ return JSON.stringify([
16348
+ this.project.id ?? "",
16349
+ this.project.name ?? "",
16350
+ this.slug
16351
+ ]);
16352
+ }
16353
+ };
16354
+ var CodePrompt = class {
16355
+ project;
16356
+ name;
16357
+ slug;
16358
+ prompt;
16359
+ ifExists;
16360
+ description;
16361
+ id;
16362
+ functionType;
16363
+ toolFunctions;
16364
+ metadata;
16365
+ constructor(project, prompt, toolFunctions, opts, functionType) {
16366
+ this.project = project;
16367
+ this.name = opts.name;
16368
+ this.slug = opts.slug;
16369
+ this.prompt = prompt;
16370
+ this.toolFunctions = toolFunctions;
16371
+ this.ifExists = opts.ifExists;
16372
+ this.description = opts.description;
16373
+ this.id = opts.id;
16374
+ this.functionType = functionType;
16375
+ this.metadata = opts.metadata;
16376
+ }
16377
+ async toFunctionDefinition(projectNameToId) {
16378
+ const prompt_data = {
16379
+ ...this.prompt
16380
+ };
16381
+ if (this.toolFunctions.length > 0) {
16382
+ const resolvableToolFunctions = await Promise.all(
16383
+ this.toolFunctions.map(async (fn) => {
16384
+ if ("slug" in fn) {
16385
+ return {
16386
+ type: "slug",
16387
+ project_id: await projectNameToId.resolve(fn.project),
16388
+ slug: fn.slug
16389
+ };
16390
+ } else {
16391
+ return fn;
16392
+ }
16393
+ })
16394
+ );
16395
+ prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
16396
+ resolvableToolFunctions;
16397
+ }
16398
+ return {
16399
+ project_id: await projectNameToId.resolve(this.project),
16400
+ name: this.name,
16401
+ slug: this.slug,
16402
+ description: this.description ?? "",
16403
+ function_data: {
16404
+ type: "prompt"
16405
+ },
16406
+ function_type: this.functionType,
16407
+ prompt_data,
16408
+ if_exists: this.ifExists,
16409
+ metadata: this.metadata
16410
+ };
16411
+ }
16412
+ };
16413
+ var PromptBuilder = class {
16414
+ constructor(project) {
16415
+ this.project = project;
16416
+ }
16417
+ create(opts) {
16418
+ const toolFunctions = [];
16419
+ const rawTools = [];
16420
+ for (const tool of opts.tools ?? []) {
16421
+ if (tool instanceof CodeFunction) {
16422
+ toolFunctions.push(tool);
16423
+ } else if ("type" in tool && !("function" in tool)) {
16424
+ toolFunctions.push(tool);
16425
+ } else {
16426
+ rawTools.push(tool);
16427
+ }
16428
+ }
16429
+ const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
16430
+ const promptData = promptDefinitionToPromptData(opts, rawTools);
16431
+ const promptRow = {
16432
+ id: opts.id,
16433
+ _xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
16434
+ name: opts.name,
16435
+ slug,
16436
+ prompt_data: promptData,
16437
+ ...this.project.id !== void 0 ? { project_id: this.project.id } : {}
16438
+ };
16439
+ const prompt = new Prompt2(
16440
+ promptRow,
16441
+ {},
16442
+ // It doesn't make sense to specify defaults here.
16443
+ opts.noTrace ?? false
16444
+ );
16445
+ const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
16446
+ ...opts,
16447
+ slug
16448
+ });
16449
+ this.project.addPrompt(codePrompt);
16450
+ return prompt;
16451
+ }
16452
+ };
16453
+ var CodeParameters = class {
16454
+ project;
16455
+ name;
16456
+ slug;
16457
+ description;
16458
+ schema;
16459
+ ifExists;
16460
+ metadata;
16461
+ constructor(project, opts) {
16462
+ this.project = project;
16463
+ this.name = opts.name;
16464
+ this.slug = opts.slug;
16465
+ this.description = opts.description;
16466
+ this.schema = opts.schema;
16467
+ this.ifExists = opts.ifExists;
16468
+ this.metadata = opts.metadata;
15487
16469
  }
15488
- }
15489
- function accumulateScores(accumulator, scores) {
15490
- for (const [name, score] of Object.entries(scores)) {
15491
- if (score === null || score === void 0) {
15492
- continue;
15493
- }
15494
- const existing = accumulator[name] ?? { total: 0, count: 0 };
15495
- accumulator[name] = {
15496
- total: existing.total + score,
15497
- count: existing.count + 1
16470
+ async toFunctionDefinition(projectNameToId) {
16471
+ return {
16472
+ project_id: await projectNameToId.resolve(this.project),
16473
+ name: this.name,
16474
+ slug: this.slug,
16475
+ description: this.description ?? "",
16476
+ function_type: "parameters",
16477
+ function_data: {
16478
+ type: "parameters",
16479
+ data: {},
16480
+ __schema: serializeEvalParameterstoParametersSchema(this.schema)
16481
+ },
16482
+ if_exists: this.ifExists,
16483
+ metadata: this.metadata
15498
16484
  };
15499
16485
  }
15500
- }
15501
- function ensureScoreAccumulator(results) {
15502
- const accumulator = {};
15503
- for (const result of results) {
15504
- accumulateScores(accumulator, result.scores);
16486
+ };
16487
+ var ParametersBuilder = class {
16488
+ constructor(project) {
16489
+ this.project = project;
15505
16490
  }
15506
- return accumulator;
15507
- }
15508
- function buildLocalSummary(evaluator, results, precomputedScores) {
15509
- const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
15510
- return {
15511
- projectName: evaluator.projectName,
15512
- experimentName: evaluator.evalName,
15513
- scores: Object.fromEntries(
15514
- Object.entries(scoresByName).map(([name, { total, count }]) => [
15515
- name,
15516
- {
15517
- name,
15518
- score: count === 0 ? 0 : total / count,
15519
- improvements: 0,
15520
- regressions: 0
15521
- }
15522
- ])
15523
- )
15524
- };
15525
- }
15526
- function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
15527
- if (failingResults.length > 0) {
15528
- console.error(
15529
- warning(
15530
- `Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
15531
- )
15532
- );
15533
- if (jsonl) {
15534
- console.log(
15535
- JSON.stringify({
15536
- evaluatorName: evaluator.evalName,
15537
- errors: failingResults.map(
15538
- (r) => `${r.error instanceof Error ? r.error.stack : r.error}`
15539
- )
15540
- })
15541
- );
16491
+ create(opts) {
16492
+ const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
16493
+ const codeParameters = new CodeParameters(this.project, {
16494
+ name: opts.name,
16495
+ slug,
16496
+ description: opts.description,
16497
+ schema: opts.schema,
16498
+ ifExists: opts.ifExists,
16499
+ metadata: opts.metadata
16500
+ });
16501
+ this.project.addParameters(codeParameters);
16502
+ return opts.schema;
16503
+ }
16504
+ };
16505
+ function serializeEvalParameterstoParametersSchema(parameters) {
16506
+ const properties = {};
16507
+ const required = [];
16508
+ for (const [name, value] of Object.entries(parameters)) {
16509
+ if ("type" in value && value.type === "prompt") {
16510
+ const defaultPromptData = value.default ? promptDefinitionToPromptData(value.default) : void 0;
16511
+ properties[name] = {
16512
+ type: "object",
16513
+ "x-bt-type": "prompt",
16514
+ ...value.description ? { description: value.description } : {},
16515
+ ...defaultPromptData ? { default: defaultPromptData } : {}
16516
+ };
16517
+ if (!defaultPromptData) {
16518
+ required.push(name);
16519
+ }
15542
16520
  } else {
15543
- for (const result of failingResults) {
15544
- logError2(result.error, verbose);
16521
+ const schemaObj = zodToJsonSchema(value);
16522
+ properties[name] = schemaObj;
16523
+ if (!("default" in schemaObj)) {
16524
+ required.push(name);
15545
16525
  }
15546
16526
  }
15547
- if (!verbose && !jsonl) {
15548
- console.error(warning("Add --verbose to see full stack traces."));
15549
- }
15550
16527
  }
16528
+ return {
16529
+ type: "object",
16530
+ properties,
16531
+ ...required.length > 0 ? { required } : {},
16532
+ additionalProperties: true
16533
+ };
15551
16534
  }
15552
- var defaultReporter = {
15553
- name: "Braintrust default reporter",
15554
- async reportEval(evaluator, result, { verbose, jsonl }) {
15555
- const { results, summary } = result;
15556
- const failingResults = results.filter(
15557
- (r) => r.error !== void 0
15558
- );
15559
- if (failingResults.length > 0) {
15560
- reportFailures(evaluator, failingResults, { verbose, jsonl });
16535
+ var ProjectNameIdMap = class {
16536
+ nameToId = {};
16537
+ idToName = {};
16538
+ async getId(projectName) {
16539
+ if (!(projectName in this.nameToId)) {
16540
+ const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
16541
+ project_name: projectName
16542
+ });
16543
+ const result = z11.object({
16544
+ project: Project
16545
+ }).parse(response);
16546
+ const projectId = result.project.id;
16547
+ this.nameToId[projectName] = projectId;
16548
+ this.idToName[projectId] = projectName;
15561
16549
  }
15562
- if (jsonl) {
15563
- isomorph_default.writeln(JSON.stringify(summary));
15564
- } else {
15565
- isomorph_default.writeln("Experiment summary");
15566
- isomorph_default.writeln("==================");
15567
- if (summary.comparisonExperimentName) {
15568
- isomorph_default.writeln(
15569
- `${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
15570
- );
15571
- isomorph_default.writeln("");
15572
- }
15573
- const hasScores = Object.keys(summary.scores).length > 0;
15574
- const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
15575
- const hasComparison = !!summary.comparisonExperimentName;
15576
- if (hasScores || hasMetrics) {
15577
- if (hasComparison) {
15578
- isomorph_default.writeln(
15579
- "Name Value Change Improvements Regressions"
15580
- );
15581
- isomorph_default.writeln(
15582
- "----------------------------------------------------------------"
15583
- );
15584
- }
15585
- for (const score of Object.values(summary.scores)) {
15586
- const scorePercent = (score.score * 100).toFixed(2);
15587
- const scoreValue = `${scorePercent}%`;
15588
- if (hasComparison) {
15589
- let diffString = "-";
15590
- if (!isEmpty2(score.diff)) {
15591
- const diffPercent = (score.diff * 100).toFixed(2);
15592
- const diffSign = score.diff > 0 ? "+" : "";
15593
- diffString = `${diffSign}${diffPercent}%`;
15594
- }
15595
- const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
15596
- const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
15597
- isomorph_default.writeln(
15598
- `${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
15599
- );
15600
- } else {
15601
- isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
15602
- }
15603
- }
15604
- for (const metric of Object.values(summary.metrics ?? {})) {
15605
- const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
15606
- const formattedValue = metric.metric.toFixed(fractionDigits);
15607
- const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
15608
- if (hasComparison) {
15609
- let diffString = "-";
15610
- if (!isEmpty2(metric.diff)) {
15611
- const diffPercent = (metric.diff * 100).toFixed(2);
15612
- const diffSign = metric.diff > 0 ? "+" : "";
15613
- diffString = `${diffSign}${diffPercent}%`;
15614
- }
15615
- const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
15616
- const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
15617
- isomorph_default.writeln(
15618
- `${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
15619
- );
15620
- } else {
15621
- isomorph_default.writeln(
15622
- `${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
15623
- );
15624
- }
15625
- }
15626
- }
15627
- if (summary.experimentUrl) {
15628
- isomorph_default.writeln("");
15629
- isomorph_default.writeln(`View results for ${summary.experimentName}`);
15630
- isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
15631
- }
16550
+ return this.nameToId[projectName];
16551
+ }
16552
+ async getName(projectId) {
16553
+ if (!(projectId in this.idToName)) {
16554
+ const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
16555
+ id: projectId
16556
+ });
16557
+ const result = z11.array(Project).nonempty().parse(response);
16558
+ const projectName = result[0].name;
16559
+ this.idToName[projectId] = projectName;
16560
+ this.nameToId[projectName] = projectId;
15632
16561
  }
15633
- isomorph_default.writeln("");
15634
- return failingResults.length === 0;
15635
- },
15636
- async reportRun(evalReports) {
15637
- return evalReports.every((r) => r);
16562
+ return this.idToName[projectId];
16563
+ }
16564
+ async resolve(project) {
16565
+ if (project.id) {
16566
+ return project.id;
16567
+ }
16568
+ return this.getId(project.name);
15638
16569
  }
15639
16570
  };
15640
16571
 
15641
16572
  // dev/types.ts
15642
- import { z as z11 } from "zod/v3";
15643
- var evalBodySchema = z11.object({
15644
- name: z11.string(),
15645
- parameters: z11.record(z11.string(), z11.unknown()).nullish(),
16573
+ import { z as z12 } from "zod/v3";
16574
+ var evalBodySchema = z12.object({
16575
+ name: z12.string(),
16576
+ parameters: z12.record(z12.string(), z12.unknown()).nullish(),
15646
16577
  data: RunEval.shape.data,
15647
- scores: z11.array(
15648
- z11.object({
16578
+ scores: z12.array(
16579
+ z12.object({
15649
16580
  function_id: FunctionId,
15650
- name: z11.string()
16581
+ name: z12.string()
15651
16582
  })
15652
16583
  ).nullish(),
15653
- experiment_name: z11.string().nullish(),
15654
- project_id: z11.string().nullish(),
16584
+ experiment_name: z12.string().nullish(),
16585
+ project_id: z12.string().nullish(),
15655
16586
  parent: InvokeParent.optional(),
15656
- stream: z11.boolean().optional()
16587
+ stream: z12.boolean().optional()
15657
16588
  });
15658
- var evalParametersSerializedSchema = z11.record(
15659
- z11.string(),
15660
- z11.union([
15661
- z11.object({
15662
- type: z11.literal("prompt"),
16589
+ var staticParametersSchema = z12.record(
16590
+ z12.string(),
16591
+ z12.union([
16592
+ z12.object({
16593
+ type: z12.literal("prompt"),
15663
16594
  default: PromptData.optional(),
15664
- description: z11.string().optional()
16595
+ description: z12.string().optional()
15665
16596
  }),
15666
- z11.object({
15667
- type: z11.literal("data"),
15668
- schema: z11.record(z11.unknown()),
15669
- // JSON Schema
15670
- default: z11.unknown().optional(),
15671
- description: z11.string().optional()
16597
+ z12.object({
16598
+ type: z12.literal("data"),
16599
+ schema: z12.record(z12.unknown()),
16600
+ default: z12.unknown().optional(),
16601
+ description: z12.string().optional()
15672
16602
  })
15673
16603
  ])
15674
16604
  );
15675
- var evaluatorDefinitionSchema = z11.object({
15676
- parameters: evalParametersSerializedSchema.optional()
16605
+ var parametersSchema = z12.object({
16606
+ type: z12.literal("object"),
16607
+ properties: z12.record(z12.string(), z12.record(z12.unknown())),
16608
+ required: z12.array(z12.string()).optional(),
16609
+ additionalProperties: z12.boolean().optional()
16610
+ });
16611
+ var parametersSourceSchema = z12.object({
16612
+ parametersId: z12.string().optional(),
16613
+ slug: z12.string(),
16614
+ name: z12.string(),
16615
+ projectId: z12.string().optional(),
16616
+ version: z12.string().optional()
16617
+ });
16618
+ var parametersContainerSchema = z12.object({
16619
+ type: z12.literal("braintrust.parameters"),
16620
+ schema: parametersSchema,
16621
+ source: parametersSourceSchema
16622
+ });
16623
+ var staticParametersContainerSchema = z12.object({
16624
+ type: z12.literal("braintrust.staticParameters"),
16625
+ schema: staticParametersSchema,
16626
+ source: z12.null()
16627
+ });
16628
+ var serializedParametersContainerSchema = z12.union([
16629
+ parametersContainerSchema,
16630
+ staticParametersContainerSchema,
16631
+ // keeping this type here since old versions of the SDK will still pass the unwrapped schema and we need to handle this in the app
16632
+ staticParametersSchema
16633
+ ]);
16634
+ var evaluatorDefinitionSchema = z12.object({
16635
+ parameters: serializedParametersContainerSchema.optional(),
16636
+ scores: z12.array(z12.object({ name: z12.string() })).optional()
15677
16637
  });
15678
- var evaluatorDefinitionsSchema = z11.record(
15679
- z11.string(),
16638
+ var evaluatorDefinitionsSchema = z12.record(
16639
+ z12.string(),
15680
16640
  evaluatorDefinitionSchema
15681
16641
  );
15682
16642
 
@@ -15695,6 +16655,7 @@ export {
15695
16655
  CodePrompt,
15696
16656
  ContextManager,
15697
16657
  DEFAULT_FETCH_BATCH_SIZE,
16658
+ DEFAULT_MAX_REQUEST_SIZE,
15698
16659
  Dataset2 as Dataset,
15699
16660
  ERR_PERMALINK,
15700
16661
  Eval,
@@ -15705,6 +16666,7 @@ export {
15705
16666
  IDGenerator,
15706
16667
  JSONAttachment,
15707
16668
  LEGACY_CACHED_HEADER,
16669
+ LOGS3_OVERFLOW_REFERENCE_TYPE,
15708
16670
  LazyValue,
15709
16671
  Logger,
15710
16672
  LoginInvalidOrgError,
@@ -15729,8 +16691,10 @@ export {
15729
16691
  _exportsForTestingOnly,
15730
16692
  _internalGetGlobalState,
15731
16693
  _internalSetInitialState,
16694
+ addAzureBlobHeaders,
15732
16695
  braintrustStreamChunkSchema,
15733
16696
  buildLocalSummary,
16697
+ constructLogs3OverflowRequest,
15734
16698
  createFinalValuePassThroughStream,
15735
16699
  currentExperiment,
15736
16700
  currentLogger,
@@ -15755,15 +16719,18 @@ export {
15755
16719
  initLogger,
15756
16720
  invoke,
15757
16721
  isTemplateFormat,
16722
+ loadParameters,
15758
16723
  loadPrompt,
15759
16724
  log,
15760
16725
  logError,
15761
16726
  login,
15762
16727
  loginToState,
16728
+ logs3OverflowUploadSchema,
15763
16729
  newId,
15764
16730
  parseCachedHeader,
15765
16731
  parseTemplateFormat,
15766
16732
  permalink,
16733
+ pickLogs3OverflowObjectIds,
15767
16734
  projects,
15768
16735
  promptContentsSchema,
15769
16736
  promptDefinitionSchema,
@@ -15784,6 +16751,8 @@ export {
15784
16751
  traceable,
15785
16752
  traced,
15786
16753
  updateSpan,
16754
+ uploadLogs3OverflowPayload,
16755
+ utf8ByteLength,
15787
16756
  withCurrent,
15788
16757
  withDataset,
15789
16758
  withExperiment,