braintrust 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -332,11 +332,19 @@ function getIdGenerator() {
332
332
 
333
333
  // util/db_fields.ts
334
334
  var TRANSACTION_ID_FIELD = "_xact_id";
335
+ var OBJECT_DELETE_FIELD = "_object_delete";
335
336
  var IS_MERGE_FIELD = "_is_merge";
336
337
  var AUDIT_SOURCE_FIELD = "_audit_source";
337
338
  var AUDIT_METADATA_FIELD = "_audit_metadata";
338
339
  var VALID_SOURCES = ["app", "api", "external"];
339
- var PARENT_ID_FIELD = "_parent_id";
340
+ var OBJECT_ID_KEYS = [
341
+ "experiment_id",
342
+ "dataset_id",
343
+ "prompt_session_id",
344
+ "project_id",
345
+ "log_id",
346
+ "function_data"
347
+ ];
340
348
 
341
349
  // util/span_identifier_v3.ts
342
350
  import * as uuid3 from "uuid";
@@ -1007,13 +1015,6 @@ function mergeDictsWithPathsHelper({
1007
1015
  function mergeDicts(mergeInto, mergeFrom) {
1008
1016
  return mergeDictsWithPaths({ mergeInto, mergeFrom, mergePaths: [] });
1009
1017
  }
1010
- function mapAt(m, k) {
1011
- const ret = m.get(k);
1012
- if (ret === void 0) {
1013
- throw new Error(`Map does not contain key ${k}`);
1014
- }
1015
- return ret;
1016
- }
1017
1018
  function recordFind(m, k) {
1018
1019
  return m[k];
1019
1020
  }
@@ -1028,72 +1029,8 @@ function getObjValueByPath(row, path2) {
1028
1029
  return curr;
1029
1030
  }
1030
1031
 
1031
- // util/graph_util.ts
1032
- function depthFirstSearch(args) {
1033
- const { graph, firstVisitF, lastVisitF } = args;
1034
- for (const vs of graph.values()) {
1035
- for (const v of vs.values()) {
1036
- if (!graph.has(v)) {
1037
- throw new Error(`Outgoing vertex ${v} must be a key in the graph`);
1038
- }
1039
- }
1040
- }
1041
- const firstVisitedVertices = /* @__PURE__ */ new Set();
1042
- const visitationOrder = args.visitationOrder ?? [...graph.keys()];
1043
- const events = visitationOrder.map((vertex) => ({ eventType: "first", vertex, extras: {} })).reverse();
1044
- while (events.length) {
1045
- const { eventType, vertex, extras } = events.pop();
1046
- if (eventType === "last") {
1047
- lastVisitF?.(vertex);
1048
- continue;
1049
- }
1050
- if (firstVisitedVertices.has(vertex)) {
1051
- continue;
1052
- }
1053
- firstVisitedVertices.add(vertex);
1054
- firstVisitF?.(vertex, { parentVertex: extras.parentVertex });
1055
- events.push({ eventType: "last", vertex, extras: {} });
1056
- mapAt(graph, vertex).forEach((child) => {
1057
- events.push({
1058
- eventType: "first",
1059
- vertex: child,
1060
- extras: { parentVertex: vertex }
1061
- });
1062
- });
1063
- }
1064
- }
1065
- function undirectedConnectedComponents(graph) {
1066
- const directedGraph = new Map(
1067
- [...graph.vertices].map((v) => [v, /* @__PURE__ */ new Set()])
1068
- );
1069
- for (const [i, j] of graph.edges) {
1070
- mapAt(directedGraph, i).add(j);
1071
- mapAt(directedGraph, j).add(i);
1072
- }
1073
- let labelCounter = 0;
1074
- const vertexLabels = /* @__PURE__ */ new Map();
1075
- const firstVisitF = (vertex, args) => {
1076
- const label = args?.parentVertex !== void 0 ? mapAt(vertexLabels, args?.parentVertex) : labelCounter++;
1077
- vertexLabels.set(vertex, label);
1078
- };
1079
- depthFirstSearch({ graph: directedGraph, firstVisitF });
1080
- const output = Array.from({ length: labelCounter }).map(() => []);
1081
- for (const [vertex, label] of vertexLabels.entries()) {
1082
- output[label].push(vertex);
1083
- }
1084
- return output;
1085
- }
1086
- function topologicalSort(graph, visitationOrder) {
1087
- const reverseOrdering = [];
1088
- const lastVisitF = (vertex) => {
1089
- reverseOrdering.push(vertex);
1090
- };
1091
- depthFirstSearch({ graph, lastVisitF, visitationOrder });
1092
- return reverseOrdering.reverse();
1093
- }
1094
-
1095
1032
  // util/merge_row_batch.ts
1096
- function generateMergedRowKey(row, useParentIdForId) {
1033
+ function generateMergedRowKey(row) {
1097
1034
  return JSON.stringify(
1098
1035
  [
1099
1036
  "org_id",
@@ -1102,7 +1039,7 @@ function generateMergedRowKey(row, useParentIdForId) {
1102
1039
  "dataset_id",
1103
1040
  "prompt_session_id",
1104
1041
  "log_id",
1105
- useParentIdForId ?? false ? PARENT_ID_FIELD : "id"
1042
+ "id"
1106
1043
  ].map((k) => row[k])
1107
1044
  );
1108
1045
  }
@@ -1156,96 +1093,34 @@ function mergeRowBatch(rows) {
1156
1093
  rowGroups.set(key, row);
1157
1094
  }
1158
1095
  }
1159
- const merged = [...rowGroups.values()];
1160
- const rowToLabel = new Map(
1161
- merged.map((r, i) => [generateMergedRowKey(r), i])
1162
- );
1163
- const graph = new Map(
1164
- Array.from({ length: merged.length }).map((_, i) => [i, /* @__PURE__ */ new Set()])
1165
- );
1166
- merged.forEach((r, i) => {
1167
- const parentId = r[PARENT_ID_FIELD];
1168
- if (!parentId) {
1169
- return;
1170
- }
1171
- const parentRowKey = generateMergedRowKey(
1172
- r,
1173
- true
1174
- /* useParentIdForId */
1175
- );
1176
- const parentLabel = rowToLabel.get(parentRowKey);
1177
- if (parentLabel !== void 0) {
1178
- mapAt(graph, parentLabel).add(i);
1179
- }
1180
- });
1181
- const connectedComponents = undirectedConnectedComponents({
1182
- vertices: new Set(graph.keys()),
1183
- edges: new Set(
1184
- [...graph.entries()].flatMap(
1185
- ([k, vs]) => [...vs].map((v) => {
1186
- const ret = [k, v];
1187
- return ret;
1188
- })
1189
- )
1190
- )
1191
- });
1192
- const buckets = connectedComponents.map(
1193
- (cc) => topologicalSort(
1194
- graph,
1195
- cc
1196
- /* visitationOrder */
1197
- )
1198
- );
1199
- return buckets.map((bucket) => bucket.map((i) => merged[i]));
1096
+ return [...rowGroups.values()];
1200
1097
  }
1201
1098
  function batchItems(args) {
1202
- let { items } = args;
1099
+ const { items } = args;
1203
1100
  const batchMaxNumItems = args.batchMaxNumItems ?? Number.POSITIVE_INFINITY;
1204
1101
  const batchMaxNumBytes = args.batchMaxNumBytes ?? Number.POSITIVE_INFINITY;
1102
+ const getByteSize = args.getByteSize;
1205
1103
  const output = [];
1206
- let nextItems = [];
1207
- let batchSet = [];
1208
1104
  let batch = [];
1209
1105
  let batchLen = 0;
1210
1106
  function addToBatch(item) {
1211
1107
  batch.push(item);
1212
- batchLen += item.length;
1108
+ batchLen += getByteSize(item);
1213
1109
  }
1214
1110
  function flushBatch() {
1215
- batchSet.push(batch);
1111
+ output.push(batch);
1216
1112
  batch = [];
1217
1113
  batchLen = 0;
1218
1114
  }
1219
- while (items.length) {
1220
- for (const bucket of items) {
1221
- let i = 0;
1222
- for (const item of bucket) {
1223
- if (batch.length === 0 || item.length + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems) {
1224
- addToBatch(item);
1225
- } else if (i === 0) {
1226
- flushBatch();
1227
- addToBatch(item);
1228
- } else {
1229
- break;
1230
- }
1231
- ++i;
1232
- }
1233
- if (i < bucket.length) {
1234
- nextItems.push(bucket.slice(i));
1235
- }
1236
- if (batchLen >= batchMaxNumBytes || batch.length > batchMaxNumItems) {
1237
- flushBatch();
1238
- }
1239
- }
1240
- if (batch.length) {
1115
+ for (const item of items) {
1116
+ const itemSize = getByteSize(item);
1117
+ if (batch.length > 0 && !(itemSize + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems)) {
1241
1118
  flushBatch();
1242
1119
  }
1243
- if (batchSet.length) {
1244
- output.push(batchSet);
1245
- batchSet = [];
1246
- }
1247
- items = nextItems;
1248
- nextItems = [];
1120
+ addToBatch(item);
1121
+ }
1122
+ if (batch.length > 0) {
1123
+ flushBatch();
1249
1124
  }
1250
1125
  return output;
1251
1126
  }
@@ -1832,10 +1707,15 @@ var FunctionTypeEnum = z6.enum([
1832
1707
  "preprocessor",
1833
1708
  "facet",
1834
1709
  "classifier",
1835
- "tag"
1710
+ "tag",
1711
+ "parameters"
1836
1712
  ]);
1837
1713
  var NullableSavedFunctionId = z6.union([
1838
- z6.object({ type: z6.literal("function"), id: z6.string() }),
1714
+ z6.object({
1715
+ type: z6.literal("function"),
1716
+ id: z6.string(),
1717
+ version: z6.string().optional()
1718
+ }),
1839
1719
  z6.object({
1840
1720
  type: z6.literal("global"),
1841
1721
  name: z6.string(),
@@ -1843,6 +1723,67 @@ var NullableSavedFunctionId = z6.union([
1843
1723
  }),
1844
1724
  z6.null()
1845
1725
  ]);
1726
+ var TopicMapReport = z6.object({
1727
+ version: z6.literal(1),
1728
+ created_at: z6.string().optional(),
1729
+ settings: z6.object({
1730
+ algorithm: z6.enum(["hdbscan", "kmeans", "hierarchical"]),
1731
+ dimension_reduction: z6.enum(["umap", "pca", "none"]),
1732
+ vector_field: z6.string(),
1733
+ embedding_model: z6.string(),
1734
+ n_clusters: z6.union([z6.number(), z6.null()]).optional(),
1735
+ umap_dimensions: z6.union([z6.number(), z6.null()]).optional(),
1736
+ min_cluster_size: z6.union([z6.number(), z6.null()]).optional(),
1737
+ min_samples: z6.union([z6.number(), z6.null()]).optional()
1738
+ }),
1739
+ query_settings: z6.object({
1740
+ hierarchy_threshold: z6.union([z6.number(), z6.null()]),
1741
+ auto_naming: z6.boolean(),
1742
+ skip_cache: z6.boolean(),
1743
+ viz_mode: z6.enum(["bar", "scatter"]),
1744
+ naming_model: z6.string()
1745
+ }).partial(),
1746
+ clusters: z6.array(
1747
+ z6.object({
1748
+ cluster_id: z6.number(),
1749
+ parent_cluster_id: z6.union([z6.number(), z6.null()]).optional(),
1750
+ topic_id: z6.string(),
1751
+ count: z6.number(),
1752
+ sample_texts: z6.array(z6.string()),
1753
+ samples: z6.array(
1754
+ z6.object({
1755
+ id: z6.string(),
1756
+ text: z6.string(),
1757
+ root_span_id: z6.string(),
1758
+ span_id: z6.string()
1759
+ })
1760
+ ),
1761
+ name: z6.string().optional(),
1762
+ description: z6.string().optional(),
1763
+ keywords: z6.array(z6.string()).optional(),
1764
+ centroid: z6.array(z6.number()).optional(),
1765
+ parent_id: z6.union([z6.number(), z6.null()]).optional(),
1766
+ is_leaf: z6.boolean().optional(),
1767
+ depth: z6.number().optional()
1768
+ })
1769
+ ),
1770
+ embedding_points: z6.array(
1771
+ z6.object({
1772
+ x: z6.number(),
1773
+ y: z6.number(),
1774
+ cluster: z6.number(),
1775
+ text: z6.string().optional()
1776
+ })
1777
+ ).optional()
1778
+ });
1779
+ var TopicMapData = z6.object({
1780
+ type: z6.literal("topic_map"),
1781
+ source_facet: z6.string(),
1782
+ embedding_model: z6.string(),
1783
+ bundle_key: z6.string(),
1784
+ distance_threshold: z6.number().optional(),
1785
+ report: TopicMapReport.optional()
1786
+ });
1846
1787
  var BatchedFacetData = z6.object({
1847
1788
  type: z6.literal("batched_facet"),
1848
1789
  preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
@@ -1851,9 +1792,17 @@ var BatchedFacetData = z6.object({
1851
1792
  name: z6.string(),
1852
1793
  prompt: z6.string(),
1853
1794
  model: z6.string().optional(),
1795
+ embedding_model: z6.string().optional(),
1854
1796
  no_match_pattern: z6.string().optional()
1855
1797
  })
1856
- )
1798
+ ),
1799
+ topic_maps: z6.record(
1800
+ z6.object({
1801
+ function_name: z6.string(),
1802
+ topic_map_id: z6.string().optional(),
1803
+ topic_map_data: TopicMapData
1804
+ })
1805
+ ).optional()
1857
1806
  });
1858
1807
  var BraintrustModelParams = z6.object({
1859
1808
  use_cache: z6.boolean(),
@@ -2064,6 +2013,18 @@ var ObjectReferenceNullish = z6.union([
2064
2013
  }),
2065
2014
  z6.null()
2066
2015
  ]);
2016
+ var SavedFunctionId = z6.union([
2017
+ z6.object({
2018
+ type: z6.literal("function"),
2019
+ id: z6.string(),
2020
+ version: z6.string().optional()
2021
+ }),
2022
+ z6.object({
2023
+ type: z6.literal("global"),
2024
+ name: z6.string(),
2025
+ function_type: FunctionTypeEnum.optional().default("scorer")
2026
+ })
2027
+ ]);
2067
2028
  var DatasetEvent = z6.object({
2068
2029
  id: z6.string(),
2069
2030
  _xact_id: z6.string(),
@@ -2083,7 +2044,36 @@ var DatasetEvent = z6.object({
2083
2044
  is_root: z6.union([z6.boolean(), z6.null()]).optional(),
2084
2045
  origin: ObjectReferenceNullish.optional(),
2085
2046
  comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
2086
- audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
2047
+ audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
2048
+ facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
2049
+ classifications: z6.union([
2050
+ z6.record(
2051
+ z6.array(
2052
+ z6.object({
2053
+ id: z6.string(),
2054
+ label: z6.string().optional(),
2055
+ confidence: z6.union([z6.number(), z6.null()]).optional(),
2056
+ metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
2057
+ source: SavedFunctionId.and(
2058
+ z6.union([
2059
+ z6.object({
2060
+ type: z6.literal("function"),
2061
+ id: z6.string(),
2062
+ version: z6.string().optional()
2063
+ }),
2064
+ z6.object({
2065
+ type: z6.literal("global"),
2066
+ name: z6.string(),
2067
+ function_type: FunctionTypeEnum.optional().default("scorer")
2068
+ }),
2069
+ z6.null()
2070
+ ])
2071
+ ).optional()
2072
+ })
2073
+ )
2074
+ ),
2075
+ z6.null()
2076
+ ]).optional()
2087
2077
  });
2088
2078
  var EnvVar = z6.object({
2089
2079
  id: z6.string().uuid(),
@@ -2160,7 +2150,8 @@ var SpanType = z6.union([
2160
2150
  "automation",
2161
2151
  "facet",
2162
2152
  "preprocessor",
2163
- "classifier"
2153
+ "classifier",
2154
+ "review"
2164
2155
  ]),
2165
2156
  z6.null()
2166
2157
  ]);
@@ -2201,10 +2192,43 @@ var ExperimentEvent = z6.object({
2201
2192
  is_root: z6.union([z6.boolean(), z6.null()]).optional(),
2202
2193
  origin: ObjectReferenceNullish.optional(),
2203
2194
  comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
2204
- audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
2195
+ audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
2196
+ facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
2197
+ classifications: z6.union([
2198
+ z6.record(
2199
+ z6.array(
2200
+ z6.object({
2201
+ id: z6.string(),
2202
+ label: z6.string().optional(),
2203
+ confidence: z6.union([z6.number(), z6.null()]).optional(),
2204
+ metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
2205
+ source: SavedFunctionId.and(
2206
+ z6.union([
2207
+ z6.object({
2208
+ type: z6.literal("function"),
2209
+ id: z6.string(),
2210
+ version: z6.string().optional()
2211
+ }),
2212
+ z6.object({
2213
+ type: z6.literal("global"),
2214
+ name: z6.string(),
2215
+ function_type: FunctionTypeEnum.optional().default("scorer")
2216
+ }),
2217
+ z6.null()
2218
+ ])
2219
+ ).optional()
2220
+ })
2221
+ )
2222
+ ),
2223
+ z6.null()
2224
+ ]).optional()
2205
2225
  });
2206
2226
  var ExtendedSavedFunctionId = z6.union([
2207
- z6.object({ type: z6.literal("function"), id: z6.string() }),
2227
+ z6.object({
2228
+ type: z6.literal("function"),
2229
+ id: z6.string(),
2230
+ version: z6.string().optional()
2231
+ }),
2208
2232
  z6.object({
2209
2233
  type: z6.literal("global"),
2210
2234
  name: z6.string(),
@@ -2221,6 +2245,7 @@ var FacetData = z6.object({
2221
2245
  preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
2222
2246
  prompt: z6.string(),
2223
2247
  model: z6.string().optional(),
2248
+ embedding_model: z6.string().optional(),
2224
2249
  no_match_pattern: z6.string().optional()
2225
2250
  });
2226
2251
  var PromptBlockDataNullish = z6.union([
@@ -2310,14 +2335,6 @@ var PromptParserNullish = z6.union([
2310
2335
  }),
2311
2336
  z6.null()
2312
2337
  ]);
2313
- var SavedFunctionId = z6.union([
2314
- z6.object({ type: z6.literal("function"), id: z6.string() }),
2315
- z6.object({
2316
- type: z6.literal("global"),
2317
- name: z6.string(),
2318
- function_type: FunctionTypeEnum.optional().default("scorer")
2319
- })
2320
- ]);
2321
2338
  var PromptDataNullish = z6.union([
2322
2339
  z6.object({
2323
2340
  prompt: PromptBlockDataNullish,
@@ -2368,7 +2385,8 @@ var FunctionTypeEnumNullish = z6.union([
2368
2385
  "preprocessor",
2369
2386
  "facet",
2370
2387
  "classifier",
2371
- "tag"
2388
+ "tag",
2389
+ "parameters"
2372
2390
  ]),
2373
2391
  z6.null()
2374
2392
  ]);
@@ -2460,7 +2478,8 @@ var FunctionData = z6.union([
2460
2478
  type: z6.literal("remote_eval"),
2461
2479
  endpoint: z6.string(),
2462
2480
  eval_name: z6.string(),
2463
- parameters: z6.object({}).partial().passthrough()
2481
+ parameters: z6.object({}).partial().passthrough(),
2482
+ parameters_version: z6.union([z6.string(), z6.null()]).optional()
2464
2483
  }),
2465
2484
  z6.object({
2466
2485
  type: z6.literal("global"),
@@ -2469,7 +2488,18 @@ var FunctionData = z6.union([
2469
2488
  config: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional()
2470
2489
  }),
2471
2490
  FacetData,
2472
- BatchedFacetData
2491
+ BatchedFacetData,
2492
+ z6.object({
2493
+ type: z6.literal("parameters"),
2494
+ data: z6.object({}).partial().passthrough(),
2495
+ __schema: z6.object({
2496
+ type: z6.literal("object"),
2497
+ properties: z6.record(z6.object({}).partial().passthrough()),
2498
+ required: z6.array(z6.string()).optional(),
2499
+ additionalProperties: z6.boolean().optional()
2500
+ })
2501
+ }),
2502
+ TopicMapData.and(z6.unknown())
2473
2503
  ]);
2474
2504
  var Function = z6.object({
2475
2505
  id: z6.string().uuid(),
@@ -2499,7 +2529,13 @@ var Function = z6.object({
2499
2529
  z6.null()
2500
2530
  ]).optional()
2501
2531
  });
2502
- var FunctionFormat = z6.enum(["llm", "code", "global", "graph"]);
2532
+ var FunctionFormat = z6.enum([
2533
+ "llm",
2534
+ "code",
2535
+ "global",
2536
+ "graph",
2537
+ "topic_map"
2538
+ ]);
2503
2539
  var PromptData = z6.object({
2504
2540
  prompt: PromptBlockDataNullish,
2505
2541
  options: PromptOptionsNullish,
@@ -2582,13 +2618,14 @@ var FunctionObjectType = z6.enum([
2582
2618
  "custom_view",
2583
2619
  "preprocessor",
2584
2620
  "facet",
2585
- "classifier"
2621
+ "classifier",
2622
+ "parameters"
2586
2623
  ]);
2587
2624
  var FunctionOutputType = z6.enum([
2588
2625
  "completion",
2589
2626
  "score",
2590
2627
  "facet",
2591
- "tag",
2628
+ "classification",
2592
2629
  "any"
2593
2630
  ]);
2594
2631
  var GitMetadataSettings = z6.object({
@@ -2624,6 +2661,10 @@ var GroupScope = z6.object({
2624
2661
  idle_seconds: z6.number().optional()
2625
2662
  });
2626
2663
  var IfExists = z6.enum(["error", "ignore", "replace"]);
2664
+ var ImageRenderingMode = z6.union([
2665
+ z6.enum(["auto", "click_to_load", "blocked"]),
2666
+ z6.null()
2667
+ ]);
2627
2668
  var InvokeParent = z6.union([
2628
2669
  z6.object({
2629
2670
  object_type: z6.enum(["project_logs", "experiment", "playground_logs"]),
@@ -2716,7 +2757,8 @@ var Organization = z6.object({
2716
2757
  is_universal_api: z6.union([z6.boolean(), z6.null()]).optional(),
2717
2758
  proxy_url: z6.union([z6.string(), z6.null()]).optional(),
2718
2759
  realtime_url: z6.union([z6.string(), z6.null()]).optional(),
2719
- created: z6.union([z6.string(), z6.null()]).optional()
2760
+ created: z6.union([z6.string(), z6.null()]).optional(),
2761
+ image_rendering_mode: ImageRenderingMode.optional()
2720
2762
  });
2721
2763
  var ProjectSettings = z6.union([
2722
2764
  z6.object({
@@ -2857,7 +2899,36 @@ var ProjectLogsEvent = z6.object({
2857
2899
  origin: ObjectReferenceNullish.optional(),
2858
2900
  comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
2859
2901
  audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
2860
- _async_scoring_state: z6.unknown().optional()
2902
+ _async_scoring_state: z6.unknown().optional(),
2903
+ facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
2904
+ classifications: z6.union([
2905
+ z6.record(
2906
+ z6.array(
2907
+ z6.object({
2908
+ id: z6.string(),
2909
+ label: z6.string().optional(),
2910
+ confidence: z6.union([z6.number(), z6.null()]).optional(),
2911
+ metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
2912
+ source: SavedFunctionId.and(
2913
+ z6.union([
2914
+ z6.object({
2915
+ type: z6.literal("function"),
2916
+ id: z6.string(),
2917
+ version: z6.string().optional()
2918
+ }),
2919
+ z6.object({
2920
+ type: z6.literal("global"),
2921
+ name: z6.string(),
2922
+ function_type: FunctionTypeEnum.optional().default("scorer")
2923
+ }),
2924
+ z6.null()
2925
+ ])
2926
+ ).optional()
2927
+ })
2928
+ )
2929
+ ),
2930
+ z6.null()
2931
+ ]).optional()
2861
2932
  });
2862
2933
  var ProjectScoreType = z6.enum([
2863
2934
  "slider",
@@ -3159,12 +3230,15 @@ var View = z6.object({
3159
3230
  "datasets",
3160
3231
  "dataset",
3161
3232
  "prompts",
3233
+ "parameters",
3162
3234
  "tools",
3163
3235
  "scorers",
3164
3236
  "classifiers",
3165
3237
  "logs",
3166
3238
  "monitor",
3167
- "for_review"
3239
+ "for_review_project_log",
3240
+ "for_review_experiments",
3241
+ "for_review_datasets"
3168
3242
  ]),
3169
3243
  name: z6.string(),
3170
3244
  created: z6.union([z6.string(), z6.null()]).optional(),
@@ -3909,6 +3983,52 @@ var PromptCache = class {
3909
3983
  }
3910
3984
  };
3911
3985
 
3986
+ // src/prompt-cache/parameters-cache.ts
3987
+ function createCacheKey2(key) {
3988
+ if (key.id) {
3989
+ return `parameters:id:${key.id}`;
3990
+ }
3991
+ const prefix = key.projectId ?? key.projectName;
3992
+ if (!prefix) {
3993
+ throw new Error("Either projectId or projectName must be provided");
3994
+ }
3995
+ if (!key.slug) {
3996
+ throw new Error("Slug must be provided when not using ID");
3997
+ }
3998
+ return `parameters:${prefix}:${key.slug}:${key.version ?? "latest"}`;
3999
+ }
4000
+ var ParametersCache = class {
4001
+ memoryCache;
4002
+ diskCache;
4003
+ constructor(options) {
4004
+ this.memoryCache = options.memoryCache;
4005
+ this.diskCache = options.diskCache;
4006
+ }
4007
+ async get(key) {
4008
+ const cacheKey = createCacheKey2(key);
4009
+ const memoryParams = this.memoryCache.get(cacheKey);
4010
+ if (memoryParams !== void 0) {
4011
+ return memoryParams;
4012
+ }
4013
+ if (this.diskCache) {
4014
+ const diskParams = await this.diskCache.get(cacheKey);
4015
+ if (!diskParams) {
4016
+ return void 0;
4017
+ }
4018
+ this.memoryCache.set(cacheKey, diskParams);
4019
+ return diskParams;
4020
+ }
4021
+ return void 0;
4022
+ }
4023
+ async set(key, value) {
4024
+ const cacheKey = createCacheKey2(key);
4025
+ this.memoryCache.set(cacheKey, value);
4026
+ if (this.diskCache) {
4027
+ await this.diskCache.set(cacheKey, value);
4028
+ }
4029
+ }
4030
+ };
4031
+
3912
4032
  // src/span-cache.ts
3913
4033
  var activeCaches = /* @__PURE__ */ new Set();
3914
4034
  var exitHandlersRegistered = false;
@@ -4199,7 +4319,24 @@ var SpanCache = class {
4199
4319
  // src/logger.ts
4200
4320
  var BRAINTRUST_ATTACHMENT = BraintrustAttachmentReference.shape.type.value;
4201
4321
  var EXTERNAL_ATTACHMENT = ExternalAttachmentReference.shape.type.value;
4322
+ var LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow";
4202
4323
  var BRAINTRUST_PARAMS = Object.keys(BraintrustModelParams.shape);
4324
+ var DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024;
4325
+ var parametersRowSchema = z8.object({
4326
+ id: z8.string().uuid(),
4327
+ _xact_id: z8.string(),
4328
+ project_id: z8.string().uuid(),
4329
+ name: z8.string(),
4330
+ slug: z8.string(),
4331
+ description: z8.union([z8.string(), z8.null()]).optional(),
4332
+ function_type: z8.literal("parameters"),
4333
+ function_data: z8.object({
4334
+ type: z8.literal("parameters"),
4335
+ data: z8.record(z8.unknown()).optional(),
4336
+ __schema: z8.record(z8.unknown())
4337
+ }),
4338
+ metadata: z8.union([z8.object({}).partial().passthrough(), z8.null()]).optional()
4339
+ });
4203
4340
  var LoginInvalidOrgError = class extends Error {
4204
4341
  constructor(message) {
4205
4342
  super(message);
@@ -4376,6 +4513,17 @@ var BraintrustState = class _BraintrustState {
4376
4513
  max: Number(isomorph_default.getEnv("BRAINTRUST_PROMPT_CACHE_DISK_MAX")) ?? 1 << 20
4377
4514
  }) : void 0;
4378
4515
  this.promptCache = new PromptCache({ memoryCache, diskCache });
4516
+ const parametersMemoryCache = new LRUCache({
4517
+ max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_MEMORY_MAX")) ?? 1 << 10
4518
+ });
4519
+ const parametersDiskCache = canUseDiskCache() ? new DiskCache({
4520
+ cacheDir: isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DIR") ?? `${isomorph_default.getEnv("HOME") ?? isomorph_default.homedir()}/.braintrust/parameters_cache`,
4521
+ max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DISK_MAX")) ?? 1 << 20
4522
+ }) : void 0;
4523
+ this.parametersCache = new ParametersCache({
4524
+ memoryCache: parametersMemoryCache,
4525
+ diskCache: parametersDiskCache
4526
+ });
4379
4527
  this.spanCache = new SpanCache({ disabled: loginParams.disableSpanCache });
4380
4528
  }
4381
4529
  id;
@@ -4405,6 +4553,7 @@ var BraintrustState = class _BraintrustState {
4405
4553
  _apiConn = null;
4406
4554
  _proxyConn = null;
4407
4555
  promptCache;
4556
+ parametersCache;
4408
4557
  spanCache;
4409
4558
  _idGenerator = null;
4410
4559
  _contextManager = null;
@@ -5659,8 +5808,100 @@ function castLogger(logger, asyncFlush) {
5659
5808
  }
5660
5809
  return logger;
5661
5810
  }
5811
+ var logs3OverflowUploadSchema = z8.object({
5812
+ method: z8.enum(["PUT", "POST"]),
5813
+ signedUrl: z8.string().url(),
5814
+ headers: z8.record(z8.string()).optional(),
5815
+ fields: z8.record(z8.string()).optional(),
5816
+ key: z8.string().min(1)
5817
+ });
5662
5818
  function constructLogs3Data(items) {
5663
- return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
5819
+ return `{"rows": ${constructJsonArray(items.map((i) => i.str))}, "api_version": 2}`;
5820
+ }
5821
+ function constructLogs3OverflowRequest(key) {
5822
+ return {
5823
+ rows: {
5824
+ type: LOGS3_OVERFLOW_REFERENCE_TYPE,
5825
+ key
5826
+ },
5827
+ api_version: 2
5828
+ };
5829
+ }
5830
+ function pickLogs3OverflowObjectIds(row) {
5831
+ const objectIds = {};
5832
+ for (const key of OBJECT_ID_KEYS) {
5833
+ if (key in row) {
5834
+ objectIds[key] = row[key];
5835
+ }
5836
+ }
5837
+ return objectIds;
5838
+ }
5839
+ async function uploadLogs3OverflowPayload(upload, payload, fetchFn = fetch) {
5840
+ if (upload.method === "POST") {
5841
+ if (!upload.fields) {
5842
+ throw new Error("Missing logs3 overflow upload fields");
5843
+ }
5844
+ if (typeof FormData === "undefined" || typeof Blob === "undefined") {
5845
+ throw new Error("FormData is not available for logs3 overflow upload");
5846
+ }
5847
+ const form = new FormData();
5848
+ for (const [key, value] of Object.entries(upload.fields)) {
5849
+ form.append(key, value);
5850
+ }
5851
+ const contentType = upload.fields["Content-Type"] ?? "application/json";
5852
+ form.append("file", new Blob([payload], { type: contentType }));
5853
+ const headers2 = {};
5854
+ for (const [key, value] of Object.entries(upload.headers ?? {})) {
5855
+ if (key.toLowerCase() !== "content-type") {
5856
+ headers2[key] = value;
5857
+ }
5858
+ }
5859
+ const response2 = await fetchFn(upload.signedUrl, {
5860
+ method: "POST",
5861
+ headers: headers2,
5862
+ body: form
5863
+ });
5864
+ if (!response2.ok) {
5865
+ const responseText = await response2.text().catch(() => "");
5866
+ throw new Error(
5867
+ `Failed to upload logs3 overflow payload: ${response2.status} ${responseText}`
5868
+ );
5869
+ }
5870
+ return;
5871
+ }
5872
+ const headers = { ...upload.headers ?? {} };
5873
+ addAzureBlobHeaders(headers, upload.signedUrl);
5874
+ const response = await fetchFn(upload.signedUrl, {
5875
+ method: "PUT",
5876
+ headers,
5877
+ body: payload
5878
+ });
5879
+ if (!response.ok) {
5880
+ const responseText = await response.text().catch(() => "");
5881
+ throw new Error(
5882
+ `Failed to upload logs3 overflow payload: ${response.status} ${responseText}`
5883
+ );
5884
+ }
5885
+ }
5886
+ function stringifyWithOverflowMeta(item) {
5887
+ const str = JSON.stringify(item);
5888
+ const record = item;
5889
+ return {
5890
+ str,
5891
+ overflowMeta: {
5892
+ object_ids: pickLogs3OverflowObjectIds(record),
5893
+ is_delete: record[OBJECT_DELETE_FIELD] === true,
5894
+ input_row: {
5895
+ byte_size: utf8ByteLength(str)
5896
+ }
5897
+ }
5898
+ };
5899
+ }
5900
+ function utf8ByteLength(value) {
5901
+ if (typeof TextEncoder !== "undefined") {
5902
+ return new TextEncoder().encode(value).length;
5903
+ }
5904
+ return value.length;
5664
5905
  }
5665
5906
  function now() {
5666
5907
  return (/* @__PURE__ */ new Date()).getTime();
@@ -5686,10 +5927,9 @@ var TestBackgroundLogger = class {
5686
5927
  events.push(await event.get());
5687
5928
  }
5688
5929
  }
5689
- const batch = mergeRowBatch(events);
5690
- let flatBatch = batch.flat();
5930
+ let batch = mergeRowBatch(events);
5691
5931
  if (this.maskingFunction) {
5692
- flatBatch = flatBatch.map((item) => {
5932
+ batch = batch.map((item) => {
5693
5933
  const maskedItem = { ...item };
5694
5934
  for (const field of REDACTION_FIELDS) {
5695
5935
  if (item[field] !== void 0) {
@@ -5714,7 +5954,7 @@ var TestBackgroundLogger = class {
5714
5954
  return maskedItem;
5715
5955
  });
5716
5956
  }
5717
- return flatBatch;
5957
+ return batch;
5718
5958
  }
5719
5959
  };
5720
5960
  var BACKGROUND_LOGGER_BASE_SLEEP_TIME_S = 1;
@@ -5727,8 +5967,8 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5727
5967
  onFlushError;
5728
5968
  maskingFunction = null;
5729
5969
  syncFlush = false;
5730
- // 6 MB for the AWS lambda gateway (from our own testing).
5731
- maxRequestSize = 6 * 1024 * 1024;
5970
+ maxRequestSizeOverride = null;
5971
+ _maxRequestSizePromise = null;
5732
5972
  defaultBatchSize = 100;
5733
5973
  numTries = 3;
5734
5974
  queueDropExceedingMaxsize = DEFAULT_QUEUE_SIZE;
@@ -5756,7 +5996,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5756
5996
  }
5757
5997
  const maxRequestSizeEnv = Number(isomorph_default.getEnv("BRAINTRUST_MAX_REQUEST_SIZE"));
5758
5998
  if (!isNaN(maxRequestSizeEnv)) {
5759
- this.maxRequestSize = maxRequestSizeEnv;
5999
+ this.maxRequestSizeOverride = maxRequestSizeEnv;
5760
6000
  }
5761
6001
  const numTriesEnv = Number(isomorph_default.getEnv("BRAINTRUST_NUM_RETRIES"));
5762
6002
  if (!isNaN(numTriesEnv)) {
@@ -5818,6 +6058,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5818
6058
  }
5819
6059
  }
5820
6060
  }
6061
+ getMaxRequestSize() {
6062
+ if (!this._maxRequestSizePromise) {
6063
+ this._maxRequestSizePromise = (async () => {
6064
+ let serverLimit = null;
6065
+ try {
6066
+ const conn = await this.apiConn.get();
6067
+ const versionInfo = await conn.get_json("version");
6068
+ serverLimit = z8.object({ logs3_payload_max_bytes: z8.number().nullish() }).parse(versionInfo).logs3_payload_max_bytes ?? null;
6069
+ } catch (e) {
6070
+ console.warn("Failed to fetch version info for payload limit:", e);
6071
+ }
6072
+ const validServerLimit = serverLimit !== null && serverLimit > 0 ? serverLimit : null;
6073
+ const canUseOverflow = validServerLimit !== null;
6074
+ let maxRequestSize = DEFAULT_MAX_REQUEST_SIZE;
6075
+ if (this.maxRequestSizeOverride !== null) {
6076
+ maxRequestSize = validServerLimit !== null ? Math.min(this.maxRequestSizeOverride, validServerLimit) : this.maxRequestSizeOverride;
6077
+ } else if (validServerLimit !== null) {
6078
+ maxRequestSize = validServerLimit;
6079
+ }
6080
+ return { maxRequestSize, canUseOverflow };
6081
+ })();
6082
+ }
6083
+ return this._maxRequestSizePromise;
6084
+ }
5821
6085
  async flush() {
5822
6086
  if (this.syncFlush) {
5823
6087
  this.triggerActiveFlush();
@@ -5861,33 +6125,33 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5861
6125
  if (allItems.length === 0) {
5862
6126
  return;
5863
6127
  }
5864
- const allItemsStr = allItems.map(
5865
- (bucket) => bucket.map((item) => JSON.stringify(item))
6128
+ const allItemsWithMeta = allItems.map(
6129
+ (item) => stringifyWithOverflowMeta(item)
5866
6130
  );
5867
- const batchSets = batchItems({
5868
- items: allItemsStr,
6131
+ const maxRequestSizeResult = await this.getMaxRequestSize();
6132
+ const batches = batchItems({
6133
+ items: allItemsWithMeta,
5869
6134
  batchMaxNumItems: batchSize,
5870
- batchMaxNumBytes: this.maxRequestSize / 2
6135
+ batchMaxNumBytes: maxRequestSizeResult.maxRequestSize / 2,
6136
+ getByteSize: (item) => item.str.length
5871
6137
  });
5872
- for (const batchSet of batchSets) {
5873
- const postPromises = batchSet.map(
5874
- (batch) => (async () => {
5875
- try {
5876
- await this.submitLogsRequest(batch);
5877
- return { type: "success" };
5878
- } catch (e) {
5879
- return { type: "error", value: e };
5880
- }
5881
- })()
6138
+ const postPromises = batches.map(
6139
+ (batch) => (async () => {
6140
+ try {
6141
+ await this.submitLogsRequest(batch, maxRequestSizeResult);
6142
+ return { type: "success" };
6143
+ } catch (e) {
6144
+ return { type: "error", value: e };
6145
+ }
6146
+ })()
6147
+ );
6148
+ const results = await Promise.all(postPromises);
6149
+ const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
6150
+ if (failingResultErrors.length) {
6151
+ throw new AggregateError(
6152
+ failingResultErrors,
6153
+ `Encountered the following errors while logging:`
5882
6154
  );
5883
- const results = await Promise.all(postPromises);
5884
- const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
5885
- if (failingResultErrors.length) {
5886
- throw new AggregateError(
5887
- failingResultErrors,
5888
- `Encountered the following errors while logging:`
5889
- );
5890
- }
5891
6155
  }
5892
6156
  const attachmentErrors = [];
5893
6157
  for (const attachment of attachments) {
@@ -5917,32 +6181,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5917
6181
  items.forEach((item) => extractAttachments(item, attachments));
5918
6182
  let mergedItems = mergeRowBatch(items);
5919
6183
  if (this.maskingFunction) {
5920
- mergedItems = mergedItems.map(
5921
- (batch) => batch.map((item) => {
5922
- const maskedItem = { ...item };
5923
- for (const field of REDACTION_FIELDS) {
5924
- if (item[field] !== void 0) {
5925
- const maskedValue = applyMaskingToField(
5926
- this.maskingFunction,
5927
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
5928
- item[field],
5929
- field
5930
- );
5931
- if (maskedValue instanceof MaskingError) {
5932
- delete maskedItem[field];
5933
- if (maskedItem.error) {
5934
- maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
5935
- } else {
5936
- maskedItem.error = maskedValue.errorMsg;
5937
- }
6184
+ mergedItems = mergedItems.map((item) => {
6185
+ const maskedItem = { ...item };
6186
+ for (const field of REDACTION_FIELDS) {
6187
+ if (item[field] !== void 0) {
6188
+ const maskedValue = applyMaskingToField(
6189
+ this.maskingFunction,
6190
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
6191
+ item[field],
6192
+ field
6193
+ );
6194
+ if (maskedValue instanceof MaskingError) {
6195
+ delete maskedItem[field];
6196
+ if (maskedItem.error) {
6197
+ maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
5938
6198
  } else {
5939
- maskedItem[field] = maskedValue;
6199
+ maskedItem.error = maskedValue.errorMsg;
5940
6200
  }
6201
+ } else {
6202
+ maskedItem[field] = maskedValue;
5941
6203
  }
5942
6204
  }
5943
- return maskedItem;
5944
- })
5945
- );
6205
+ }
6206
+ return maskedItem;
6207
+ });
5946
6208
  }
5947
6209
  return [mergedItems, attachments];
5948
6210
  } catch (e) {
@@ -5969,20 +6231,73 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5969
6231
  }
5970
6232
  throw new Error("Impossible");
5971
6233
  }
5972
- async submitLogsRequest(items) {
6234
+ async requestLogs3OverflowUpload(conn, args) {
6235
+ let response;
6236
+ try {
6237
+ response = await conn.post_json("logs3/overflow", {
6238
+ content_type: "application/json",
6239
+ size_bytes: args.sizeBytes,
6240
+ rows: args.rows
6241
+ });
6242
+ } catch (error) {
6243
+ const errorStr = JSON.stringify(error);
6244
+ throw new Error(
6245
+ `Failed to request logs3 overflow upload URL: ${errorStr}`
6246
+ );
6247
+ }
6248
+ try {
6249
+ return logs3OverflowUploadSchema.parse(response);
6250
+ } catch (error) {
6251
+ if (error instanceof ZodError) {
6252
+ const errorStr = JSON.stringify(error.flatten());
6253
+ throw new Error(`Invalid response from API server: ${errorStr}`);
6254
+ }
6255
+ throw error;
6256
+ }
6257
+ }
6258
+ async _uploadLogs3OverflowPayload(conn, upload, payload) {
6259
+ await uploadLogs3OverflowPayload(upload, payload, conn.fetch.bind(conn));
6260
+ }
6261
+ async submitLogsRequest(items, {
6262
+ maxRequestSize,
6263
+ canUseOverflow
6264
+ }) {
5973
6265
  const conn = await this.apiConn.get();
5974
6266
  const dataStr = constructLogs3Data(items);
6267
+ const payloadBytes = utf8ByteLength(dataStr);
6268
+ const useOverflow = canUseOverflow && payloadBytes > maxRequestSize;
5975
6269
  if (this.allPublishPayloadsDir) {
5976
6270
  await _HTTPBackgroundLogger.writePayloadToDir({
5977
6271
  payloadDir: this.allPublishPayloadsDir,
5978
6272
  payload: dataStr
5979
6273
  });
5980
6274
  }
6275
+ let overflowUpload = null;
6276
+ const overflowRows = useOverflow ? items.map((item) => item.overflowMeta) : null;
5981
6277
  for (let i = 0; i < this.numTries; i++) {
5982
6278
  const startTime = now();
5983
6279
  let error = void 0;
5984
6280
  try {
5985
- await conn.post_json("logs3", dataStr);
6281
+ if (overflowRows) {
6282
+ if (!overflowUpload) {
6283
+ const currentUpload = await this.requestLogs3OverflowUpload(conn, {
6284
+ rows: overflowRows,
6285
+ sizeBytes: payloadBytes
6286
+ });
6287
+ await this._uploadLogs3OverflowPayload(
6288
+ conn,
6289
+ currentUpload,
6290
+ dataStr
6291
+ );
6292
+ overflowUpload = currentUpload;
6293
+ }
6294
+ await conn.post_json(
6295
+ "logs3",
6296
+ constructLogs3OverflowRequest(overflowUpload.key)
6297
+ );
6298
+ } else {
6299
+ await conn.post_json("logs3", dataStr);
6300
+ }
5986
6301
  } catch (e) {
5987
6302
  error = e;
5988
6303
  }
@@ -5998,7 +6313,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
5998
6313
  return `${error}`;
5999
6314
  }
6000
6315
  })();
6001
- const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}.${retryingText}
6316
+ const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${payloadBytes}.${retryingText}
6002
6317
  Error: ${errorText}`;
6003
6318
  if (!isRetrying && this.failedPublishPayloadsDir) {
6004
6319
  await _HTTPBackgroundLogger.writePayloadToDir({
@@ -6052,7 +6367,7 @@ Error: ${errorText}`;
6052
6367
  try {
6053
6368
  const [allItems, allAttachments] = await this.unwrapLazyValues(wrappedItems);
6054
6369
  const dataStr = constructLogs3Data(
6055
- allItems.map((x) => JSON.stringify(x))
6370
+ allItems.map((x) => stringifyWithOverflowMeta(x))
6056
6371
  );
6057
6372
  const attachmentStr = JSON.stringify(
6058
6373
  allAttachments.map((a) => a.debugInfo())
@@ -6619,23 +6934,144 @@ async function loadPrompt({
6619
6934
  }
6620
6935
  return prompt;
6621
6936
  }
6622
- function setMaskingFunction(maskingFunction) {
6623
- _globalState.setMaskingFunction(maskingFunction);
6624
- }
6625
- async function login(options = {}) {
6626
- const { forceLogin = false } = options || {};
6627
- if (_globalState.loggedIn && !forceLogin) {
6628
- let checkUpdatedParam2 = function(varname, arg, orig) {
6629
- if (!isEmpty2(arg) && !isEmpty2(orig) && arg !== orig) {
6630
- throw new Error(
6631
- `Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
6632
- );
6633
- }
6634
- };
6635
- var checkUpdatedParam = checkUpdatedParam2;
6636
- checkUpdatedParam2("appUrl", options.appUrl, _globalState.appUrl);
6637
- checkUpdatedParam2(
6638
- "apiKey",
6937
+ async function loadParameters({
6938
+ projectName,
6939
+ projectId,
6940
+ slug,
6941
+ version,
6942
+ environment,
6943
+ id,
6944
+ appUrl,
6945
+ apiKey,
6946
+ orgName,
6947
+ fetch: fetch2,
6948
+ forceLogin,
6949
+ state: stateArg
6950
+ }) {
6951
+ if (version && environment) {
6952
+ throw new Error(
6953
+ "Cannot specify both 'version' and 'environment' parameters. Please use only one (remove the other)."
6954
+ );
6955
+ }
6956
+ if (id) {
6957
+ } else if (isEmpty2(projectName) && isEmpty2(projectId)) {
6958
+ throw new Error("Must specify either projectName or projectId");
6959
+ } else if (isEmpty2(slug)) {
6960
+ throw new Error("Must specify slug");
6961
+ }
6962
+ const state = stateArg ?? _globalState;
6963
+ let response;
6964
+ try {
6965
+ await state.login({
6966
+ orgName,
6967
+ apiKey,
6968
+ appUrl,
6969
+ fetch: fetch2,
6970
+ forceLogin
6971
+ });
6972
+ if (id) {
6973
+ response = await state.apiConn().get_json(`v1/function/${id}`, {
6974
+ ...version && { version },
6975
+ ...environment && { environment }
6976
+ });
6977
+ if (response) {
6978
+ response = { objects: [response] };
6979
+ }
6980
+ } else {
6981
+ response = await state.apiConn().get_json("v1/function", {
6982
+ project_name: projectName,
6983
+ project_id: projectId,
6984
+ slug,
6985
+ version,
6986
+ function_type: "parameters",
6987
+ ...environment && { environment }
6988
+ });
6989
+ }
6990
+ } catch (e) {
6991
+ if (environment || version) {
6992
+ throw new Error(`Parameters not found with specified parameters: ${e}`);
6993
+ }
6994
+ console.warn(
6995
+ "Failed to load parameters, attempting to fall back to cache:",
6996
+ e
6997
+ );
6998
+ let parameters2;
6999
+ if (id) {
7000
+ parameters2 = await state.parametersCache.get({ id });
7001
+ if (!parameters2) {
7002
+ throw new Error(
7003
+ `Parameters with id ${id} not found (not found on server or in local cache): ${e}`
7004
+ );
7005
+ }
7006
+ } else {
7007
+ parameters2 = await state.parametersCache.get({
7008
+ slug,
7009
+ projectId,
7010
+ projectName,
7011
+ version: version ?? "latest"
7012
+ });
7013
+ if (!parameters2) {
7014
+ throw new Error(
7015
+ `Parameters ${slug} (version ${version ?? "latest"}) not found in ${[
7016
+ projectName ?? projectId
7017
+ ]} (not found on server or in local cache): ${e}`
7018
+ );
7019
+ }
7020
+ }
7021
+ return parameters2;
7022
+ }
7023
+ if (!("objects" in response) || response.objects.length === 0) {
7024
+ if (id) {
7025
+ throw new Error(`Parameters with id ${id} not found.`);
7026
+ } else {
7027
+ throw new Error(
7028
+ `Parameters ${slug} not found in ${[projectName ?? projectId]}`
7029
+ );
7030
+ }
7031
+ } else if (response.objects.length > 1) {
7032
+ if (id) {
7033
+ throw new Error(
7034
+ `Multiple parameters found with id ${id}. This should never happen.`
7035
+ );
7036
+ } else {
7037
+ throw new Error(
7038
+ `Multiple parameters found with slug ${slug} in project ${projectName ?? projectId}. This should never happen.`
7039
+ );
7040
+ }
7041
+ }
7042
+ const metadata = parametersRowSchema.parse(response["objects"][0]);
7043
+ const parameters = new RemoteEvalParameters(metadata);
7044
+ try {
7045
+ if (id) {
7046
+ await state.parametersCache.set({ id }, parameters);
7047
+ } else if (slug) {
7048
+ await state.parametersCache.set(
7049
+ { slug, projectId, projectName, version: version ?? "latest" },
7050
+ parameters
7051
+ );
7052
+ }
7053
+ } catch (e) {
7054
+ console.warn("Failed to set parameters in cache:", e);
7055
+ }
7056
+ return parameters;
7057
+ }
7058
+ function setMaskingFunction(maskingFunction) {
7059
+ _globalState.setMaskingFunction(maskingFunction);
7060
+ }
7061
+ async function login(options = {}) {
7062
+ const { forceLogin = false } = options || {};
7063
+ if (_globalState.loggedIn && !forceLogin) {
7064
+ let checkUpdatedParam2 = function(varname, arg, orig) {
7065
+ if (!isEmpty2(arg) && !isEmpty2(orig) && arg !== orig) {
7066
+ throw new Error(
7067
+ `Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
7068
+ );
7069
+ }
7070
+ };
7071
+ var checkUpdatedParam = checkUpdatedParam2;
7072
+ checkUpdatedParam2("appUrl", options.appUrl, _globalState.appUrl);
7073
+ checkUpdatedParam2(
7074
+ "apiKey",
6639
7075
  options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
6640
7076
  _globalState.loginToken
6641
7077
  );
@@ -8689,6 +9125,55 @@ var Prompt2 = class _Prompt {
8689
9125
  );
8690
9126
  }
8691
9127
  };
9128
+ var RemoteEvalParameters = class {
9129
+ constructor(metadata) {
9130
+ this.metadata = metadata;
9131
+ }
9132
+ __braintrust_parameters_marker = true;
9133
+ get id() {
9134
+ return this.metadata.id;
9135
+ }
9136
+ get projectId() {
9137
+ return this.metadata.project_id;
9138
+ }
9139
+ get name() {
9140
+ return this.metadata.name;
9141
+ }
9142
+ get slug() {
9143
+ return this.metadata.slug;
9144
+ }
9145
+ get version() {
9146
+ return this.metadata[TRANSACTION_ID_FIELD];
9147
+ }
9148
+ get schema() {
9149
+ return this.metadata.function_data.__schema;
9150
+ }
9151
+ get data() {
9152
+ return this.metadata.function_data.data ?? {};
9153
+ }
9154
+ validate(data) {
9155
+ if (typeof data !== "object" || data === null) {
9156
+ return false;
9157
+ }
9158
+ const schemaProps = this.schema.properties;
9159
+ if (typeof schemaProps !== "object" || schemaProps === null) {
9160
+ return true;
9161
+ }
9162
+ for (const key of Object.keys(schemaProps)) {
9163
+ if (!(key in data)) {
9164
+ const required = Array.isArray(this.schema.required) ? this.schema.required : [];
9165
+ if (required.includes(key)) {
9166
+ return false;
9167
+ }
9168
+ }
9169
+ }
9170
+ return true;
9171
+ }
9172
+ static isParameters(x) {
9173
+ return typeof x === "object" && x !== null && "__braintrust_parameters_marker" in x && // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
9174
+ x.__braintrust_parameters_marker === true;
9175
+ }
9176
+ };
8692
9177
  var TEST_API_KEY = "___TEST_API_KEY__THIS_IS_NOT_REAL___";
8693
9178
  function setInitialTestState() {
8694
9179
  if (!_internalGetGlobalState()) {
@@ -8829,6 +9314,7 @@ __export(exports_exports, {
8829
9314
  CodePrompt: () => CodePrompt,
8830
9315
  ContextManager: () => ContextManager,
8831
9316
  DEFAULT_FETCH_BATCH_SIZE: () => DEFAULT_FETCH_BATCH_SIZE,
9317
+ DEFAULT_MAX_REQUEST_SIZE: () => DEFAULT_MAX_REQUEST_SIZE,
8832
9318
  Dataset: () => Dataset2,
8833
9319
  ERR_PERMALINK: () => ERR_PERMALINK,
8834
9320
  Eval: () => Eval,
@@ -8839,6 +9325,7 @@ __export(exports_exports, {
8839
9325
  IDGenerator: () => IDGenerator,
8840
9326
  JSONAttachment: () => JSONAttachment,
8841
9327
  LEGACY_CACHED_HEADER: () => LEGACY_CACHED_HEADER,
9328
+ LOGS3_OVERFLOW_REFERENCE_TYPE: () => LOGS3_OVERFLOW_REFERENCE_TYPE,
8842
9329
  LazyValue: () => LazyValue,
8843
9330
  Logger: () => Logger,
8844
9331
  LoginInvalidOrgError: () => LoginInvalidOrgError,
@@ -8863,8 +9350,10 @@ __export(exports_exports, {
8863
9350
  _exportsForTestingOnly: () => _exportsForTestingOnly,
8864
9351
  _internalGetGlobalState: () => _internalGetGlobalState,
8865
9352
  _internalSetInitialState: () => _internalSetInitialState,
9353
+ addAzureBlobHeaders: () => addAzureBlobHeaders,
8866
9354
  braintrustStreamChunkSchema: () => braintrustStreamChunkSchema,
8867
9355
  buildLocalSummary: () => buildLocalSummary,
9356
+ constructLogs3OverflowRequest: () => constructLogs3OverflowRequest,
8868
9357
  createFinalValuePassThroughStream: () => createFinalValuePassThroughStream,
8869
9358
  currentExperiment: () => currentExperiment,
8870
9359
  currentLogger: () => currentLogger,
@@ -8888,15 +9377,18 @@ __export(exports_exports, {
8888
9377
  initLogger: () => initLogger,
8889
9378
  invoke: () => invoke,
8890
9379
  isTemplateFormat: () => isTemplateFormat,
9380
+ loadParameters: () => loadParameters,
8891
9381
  loadPrompt: () => loadPrompt,
8892
9382
  log: () => log,
8893
9383
  logError: () => logError,
8894
9384
  login: () => login,
8895
9385
  loginToState: () => loginToState,
9386
+ logs3OverflowUploadSchema: () => logs3OverflowUploadSchema,
8896
9387
  newId: () => newId,
8897
9388
  parseCachedHeader: () => parseCachedHeader,
8898
9389
  parseTemplateFormat: () => parseTemplateFormat,
8899
9390
  permalink: () => permalink,
9391
+ pickLogs3OverflowObjectIds: () => pickLogs3OverflowObjectIds,
8900
9392
  projects: () => projects,
8901
9393
  promptContentsSchema: () => promptContentsSchema,
8902
9394
  promptDefinitionSchema: () => promptDefinitionSchema,
@@ -8917,6 +9409,8 @@ __export(exports_exports, {
8917
9409
  traceable: () => traceable,
8918
9410
  traced: () => traced,
8919
9411
  updateSpan: () => updateSpan,
9412
+ uploadLogs3OverflowPayload: () => uploadLogs3OverflowPayload,
9413
+ utf8ByteLength: () => utf8ByteLength,
8920
9414
  withCurrent: () => withCurrent,
8921
9415
  withDataset: () => withDataset,
8922
9416
  withExperiment: () => withExperiment,
@@ -12387,6 +12881,189 @@ function wrapMastraAgent(agent, _options) {
12387
12881
  }
12388
12882
 
12389
12883
  // src/wrappers/claude-agent-sdk/claude-agent-sdk.ts
12884
+ function getMcpServerMetadata(serverName, mcpServers) {
12885
+ if (!serverName || !mcpServers) {
12886
+ return {};
12887
+ }
12888
+ const serverConfig = mcpServers[serverName];
12889
+ if (!serverConfig) {
12890
+ return {};
12891
+ }
12892
+ const metadata = {};
12893
+ if (serverConfig.type) {
12894
+ metadata["mcp.type"] = serverConfig.type;
12895
+ } else if (typeof serverConfig === "object" && "transport" in serverConfig) {
12896
+ metadata["mcp.type"] = "sdk";
12897
+ }
12898
+ if (serverConfig.url) {
12899
+ metadata["mcp.url"] = serverConfig.url;
12900
+ }
12901
+ if (serverConfig.command) {
12902
+ metadata["mcp.command"] = serverConfig.command;
12903
+ if (serverConfig.args) {
12904
+ metadata["mcp.args"] = serverConfig.args.join(" ");
12905
+ }
12906
+ }
12907
+ return metadata;
12908
+ }
12909
+ function parseToolName(rawToolName) {
12910
+ const mcpMatch = rawToolName.match(/^mcp__([^_]+)__(.+)$/);
12911
+ if (mcpMatch) {
12912
+ const [, mcpServer, toolName] = mcpMatch;
12913
+ return {
12914
+ displayName: `tool: ${mcpServer}/${toolName}`,
12915
+ toolName,
12916
+ mcpServer,
12917
+ rawToolName
12918
+ };
12919
+ }
12920
+ return {
12921
+ displayName: `tool: ${rawToolName}`,
12922
+ toolName: rawToolName,
12923
+ rawToolName
12924
+ };
12925
+ }
12926
+ function createToolTracingHooks(resolveParentSpan, activeToolSpans, mcpServers, subAgentSpans, endedSubAgentSpans) {
12927
+ const preToolUse = async (input, toolUseID) => {
12928
+ if (input.hook_event_name !== "PreToolUse" || !toolUseID) {
12929
+ return {};
12930
+ }
12931
+ if (input.tool_name === "Task") {
12932
+ return {};
12933
+ }
12934
+ const parsed = parseToolName(input.tool_name);
12935
+ const mcpMetadata = getMcpServerMetadata(parsed.mcpServer, mcpServers);
12936
+ const parentExport = await resolveParentSpan(toolUseID);
12937
+ const toolSpan = startSpan({
12938
+ name: parsed.displayName,
12939
+ spanAttributes: { type: "tool" /* TOOL */ },
12940
+ event: {
12941
+ input: input.tool_input,
12942
+ metadata: {
12943
+ // GenAI semantic conventions
12944
+ "gen_ai.tool.name": parsed.toolName,
12945
+ "gen_ai.tool.call.id": toolUseID,
12946
+ // MCP-specific metadata
12947
+ ...parsed.mcpServer && { "mcp.server": parsed.mcpServer },
12948
+ ...mcpMetadata,
12949
+ // Claude SDK metadata
12950
+ "claude_agent_sdk.raw_tool_name": parsed.rawToolName,
12951
+ "claude_agent_sdk.session_id": input.session_id,
12952
+ "claude_agent_sdk.cwd": input.cwd
12953
+ }
12954
+ },
12955
+ parent: parentExport
12956
+ });
12957
+ activeToolSpans.set(toolUseID, toolSpan);
12958
+ return {};
12959
+ };
12960
+ const postToolUse = async (input, toolUseID) => {
12961
+ if (input.hook_event_name !== "PostToolUse" || !toolUseID) {
12962
+ return {};
12963
+ }
12964
+ const subAgentSpan = subAgentSpans.get(toolUseID);
12965
+ if (subAgentSpan) {
12966
+ try {
12967
+ const response = input.tool_response;
12968
+ const metadata = {};
12969
+ if (response?.status) {
12970
+ metadata["claude_agent_sdk.status"] = response.status;
12971
+ }
12972
+ if (response?.totalDurationMs) {
12973
+ metadata["claude_agent_sdk.duration_ms"] = response.totalDurationMs;
12974
+ }
12975
+ if (response?.totalToolUseCount !== void 0) {
12976
+ metadata["claude_agent_sdk.tool_use_count"] = response.totalToolUseCount;
12977
+ }
12978
+ subAgentSpan.log({
12979
+ output: response?.content,
12980
+ metadata
12981
+ });
12982
+ } finally {
12983
+ subAgentSpan.end();
12984
+ endedSubAgentSpans.add(toolUseID);
12985
+ }
12986
+ return {};
12987
+ }
12988
+ const toolSpan = activeToolSpans.get(toolUseID);
12989
+ if (!toolSpan) {
12990
+ return {};
12991
+ }
12992
+ try {
12993
+ toolSpan.log({ output: input.tool_response });
12994
+ } finally {
12995
+ toolSpan.end();
12996
+ activeToolSpans.delete(toolUseID);
12997
+ }
12998
+ return {};
12999
+ };
13000
+ const postToolUseFailure = async (input, toolUseID) => {
13001
+ if (input.hook_event_name !== "PostToolUseFailure" || !toolUseID) {
13002
+ return {};
13003
+ }
13004
+ const subAgentSpan = subAgentSpans.get(toolUseID);
13005
+ if (subAgentSpan) {
13006
+ try {
13007
+ subAgentSpan.log({ error: input.error });
13008
+ } finally {
13009
+ subAgentSpan.end();
13010
+ endedSubAgentSpans.add(toolUseID);
13011
+ }
13012
+ return {};
13013
+ }
13014
+ const toolSpan = activeToolSpans.get(toolUseID);
13015
+ if (!toolSpan) {
13016
+ return {};
13017
+ }
13018
+ const parsed = parseToolName(input.tool_name);
13019
+ try {
13020
+ toolSpan.log({
13021
+ error: input.error,
13022
+ metadata: {
13023
+ "gen_ai.tool.name": parsed.toolName,
13024
+ "gen_ai.tool.call.id": toolUseID,
13025
+ ...parsed.mcpServer && { "mcp.server": parsed.mcpServer },
13026
+ "claude_agent_sdk.is_interrupt": input.is_interrupt,
13027
+ "claude_agent_sdk.session_id": input.session_id
13028
+ }
13029
+ });
13030
+ } finally {
13031
+ toolSpan.end();
13032
+ activeToolSpans.delete(toolUseID);
13033
+ }
13034
+ return {};
13035
+ };
13036
+ return { preToolUse, postToolUse, postToolUseFailure };
13037
+ }
13038
+ function injectTracingHooks(options, resolveParentSpan, activeToolSpans, subAgentSpans, endedSubAgentSpans) {
13039
+ const mcpServers = options.mcpServers;
13040
+ const { preToolUse, postToolUse, postToolUseFailure } = createToolTracingHooks(
13041
+ resolveParentSpan,
13042
+ activeToolSpans,
13043
+ mcpServers,
13044
+ subAgentSpans,
13045
+ endedSubAgentSpans
13046
+ );
13047
+ const existingHooks = options.hooks ?? {};
13048
+ return {
13049
+ ...options,
13050
+ hooks: {
13051
+ ...existingHooks,
13052
+ PreToolUse: [
13053
+ ...existingHooks.PreToolUse ?? [],
13054
+ { hooks: [preToolUse] }
13055
+ ],
13056
+ PostToolUse: [
13057
+ ...existingHooks.PostToolUse ?? [],
13058
+ { hooks: [postToolUse] }
13059
+ ],
13060
+ PostToolUseFailure: [
13061
+ ...existingHooks.PostToolUseFailure ?? [],
13062
+ { hooks: [postToolUseFailure] }
13063
+ ]
13064
+ }
13065
+ };
13066
+ }
12390
13067
  function filterSerializableOptions(options) {
12391
13068
  const allowedKeys = [
12392
13069
  "model",
@@ -12411,18 +13088,45 @@ function filterSerializableOptions(options) {
12411
13088
  }
12412
13089
  return filtered;
12413
13090
  }
13091
+ function isAsyncIterable(value) {
13092
+ return value !== null && value !== void 0 && typeof value[Symbol.asyncIterator] === "function";
13093
+ }
12414
13094
  function wrapClaudeAgentQuery(queryFn, defaultThis) {
12415
13095
  const proxy = new Proxy(queryFn, {
12416
13096
  apply(target, thisArg, argArray) {
12417
13097
  const params = argArray[0] ?? {};
12418
13098
  const { prompt, options = {} } = params;
13099
+ const promptIsAsyncIterable = isAsyncIterable(prompt);
13100
+ let capturedPromptMessages;
13101
+ let promptForQuery = prompt;
13102
+ let promptStarted = false;
13103
+ let resolvePromptDone;
13104
+ const promptDone = new Promise((resolve) => {
13105
+ resolvePromptDone = resolve;
13106
+ });
13107
+ if (promptIsAsyncIterable) {
13108
+ capturedPromptMessages = [];
13109
+ const originalPrompt = prompt;
13110
+ const capturingPrompt = (async function* () {
13111
+ promptStarted = true;
13112
+ try {
13113
+ for await (const msg of originalPrompt) {
13114
+ capturedPromptMessages.push(msg);
13115
+ yield msg;
13116
+ }
13117
+ } finally {
13118
+ resolvePromptDone?.();
13119
+ }
13120
+ })();
13121
+ promptForQuery = capturingPrompt;
13122
+ }
12419
13123
  const span = startSpan({
12420
13124
  name: "Claude Agent",
12421
13125
  spanAttributes: {
12422
13126
  type: "task" /* TASK */
12423
13127
  },
12424
13128
  event: {
12425
- input: typeof prompt === "string" ? prompt : { type: "streaming", description: "AsyncIterable<SDKMessage>" },
13129
+ input: typeof prompt === "string" ? prompt : promptIsAsyncIterable ? void 0 : prompt !== void 0 ? String(prompt) : void 0,
12426
13130
  metadata: filterSerializableOptions(options)
12427
13131
  }
12428
13132
  });
@@ -12433,13 +13137,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12433
13137
  let currentMessageStartTime = getCurrentUnixTimestamp();
12434
13138
  const currentMessages = [];
12435
13139
  const createLLMSpan = async () => {
13140
+ const parentToolUseId = currentMessages[0]?.parent_tool_use_id ?? null;
13141
+ let parentSpanExport;
13142
+ if (parentToolUseId) {
13143
+ const subAgentSpan = subAgentSpans.get(parentToolUseId);
13144
+ parentSpanExport = subAgentSpan ? await subAgentSpan.export() : await span.export();
13145
+ } else {
13146
+ parentSpanExport = await span.export();
13147
+ }
12436
13148
  const finalMessageContent = await _createLLMSpanForMessages(
12437
13149
  currentMessages,
12438
13150
  prompt,
12439
13151
  finalResults,
12440
13152
  options,
12441
13153
  currentMessageStartTime,
12442
- await span.export()
13154
+ capturedPromptMessages,
13155
+ parentSpanExport
12443
13156
  );
12444
13157
  if (finalMessageContent) {
12445
13158
  finalResults.push(finalMessageContent);
@@ -12452,14 +13165,78 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12452
13165
  currentMessages.length = 0;
12453
13166
  };
12454
13167
  const invocationTarget = thisArg === proxy || thisArg === void 0 ? defaultThis ?? thisArg : thisArg;
13168
+ const activeToolSpans = /* @__PURE__ */ new Map();
13169
+ const subAgentSpans = /* @__PURE__ */ new Map();
13170
+ const endedSubAgentSpans = /* @__PURE__ */ new Set();
13171
+ const toolUseToParent = /* @__PURE__ */ new Map();
13172
+ const pendingSubAgentNames = /* @__PURE__ */ new Map();
13173
+ const resolveParentSpan = async (toolUseID) => {
13174
+ const parentToolUseId = toolUseToParent.get(toolUseID);
13175
+ if (parentToolUseId) {
13176
+ const subAgentSpan = subAgentSpans.get(parentToolUseId);
13177
+ if (subAgentSpan) {
13178
+ return subAgentSpan.export();
13179
+ }
13180
+ }
13181
+ return span.export();
13182
+ };
13183
+ const optionsWithHooks = injectTracingHooks(
13184
+ options,
13185
+ resolveParentSpan,
13186
+ activeToolSpans,
13187
+ subAgentSpans,
13188
+ endedSubAgentSpans
13189
+ );
13190
+ const modifiedArgArray = [
13191
+ {
13192
+ ...params,
13193
+ ...promptForQuery !== void 0 ? { prompt: promptForQuery } : {},
13194
+ options: optionsWithHooks
13195
+ }
13196
+ ];
12455
13197
  const originalGenerator = withCurrent(
12456
13198
  span,
12457
- () => Reflect.apply(target, invocationTarget, argArray)
13199
+ () => Reflect.apply(target, invocationTarget, modifiedArgArray)
12458
13200
  );
12459
13201
  const wrappedGenerator = (async function* () {
12460
13202
  try {
12461
13203
  for await (const message of originalGenerator) {
12462
13204
  const currentTime = getCurrentUnixTimestamp();
13205
+ if (message.type === "assistant" && Array.isArray(message.message?.content)) {
13206
+ const parentToolUseId = message.parent_tool_use_id ?? null;
13207
+ for (const block of message.message.content) {
13208
+ if (block.type === "tool_use" && block.id) {
13209
+ toolUseToParent.set(block.id, parentToolUseId);
13210
+ if (block.name === "Task" && block.input?.subagent_type) {
13211
+ pendingSubAgentNames.set(
13212
+ block.id,
13213
+ block.input.subagent_type
13214
+ );
13215
+ }
13216
+ }
13217
+ }
13218
+ }
13219
+ if ("parent_tool_use_id" in message) {
13220
+ const parentToolUseId = message.parent_tool_use_id;
13221
+ if (parentToolUseId && !subAgentSpans.has(parentToolUseId)) {
13222
+ const agentName = pendingSubAgentNames.get(parentToolUseId);
13223
+ const spanName = agentName ? `Agent: ${agentName}` : "Agent: sub-agent";
13224
+ const parentExport = await span.export();
13225
+ const subAgentSpan = startSpan({
13226
+ name: spanName,
13227
+ spanAttributes: { type: "task" /* TASK */ },
13228
+ event: {
13229
+ metadata: {
13230
+ ...agentName && {
13231
+ "claude_agent_sdk.agent_type": agentName
13232
+ }
13233
+ }
13234
+ },
13235
+ parent: parentExport
13236
+ });
13237
+ subAgentSpans.set(parentToolUseId, subAgentSpan);
13238
+ }
13239
+ }
12463
13240
  const messageId = message.message?.id;
12464
13241
  if (messageId && messageId !== currentMessageId) {
12465
13242
  await createLLMSpan();
@@ -12505,6 +13282,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12505
13282
  });
12506
13283
  throw error;
12507
13284
  } finally {
13285
+ for (const [id, subSpan] of subAgentSpans) {
13286
+ if (!endedSubAgentSpans.has(id)) {
13287
+ subSpan.end();
13288
+ }
13289
+ }
13290
+ subAgentSpans.clear();
13291
+ if (capturedPromptMessages) {
13292
+ if (promptStarted) {
13293
+ await promptDone;
13294
+ }
13295
+ if (capturedPromptMessages.length > 0) {
13296
+ span.log({
13297
+ input: _formatCapturedMessages(capturedPromptMessages)
13298
+ });
13299
+ }
13300
+ }
12508
13301
  span.end();
12509
13302
  }
12510
13303
  })();
@@ -12532,43 +13325,25 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
12532
13325
  });
12533
13326
  return proxy;
12534
13327
  }
12535
- function wrapClaudeAgentTool(toolDef) {
12536
- const originalHandler = toolDef.handler;
12537
- const wrappedHandler = (args, extra) => traced(
12538
- async (span) => {
12539
- span.log({
12540
- input: args,
12541
- metadata: {
12542
- tool_name: toolDef.name,
12543
- tool_description: toolDef.description
12544
- }
12545
- });
12546
- const result = await originalHandler(args, extra);
12547
- span.log({
12548
- output: result
12549
- });
12550
- return result;
12551
- },
12552
- {
12553
- name: `${toolDef.name}`,
12554
- spanAttributes: {
12555
- type: "tool" /* TOOL */
13328
+ function _buildLLMInput(prompt, conversationHistory, capturedPromptMessages) {
13329
+ const promptMessages = [];
13330
+ if (typeof prompt === "string") {
13331
+ promptMessages.push({ content: prompt, role: "user" });
13332
+ } else if (capturedPromptMessages && capturedPromptMessages.length > 0) {
13333
+ for (const msg of capturedPromptMessages) {
13334
+ const role = msg.message?.role;
13335
+ const content = msg.message?.content;
13336
+ if (role && content !== void 0) {
13337
+ promptMessages.push({ content, role });
12556
13338
  }
12557
13339
  }
12558
- );
12559
- return {
12560
- ...toolDef,
12561
- handler: wrappedHandler
12562
- };
12563
- }
12564
- function _buildLLMInput(prompt, conversationHistory) {
12565
- const promptMessage = typeof prompt === "string" ? { content: prompt, role: "user" } : void 0;
12566
- const inputParts = [
12567
- ...promptMessage ? [promptMessage] : [],
12568
- ...conversationHistory
12569
- ];
13340
+ }
13341
+ const inputParts = [...promptMessages, ...conversationHistory];
12570
13342
  return inputParts.length > 0 ? inputParts : void 0;
12571
13343
  }
13344
+ function _formatCapturedMessages(messages) {
13345
+ return messages.length > 0 ? messages : [];
13346
+ }
12572
13347
  function _extractUsageFromMessage(message) {
12573
13348
  const metrics = {};
12574
13349
  let usage;
@@ -12602,7 +13377,7 @@ function _extractUsageFromMessage(message) {
12602
13377
  }
12603
13378
  return metrics;
12604
13379
  }
12605
- async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, parentSpan) {
13380
+ async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, capturedPromptMessages, parentSpan) {
12606
13381
  if (messages.length === 0) return void 0;
12607
13382
  const lastMessage = messages[messages.length - 1];
12608
13383
  if (lastMessage.type !== "assistant" || !lastMessage.message?.usage) {
@@ -12610,7 +13385,11 @@ async function _createLLMSpanForMessages(messages, prompt, conversationHistory,
12610
13385
  }
12611
13386
  const model = lastMessage.message.model || options.model;
12612
13387
  const usage = _extractUsageFromMessage(lastMessage);
12613
- const input = _buildLLMInput(prompt, conversationHistory);
13388
+ const input = _buildLLMInput(
13389
+ prompt,
13390
+ conversationHistory,
13391
+ capturedPromptMessages
13392
+ );
12614
13393
  const outputs = messages.map(
12615
13394
  (m) => m.message?.content && m.message?.role ? { content: m.message.content, role: m.message.role } : void 0
12616
13395
  ).filter((c) => c !== void 0);
@@ -12651,25 +13430,9 @@ function wrapClaudeAgentSDK(sdk) {
12651
13430
  return wrappedQuery;
12652
13431
  }
12653
13432
  if (prop === "tool" && typeof value === "function") {
12654
- const toolFn = value;
12655
- const wrappedToolFactory = new Proxy(toolFn, {
12656
- apply(toolTarget, thisArg, argArray) {
12657
- const invocationTarget = thisArg === receiver || thisArg === void 0 ? target : thisArg;
12658
- const toolDef = Reflect.apply(
12659
- toolTarget,
12660
- invocationTarget,
12661
- argArray
12662
- );
12663
- if (toolDef && typeof toolDef === "object" && "handler" in toolDef) {
12664
- return wrapClaudeAgentTool(
12665
- toolDef
12666
- );
12667
- }
12668
- return toolDef;
12669
- }
12670
- });
12671
- cache.set(prop, wrappedToolFactory);
12672
- return wrappedToolFactory;
13433
+ const bound = value.bind(target);
13434
+ cache.set(prop, bound);
13435
+ return bound;
12673
13436
  }
12674
13437
  if (typeof value === "function") {
12675
13438
  const bound = value.bind(target);
@@ -13454,7 +14217,7 @@ function isAsync(fn) {
13454
14217
  function isAsyncGenerator2(fn) {
13455
14218
  return fn[Symbol.toStringTag] === "AsyncGenerator";
13456
14219
  }
13457
- function isAsyncIterable(obj) {
14220
+ function isAsyncIterable2(obj) {
13458
14221
  return typeof obj[Symbol.asyncIterator] === "function";
13459
14222
  }
13460
14223
  function wrapAsync(asyncFn) {
@@ -13625,7 +14388,7 @@ var eachOfLimit$2 = (limit) => {
13625
14388
  if (isAsyncGenerator2(obj)) {
13626
14389
  return asyncEachOfLimit(obj, limit, iteratee, callback);
13627
14390
  }
13628
- if (isAsyncIterable(obj)) {
14391
+ if (isAsyncIterable2(obj)) {
13629
14392
  return asyncEachOfLimit(obj[Symbol.asyncIterator](), limit, iteratee, callback);
13630
14393
  }
13631
14394
  var nextElem = createIterator(obj);
@@ -14554,6 +15317,7 @@ var LocalTrace = class {
14554
15317
  spansFlushed = false;
14555
15318
  spansFlushPromise = null;
14556
15319
  cachedFetcher;
15320
+ threadCache = /* @__PURE__ */ new Map();
14557
15321
  constructor({
14558
15322
  objectType,
14559
15323
  objectId,
@@ -14624,6 +15388,36 @@ var LocalTrace = class {
14624
15388
  }
14625
15389
  return this.cachedFetcher.getSpans({ spanType });
14626
15390
  }
15391
+ /**
15392
+ * Get the thread (preprocessed messages) for this trace.
15393
+ * Calls the API with the project_default preprocessor (which falls back to "thread").
15394
+ */
15395
+ async getThread(options) {
15396
+ const cacheKey = options?.preprocessor ?? "project_default";
15397
+ if (!this.threadCache.has(cacheKey)) {
15398
+ const promise = this.fetchThread(options);
15399
+ this.threadCache.set(cacheKey, promise);
15400
+ }
15401
+ return this.threadCache.get(cacheKey);
15402
+ }
15403
+ async fetchThread(options) {
15404
+ await this.ensureSpansReady();
15405
+ await this.state.login({});
15406
+ const result = await invoke({
15407
+ globalFunction: options?.preprocessor ?? "project_default",
15408
+ functionType: "preprocessor",
15409
+ input: {
15410
+ trace_ref: {
15411
+ object_type: this.objectType,
15412
+ object_id: this.objectId,
15413
+ root_span_id: this.rootSpanId
15414
+ }
15415
+ },
15416
+ mode: "json",
15417
+ state: this.state
15418
+ });
15419
+ return Array.isArray(result) ? result : [];
15420
+ }
14627
15421
  async ensureSpansReady() {
14628
15422
  if (this.spansFlushed || !this.ensureSpansFlushed) {
14629
15423
  return;
@@ -14658,648 +15452,371 @@ var SimpleProgressReporter = class {
14658
15452
 
14659
15453
  // src/eval-parameters.ts
14660
15454
  import { z as z10 } from "zod/v3";
15455
+ import Ajv from "ajv";
14661
15456
 
14662
- // src/framework2.ts
15457
+ // src/prompt-schemas.ts
14663
15458
  import { z as z9 } from "zod/v3";
14664
- var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
14665
- var ProjectBuilder = class {
14666
- create(opts) {
14667
- return new Project2(opts);
15459
+ var promptContentsSchema = z9.union([
15460
+ z9.object({
15461
+ prompt: z9.string()
15462
+ }),
15463
+ z9.object({
15464
+ messages: z9.array(ChatCompletionMessageParam)
15465
+ })
15466
+ ]);
15467
+ var promptDefinitionSchema = promptContentsSchema.and(
15468
+ z9.object({
15469
+ model: z9.string(),
15470
+ params: ModelParams.optional(),
15471
+ templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
15472
+ })
15473
+ );
15474
+ var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
15475
+ z9.object({
15476
+ tools: z9.array(ToolFunctionDefinition).optional()
15477
+ })
15478
+ );
15479
+ function promptDefinitionToPromptData(promptDefinition, rawTools) {
15480
+ const promptBlock = "messages" in promptDefinition ? {
15481
+ type: "chat",
15482
+ messages: promptDefinition.messages,
15483
+ tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
15484
+ } : {
15485
+ type: "completion",
15486
+ content: promptDefinition.prompt
15487
+ };
15488
+ return {
15489
+ prompt: promptBlock,
15490
+ options: {
15491
+ model: promptDefinition.model,
15492
+ params: promptDefinition.params
15493
+ },
15494
+ ...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
15495
+ };
15496
+ }
15497
+
15498
+ // src/eval-parameters.ts
15499
+ var evalParametersSchema = z10.record(
15500
+ z10.string(),
15501
+ z10.union([
15502
+ z10.object({
15503
+ type: z10.literal("prompt"),
15504
+ default: promptDefinitionWithToolsSchema.optional(),
15505
+ description: z10.string().optional()
15506
+ }),
15507
+ z10.instanceof(z10.ZodType)
15508
+ // For Zod schemas
15509
+ ])
15510
+ );
15511
+ async function validateParameters(parameters, parameterSchema) {
15512
+ let resolvedSchema = parameterSchema;
15513
+ if (resolvedSchema instanceof Promise) {
15514
+ resolvedSchema = await resolvedSchema;
15515
+ }
15516
+ if (resolvedSchema === void 0 || resolvedSchema === null) {
15517
+ return parameters;
15518
+ }
15519
+ if (RemoteEvalParameters.isParameters(resolvedSchema)) {
15520
+ const mergedParameters = parameters && Object.keys(parameters).length > 0 ? {
15521
+ ...resolvedSchema.data,
15522
+ ...parameters
15523
+ } : resolvedSchema.data;
15524
+ return validateParametersWithJsonSchema(
15525
+ mergedParameters,
15526
+ resolvedSchema.schema
15527
+ );
14668
15528
  }
14669
- };
14670
- var projects = new ProjectBuilder();
14671
- var Project2 = class {
14672
- name;
14673
- id;
14674
- tools;
14675
- prompts;
14676
- scorers;
14677
- _publishableCodeFunctions = [];
14678
- _publishablePrompts = [];
14679
- constructor(args) {
14680
- _initializeSpanContext();
14681
- this.name = "name" in args ? args.name : void 0;
14682
- this.id = "id" in args ? args.id : void 0;
14683
- this.tools = new ToolBuilder(this);
14684
- this.prompts = new PromptBuilder(this);
14685
- this.scorers = new ScorerBuilder(this);
15529
+ return validateParametersWithZod(
15530
+ parameters,
15531
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15532
+ resolvedSchema
15533
+ );
15534
+ }
15535
+ function validateParametersWithZod(parameters, parameterSchema) {
15536
+ return Object.fromEntries(
15537
+ Object.entries(parameterSchema).map(([name, schema]) => {
15538
+ const value = parameters[name];
15539
+ try {
15540
+ if ("type" in schema && schema.type === "prompt") {
15541
+ const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
15542
+ schema.default,
15543
+ schema.default.tools
15544
+ ) : void 0;
15545
+ if (!promptData) {
15546
+ throw new Error(`Parameter '${name}' is required`);
15547
+ }
15548
+ return [name, Prompt2.fromPromptData(name, promptData)];
15549
+ } else {
15550
+ const schemaCasted = schema;
15551
+ return [name, schemaCasted.parse(value)];
15552
+ }
15553
+ } catch (e) {
15554
+ console.error("Error validating parameter", name, e);
15555
+ throw Error(
15556
+ `Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
15557
+ );
15558
+ }
15559
+ })
15560
+ );
15561
+ }
15562
+ function validateParametersWithJsonSchema(parameters, schema) {
15563
+ const ajv = new Ajv({ coerceTypes: true, useDefaults: true, strict: false });
15564
+ const validate = ajv.compile(schema);
15565
+ if (!validate(parameters)) {
15566
+ const errorMessages = validate.errors?.map((err) => {
15567
+ const path2 = err.instancePath || "root";
15568
+ return `${path2}: ${err.message}`;
15569
+ }).join(", ");
15570
+ throw Error(`Invalid parameters: ${errorMessages}`);
14686
15571
  }
14687
- addPrompt(prompt) {
14688
- this._publishablePrompts.push(prompt);
14689
- if (globalThis._lazy_load) {
14690
- globalThis._evals.prompts.push(prompt);
14691
- }
15572
+ return parameters;
15573
+ }
15574
+
15575
+ // src/framework.ts
15576
+ function BaseExperiment(options = {}) {
15577
+ return { _type: "BaseExperiment", ...options };
15578
+ }
15579
+ var EvalResultWithSummary = class {
15580
+ constructor(summary, results) {
15581
+ this.summary = summary;
15582
+ this.results = results;
14692
15583
  }
14693
- addCodeFunction(fn) {
14694
- this._publishableCodeFunctions.push(fn);
14695
- if (globalThis._lazy_load) {
14696
- globalThis._evals.functions.push(fn);
14697
- }
15584
+ /**
15585
+ * @deprecated Use `summary` instead.
15586
+ */
15587
+ toString() {
15588
+ return JSON.stringify(this.summary);
14698
15589
  }
14699
- async publish() {
14700
- if (globalThis._lazy_load) {
14701
- console.warn("publish() is a no-op when running `braintrust push`.");
14702
- return;
14703
- }
14704
- await login();
14705
- const projectMap = new ProjectNameIdMap();
14706
- const functionDefinitions = [];
14707
- if (this._publishableCodeFunctions.length > 0) {
14708
- console.warn(
14709
- "Code functions cannot be published directly. Use `braintrust push` instead."
14710
- );
14711
- }
14712
- if (this._publishablePrompts.length > 0) {
14713
- for (const prompt of this._publishablePrompts) {
14714
- const functionDefinition = await prompt.toFunctionDefinition(projectMap);
14715
- functionDefinitions.push(functionDefinition);
14716
- }
14717
- }
14718
- await _internalGetGlobalState().apiConn().post_json("insert-functions", {
14719
- functions: functionDefinitions
14720
- });
15590
+ [Symbol.for("nodejs.util.inspect.custom")]() {
15591
+ return `EvalResultWithSummary(summary="...", results=[...])`;
15592
+ }
15593
+ toJSON() {
15594
+ return {
15595
+ summary: this.summary,
15596
+ results: this.results
15597
+ };
14721
15598
  }
14722
15599
  };
14723
- var ToolBuilder = class {
14724
- constructor(project) {
14725
- this.project = project;
15600
+ function makeEvalName(projectName, experimentName) {
15601
+ let out = projectName;
15602
+ if (experimentName) {
15603
+ out += ` [experimentName=${experimentName}]`;
14726
15604
  }
14727
- taskCounter = 0;
14728
- // This type definition is just a catch all so that the implementation can be
14729
- // less specific than the two more specific declarations above.
14730
- create(opts) {
14731
- this.taskCounter++;
14732
- opts = opts ?? {};
14733
- const { handler, name, slug, parameters, returns, ...rest } = opts;
14734
- let resolvedName = name ?? handler.name;
14735
- if (resolvedName.trim().length === 0) {
14736
- resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
14737
- }
14738
- const tool = new CodeFunction(this.project, {
14739
- handler,
14740
- name: resolvedName,
14741
- slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
14742
- type: "tool",
14743
- // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
14744
- parameters,
14745
- // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
14746
- returns,
14747
- ...rest
14748
- });
14749
- this.project.addCodeFunction(tool);
14750
- return tool;
15605
+ return out;
15606
+ }
15607
+ function initExperiment2(state, options = {}) {
15608
+ return init({
15609
+ state,
15610
+ ...options,
15611
+ setCurrent: false
15612
+ });
15613
+ }
15614
+ function callEvaluatorData(data) {
15615
+ const dataResult = typeof data === "function" ? data() : data;
15616
+ let baseExperiment = void 0;
15617
+ if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
15618
+ baseExperiment = dataResult.name;
14751
15619
  }
15620
+ return {
15621
+ data: dataResult,
15622
+ baseExperiment
15623
+ };
15624
+ }
15625
+ function isAsyncIterable3(value) {
15626
+ return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
15627
+ }
15628
+ function isIterable(value) {
15629
+ return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
15630
+ }
15631
+ globalThis._evals = {
15632
+ functions: [],
15633
+ prompts: [],
15634
+ parameters: [],
15635
+ evaluators: {},
15636
+ reporters: {}
14752
15637
  };
14753
- var ScorerBuilder = class {
14754
- constructor(project) {
14755
- this.project = project;
15638
+ function _initializeSpanContext() {
15639
+ globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
15640
+ }
15641
+ async function Eval(name, evaluator, reporterOrOpts) {
15642
+ const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
15643
+ let evalName = makeEvalName(name, evaluator.experimentName);
15644
+ if (globalThis._evals.evaluators[evalName]) {
15645
+ evalName = `${evalName}_${Object.keys(_evals).length}`;
14756
15646
  }
14757
- taskCounter = 0;
14758
- create(opts) {
14759
- this.taskCounter++;
14760
- let resolvedName = opts.name;
14761
- if (!resolvedName && "handler" in opts) {
14762
- resolvedName = opts.handler.name;
15647
+ if (globalThis._lazy_load) {
15648
+ globalThis._evals.evaluators[evalName] = {
15649
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15650
+ evaluator: {
15651
+ evalName,
15652
+ projectName: name,
15653
+ ...evaluator
15654
+ },
15655
+ reporter: options.reporter
15656
+ };
15657
+ _initializeSpanContext();
15658
+ return new EvalResultWithSummary(
15659
+ {
15660
+ scores: {},
15661
+ metrics: {},
15662
+ projectName: "",
15663
+ experimentName: ""
15664
+ },
15665
+ []
15666
+ );
15667
+ }
15668
+ const progressReporter = options.progress ?? new SimpleProgressReporter();
15669
+ const shouldCollectResults = options.returnResults ?? true;
15670
+ if (typeof options.reporter === "string") {
15671
+ throw new Error(
15672
+ "Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
15673
+ );
15674
+ }
15675
+ const resolvedReporter = options.reporter || defaultReporter;
15676
+ try {
15677
+ const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
15678
+ evaluator.data
15679
+ );
15680
+ const experiment = options.parent || options.noSendLogs ? null : initExperiment2(evaluator.state, {
15681
+ ...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
15682
+ experiment: evaluator.experimentName,
15683
+ description: evaluator.description,
15684
+ metadata: evaluator.metadata,
15685
+ isPublic: evaluator.isPublic,
15686
+ update: evaluator.update,
15687
+ baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
15688
+ baseExperimentId: evaluator.baseExperimentId,
15689
+ gitMetadataSettings: evaluator.gitMetadataSettings,
15690
+ repoInfo: evaluator.repoInfo,
15691
+ dataset: Dataset2.isDataset(data) ? data : void 0
15692
+ });
15693
+ if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
15694
+ await experiment._waitForId();
14763
15695
  }
14764
- if (!resolvedName || resolvedName.trim().length === 0) {
14765
- resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
15696
+ if (experiment && options.onStart) {
15697
+ const summary = await experiment.summarize({ summarizeScores: false });
15698
+ options.onStart(summary);
14766
15699
  }
14767
- const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
14768
- if ("handler" in opts) {
14769
- const scorer = new CodeFunction(this.project, {
14770
- ...opts,
14771
- name: resolvedName,
14772
- slug,
14773
- type: "scorer"
14774
- });
14775
- this.project.addCodeFunction(scorer);
14776
- } else {
14777
- const promptBlock = "messages" in opts ? {
14778
- type: "chat",
14779
- messages: opts.messages
14780
- } : {
14781
- type: "completion",
14782
- content: opts.prompt
14783
- };
14784
- const promptData = {
14785
- prompt: promptBlock,
14786
- options: {
14787
- model: opts.model,
14788
- params: opts.params
14789
- },
14790
- parser: {
14791
- type: "llm_classifier",
14792
- use_cot: opts.useCot,
14793
- choice_scores: opts.choiceScores
14794
- }
15700
+ try {
15701
+ const evalDef = {
15702
+ evalName,
15703
+ projectName: name,
15704
+ ...evaluator,
15705
+ data
14795
15706
  };
14796
- const codePrompt = new CodePrompt(
14797
- this.project,
14798
- promptData,
14799
- [],
14800
- {
14801
- ...opts,
14802
- name: resolvedName,
14803
- slug
14804
- },
14805
- "scorer"
14806
- );
14807
- this.project.addPrompt(codePrompt);
14808
- }
14809
- }
14810
- };
14811
- var CodeFunction = class {
14812
- constructor(project, opts) {
14813
- this.project = project;
14814
- this.handler = opts.handler;
14815
- this.name = opts.name;
14816
- this.slug = opts.slug;
14817
- this.description = opts.description;
14818
- this.type = opts.type;
14819
- this.ifExists = opts.ifExists;
14820
- this.metadata = opts.metadata;
14821
- this.parameters = opts.parameters;
14822
- this.returns = opts.returns;
14823
- if (this.returns && !this.parameters) {
14824
- throw new Error("parameters are required if return type is defined");
15707
+ const enableCache = options.enableCache ?? true;
15708
+ let ret;
15709
+ if (options.parent) {
15710
+ ret = await withParent(
15711
+ options.parent,
15712
+ () => runEvaluator(
15713
+ null,
15714
+ evalDef,
15715
+ progressReporter,
15716
+ [],
15717
+ options.stream,
15718
+ options.parameters,
15719
+ shouldCollectResults,
15720
+ enableCache
15721
+ ),
15722
+ evaluator.state
15723
+ );
15724
+ } else {
15725
+ ret = await runEvaluator(
15726
+ experiment,
15727
+ evalDef,
15728
+ progressReporter,
15729
+ [],
15730
+ options.stream,
15731
+ options.parameters,
15732
+ shouldCollectResults,
15733
+ enableCache
15734
+ );
15735
+ }
15736
+ progressReporter.stop();
15737
+ resolvedReporter.reportEval(evalDef, ret, {
15738
+ verbose: true,
15739
+ jsonl: false
15740
+ });
15741
+ return ret;
15742
+ } finally {
15743
+ if (experiment) {
15744
+ await experiment.flush().catch(console.error);
15745
+ } else if (options.parent) {
15746
+ await flush().catch(console.error);
15747
+ }
14825
15748
  }
15749
+ } finally {
15750
+ progressReporter.stop();
14826
15751
  }
14827
- handler;
14828
- name;
14829
- slug;
14830
- type;
14831
- description;
14832
- parameters;
14833
- returns;
14834
- ifExists;
14835
- metadata;
14836
- key() {
14837
- return JSON.stringify([
14838
- this.project.id ?? "",
14839
- this.project.name ?? "",
14840
- this.slug
14841
- ]);
15752
+ }
15753
+ function Reporter(name, reporter) {
15754
+ const ret = { name, ...reporter };
15755
+ if (_evals.reporters[name]) {
15756
+ throw new Error(`Reporter ${name} already exists`);
14842
15757
  }
14843
- };
14844
- var CodePrompt = class {
14845
- project;
14846
- name;
14847
- slug;
14848
- prompt;
14849
- ifExists;
14850
- description;
14851
- id;
14852
- functionType;
14853
- toolFunctions;
14854
- metadata;
14855
- constructor(project, prompt, toolFunctions, opts, functionType) {
14856
- this.project = project;
14857
- this.name = opts.name;
14858
- this.slug = opts.slug;
14859
- this.prompt = prompt;
14860
- this.toolFunctions = toolFunctions;
14861
- this.ifExists = opts.ifExists;
14862
- this.description = opts.description;
14863
- this.id = opts.id;
14864
- this.functionType = functionType;
14865
- this.metadata = opts.metadata;
15758
+ if (globalThis._lazy_load) {
15759
+ _evals.reporters[name] = ret;
14866
15760
  }
14867
- async toFunctionDefinition(projectNameToId) {
14868
- const prompt_data = {
14869
- ...this.prompt
14870
- };
14871
- if (this.toolFunctions.length > 0) {
14872
- const resolvableToolFunctions = await Promise.all(
14873
- this.toolFunctions.map(async (fn) => {
14874
- if ("slug" in fn) {
14875
- return {
14876
- type: "slug",
14877
- project_id: await projectNameToId.resolve(fn.project),
14878
- slug: fn.slug
14879
- };
14880
- } else {
14881
- return fn;
14882
- }
14883
- })
14884
- );
14885
- prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
14886
- resolvableToolFunctions;
14887
- }
14888
- return {
14889
- project_id: await projectNameToId.resolve(this.project),
14890
- name: this.name,
14891
- slug: this.slug,
14892
- description: this.description ?? "",
14893
- function_data: {
14894
- type: "prompt"
14895
- },
14896
- function_type: this.functionType,
14897
- prompt_data,
14898
- if_exists: this.ifExists,
14899
- metadata: this.metadata
14900
- };
15761
+ return ret;
15762
+ }
15763
+ function serializeJSONWithPlainString(v) {
15764
+ if (typeof v === "string") {
15765
+ return v;
15766
+ } else {
15767
+ return JSON.stringify(v);
14901
15768
  }
14902
- };
14903
- var promptContentsSchema = z9.union([
14904
- z9.object({
14905
- prompt: z9.string()
14906
- }),
14907
- z9.object({
14908
- messages: z9.array(ChatCompletionMessageParam)
14909
- })
14910
- ]);
14911
- var promptDefinitionSchema = promptContentsSchema.and(
14912
- z9.object({
14913
- model: z9.string(),
14914
- params: ModelParams.optional(),
14915
- templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
14916
- })
14917
- );
14918
- var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
14919
- z9.object({
14920
- tools: z9.array(ToolFunctionDefinition).optional()
14921
- })
14922
- );
14923
- var PromptBuilder = class {
14924
- constructor(project) {
14925
- this.project = project;
14926
- }
14927
- create(opts) {
14928
- const toolFunctions = [];
14929
- const rawTools = [];
14930
- for (const tool of opts.tools ?? []) {
14931
- if (tool instanceof CodeFunction) {
14932
- toolFunctions.push(tool);
14933
- } else if ("type" in tool && !("function" in tool)) {
14934
- toolFunctions.push(tool);
14935
- } else {
14936
- rawTools.push(tool);
14937
- }
14938
- }
14939
- const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
14940
- const promptData = promptDefinitionToPromptData(opts, rawTools);
14941
- const promptRow = {
14942
- id: opts.id,
14943
- _xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
14944
- name: opts.name,
14945
- slug,
14946
- prompt_data: promptData,
14947
- ...this.project.id !== void 0 ? { project_id: this.project.id } : {}
14948
- };
14949
- const prompt = new Prompt2(
14950
- promptRow,
14951
- {},
14952
- // It doesn't make sense to specify defaults here.
14953
- opts.noTrace ?? false
14954
- );
14955
- const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
14956
- ...opts,
14957
- slug
14958
- });
14959
- this.project.addPrompt(codePrompt);
14960
- return prompt;
14961
- }
14962
- };
14963
- function promptDefinitionToPromptData(promptDefinition, rawTools) {
14964
- const promptBlock = "messages" in promptDefinition ? {
14965
- type: "chat",
14966
- messages: promptDefinition.messages,
14967
- tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
14968
- } : {
14969
- type: "completion",
14970
- content: promptDefinition.prompt
14971
- };
14972
- return {
14973
- prompt: promptBlock,
14974
- options: {
14975
- model: promptDefinition.model,
14976
- params: promptDefinition.params
14977
- },
14978
- ...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
14979
- };
14980
15769
  }
14981
- var ProjectNameIdMap = class {
14982
- nameToId = {};
14983
- idToName = {};
14984
- async getId(projectName) {
14985
- if (!(projectName in this.nameToId)) {
14986
- const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
14987
- project_name: projectName
14988
- });
14989
- const result = z9.object({
14990
- project: Project
14991
- }).parse(response);
14992
- const projectId = result.project.id;
14993
- this.nameToId[projectName] = projectId;
14994
- this.idToName[projectId] = projectName;
14995
- }
14996
- return this.nameToId[projectName];
14997
- }
14998
- async getName(projectId) {
14999
- if (!(projectId in this.idToName)) {
15000
- const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
15001
- id: projectId
15002
- });
15003
- const result = z9.array(Project).nonempty().parse(response);
15004
- const projectName = result[0].name;
15005
- this.idToName[projectId] = projectName;
15006
- this.nameToId[projectName] = projectId;
15007
- }
15008
- return this.idToName[projectId];
15009
- }
15010
- async resolve(project) {
15011
- if (project.id) {
15012
- return project.id;
15013
- }
15014
- return this.getId(project.name);
15015
- }
15016
- };
15017
-
15018
- // src/eval-parameters.ts
15019
- var evalParametersSchema = z10.record(
15020
- z10.string(),
15021
- z10.union([
15022
- z10.object({
15023
- type: z10.literal("prompt"),
15024
- default: promptDefinitionWithToolsSchema.optional(),
15025
- description: z10.string().optional()
15026
- }),
15027
- z10.instanceof(z10.ZodType)
15028
- // For Zod schemas
15029
- ])
15030
- );
15031
- function validateParameters(parameters, parameterSchema) {
15032
- return Object.fromEntries(
15033
- Object.entries(parameterSchema).map(([name, schema]) => {
15034
- const value = parameters[name];
15035
- try {
15036
- if ("type" in schema && schema.type === "prompt") {
15037
- const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
15038
- schema.default,
15039
- schema.default.tools
15040
- ) : void 0;
15041
- if (!promptData) {
15042
- throw new Error(`Parameter '${name}' is required`);
15043
- }
15044
- return [name, Prompt2.fromPromptData(name, promptData)];
15045
- } else {
15046
- const schemaCasted = schema;
15047
- return [name, schemaCasted.parse(value)];
15048
- }
15049
- } catch (e) {
15050
- console.error("Error validating parameter", name, e);
15051
- throw Error(
15052
- `Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
15053
- );
15054
- }
15055
- })
15770
+ function evaluateFilter(object, filter2) {
15771
+ const { path: path2, pattern } = filter2;
15772
+ const key = path2.reduce(
15773
+ (acc, p) => typeof acc === "object" && acc !== null ? (
15774
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15775
+ acc[p]
15776
+ ) : void 0,
15777
+ object
15056
15778
  );
15057
- }
15058
-
15059
- // src/framework.ts
15060
- function BaseExperiment(options = {}) {
15061
- return { _type: "BaseExperiment", ...options };
15062
- }
15063
- var EvalResultWithSummary = class {
15064
- constructor(summary, results) {
15065
- this.summary = summary;
15066
- this.results = results;
15067
- }
15068
- /**
15069
- * @deprecated Use `summary` instead.
15070
- */
15071
- toString() {
15072
- return JSON.stringify(this.summary);
15073
- }
15074
- [Symbol.for("nodejs.util.inspect.custom")]() {
15075
- return `EvalResultWithSummary(summary="...", results=[...])`;
15076
- }
15077
- toJSON() {
15078
- return {
15079
- summary: this.summary,
15080
- results: this.results
15081
- };
15082
- }
15083
- };
15084
- function makeEvalName(projectName, experimentName) {
15085
- let out = projectName;
15086
- if (experimentName) {
15087
- out += ` [experimentName=${experimentName}]`;
15088
- }
15089
- return out;
15090
- }
15091
- function initExperiment2(state, options = {}) {
15092
- return init({
15093
- state,
15094
- ...options,
15095
- setCurrent: false
15096
- });
15097
- }
15098
- function callEvaluatorData(data) {
15099
- const dataResult = typeof data === "function" ? data() : data;
15100
- let baseExperiment = void 0;
15101
- if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
15102
- baseExperiment = dataResult.name;
15779
+ if (key === void 0) {
15780
+ return false;
15103
15781
  }
15104
- return {
15105
- data: dataResult,
15106
- baseExperiment
15107
- };
15782
+ return pattern.test(serializeJSONWithPlainString(key));
15108
15783
  }
15109
- function isAsyncIterable2(value) {
15110
- return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
15784
+ function scorerName(scorer, scorer_idx) {
15785
+ return scorer.name || `scorer_${scorer_idx}`;
15111
15786
  }
15112
- function isIterable(value) {
15113
- return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
15787
+ async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
15788
+ return await runEvaluatorInternal(
15789
+ experiment,
15790
+ evaluator,
15791
+ progressReporter,
15792
+ filters,
15793
+ stream,
15794
+ parameters,
15795
+ collectResults,
15796
+ enableCache
15797
+ );
15114
15798
  }
15115
- globalThis._evals = {
15116
- functions: [],
15117
- prompts: [],
15118
- evaluators: {},
15119
- reporters: {}
15799
+ var defaultErrorScoreHandler = ({
15800
+ rootSpan,
15801
+ data: _,
15802
+ unhandledScores
15803
+ }) => {
15804
+ const scores = Object.fromEntries(unhandledScores.map((s) => [s, 0]));
15805
+ rootSpan.log({ scores });
15806
+ return scores;
15120
15807
  };
15121
- function _initializeSpanContext() {
15122
- globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
15123
- }
15124
- async function Eval(name, evaluator, reporterOrOpts) {
15125
- const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
15126
- let evalName = makeEvalName(name, evaluator.experimentName);
15127
- if (globalThis._evals.evaluators[evalName]) {
15128
- evalName = `${evalName}_${Object.keys(_evals).length}`;
15129
- }
15130
- if (globalThis._lazy_load) {
15131
- globalThis._evals.evaluators[evalName] = {
15132
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15133
- evaluator: {
15134
- evalName,
15135
- projectName: name,
15136
- ...evaluator
15137
- },
15138
- reporter: options.reporter
15139
- };
15140
- _initializeSpanContext();
15141
- return new EvalResultWithSummary(
15142
- {
15143
- scores: {},
15144
- metrics: {},
15145
- projectName: "",
15146
- experimentName: ""
15147
- },
15148
- []
15149
- );
15150
- }
15151
- const progressReporter = options.progress ?? new SimpleProgressReporter();
15152
- const shouldCollectResults = options.returnResults ?? true;
15153
- if (typeof options.reporter === "string") {
15154
- throw new Error(
15155
- "Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
15156
- );
15157
- }
15158
- const resolvedReporter = options.reporter || defaultReporter;
15159
- try {
15160
- const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
15161
- evaluator.data
15162
- );
15163
- const experiment = options.parent || options.noSendLogs ? null : initExperiment2(evaluator.state, {
15164
- ...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
15165
- experiment: evaluator.experimentName,
15166
- description: evaluator.description,
15167
- metadata: evaluator.metadata,
15168
- isPublic: evaluator.isPublic,
15169
- update: evaluator.update,
15170
- baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
15171
- baseExperimentId: evaluator.baseExperimentId,
15172
- gitMetadataSettings: evaluator.gitMetadataSettings,
15173
- repoInfo: evaluator.repoInfo,
15174
- dataset: Dataset2.isDataset(data) ? data : void 0
15175
- });
15176
- if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
15177
- await experiment._waitForId();
15178
- }
15179
- if (experiment && options.onStart) {
15180
- const summary = await experiment.summarize({ summarizeScores: false });
15181
- options.onStart(summary);
15182
- }
15183
- try {
15184
- const evalDef = {
15185
- evalName,
15186
- projectName: name,
15187
- ...evaluator,
15188
- data
15189
- };
15190
- const enableCache = options.enableCache ?? true;
15191
- let ret;
15192
- if (options.parent) {
15193
- ret = await withParent(
15194
- options.parent,
15195
- () => runEvaluator(
15196
- null,
15197
- evalDef,
15198
- progressReporter,
15199
- [],
15200
- options.stream,
15201
- options.parameters,
15202
- shouldCollectResults,
15203
- enableCache
15204
- ),
15205
- evaluator.state
15206
- );
15207
- } else {
15208
- ret = await runEvaluator(
15209
- experiment,
15210
- evalDef,
15211
- progressReporter,
15212
- [],
15213
- options.stream,
15214
- options.parameters,
15215
- shouldCollectResults,
15216
- enableCache
15217
- );
15218
- }
15219
- progressReporter.stop();
15220
- resolvedReporter.reportEval(evalDef, ret, {
15221
- verbose: true,
15222
- jsonl: false
15223
- });
15224
- return ret;
15225
- } finally {
15226
- if (experiment) {
15227
- await experiment.flush().catch(console.error);
15228
- } else if (options.parent) {
15229
- await flush().catch(console.error);
15230
- }
15231
- }
15232
- } finally {
15233
- progressReporter.stop();
15234
- }
15235
- }
15236
- function Reporter(name, reporter) {
15237
- const ret = { name, ...reporter };
15238
- if (_evals.reporters[name]) {
15239
- throw new Error(`Reporter ${name} already exists`);
15240
- }
15241
- if (globalThis._lazy_load) {
15242
- _evals.reporters[name] = ret;
15243
- }
15244
- return ret;
15245
- }
15246
- function serializeJSONWithPlainString(v) {
15247
- if (typeof v === "string") {
15248
- return v;
15249
- } else {
15250
- return JSON.stringify(v);
15251
- }
15252
- }
15253
- function evaluateFilter(object, filter2) {
15254
- const { path: path2, pattern } = filter2;
15255
- const key = path2.reduce(
15256
- (acc, p) => typeof acc === "object" && acc !== null ? (
15257
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
15258
- acc[p]
15259
- ) : void 0,
15260
- object
15261
- );
15262
- if (key === void 0) {
15263
- return false;
15264
- }
15265
- return pattern.test(serializeJSONWithPlainString(key));
15266
- }
15267
- function scorerName(scorer, scorer_idx) {
15268
- return scorer.name || `scorer_${scorer_idx}`;
15269
- }
15270
- async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
15271
- return await runEvaluatorInternal(
15272
- experiment,
15273
- evaluator,
15274
- progressReporter,
15275
- filters,
15276
- stream,
15277
- parameters,
15278
- collectResults,
15279
- enableCache
15280
- );
15281
- }
15282
- var defaultErrorScoreHandler = ({
15283
- rootSpan,
15284
- data: _,
15285
- unhandledScores
15286
- }) => {
15287
- const scores = Object.fromEntries(unhandledScores.map((s) => [s, 0]));
15288
- rootSpan.log({ scores });
15289
- return scores;
15290
- };
15291
- async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
15292
- if (enableCache) {
15293
- (evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
15808
+ async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
15809
+ if (enableCache) {
15810
+ (evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
15294
15811
  }
15295
15812
  try {
15296
15813
  if (typeof evaluator.data === "string") {
15297
15814
  throw new Error("Unimplemented: string data paths");
15298
15815
  }
15299
15816
  let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
15300
- parameters = validateParameters(
15817
+ parameters = await validateParameters(
15301
15818
  parameters ?? {},
15302
- evaluator.parameters ?? {}
15819
+ evaluator.parameters
15303
15820
  );
15304
15821
  if ("_type" in dataResult) {
15305
15822
  if (dataResult._type !== "BaseExperiment") {
@@ -15326,7 +15843,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
15326
15843
  }
15327
15844
  const resolvedDataResult = dataResult instanceof Promise ? await dataResult : dataResult;
15328
15845
  const dataIterable = (() => {
15329
- if (isAsyncIterable2(resolvedDataResult)) {
15846
+ if (isAsyncIterable3(resolvedDataResult)) {
15330
15847
  return resolvedDataResult;
15331
15848
  }
15332
15849
  if (Array.isArray(resolvedDataResult) || isIterable(resolvedDataResult)) {
@@ -15458,6 +15975,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
15458
15975
  } else {
15459
15976
  rootSpan.log({ output, metadata, expected });
15460
15977
  }
15978
+ if (evaluator.flushBeforeScoring) {
15979
+ await rootSpan.flush();
15980
+ }
15461
15981
  const scoringArgs = {
15462
15982
  input: datum.input,
15463
15983
  expected: "expected" in datum ? datum.expected : void 0,
@@ -15704,206 +16224,646 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
15704
16224
  spanCache?.stop();
15705
16225
  }
15706
16226
  }
15707
- }
15708
- var warning = (text) => `Warning: ${text}`;
15709
- function logError2(e, verbose) {
15710
- if (!verbose) {
15711
- console.error(`${e}`);
15712
- } else {
15713
- console.error(e);
16227
+ }
16228
+ var warning = (text) => `Warning: ${text}`;
16229
+ function logError2(e, verbose) {
16230
+ if (!verbose) {
16231
+ console.error(`${e}`);
16232
+ } else {
16233
+ console.error(e);
16234
+ }
16235
+ }
16236
+ function accumulateScores(accumulator, scores) {
16237
+ for (const [name, score] of Object.entries(scores)) {
16238
+ if (score === null || score === void 0) {
16239
+ continue;
16240
+ }
16241
+ const existing = accumulator[name] ?? { total: 0, count: 0 };
16242
+ accumulator[name] = {
16243
+ total: existing.total + score,
16244
+ count: existing.count + 1
16245
+ };
16246
+ }
16247
+ }
16248
+ function ensureScoreAccumulator(results) {
16249
+ const accumulator = {};
16250
+ for (const result of results) {
16251
+ accumulateScores(accumulator, result.scores);
16252
+ }
16253
+ return accumulator;
16254
+ }
16255
+ function buildLocalSummary(evaluator, results, precomputedScores) {
16256
+ const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
16257
+ return {
16258
+ projectName: evaluator.projectName,
16259
+ experimentName: evaluator.evalName,
16260
+ scores: Object.fromEntries(
16261
+ Object.entries(scoresByName).map(([name, { total, count }]) => [
16262
+ name,
16263
+ {
16264
+ name,
16265
+ score: count === 0 ? 0 : total / count,
16266
+ improvements: 0,
16267
+ regressions: 0
16268
+ }
16269
+ ])
16270
+ )
16271
+ };
16272
+ }
16273
+ function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
16274
+ if (failingResults.length > 0) {
16275
+ console.error(
16276
+ warning(
16277
+ `Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
16278
+ )
16279
+ );
16280
+ if (jsonl) {
16281
+ console.log(
16282
+ JSON.stringify({
16283
+ evaluatorName: evaluator.evalName,
16284
+ errors: failingResults.map(
16285
+ (r) => `${r.error instanceof Error ? r.error.stack : r.error}`
16286
+ )
16287
+ })
16288
+ );
16289
+ } else {
16290
+ for (const result of failingResults) {
16291
+ logError2(result.error, verbose);
16292
+ }
16293
+ }
16294
+ if (!verbose && !jsonl) {
16295
+ console.error(warning("Add --verbose to see full stack traces."));
16296
+ }
16297
+ }
16298
+ }
16299
+ var defaultReporter = {
16300
+ name: "Braintrust default reporter",
16301
+ async reportEval(evaluator, result, { verbose, jsonl }) {
16302
+ const { results, summary } = result;
16303
+ const failingResults = results.filter(
16304
+ (r) => r.error !== void 0
16305
+ );
16306
+ if (failingResults.length > 0) {
16307
+ reportFailures(evaluator, failingResults, { verbose, jsonl });
16308
+ }
16309
+ if (jsonl) {
16310
+ isomorph_default.writeln(JSON.stringify(summary));
16311
+ } else {
16312
+ isomorph_default.writeln("Experiment summary");
16313
+ isomorph_default.writeln("==================");
16314
+ if (summary.comparisonExperimentName) {
16315
+ isomorph_default.writeln(
16316
+ `${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
16317
+ );
16318
+ isomorph_default.writeln("");
16319
+ }
16320
+ const hasScores = Object.keys(summary.scores).length > 0;
16321
+ const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
16322
+ const hasComparison = !!summary.comparisonExperimentName;
16323
+ if (hasScores || hasMetrics) {
16324
+ if (hasComparison) {
16325
+ isomorph_default.writeln(
16326
+ "Name Value Change Improvements Regressions"
16327
+ );
16328
+ isomorph_default.writeln(
16329
+ "----------------------------------------------------------------"
16330
+ );
16331
+ }
16332
+ for (const score of Object.values(summary.scores)) {
16333
+ const scorePercent = (score.score * 100).toFixed(2);
16334
+ const scoreValue = `${scorePercent}%`;
16335
+ if (hasComparison) {
16336
+ let diffString = "-";
16337
+ if (!isEmpty2(score.diff)) {
16338
+ const diffPercent = (score.diff * 100).toFixed(2);
16339
+ const diffSign = score.diff > 0 ? "+" : "";
16340
+ diffString = `${diffSign}${diffPercent}%`;
16341
+ }
16342
+ const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
16343
+ const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
16344
+ isomorph_default.writeln(
16345
+ `${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
16346
+ );
16347
+ } else {
16348
+ isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
16349
+ }
16350
+ }
16351
+ for (const metric of Object.values(summary.metrics ?? {})) {
16352
+ const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
16353
+ const formattedValue = metric.metric.toFixed(fractionDigits);
16354
+ const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
16355
+ if (hasComparison) {
16356
+ let diffString = "-";
16357
+ if (!isEmpty2(metric.diff)) {
16358
+ const diffPercent = (metric.diff * 100).toFixed(2);
16359
+ const diffSign = metric.diff > 0 ? "+" : "";
16360
+ diffString = `${diffSign}${diffPercent}%`;
16361
+ }
16362
+ const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
16363
+ const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
16364
+ isomorph_default.writeln(
16365
+ `${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
16366
+ );
16367
+ } else {
16368
+ isomorph_default.writeln(
16369
+ `${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
16370
+ );
16371
+ }
16372
+ }
16373
+ }
16374
+ if (summary.experimentUrl) {
16375
+ isomorph_default.writeln("");
16376
+ isomorph_default.writeln(`View results for ${summary.experimentName}`);
16377
+ isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
16378
+ }
16379
+ }
16380
+ isomorph_default.writeln("");
16381
+ return failingResults.length === 0;
16382
+ },
16383
+ async reportRun(evalReports) {
16384
+ return evalReports.every((r) => r);
16385
+ }
16386
+ };
16387
+
16388
+ // src/framework2.ts
16389
+ import { z as z11 } from "zod/v3";
16390
+ var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
16391
+ var ProjectBuilder = class {
16392
+ create(opts) {
16393
+ return new Project2(opts);
16394
+ }
16395
+ };
16396
+ var projects = new ProjectBuilder();
16397
+ var Project2 = class {
16398
+ name;
16399
+ id;
16400
+ tools;
16401
+ prompts;
16402
+ parameters;
16403
+ scorers;
16404
+ _publishableCodeFunctions = [];
16405
+ _publishablePrompts = [];
16406
+ _publishableParameters = [];
16407
+ constructor(args) {
16408
+ _initializeSpanContext();
16409
+ this.name = "name" in args ? args.name : void 0;
16410
+ this.id = "id" in args ? args.id : void 0;
16411
+ this.tools = new ToolBuilder(this);
16412
+ this.prompts = new PromptBuilder(this);
16413
+ this.parameters = new ParametersBuilder(this);
16414
+ this.scorers = new ScorerBuilder(this);
16415
+ }
16416
+ addPrompt(prompt) {
16417
+ this._publishablePrompts.push(prompt);
16418
+ if (globalThis._lazy_load) {
16419
+ globalThis._evals.prompts.push(prompt);
16420
+ }
16421
+ }
16422
+ addParameters(parameters) {
16423
+ this._publishableParameters.push(parameters);
16424
+ if (globalThis._lazy_load) {
16425
+ if (globalThis._evals.parameters == null)
16426
+ globalThis._evals.parameters = [];
16427
+ globalThis._evals.parameters.push(parameters);
16428
+ }
16429
+ }
16430
+ addCodeFunction(fn) {
16431
+ this._publishableCodeFunctions.push(fn);
16432
+ if (globalThis._lazy_load) {
16433
+ globalThis._evals.functions.push(fn);
16434
+ }
16435
+ }
16436
+ async publish() {
16437
+ if (globalThis._lazy_load) {
16438
+ console.warn("publish() is a no-op when running `braintrust push`.");
16439
+ return;
16440
+ }
16441
+ await login();
16442
+ const projectMap = new ProjectNameIdMap();
16443
+ const functionDefinitions = [];
16444
+ if (this._publishableCodeFunctions.length > 0) {
16445
+ console.warn(
16446
+ "Code functions cannot be published directly. Use `braintrust push` instead."
16447
+ );
16448
+ }
16449
+ if (this._publishablePrompts.length > 0) {
16450
+ for (const prompt of this._publishablePrompts) {
16451
+ const functionDefinition = await prompt.toFunctionDefinition(projectMap);
16452
+ functionDefinitions.push(functionDefinition);
16453
+ }
16454
+ }
16455
+ await _internalGetGlobalState().apiConn().post_json("insert-functions", {
16456
+ functions: functionDefinitions
16457
+ });
16458
+ }
16459
+ };
16460
+ var ToolBuilder = class {
16461
+ constructor(project) {
16462
+ this.project = project;
16463
+ }
16464
+ taskCounter = 0;
16465
+ // This type definition is just a catch all so that the implementation can be
16466
+ // less specific than the two more specific declarations above.
16467
+ create(opts) {
16468
+ this.taskCounter++;
16469
+ opts = opts ?? {};
16470
+ const { handler, name, slug, parameters, returns, ...rest } = opts;
16471
+ let resolvedName = name ?? handler.name;
16472
+ if (resolvedName.trim().length === 0) {
16473
+ resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
16474
+ }
16475
+ const tool = new CodeFunction(this.project, {
16476
+ handler,
16477
+ name: resolvedName,
16478
+ slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
16479
+ type: "tool",
16480
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
16481
+ parameters,
16482
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
16483
+ returns,
16484
+ ...rest
16485
+ });
16486
+ this.project.addCodeFunction(tool);
16487
+ return tool;
16488
+ }
16489
+ };
16490
+ var ScorerBuilder = class {
16491
+ constructor(project) {
16492
+ this.project = project;
16493
+ }
16494
+ taskCounter = 0;
16495
+ create(opts) {
16496
+ this.taskCounter++;
16497
+ let resolvedName = opts.name;
16498
+ if (!resolvedName && "handler" in opts) {
16499
+ resolvedName = opts.handler.name;
16500
+ }
16501
+ if (!resolvedName || resolvedName.trim().length === 0) {
16502
+ resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
16503
+ }
16504
+ const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
16505
+ if ("handler" in opts) {
16506
+ const scorer = new CodeFunction(this.project, {
16507
+ ...opts,
16508
+ name: resolvedName,
16509
+ slug,
16510
+ type: "scorer"
16511
+ });
16512
+ this.project.addCodeFunction(scorer);
16513
+ } else {
16514
+ const promptBlock = "messages" in opts ? {
16515
+ type: "chat",
16516
+ messages: opts.messages
16517
+ } : {
16518
+ type: "completion",
16519
+ content: opts.prompt
16520
+ };
16521
+ const promptData = {
16522
+ prompt: promptBlock,
16523
+ options: {
16524
+ model: opts.model,
16525
+ params: opts.params
16526
+ },
16527
+ parser: {
16528
+ type: "llm_classifier",
16529
+ use_cot: opts.useCot,
16530
+ choice_scores: opts.choiceScores
16531
+ }
16532
+ };
16533
+ const codePrompt = new CodePrompt(
16534
+ this.project,
16535
+ promptData,
16536
+ [],
16537
+ {
16538
+ ...opts,
16539
+ name: resolvedName,
16540
+ slug
16541
+ },
16542
+ "scorer"
16543
+ );
16544
+ this.project.addPrompt(codePrompt);
16545
+ }
16546
+ }
16547
+ };
16548
+ var CodeFunction = class {
16549
+ constructor(project, opts) {
16550
+ this.project = project;
16551
+ this.handler = opts.handler;
16552
+ this.name = opts.name;
16553
+ this.slug = opts.slug;
16554
+ this.description = opts.description;
16555
+ this.type = opts.type;
16556
+ this.ifExists = opts.ifExists;
16557
+ this.metadata = opts.metadata;
16558
+ this.parameters = opts.parameters;
16559
+ this.returns = opts.returns;
16560
+ if (this.returns && !this.parameters) {
16561
+ throw new Error("parameters are required if return type is defined");
16562
+ }
16563
+ }
16564
+ handler;
16565
+ name;
16566
+ slug;
16567
+ type;
16568
+ description;
16569
+ parameters;
16570
+ returns;
16571
+ ifExists;
16572
+ metadata;
16573
+ key() {
16574
+ return JSON.stringify([
16575
+ this.project.id ?? "",
16576
+ this.project.name ?? "",
16577
+ this.slug
16578
+ ]);
16579
+ }
16580
+ };
16581
+ var CodePrompt = class {
16582
+ project;
16583
+ name;
16584
+ slug;
16585
+ prompt;
16586
+ ifExists;
16587
+ description;
16588
+ id;
16589
+ functionType;
16590
+ toolFunctions;
16591
+ metadata;
16592
+ constructor(project, prompt, toolFunctions, opts, functionType) {
16593
+ this.project = project;
16594
+ this.name = opts.name;
16595
+ this.slug = opts.slug;
16596
+ this.prompt = prompt;
16597
+ this.toolFunctions = toolFunctions;
16598
+ this.ifExists = opts.ifExists;
16599
+ this.description = opts.description;
16600
+ this.id = opts.id;
16601
+ this.functionType = functionType;
16602
+ this.metadata = opts.metadata;
16603
+ }
16604
+ async toFunctionDefinition(projectNameToId) {
16605
+ const prompt_data = {
16606
+ ...this.prompt
16607
+ };
16608
+ if (this.toolFunctions.length > 0) {
16609
+ const resolvableToolFunctions = await Promise.all(
16610
+ this.toolFunctions.map(async (fn) => {
16611
+ if ("slug" in fn) {
16612
+ return {
16613
+ type: "slug",
16614
+ project_id: await projectNameToId.resolve(fn.project),
16615
+ slug: fn.slug
16616
+ };
16617
+ } else {
16618
+ return fn;
16619
+ }
16620
+ })
16621
+ );
16622
+ prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
16623
+ resolvableToolFunctions;
16624
+ }
16625
+ return {
16626
+ project_id: await projectNameToId.resolve(this.project),
16627
+ name: this.name,
16628
+ slug: this.slug,
16629
+ description: this.description ?? "",
16630
+ function_data: {
16631
+ type: "prompt"
16632
+ },
16633
+ function_type: this.functionType,
16634
+ prompt_data,
16635
+ if_exists: this.ifExists,
16636
+ metadata: this.metadata
16637
+ };
16638
+ }
16639
+ };
16640
+ var PromptBuilder = class {
16641
+ constructor(project) {
16642
+ this.project = project;
16643
+ }
16644
+ create(opts) {
16645
+ const toolFunctions = [];
16646
+ const rawTools = [];
16647
+ for (const tool of opts.tools ?? []) {
16648
+ if (tool instanceof CodeFunction) {
16649
+ toolFunctions.push(tool);
16650
+ } else if ("type" in tool && !("function" in tool)) {
16651
+ toolFunctions.push(tool);
16652
+ } else {
16653
+ rawTools.push(tool);
16654
+ }
16655
+ }
16656
+ const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
16657
+ const promptData = promptDefinitionToPromptData(opts, rawTools);
16658
+ const promptRow = {
16659
+ id: opts.id,
16660
+ _xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
16661
+ name: opts.name,
16662
+ slug,
16663
+ prompt_data: promptData,
16664
+ ...this.project.id !== void 0 ? { project_id: this.project.id } : {}
16665
+ };
16666
+ const prompt = new Prompt2(
16667
+ promptRow,
16668
+ {},
16669
+ // It doesn't make sense to specify defaults here.
16670
+ opts.noTrace ?? false
16671
+ );
16672
+ const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
16673
+ ...opts,
16674
+ slug
16675
+ });
16676
+ this.project.addPrompt(codePrompt);
16677
+ return prompt;
16678
+ }
16679
+ };
16680
+ var CodeParameters = class {
16681
+ project;
16682
+ name;
16683
+ slug;
16684
+ description;
16685
+ schema;
16686
+ ifExists;
16687
+ metadata;
16688
+ constructor(project, opts) {
16689
+ this.project = project;
16690
+ this.name = opts.name;
16691
+ this.slug = opts.slug;
16692
+ this.description = opts.description;
16693
+ this.schema = opts.schema;
16694
+ this.ifExists = opts.ifExists;
16695
+ this.metadata = opts.metadata;
15714
16696
  }
15715
- }
15716
- function accumulateScores(accumulator, scores) {
15717
- for (const [name, score] of Object.entries(scores)) {
15718
- if (score === null || score === void 0) {
15719
- continue;
15720
- }
15721
- const existing = accumulator[name] ?? { total: 0, count: 0 };
15722
- accumulator[name] = {
15723
- total: existing.total + score,
15724
- count: existing.count + 1
16697
+ async toFunctionDefinition(projectNameToId) {
16698
+ return {
16699
+ project_id: await projectNameToId.resolve(this.project),
16700
+ name: this.name,
16701
+ slug: this.slug,
16702
+ description: this.description ?? "",
16703
+ function_type: "parameters",
16704
+ function_data: {
16705
+ type: "parameters",
16706
+ data: {},
16707
+ __schema: serializeEvalParameterstoParametersSchema(this.schema)
16708
+ },
16709
+ if_exists: this.ifExists,
16710
+ metadata: this.metadata
15725
16711
  };
15726
16712
  }
15727
- }
15728
- function ensureScoreAccumulator(results) {
15729
- const accumulator = {};
15730
- for (const result of results) {
15731
- accumulateScores(accumulator, result.scores);
16713
+ };
16714
+ var ParametersBuilder = class {
16715
+ constructor(project) {
16716
+ this.project = project;
15732
16717
  }
15733
- return accumulator;
15734
- }
15735
- function buildLocalSummary(evaluator, results, precomputedScores) {
15736
- const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
15737
- return {
15738
- projectName: evaluator.projectName,
15739
- experimentName: evaluator.evalName,
15740
- scores: Object.fromEntries(
15741
- Object.entries(scoresByName).map(([name, { total, count }]) => [
15742
- name,
15743
- {
15744
- name,
15745
- score: count === 0 ? 0 : total / count,
15746
- improvements: 0,
15747
- regressions: 0
15748
- }
15749
- ])
15750
- )
15751
- };
15752
- }
15753
- function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
15754
- if (failingResults.length > 0) {
15755
- console.error(
15756
- warning(
15757
- `Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
15758
- )
15759
- );
15760
- if (jsonl) {
15761
- console.log(
15762
- JSON.stringify({
15763
- evaluatorName: evaluator.evalName,
15764
- errors: failingResults.map(
15765
- (r) => `${r.error instanceof Error ? r.error.stack : r.error}`
15766
- )
15767
- })
15768
- );
16718
+ create(opts) {
16719
+ const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
16720
+ const codeParameters = new CodeParameters(this.project, {
16721
+ name: opts.name,
16722
+ slug,
16723
+ description: opts.description,
16724
+ schema: opts.schema,
16725
+ ifExists: opts.ifExists,
16726
+ metadata: opts.metadata
16727
+ });
16728
+ this.project.addParameters(codeParameters);
16729
+ return opts.schema;
16730
+ }
16731
+ };
16732
+ function serializeEvalParameterstoParametersSchema(parameters) {
16733
+ const properties = {};
16734
+ const required = [];
16735
+ for (const [name, value] of Object.entries(parameters)) {
16736
+ if ("type" in value && value.type === "prompt") {
16737
+ const defaultPromptData = value.default ? promptDefinitionToPromptData(value.default) : void 0;
16738
+ properties[name] = {
16739
+ type: "object",
16740
+ "x-bt-type": "prompt",
16741
+ ...value.description ? { description: value.description } : {},
16742
+ ...defaultPromptData ? { default: defaultPromptData } : {}
16743
+ };
16744
+ if (!defaultPromptData) {
16745
+ required.push(name);
16746
+ }
15769
16747
  } else {
15770
- for (const result of failingResults) {
15771
- logError2(result.error, verbose);
16748
+ const schemaObj = zodToJsonSchema(value);
16749
+ properties[name] = schemaObj;
16750
+ if (!("default" in schemaObj)) {
16751
+ required.push(name);
15772
16752
  }
15773
16753
  }
15774
- if (!verbose && !jsonl) {
15775
- console.error(warning("Add --verbose to see full stack traces."));
15776
- }
15777
16754
  }
16755
+ return {
16756
+ type: "object",
16757
+ properties,
16758
+ ...required.length > 0 ? { required } : {},
16759
+ additionalProperties: true
16760
+ };
15778
16761
  }
15779
- var defaultReporter = {
15780
- name: "Braintrust default reporter",
15781
- async reportEval(evaluator, result, { verbose, jsonl }) {
15782
- const { results, summary } = result;
15783
- const failingResults = results.filter(
15784
- (r) => r.error !== void 0
15785
- );
15786
- if (failingResults.length > 0) {
15787
- reportFailures(evaluator, failingResults, { verbose, jsonl });
16762
+ var ProjectNameIdMap = class {
16763
+ nameToId = {};
16764
+ idToName = {};
16765
+ async getId(projectName) {
16766
+ if (!(projectName in this.nameToId)) {
16767
+ const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
16768
+ project_name: projectName
16769
+ });
16770
+ const result = z11.object({
16771
+ project: Project
16772
+ }).parse(response);
16773
+ const projectId = result.project.id;
16774
+ this.nameToId[projectName] = projectId;
16775
+ this.idToName[projectId] = projectName;
15788
16776
  }
15789
- if (jsonl) {
15790
- isomorph_default.writeln(JSON.stringify(summary));
15791
- } else {
15792
- isomorph_default.writeln("Experiment summary");
15793
- isomorph_default.writeln("==================");
15794
- if (summary.comparisonExperimentName) {
15795
- isomorph_default.writeln(
15796
- `${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
15797
- );
15798
- isomorph_default.writeln("");
15799
- }
15800
- const hasScores = Object.keys(summary.scores).length > 0;
15801
- const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
15802
- const hasComparison = !!summary.comparisonExperimentName;
15803
- if (hasScores || hasMetrics) {
15804
- if (hasComparison) {
15805
- isomorph_default.writeln(
15806
- "Name Value Change Improvements Regressions"
15807
- );
15808
- isomorph_default.writeln(
15809
- "----------------------------------------------------------------"
15810
- );
15811
- }
15812
- for (const score of Object.values(summary.scores)) {
15813
- const scorePercent = (score.score * 100).toFixed(2);
15814
- const scoreValue = `${scorePercent}%`;
15815
- if (hasComparison) {
15816
- let diffString = "-";
15817
- if (!isEmpty2(score.diff)) {
15818
- const diffPercent = (score.diff * 100).toFixed(2);
15819
- const diffSign = score.diff > 0 ? "+" : "";
15820
- diffString = `${diffSign}${diffPercent}%`;
15821
- }
15822
- const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
15823
- const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
15824
- isomorph_default.writeln(
15825
- `${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
15826
- );
15827
- } else {
15828
- isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
15829
- }
15830
- }
15831
- for (const metric of Object.values(summary.metrics ?? {})) {
15832
- const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
15833
- const formattedValue = metric.metric.toFixed(fractionDigits);
15834
- const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
15835
- if (hasComparison) {
15836
- let diffString = "-";
15837
- if (!isEmpty2(metric.diff)) {
15838
- const diffPercent = (metric.diff * 100).toFixed(2);
15839
- const diffSign = metric.diff > 0 ? "+" : "";
15840
- diffString = `${diffSign}${diffPercent}%`;
15841
- }
15842
- const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
15843
- const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
15844
- isomorph_default.writeln(
15845
- `${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
15846
- );
15847
- } else {
15848
- isomorph_default.writeln(
15849
- `${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
15850
- );
15851
- }
15852
- }
15853
- }
15854
- if (summary.experimentUrl) {
15855
- isomorph_default.writeln("");
15856
- isomorph_default.writeln(`View results for ${summary.experimentName}`);
15857
- isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
15858
- }
16777
+ return this.nameToId[projectName];
16778
+ }
16779
+ async getName(projectId) {
16780
+ if (!(projectId in this.idToName)) {
16781
+ const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
16782
+ id: projectId
16783
+ });
16784
+ const result = z11.array(Project).nonempty().parse(response);
16785
+ const projectName = result[0].name;
16786
+ this.idToName[projectId] = projectName;
16787
+ this.nameToId[projectName] = projectId;
15859
16788
  }
15860
- isomorph_default.writeln("");
15861
- return failingResults.length === 0;
15862
- },
15863
- async reportRun(evalReports) {
15864
- return evalReports.every((r) => r);
16789
+ return this.idToName[projectId];
16790
+ }
16791
+ async resolve(project) {
16792
+ if (project.id) {
16793
+ return project.id;
16794
+ }
16795
+ return this.getId(project.name);
15865
16796
  }
15866
16797
  };
15867
16798
 
15868
16799
  // dev/types.ts
15869
- import { z as z11 } from "zod/v3";
15870
- var evalBodySchema = z11.object({
15871
- name: z11.string(),
15872
- parameters: z11.record(z11.string(), z11.unknown()).nullish(),
16800
+ import { z as z12 } from "zod/v3";
16801
+ var evalBodySchema = z12.object({
16802
+ name: z12.string(),
16803
+ parameters: z12.record(z12.string(), z12.unknown()).nullish(),
15873
16804
  data: RunEval.shape.data,
15874
- scores: z11.array(
15875
- z11.object({
16805
+ scores: z12.array(
16806
+ z12.object({
15876
16807
  function_id: FunctionId,
15877
- name: z11.string()
16808
+ name: z12.string()
15878
16809
  })
15879
16810
  ).nullish(),
15880
- experiment_name: z11.string().nullish(),
15881
- project_id: z11.string().nullish(),
16811
+ experiment_name: z12.string().nullish(),
16812
+ project_id: z12.string().nullish(),
15882
16813
  parent: InvokeParent.optional(),
15883
- stream: z11.boolean().optional()
16814
+ stream: z12.boolean().optional()
15884
16815
  });
15885
- var evalParametersSerializedSchema = z11.record(
15886
- z11.string(),
15887
- z11.union([
15888
- z11.object({
15889
- type: z11.literal("prompt"),
16816
+ var staticParametersSchema = z12.record(
16817
+ z12.string(),
16818
+ z12.union([
16819
+ z12.object({
16820
+ type: z12.literal("prompt"),
15890
16821
  default: PromptData.optional(),
15891
- description: z11.string().optional()
16822
+ description: z12.string().optional()
15892
16823
  }),
15893
- z11.object({
15894
- type: z11.literal("data"),
15895
- schema: z11.record(z11.unknown()),
15896
- // JSON Schema
15897
- default: z11.unknown().optional(),
15898
- description: z11.string().optional()
16824
+ z12.object({
16825
+ type: z12.literal("data"),
16826
+ schema: z12.record(z12.unknown()),
16827
+ default: z12.unknown().optional(),
16828
+ description: z12.string().optional()
15899
16829
  })
15900
16830
  ])
15901
16831
  );
15902
- var evaluatorDefinitionSchema = z11.object({
15903
- parameters: evalParametersSerializedSchema.optional()
16832
+ var parametersSchema = z12.object({
16833
+ type: z12.literal("object"),
16834
+ properties: z12.record(z12.string(), z12.record(z12.unknown())),
16835
+ required: z12.array(z12.string()).optional(),
16836
+ additionalProperties: z12.boolean().optional()
16837
+ });
16838
+ var parametersSourceSchema = z12.object({
16839
+ parametersId: z12.string().optional(),
16840
+ slug: z12.string(),
16841
+ name: z12.string(),
16842
+ projectId: z12.string().optional(),
16843
+ version: z12.string().optional()
16844
+ });
16845
+ var parametersContainerSchema = z12.object({
16846
+ type: z12.literal("braintrust.parameters"),
16847
+ schema: parametersSchema,
16848
+ source: parametersSourceSchema
16849
+ });
16850
+ var staticParametersContainerSchema = z12.object({
16851
+ type: z12.literal("braintrust.staticParameters"),
16852
+ schema: staticParametersSchema,
16853
+ source: z12.null()
16854
+ });
16855
+ var serializedParametersContainerSchema = z12.union([
16856
+ parametersContainerSchema,
16857
+ staticParametersContainerSchema,
16858
+ // keeping this type here since old versions of the SDK will still pass the unwrapped schema and we need to handle this in the app
16859
+ staticParametersSchema
16860
+ ]);
16861
+ var evaluatorDefinitionSchema = z12.object({
16862
+ parameters: serializedParametersContainerSchema.optional(),
16863
+ scores: z12.array(z12.object({ name: z12.string() })).optional()
15904
16864
  });
15905
- var evaluatorDefinitionsSchema = z11.record(
15906
- z11.string(),
16865
+ var evaluatorDefinitionsSchema = z12.record(
16866
+ z12.string(),
15907
16867
  evaluatorDefinitionSchema
15908
16868
  );
15909
16869
 
@@ -15922,6 +16882,7 @@ export {
15922
16882
  CodePrompt,
15923
16883
  ContextManager,
15924
16884
  DEFAULT_FETCH_BATCH_SIZE,
16885
+ DEFAULT_MAX_REQUEST_SIZE,
15925
16886
  Dataset2 as Dataset,
15926
16887
  ERR_PERMALINK,
15927
16888
  Eval,
@@ -15932,6 +16893,7 @@ export {
15932
16893
  IDGenerator,
15933
16894
  JSONAttachment,
15934
16895
  LEGACY_CACHED_HEADER,
16896
+ LOGS3_OVERFLOW_REFERENCE_TYPE,
15935
16897
  LazyValue,
15936
16898
  Logger,
15937
16899
  LoginInvalidOrgError,
@@ -15956,8 +16918,10 @@ export {
15956
16918
  _exportsForTestingOnly,
15957
16919
  _internalGetGlobalState,
15958
16920
  _internalSetInitialState,
16921
+ addAzureBlobHeaders,
15959
16922
  braintrustStreamChunkSchema,
15960
16923
  buildLocalSummary,
16924
+ constructLogs3OverflowRequest,
15961
16925
  createFinalValuePassThroughStream,
15962
16926
  currentExperiment,
15963
16927
  currentLogger,
@@ -15982,15 +16946,18 @@ export {
15982
16946
  initLogger,
15983
16947
  invoke,
15984
16948
  isTemplateFormat,
16949
+ loadParameters,
15985
16950
  loadPrompt,
15986
16951
  log,
15987
16952
  logError,
15988
16953
  login,
15989
16954
  loginToState,
16955
+ logs3OverflowUploadSchema,
15990
16956
  newId,
15991
16957
  parseCachedHeader,
15992
16958
  parseTemplateFormat,
15993
16959
  permalink,
16960
+ pickLogs3OverflowObjectIds,
15994
16961
  projects,
15995
16962
  promptContentsSchema,
15996
16963
  promptDefinitionSchema,
@@ -16011,6 +16978,8 @@ export {
16011
16978
  traceable,
16012
16979
  traced,
16013
16980
  updateSpan,
16981
+ uploadLogs3OverflowPayload,
16982
+ utf8ByteLength,
16014
16983
  withCurrent,
16015
16984
  withDataset,
16016
16985
  withExperiment,