braintrust 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1275,7 +1275,7 @@ var require_package = __commonJS({
1275
1275
  "package.json"(exports2, module2) {
1276
1276
  module2.exports = {
1277
1277
  name: "braintrust",
1278
- version: "2.2.0",
1278
+ version: "2.2.1",
1279
1279
  description: "SDK for integrating Braintrust",
1280
1280
  repository: {
1281
1281
  type: "git",
@@ -1394,6 +1394,7 @@ var require_package = __commonJS({
1394
1394
  "@next/env": "^14.2.3",
1395
1395
  "@types/nunjucks": "^3.2.6",
1396
1396
  "@vercel/functions": "^1.0.2",
1397
+ ajv: "^8.17.1",
1397
1398
  argparse: "^2.0.1",
1398
1399
  boxen: "^8.0.1",
1399
1400
  chalk: "^4.1.2",
@@ -1523,11 +1524,19 @@ function getIdGenerator() {
1523
1524
 
1524
1525
  // util/db_fields.ts
1525
1526
  var TRANSACTION_ID_FIELD = "_xact_id";
1527
+ var OBJECT_DELETE_FIELD = "_object_delete";
1526
1528
  var IS_MERGE_FIELD = "_is_merge";
1527
1529
  var AUDIT_SOURCE_FIELD = "_audit_source";
1528
1530
  var AUDIT_METADATA_FIELD = "_audit_metadata";
1529
1531
  var VALID_SOURCES = ["app", "api", "external"];
1530
- var PARENT_ID_FIELD = "_parent_id";
1532
+ var OBJECT_ID_KEYS = [
1533
+ "experiment_id",
1534
+ "dataset_id",
1535
+ "prompt_session_id",
1536
+ "project_id",
1537
+ "log_id",
1538
+ "function_data"
1539
+ ];
1531
1540
 
1532
1541
  // util/span_identifier_v3.ts
1533
1542
  var uuid3 = __toESM(require("uuid"));
@@ -2205,13 +2214,6 @@ function mergeDictsWithPathsHelper({
2205
2214
  function mergeDicts(mergeInto, mergeFrom) {
2206
2215
  return mergeDictsWithPaths({ mergeInto, mergeFrom, mergePaths: [] });
2207
2216
  }
2208
- function mapAt(m, k) {
2209
- const ret = m.get(k);
2210
- if (ret === void 0) {
2211
- throw new Error(`Map does not contain key ${k}`);
2212
- }
2213
- return ret;
2214
- }
2215
2217
  function recordFind(m, k) {
2216
2218
  return m[k];
2217
2219
  }
@@ -2226,72 +2228,8 @@ function getObjValueByPath(row, path8) {
2226
2228
  return curr;
2227
2229
  }
2228
2230
 
2229
- // util/graph_util.ts
2230
- function depthFirstSearch(args) {
2231
- const { graph, firstVisitF, lastVisitF } = args;
2232
- for (const vs of graph.values()) {
2233
- for (const v of vs.values()) {
2234
- if (!graph.has(v)) {
2235
- throw new Error(`Outgoing vertex ${v} must be a key in the graph`);
2236
- }
2237
- }
2238
- }
2239
- const firstVisitedVertices = /* @__PURE__ */ new Set();
2240
- const visitationOrder = args.visitationOrder ?? [...graph.keys()];
2241
- const events = visitationOrder.map((vertex) => ({ eventType: "first", vertex, extras: {} })).reverse();
2242
- while (events.length) {
2243
- const { eventType, vertex, extras } = events.pop();
2244
- if (eventType === "last") {
2245
- lastVisitF?.(vertex);
2246
- continue;
2247
- }
2248
- if (firstVisitedVertices.has(vertex)) {
2249
- continue;
2250
- }
2251
- firstVisitedVertices.add(vertex);
2252
- firstVisitF?.(vertex, { parentVertex: extras.parentVertex });
2253
- events.push({ eventType: "last", vertex, extras: {} });
2254
- mapAt(graph, vertex).forEach((child) => {
2255
- events.push({
2256
- eventType: "first",
2257
- vertex: child,
2258
- extras: { parentVertex: vertex }
2259
- });
2260
- });
2261
- }
2262
- }
2263
- function undirectedConnectedComponents(graph) {
2264
- const directedGraph = new Map(
2265
- [...graph.vertices].map((v) => [v, /* @__PURE__ */ new Set()])
2266
- );
2267
- for (const [i, j] of graph.edges) {
2268
- mapAt(directedGraph, i).add(j);
2269
- mapAt(directedGraph, j).add(i);
2270
- }
2271
- let labelCounter = 0;
2272
- const vertexLabels = /* @__PURE__ */ new Map();
2273
- const firstVisitF = (vertex, args) => {
2274
- const label = args?.parentVertex !== void 0 ? mapAt(vertexLabels, args?.parentVertex) : labelCounter++;
2275
- vertexLabels.set(vertex, label);
2276
- };
2277
- depthFirstSearch({ graph: directedGraph, firstVisitF });
2278
- const output = Array.from({ length: labelCounter }).map(() => []);
2279
- for (const [vertex, label] of vertexLabels.entries()) {
2280
- output[label].push(vertex);
2281
- }
2282
- return output;
2283
- }
2284
- function topologicalSort(graph, visitationOrder) {
2285
- const reverseOrdering = [];
2286
- const lastVisitF = (vertex) => {
2287
- reverseOrdering.push(vertex);
2288
- };
2289
- depthFirstSearch({ graph, lastVisitF, visitationOrder });
2290
- return reverseOrdering.reverse();
2291
- }
2292
-
2293
2231
  // util/merge_row_batch.ts
2294
- function generateMergedRowKey(row, useParentIdForId) {
2232
+ function generateMergedRowKey(row) {
2295
2233
  return JSON.stringify(
2296
2234
  [
2297
2235
  "org_id",
@@ -2300,7 +2238,7 @@ function generateMergedRowKey(row, useParentIdForId) {
2300
2238
  "dataset_id",
2301
2239
  "prompt_session_id",
2302
2240
  "log_id",
2303
- useParentIdForId ?? false ? PARENT_ID_FIELD : "id"
2241
+ "id"
2304
2242
  ].map((k) => row[k])
2305
2243
  );
2306
2244
  }
@@ -2354,96 +2292,34 @@ function mergeRowBatch(rows) {
2354
2292
  rowGroups.set(key, row);
2355
2293
  }
2356
2294
  }
2357
- const merged = [...rowGroups.values()];
2358
- const rowToLabel = new Map(
2359
- merged.map((r, i) => [generateMergedRowKey(r), i])
2360
- );
2361
- const graph = new Map(
2362
- Array.from({ length: merged.length }).map((_, i) => [i, /* @__PURE__ */ new Set()])
2363
- );
2364
- merged.forEach((r, i) => {
2365
- const parentId = r[PARENT_ID_FIELD];
2366
- if (!parentId) {
2367
- return;
2368
- }
2369
- const parentRowKey = generateMergedRowKey(
2370
- r,
2371
- true
2372
- /* useParentIdForId */
2373
- );
2374
- const parentLabel = rowToLabel.get(parentRowKey);
2375
- if (parentLabel !== void 0) {
2376
- mapAt(graph, parentLabel).add(i);
2377
- }
2378
- });
2379
- const connectedComponents = undirectedConnectedComponents({
2380
- vertices: new Set(graph.keys()),
2381
- edges: new Set(
2382
- [...graph.entries()].flatMap(
2383
- ([k, vs]) => [...vs].map((v) => {
2384
- const ret = [k, v];
2385
- return ret;
2386
- })
2387
- )
2388
- )
2389
- });
2390
- const buckets = connectedComponents.map(
2391
- (cc) => topologicalSort(
2392
- graph,
2393
- cc
2394
- /* visitationOrder */
2395
- )
2396
- );
2397
- return buckets.map((bucket) => bucket.map((i) => merged[i]));
2295
+ return [...rowGroups.values()];
2398
2296
  }
2399
2297
  function batchItems(args) {
2400
- let { items } = args;
2298
+ const { items } = args;
2401
2299
  const batchMaxNumItems = args.batchMaxNumItems ?? Number.POSITIVE_INFINITY;
2402
2300
  const batchMaxNumBytes = args.batchMaxNumBytes ?? Number.POSITIVE_INFINITY;
2301
+ const getByteSize = args.getByteSize;
2403
2302
  const output = [];
2404
- let nextItems = [];
2405
- let batchSet = [];
2406
2303
  let batch = [];
2407
2304
  let batchLen = 0;
2408
2305
  function addToBatch(item) {
2409
2306
  batch.push(item);
2410
- batchLen += item.length;
2307
+ batchLen += getByteSize(item);
2411
2308
  }
2412
2309
  function flushBatch() {
2413
- batchSet.push(batch);
2310
+ output.push(batch);
2414
2311
  batch = [];
2415
2312
  batchLen = 0;
2416
2313
  }
2417
- while (items.length) {
2418
- for (const bucket of items) {
2419
- let i = 0;
2420
- for (const item of bucket) {
2421
- if (batch.length === 0 || item.length + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems) {
2422
- addToBatch(item);
2423
- } else if (i === 0) {
2424
- flushBatch();
2425
- addToBatch(item);
2426
- } else {
2427
- break;
2428
- }
2429
- ++i;
2430
- }
2431
- if (i < bucket.length) {
2432
- nextItems.push(bucket.slice(i));
2433
- }
2434
- if (batchLen >= batchMaxNumBytes || batch.length > batchMaxNumItems) {
2435
- flushBatch();
2436
- }
2437
- }
2438
- if (batch.length) {
2314
+ for (const item of items) {
2315
+ const itemSize = getByteSize(item);
2316
+ if (batch.length > 0 && !(itemSize + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems)) {
2439
2317
  flushBatch();
2440
2318
  }
2441
- if (batchSet.length) {
2442
- output.push(batchSet);
2443
- batchSet = [];
2444
- }
2445
- items = nextItems;
2446
- nextItems = [];
2319
+ addToBatch(item);
2320
+ }
2321
+ if (batch.length > 0) {
2322
+ flushBatch();
2447
2323
  }
2448
2324
  return output;
2449
2325
  }
@@ -3050,10 +2926,15 @@ var FunctionTypeEnum = import_v36.z.enum([
3050
2926
  "preprocessor",
3051
2927
  "facet",
3052
2928
  "classifier",
3053
- "tag"
2929
+ "tag",
2930
+ "parameters"
3054
2931
  ]);
3055
2932
  var NullableSavedFunctionId = import_v36.z.union([
3056
- import_v36.z.object({ type: import_v36.z.literal("function"), id: import_v36.z.string() }),
2933
+ import_v36.z.object({
2934
+ type: import_v36.z.literal("function"),
2935
+ id: import_v36.z.string(),
2936
+ version: import_v36.z.string().optional()
2937
+ }),
3057
2938
  import_v36.z.object({
3058
2939
  type: import_v36.z.literal("global"),
3059
2940
  name: import_v36.z.string(),
@@ -3061,6 +2942,67 @@ var NullableSavedFunctionId = import_v36.z.union([
3061
2942
  }),
3062
2943
  import_v36.z.null()
3063
2944
  ]);
2945
+ var TopicMapReport = import_v36.z.object({
2946
+ version: import_v36.z.literal(1),
2947
+ created_at: import_v36.z.string().optional(),
2948
+ settings: import_v36.z.object({
2949
+ algorithm: import_v36.z.enum(["hdbscan", "kmeans", "hierarchical"]),
2950
+ dimension_reduction: import_v36.z.enum(["umap", "pca", "none"]),
2951
+ vector_field: import_v36.z.string(),
2952
+ embedding_model: import_v36.z.string(),
2953
+ n_clusters: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
2954
+ umap_dimensions: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
2955
+ min_cluster_size: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
2956
+ min_samples: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional()
2957
+ }),
2958
+ query_settings: import_v36.z.object({
2959
+ hierarchy_threshold: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]),
2960
+ auto_naming: import_v36.z.boolean(),
2961
+ skip_cache: import_v36.z.boolean(),
2962
+ viz_mode: import_v36.z.enum(["bar", "scatter"]),
2963
+ naming_model: import_v36.z.string()
2964
+ }).partial(),
2965
+ clusters: import_v36.z.array(
2966
+ import_v36.z.object({
2967
+ cluster_id: import_v36.z.number(),
2968
+ parent_cluster_id: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
2969
+ topic_id: import_v36.z.string(),
2970
+ count: import_v36.z.number(),
2971
+ sample_texts: import_v36.z.array(import_v36.z.string()),
2972
+ samples: import_v36.z.array(
2973
+ import_v36.z.object({
2974
+ id: import_v36.z.string(),
2975
+ text: import_v36.z.string(),
2976
+ root_span_id: import_v36.z.string(),
2977
+ span_id: import_v36.z.string()
2978
+ })
2979
+ ),
2980
+ name: import_v36.z.string().optional(),
2981
+ description: import_v36.z.string().optional(),
2982
+ keywords: import_v36.z.array(import_v36.z.string()).optional(),
2983
+ centroid: import_v36.z.array(import_v36.z.number()).optional(),
2984
+ parent_id: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
2985
+ is_leaf: import_v36.z.boolean().optional(),
2986
+ depth: import_v36.z.number().optional()
2987
+ })
2988
+ ),
2989
+ embedding_points: import_v36.z.array(
2990
+ import_v36.z.object({
2991
+ x: import_v36.z.number(),
2992
+ y: import_v36.z.number(),
2993
+ cluster: import_v36.z.number(),
2994
+ text: import_v36.z.string().optional()
2995
+ })
2996
+ ).optional()
2997
+ });
2998
+ var TopicMapData = import_v36.z.object({
2999
+ type: import_v36.z.literal("topic_map"),
3000
+ source_facet: import_v36.z.string(),
3001
+ embedding_model: import_v36.z.string(),
3002
+ bundle_key: import_v36.z.string(),
3003
+ distance_threshold: import_v36.z.number().optional(),
3004
+ report: TopicMapReport.optional()
3005
+ });
3064
3006
  var BatchedFacetData = import_v36.z.object({
3065
3007
  type: import_v36.z.literal("batched_facet"),
3066
3008
  preprocessor: NullableSavedFunctionId.and(import_v36.z.unknown()).optional(),
@@ -3069,9 +3011,17 @@ var BatchedFacetData = import_v36.z.object({
3069
3011
  name: import_v36.z.string(),
3070
3012
  prompt: import_v36.z.string(),
3071
3013
  model: import_v36.z.string().optional(),
3014
+ embedding_model: import_v36.z.string().optional(),
3072
3015
  no_match_pattern: import_v36.z.string().optional()
3073
3016
  })
3074
- )
3017
+ ),
3018
+ topic_maps: import_v36.z.record(
3019
+ import_v36.z.object({
3020
+ function_name: import_v36.z.string(),
3021
+ topic_map_id: import_v36.z.string().optional(),
3022
+ topic_map_data: TopicMapData
3023
+ })
3024
+ ).optional()
3075
3025
  });
3076
3026
  var BraintrustModelParams = import_v36.z.object({
3077
3027
  use_cache: import_v36.z.boolean(),
@@ -3282,6 +3232,18 @@ var ObjectReferenceNullish = import_v36.z.union([
3282
3232
  }),
3283
3233
  import_v36.z.null()
3284
3234
  ]);
3235
+ var SavedFunctionId = import_v36.z.union([
3236
+ import_v36.z.object({
3237
+ type: import_v36.z.literal("function"),
3238
+ id: import_v36.z.string(),
3239
+ version: import_v36.z.string().optional()
3240
+ }),
3241
+ import_v36.z.object({
3242
+ type: import_v36.z.literal("global"),
3243
+ name: import_v36.z.string(),
3244
+ function_type: FunctionTypeEnum.optional().default("scorer")
3245
+ })
3246
+ ]);
3285
3247
  var DatasetEvent = import_v36.z.object({
3286
3248
  id: import_v36.z.string(),
3287
3249
  _xact_id: import_v36.z.string(),
@@ -3301,7 +3263,36 @@ var DatasetEvent = import_v36.z.object({
3301
3263
  is_root: import_v36.z.union([import_v36.z.boolean(), import_v36.z.null()]).optional(),
3302
3264
  origin: ObjectReferenceNullish.optional(),
3303
3265
  comments: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
3304
- audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional()
3266
+ audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
3267
+ facets: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
3268
+ classifications: import_v36.z.union([
3269
+ import_v36.z.record(
3270
+ import_v36.z.array(
3271
+ import_v36.z.object({
3272
+ id: import_v36.z.string(),
3273
+ label: import_v36.z.string().optional(),
3274
+ confidence: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
3275
+ metadata: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
3276
+ source: SavedFunctionId.and(
3277
+ import_v36.z.union([
3278
+ import_v36.z.object({
3279
+ type: import_v36.z.literal("function"),
3280
+ id: import_v36.z.string(),
3281
+ version: import_v36.z.string().optional()
3282
+ }),
3283
+ import_v36.z.object({
3284
+ type: import_v36.z.literal("global"),
3285
+ name: import_v36.z.string(),
3286
+ function_type: FunctionTypeEnum.optional().default("scorer")
3287
+ }),
3288
+ import_v36.z.null()
3289
+ ])
3290
+ ).optional()
3291
+ })
3292
+ )
3293
+ ),
3294
+ import_v36.z.null()
3295
+ ]).optional()
3305
3296
  });
3306
3297
  var EnvVar = import_v36.z.object({
3307
3298
  id: import_v36.z.string().uuid(),
@@ -3378,7 +3369,8 @@ var SpanType = import_v36.z.union([
3378
3369
  "automation",
3379
3370
  "facet",
3380
3371
  "preprocessor",
3381
- "classifier"
3372
+ "classifier",
3373
+ "review"
3382
3374
  ]),
3383
3375
  import_v36.z.null()
3384
3376
  ]);
@@ -3419,10 +3411,43 @@ var ExperimentEvent = import_v36.z.object({
3419
3411
  is_root: import_v36.z.union([import_v36.z.boolean(), import_v36.z.null()]).optional(),
3420
3412
  origin: ObjectReferenceNullish.optional(),
3421
3413
  comments: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
3422
- audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional()
3414
+ audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
3415
+ facets: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
3416
+ classifications: import_v36.z.union([
3417
+ import_v36.z.record(
3418
+ import_v36.z.array(
3419
+ import_v36.z.object({
3420
+ id: import_v36.z.string(),
3421
+ label: import_v36.z.string().optional(),
3422
+ confidence: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
3423
+ metadata: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
3424
+ source: SavedFunctionId.and(
3425
+ import_v36.z.union([
3426
+ import_v36.z.object({
3427
+ type: import_v36.z.literal("function"),
3428
+ id: import_v36.z.string(),
3429
+ version: import_v36.z.string().optional()
3430
+ }),
3431
+ import_v36.z.object({
3432
+ type: import_v36.z.literal("global"),
3433
+ name: import_v36.z.string(),
3434
+ function_type: FunctionTypeEnum.optional().default("scorer")
3435
+ }),
3436
+ import_v36.z.null()
3437
+ ])
3438
+ ).optional()
3439
+ })
3440
+ )
3441
+ ),
3442
+ import_v36.z.null()
3443
+ ]).optional()
3423
3444
  });
3424
3445
  var ExtendedSavedFunctionId = import_v36.z.union([
3425
- import_v36.z.object({ type: import_v36.z.literal("function"), id: import_v36.z.string() }),
3446
+ import_v36.z.object({
3447
+ type: import_v36.z.literal("function"),
3448
+ id: import_v36.z.string(),
3449
+ version: import_v36.z.string().optional()
3450
+ }),
3426
3451
  import_v36.z.object({
3427
3452
  type: import_v36.z.literal("global"),
3428
3453
  name: import_v36.z.string(),
@@ -3439,6 +3464,7 @@ var FacetData = import_v36.z.object({
3439
3464
  preprocessor: NullableSavedFunctionId.and(import_v36.z.unknown()).optional(),
3440
3465
  prompt: import_v36.z.string(),
3441
3466
  model: import_v36.z.string().optional(),
3467
+ embedding_model: import_v36.z.string().optional(),
3442
3468
  no_match_pattern: import_v36.z.string().optional()
3443
3469
  });
3444
3470
  var PromptBlockDataNullish = import_v36.z.union([
@@ -3528,14 +3554,6 @@ var PromptParserNullish = import_v36.z.union([
3528
3554
  }),
3529
3555
  import_v36.z.null()
3530
3556
  ]);
3531
- var SavedFunctionId = import_v36.z.union([
3532
- import_v36.z.object({ type: import_v36.z.literal("function"), id: import_v36.z.string() }),
3533
- import_v36.z.object({
3534
- type: import_v36.z.literal("global"),
3535
- name: import_v36.z.string(),
3536
- function_type: FunctionTypeEnum.optional().default("scorer")
3537
- })
3538
- ]);
3539
3557
  var PromptDataNullish = import_v36.z.union([
3540
3558
  import_v36.z.object({
3541
3559
  prompt: PromptBlockDataNullish,
@@ -3586,7 +3604,8 @@ var FunctionTypeEnumNullish = import_v36.z.union([
3586
3604
  "preprocessor",
3587
3605
  "facet",
3588
3606
  "classifier",
3589
- "tag"
3607
+ "tag",
3608
+ "parameters"
3590
3609
  ]),
3591
3610
  import_v36.z.null()
3592
3611
  ]);
@@ -3678,7 +3697,8 @@ var FunctionData = import_v36.z.union([
3678
3697
  type: import_v36.z.literal("remote_eval"),
3679
3698
  endpoint: import_v36.z.string(),
3680
3699
  eval_name: import_v36.z.string(),
3681
- parameters: import_v36.z.object({}).partial().passthrough()
3700
+ parameters: import_v36.z.object({}).partial().passthrough(),
3701
+ parameters_version: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional()
3682
3702
  }),
3683
3703
  import_v36.z.object({
3684
3704
  type: import_v36.z.literal("global"),
@@ -3687,7 +3707,18 @@ var FunctionData = import_v36.z.union([
3687
3707
  config: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional()
3688
3708
  }),
3689
3709
  FacetData,
3690
- BatchedFacetData
3710
+ BatchedFacetData,
3711
+ import_v36.z.object({
3712
+ type: import_v36.z.literal("parameters"),
3713
+ data: import_v36.z.object({}).partial().passthrough(),
3714
+ __schema: import_v36.z.object({
3715
+ type: import_v36.z.literal("object"),
3716
+ properties: import_v36.z.record(import_v36.z.object({}).partial().passthrough()),
3717
+ required: import_v36.z.array(import_v36.z.string()).optional(),
3718
+ additionalProperties: import_v36.z.boolean().optional()
3719
+ })
3720
+ }),
3721
+ TopicMapData.and(import_v36.z.unknown())
3691
3722
  ]);
3692
3723
  var Function2 = import_v36.z.object({
3693
3724
  id: import_v36.z.string().uuid(),
@@ -3717,7 +3748,13 @@ var Function2 = import_v36.z.object({
3717
3748
  import_v36.z.null()
3718
3749
  ]).optional()
3719
3750
  });
3720
- var FunctionFormat = import_v36.z.enum(["llm", "code", "global", "graph"]);
3751
+ var FunctionFormat = import_v36.z.enum([
3752
+ "llm",
3753
+ "code",
3754
+ "global",
3755
+ "graph",
3756
+ "topic_map"
3757
+ ]);
3721
3758
  var PromptData = import_v36.z.object({
3722
3759
  prompt: PromptBlockDataNullish,
3723
3760
  options: PromptOptionsNullish,
@@ -3800,13 +3837,14 @@ var FunctionObjectType = import_v36.z.enum([
3800
3837
  "custom_view",
3801
3838
  "preprocessor",
3802
3839
  "facet",
3803
- "classifier"
3840
+ "classifier",
3841
+ "parameters"
3804
3842
  ]);
3805
3843
  var FunctionOutputType = import_v36.z.enum([
3806
3844
  "completion",
3807
3845
  "score",
3808
3846
  "facet",
3809
- "tag",
3847
+ "classification",
3810
3848
  "any"
3811
3849
  ]);
3812
3850
  var GitMetadataSettings = import_v36.z.object({
@@ -3842,6 +3880,10 @@ var GroupScope = import_v36.z.object({
3842
3880
  idle_seconds: import_v36.z.number().optional()
3843
3881
  });
3844
3882
  var IfExists = import_v36.z.enum(["error", "ignore", "replace"]);
3883
+ var ImageRenderingMode = import_v36.z.union([
3884
+ import_v36.z.enum(["auto", "click_to_load", "blocked"]),
3885
+ import_v36.z.null()
3886
+ ]);
3845
3887
  var InvokeParent = import_v36.z.union([
3846
3888
  import_v36.z.object({
3847
3889
  object_type: import_v36.z.enum(["project_logs", "experiment", "playground_logs"]),
@@ -3934,7 +3976,8 @@ var Organization = import_v36.z.object({
3934
3976
  is_universal_api: import_v36.z.union([import_v36.z.boolean(), import_v36.z.null()]).optional(),
3935
3977
  proxy_url: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional(),
3936
3978
  realtime_url: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional(),
3937
- created: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional()
3979
+ created: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional(),
3980
+ image_rendering_mode: ImageRenderingMode.optional()
3938
3981
  });
3939
3982
  var ProjectSettings = import_v36.z.union([
3940
3983
  import_v36.z.object({
@@ -4075,7 +4118,36 @@ var ProjectLogsEvent = import_v36.z.object({
4075
4118
  origin: ObjectReferenceNullish.optional(),
4076
4119
  comments: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
4077
4120
  audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
4078
- _async_scoring_state: import_v36.z.unknown().optional()
4121
+ _async_scoring_state: import_v36.z.unknown().optional(),
4122
+ facets: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
4123
+ classifications: import_v36.z.union([
4124
+ import_v36.z.record(
4125
+ import_v36.z.array(
4126
+ import_v36.z.object({
4127
+ id: import_v36.z.string(),
4128
+ label: import_v36.z.string().optional(),
4129
+ confidence: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
4130
+ metadata: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
4131
+ source: SavedFunctionId.and(
4132
+ import_v36.z.union([
4133
+ import_v36.z.object({
4134
+ type: import_v36.z.literal("function"),
4135
+ id: import_v36.z.string(),
4136
+ version: import_v36.z.string().optional()
4137
+ }),
4138
+ import_v36.z.object({
4139
+ type: import_v36.z.literal("global"),
4140
+ name: import_v36.z.string(),
4141
+ function_type: FunctionTypeEnum.optional().default("scorer")
4142
+ }),
4143
+ import_v36.z.null()
4144
+ ])
4145
+ ).optional()
4146
+ })
4147
+ )
4148
+ ),
4149
+ import_v36.z.null()
4150
+ ]).optional()
4079
4151
  });
4080
4152
  var ProjectScoreType = import_v36.z.enum([
4081
4153
  "slider",
@@ -4377,12 +4449,15 @@ var View = import_v36.z.object({
4377
4449
  "datasets",
4378
4450
  "dataset",
4379
4451
  "prompts",
4452
+ "parameters",
4380
4453
  "tools",
4381
4454
  "scorers",
4382
4455
  "classifiers",
4383
4456
  "logs",
4384
4457
  "monitor",
4385
- "for_review"
4458
+ "for_review_project_log",
4459
+ "for_review_experiments",
4460
+ "for_review_datasets"
4386
4461
  ]),
4387
4462
  name: import_v36.z.string(),
4388
4463
  created: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional(),
@@ -5139,6 +5214,52 @@ var PromptCache = class {
5139
5214
  }
5140
5215
  };
5141
5216
 
5217
+ // src/prompt-cache/parameters-cache.ts
5218
+ function createCacheKey2(key) {
5219
+ if (key.id) {
5220
+ return `parameters:id:${key.id}`;
5221
+ }
5222
+ const prefix = key.projectId ?? key.projectName;
5223
+ if (!prefix) {
5224
+ throw new Error("Either projectId or projectName must be provided");
5225
+ }
5226
+ if (!key.slug) {
5227
+ throw new Error("Slug must be provided when not using ID");
5228
+ }
5229
+ return `parameters:${prefix}:${key.slug}:${key.version ?? "latest"}`;
5230
+ }
5231
+ var ParametersCache = class {
5232
+ memoryCache;
5233
+ diskCache;
5234
+ constructor(options) {
5235
+ this.memoryCache = options.memoryCache;
5236
+ this.diskCache = options.diskCache;
5237
+ }
5238
+ async get(key) {
5239
+ const cacheKey = createCacheKey2(key);
5240
+ const memoryParams = this.memoryCache.get(cacheKey);
5241
+ if (memoryParams !== void 0) {
5242
+ return memoryParams;
5243
+ }
5244
+ if (this.diskCache) {
5245
+ const diskParams = await this.diskCache.get(cacheKey);
5246
+ if (!diskParams) {
5247
+ return void 0;
5248
+ }
5249
+ this.memoryCache.set(cacheKey, diskParams);
5250
+ return diskParams;
5251
+ }
5252
+ return void 0;
5253
+ }
5254
+ async set(key, value) {
5255
+ const cacheKey = createCacheKey2(key);
5256
+ this.memoryCache.set(cacheKey, value);
5257
+ if (this.diskCache) {
5258
+ await this.diskCache.set(cacheKey, value);
5259
+ }
5260
+ }
5261
+ };
5262
+
5142
5263
  // src/span-cache.ts
5143
5264
  var activeCaches = /* @__PURE__ */ new Set();
5144
5265
  var exitHandlersRegistered = false;
@@ -5429,7 +5550,24 @@ var SpanCache = class {
5429
5550
  // src/logger.ts
5430
5551
  var BRAINTRUST_ATTACHMENT = BraintrustAttachmentReference.shape.type.value;
5431
5552
  var EXTERNAL_ATTACHMENT = ExternalAttachmentReference.shape.type.value;
5553
+ var LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow";
5432
5554
  var BRAINTRUST_PARAMS = Object.keys(BraintrustModelParams.shape);
5555
+ var DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024;
5556
+ var parametersRowSchema = import_v38.z.object({
5557
+ id: import_v38.z.string().uuid(),
5558
+ _xact_id: import_v38.z.string(),
5559
+ project_id: import_v38.z.string().uuid(),
5560
+ name: import_v38.z.string(),
5561
+ slug: import_v38.z.string(),
5562
+ description: import_v38.z.union([import_v38.z.string(), import_v38.z.null()]).optional(),
5563
+ function_type: import_v38.z.literal("parameters"),
5564
+ function_data: import_v38.z.object({
5565
+ type: import_v38.z.literal("parameters"),
5566
+ data: import_v38.z.record(import_v38.z.unknown()).optional(),
5567
+ __schema: import_v38.z.record(import_v38.z.unknown())
5568
+ }),
5569
+ metadata: import_v38.z.union([import_v38.z.object({}).partial().passthrough(), import_v38.z.null()]).optional()
5570
+ });
5433
5571
  var LoginInvalidOrgError = class extends Error {
5434
5572
  constructor(message) {
5435
5573
  super(message);
@@ -5606,6 +5744,17 @@ var BraintrustState = class _BraintrustState {
5606
5744
  max: Number(isomorph_default.getEnv("BRAINTRUST_PROMPT_CACHE_DISK_MAX")) ?? 1 << 20
5607
5745
  }) : void 0;
5608
5746
  this.promptCache = new PromptCache({ memoryCache, diskCache });
5747
+ const parametersMemoryCache = new LRUCache({
5748
+ max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_MEMORY_MAX")) ?? 1 << 10
5749
+ });
5750
+ const parametersDiskCache = canUseDiskCache() ? new DiskCache({
5751
+ cacheDir: isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DIR") ?? `${isomorph_default.getEnv("HOME") ?? isomorph_default.homedir()}/.braintrust/parameters_cache`,
5752
+ max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DISK_MAX")) ?? 1 << 20
5753
+ }) : void 0;
5754
+ this.parametersCache = new ParametersCache({
5755
+ memoryCache: parametersMemoryCache,
5756
+ diskCache: parametersDiskCache
5757
+ });
5609
5758
  this.spanCache = new SpanCache({ disabled: loginParams.disableSpanCache });
5610
5759
  }
5611
5760
  id;
@@ -5635,6 +5784,7 @@ var BraintrustState = class _BraintrustState {
5635
5784
  _apiConn = null;
5636
5785
  _proxyConn = null;
5637
5786
  promptCache;
5787
+ parametersCache;
5638
5788
  spanCache;
5639
5789
  _idGenerator = null;
5640
5790
  _contextManager = null;
@@ -6736,8 +6886,100 @@ function castLogger(logger, asyncFlush) {
6736
6886
  }
6737
6887
  return logger;
6738
6888
  }
6889
+ var logs3OverflowUploadSchema = import_v38.z.object({
6890
+ method: import_v38.z.enum(["PUT", "POST"]),
6891
+ signedUrl: import_v38.z.string().url(),
6892
+ headers: import_v38.z.record(import_v38.z.string()).optional(),
6893
+ fields: import_v38.z.record(import_v38.z.string()).optional(),
6894
+ key: import_v38.z.string().min(1)
6895
+ });
6739
6896
  function constructLogs3Data(items) {
6740
- return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
6897
+ return `{"rows": ${constructJsonArray(items.map((i) => i.str))}, "api_version": 2}`;
6898
+ }
6899
+ function constructLogs3OverflowRequest(key) {
6900
+ return {
6901
+ rows: {
6902
+ type: LOGS3_OVERFLOW_REFERENCE_TYPE,
6903
+ key
6904
+ },
6905
+ api_version: 2
6906
+ };
6907
+ }
6908
+ function pickLogs3OverflowObjectIds(row) {
6909
+ const objectIds = {};
6910
+ for (const key of OBJECT_ID_KEYS) {
6911
+ if (key in row) {
6912
+ objectIds[key] = row[key];
6913
+ }
6914
+ }
6915
+ return objectIds;
6916
+ }
6917
+ async function uploadLogs3OverflowPayload(upload, payload, fetchFn = fetch) {
6918
+ if (upload.method === "POST") {
6919
+ if (!upload.fields) {
6920
+ throw new Error("Missing logs3 overflow upload fields");
6921
+ }
6922
+ if (typeof FormData === "undefined" || typeof Blob === "undefined") {
6923
+ throw new Error("FormData is not available for logs3 overflow upload");
6924
+ }
6925
+ const form = new FormData();
6926
+ for (const [key, value] of Object.entries(upload.fields)) {
6927
+ form.append(key, value);
6928
+ }
6929
+ const contentType = upload.fields["Content-Type"] ?? "application/json";
6930
+ form.append("file", new Blob([payload], { type: contentType }));
6931
+ const headers2 = {};
6932
+ for (const [key, value] of Object.entries(upload.headers ?? {})) {
6933
+ if (key.toLowerCase() !== "content-type") {
6934
+ headers2[key] = value;
6935
+ }
6936
+ }
6937
+ const response2 = await fetchFn(upload.signedUrl, {
6938
+ method: "POST",
6939
+ headers: headers2,
6940
+ body: form
6941
+ });
6942
+ if (!response2.ok) {
6943
+ const responseText = await response2.text().catch(() => "");
6944
+ throw new Error(
6945
+ `Failed to upload logs3 overflow payload: ${response2.status} ${responseText}`
6946
+ );
6947
+ }
6948
+ return;
6949
+ }
6950
+ const headers = { ...upload.headers ?? {} };
6951
+ addAzureBlobHeaders(headers, upload.signedUrl);
6952
+ const response = await fetchFn(upload.signedUrl, {
6953
+ method: "PUT",
6954
+ headers,
6955
+ body: payload
6956
+ });
6957
+ if (!response.ok) {
6958
+ const responseText = await response.text().catch(() => "");
6959
+ throw new Error(
6960
+ `Failed to upload logs3 overflow payload: ${response.status} ${responseText}`
6961
+ );
6962
+ }
6963
+ }
6964
+ function stringifyWithOverflowMeta(item) {
6965
+ const str = JSON.stringify(item);
6966
+ const record = item;
6967
+ return {
6968
+ str,
6969
+ overflowMeta: {
6970
+ object_ids: pickLogs3OverflowObjectIds(record),
6971
+ is_delete: record[OBJECT_DELETE_FIELD] === true,
6972
+ input_row: {
6973
+ byte_size: utf8ByteLength(str)
6974
+ }
6975
+ }
6976
+ };
6977
+ }
6978
+ function utf8ByteLength(value) {
6979
+ if (typeof TextEncoder !== "undefined") {
6980
+ return new TextEncoder().encode(value).length;
6981
+ }
6982
+ return value.length;
6741
6983
  }
6742
6984
  function now() {
6743
6985
  return (/* @__PURE__ */ new Date()).getTime();
@@ -6752,8 +6994,8 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
6752
6994
  onFlushError;
6753
6995
  maskingFunction = null;
6754
6996
  syncFlush = false;
6755
- // 6 MB for the AWS lambda gateway (from our own testing).
6756
- maxRequestSize = 6 * 1024 * 1024;
6997
+ maxRequestSizeOverride = null;
6998
+ _maxRequestSizePromise = null;
6757
6999
  defaultBatchSize = 100;
6758
7000
  numTries = 3;
6759
7001
  queueDropExceedingMaxsize = DEFAULT_QUEUE_SIZE;
@@ -6781,7 +7023,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
6781
7023
  }
6782
7024
  const maxRequestSizeEnv = Number(isomorph_default.getEnv("BRAINTRUST_MAX_REQUEST_SIZE"));
6783
7025
  if (!isNaN(maxRequestSizeEnv)) {
6784
- this.maxRequestSize = maxRequestSizeEnv;
7026
+ this.maxRequestSizeOverride = maxRequestSizeEnv;
6785
7027
  }
6786
7028
  const numTriesEnv = Number(isomorph_default.getEnv("BRAINTRUST_NUM_RETRIES"));
6787
7029
  if (!isNaN(numTriesEnv)) {
@@ -6843,6 +7085,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
6843
7085
  }
6844
7086
  }
6845
7087
  }
7088
+ getMaxRequestSize() {
7089
+ if (!this._maxRequestSizePromise) {
7090
+ this._maxRequestSizePromise = (async () => {
7091
+ let serverLimit = null;
7092
+ try {
7093
+ const conn = await this.apiConn.get();
7094
+ const versionInfo = await conn.get_json("version");
7095
+ serverLimit = import_v38.z.object({ logs3_payload_max_bytes: import_v38.z.number().nullish() }).parse(versionInfo).logs3_payload_max_bytes ?? null;
7096
+ } catch (e) {
7097
+ console.warn("Failed to fetch version info for payload limit:", e);
7098
+ }
7099
+ const validServerLimit = serverLimit !== null && serverLimit > 0 ? serverLimit : null;
7100
+ const canUseOverflow = validServerLimit !== null;
7101
+ let maxRequestSize = DEFAULT_MAX_REQUEST_SIZE;
7102
+ if (this.maxRequestSizeOverride !== null) {
7103
+ maxRequestSize = validServerLimit !== null ? Math.min(this.maxRequestSizeOverride, validServerLimit) : this.maxRequestSizeOverride;
7104
+ } else if (validServerLimit !== null) {
7105
+ maxRequestSize = validServerLimit;
7106
+ }
7107
+ return { maxRequestSize, canUseOverflow };
7108
+ })();
7109
+ }
7110
+ return this._maxRequestSizePromise;
7111
+ }
6846
7112
  async flush() {
6847
7113
  if (this.syncFlush) {
6848
7114
  this.triggerActiveFlush();
@@ -6886,33 +7152,33 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
6886
7152
  if (allItems.length === 0) {
6887
7153
  return;
6888
7154
  }
6889
- const allItemsStr = allItems.map(
6890
- (bucket) => bucket.map((item) => JSON.stringify(item))
7155
+ const allItemsWithMeta = allItems.map(
7156
+ (item) => stringifyWithOverflowMeta(item)
6891
7157
  );
6892
- const batchSets = batchItems({
6893
- items: allItemsStr,
7158
+ const maxRequestSizeResult = await this.getMaxRequestSize();
7159
+ const batches = batchItems({
7160
+ items: allItemsWithMeta,
6894
7161
  batchMaxNumItems: batchSize,
6895
- batchMaxNumBytes: this.maxRequestSize / 2
7162
+ batchMaxNumBytes: maxRequestSizeResult.maxRequestSize / 2,
7163
+ getByteSize: (item) => item.str.length
6896
7164
  });
6897
- for (const batchSet of batchSets) {
6898
- const postPromises = batchSet.map(
6899
- (batch) => (async () => {
6900
- try {
6901
- await this.submitLogsRequest(batch);
6902
- return { type: "success" };
6903
- } catch (e) {
6904
- return { type: "error", value: e };
6905
- }
6906
- })()
7165
+ const postPromises = batches.map(
7166
+ (batch) => (async () => {
7167
+ try {
7168
+ await this.submitLogsRequest(batch, maxRequestSizeResult);
7169
+ return { type: "success" };
7170
+ } catch (e) {
7171
+ return { type: "error", value: e };
7172
+ }
7173
+ })()
7174
+ );
7175
+ const results = await Promise.all(postPromises);
7176
+ const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
7177
+ if (failingResultErrors.length) {
7178
+ throw new AggregateError(
7179
+ failingResultErrors,
7180
+ `Encountered the following errors while logging:`
6907
7181
  );
6908
- const results = await Promise.all(postPromises);
6909
- const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
6910
- if (failingResultErrors.length) {
6911
- throw new AggregateError(
6912
- failingResultErrors,
6913
- `Encountered the following errors while logging:`
6914
- );
6915
- }
6916
7182
  }
6917
7183
  const attachmentErrors = [];
6918
7184
  for (const attachment of attachments) {
@@ -6942,32 +7208,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
6942
7208
  items.forEach((item) => extractAttachments(item, attachments));
6943
7209
  let mergedItems = mergeRowBatch(items);
6944
7210
  if (this.maskingFunction) {
6945
- mergedItems = mergedItems.map(
6946
- (batch) => batch.map((item) => {
6947
- const maskedItem = { ...item };
6948
- for (const field of REDACTION_FIELDS) {
6949
- if (item[field] !== void 0) {
6950
- const maskedValue = applyMaskingToField(
6951
- this.maskingFunction,
6952
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
6953
- item[field],
6954
- field
6955
- );
6956
- if (maskedValue instanceof MaskingError) {
6957
- delete maskedItem[field];
6958
- if (maskedItem.error) {
6959
- maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
6960
- } else {
6961
- maskedItem.error = maskedValue.errorMsg;
6962
- }
7211
+ mergedItems = mergedItems.map((item) => {
7212
+ const maskedItem = { ...item };
7213
+ for (const field of REDACTION_FIELDS) {
7214
+ if (item[field] !== void 0) {
7215
+ const maskedValue = applyMaskingToField(
7216
+ this.maskingFunction,
7217
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
7218
+ item[field],
7219
+ field
7220
+ );
7221
+ if (maskedValue instanceof MaskingError) {
7222
+ delete maskedItem[field];
7223
+ if (maskedItem.error) {
7224
+ maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
6963
7225
  } else {
6964
- maskedItem[field] = maskedValue;
7226
+ maskedItem.error = maskedValue.errorMsg;
6965
7227
  }
7228
+ } else {
7229
+ maskedItem[field] = maskedValue;
6966
7230
  }
6967
7231
  }
6968
- return maskedItem;
6969
- })
6970
- );
7232
+ }
7233
+ return maskedItem;
7234
+ });
6971
7235
  }
6972
7236
  return [mergedItems, attachments];
6973
7237
  } catch (e) {
@@ -6994,20 +7258,73 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
6994
7258
  }
6995
7259
  throw new Error("Impossible");
6996
7260
  }
6997
- async submitLogsRequest(items) {
7261
+ async requestLogs3OverflowUpload(conn, args) {
7262
+ let response;
7263
+ try {
7264
+ response = await conn.post_json("logs3/overflow", {
7265
+ content_type: "application/json",
7266
+ size_bytes: args.sizeBytes,
7267
+ rows: args.rows
7268
+ });
7269
+ } catch (error2) {
7270
+ const errorStr = JSON.stringify(error2);
7271
+ throw new Error(
7272
+ `Failed to request logs3 overflow upload URL: ${errorStr}`
7273
+ );
7274
+ }
7275
+ try {
7276
+ return logs3OverflowUploadSchema.parse(response);
7277
+ } catch (error2) {
7278
+ if (error2 instanceof import_v38.ZodError) {
7279
+ const errorStr = JSON.stringify(error2.flatten());
7280
+ throw new Error(`Invalid response from API server: ${errorStr}`);
7281
+ }
7282
+ throw error2;
7283
+ }
7284
+ }
7285
+ async _uploadLogs3OverflowPayload(conn, upload, payload) {
7286
+ await uploadLogs3OverflowPayload(upload, payload, conn.fetch.bind(conn));
7287
+ }
7288
+ async submitLogsRequest(items, {
7289
+ maxRequestSize,
7290
+ canUseOverflow
7291
+ }) {
6998
7292
  const conn = await this.apiConn.get();
6999
7293
  const dataStr = constructLogs3Data(items);
7294
+ const payloadBytes = utf8ByteLength(dataStr);
7295
+ const useOverflow = canUseOverflow && payloadBytes > maxRequestSize;
7000
7296
  if (this.allPublishPayloadsDir) {
7001
7297
  await _HTTPBackgroundLogger.writePayloadToDir({
7002
7298
  payloadDir: this.allPublishPayloadsDir,
7003
7299
  payload: dataStr
7004
7300
  });
7005
7301
  }
7302
+ let overflowUpload = null;
7303
+ const overflowRows = useOverflow ? items.map((item) => item.overflowMeta) : null;
7006
7304
  for (let i = 0; i < this.numTries; i++) {
7007
7305
  const startTime = now();
7008
7306
  let error2 = void 0;
7009
7307
  try {
7010
- await conn.post_json("logs3", dataStr);
7308
+ if (overflowRows) {
7309
+ if (!overflowUpload) {
7310
+ const currentUpload = await this.requestLogs3OverflowUpload(conn, {
7311
+ rows: overflowRows,
7312
+ sizeBytes: payloadBytes
7313
+ });
7314
+ await this._uploadLogs3OverflowPayload(
7315
+ conn,
7316
+ currentUpload,
7317
+ dataStr
7318
+ );
7319
+ overflowUpload = currentUpload;
7320
+ }
7321
+ await conn.post_json(
7322
+ "logs3",
7323
+ constructLogs3OverflowRequest(overflowUpload.key)
7324
+ );
7325
+ } else {
7326
+ await conn.post_json("logs3", dataStr);
7327
+ }
7011
7328
  } catch (e) {
7012
7329
  error2 = e;
7013
7330
  }
@@ -7023,7 +7340,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
7023
7340
  return `${error2}`;
7024
7341
  }
7025
7342
  })();
7026
- const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}.${retryingText}
7343
+ const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${payloadBytes}.${retryingText}
7027
7344
  Error: ${errorText}`;
7028
7345
  if (!isRetrying && this.failedPublishPayloadsDir) {
7029
7346
  await _HTTPBackgroundLogger.writePayloadToDir({
@@ -7077,7 +7394,7 @@ Error: ${errorText}`;
7077
7394
  try {
7078
7395
  const [allItems, allAttachments] = await this.unwrapLazyValues(wrappedItems);
7079
7396
  const dataStr = constructLogs3Data(
7080
- allItems.map((x) => JSON.stringify(x))
7397
+ allItems.map((x) => stringifyWithOverflowMeta(x))
7081
7398
  );
7082
7399
  const attachmentStr = JSON.stringify(
7083
7400
  allAttachments.map((a) => a.debugInfo())
@@ -9287,22 +9604,71 @@ var Prompt2 = class _Prompt {
9287
9604
  );
9288
9605
  }
9289
9606
  };
9290
- var TEST_API_KEY = "___TEST_API_KEY__THIS_IS_NOT_REAL___";
9291
-
9292
- // src/cli/reporters/progress.ts
9293
- var import_chalk = __toESM(require("chalk"));
9294
- var cliProgress = __toESM(require("cli-progress"));
9295
-
9296
- // src/reporters/progress.ts
9297
- var SimpleProgressReporter = class {
9298
- start(name, _total) {
9299
- console.log(`Running evaluator ${name}`);
9607
+ var RemoteEvalParameters = class {
9608
+ constructor(metadata) {
9609
+ this.metadata = metadata;
9300
9610
  }
9301
- stop() {
9611
+ __braintrust_parameters_marker = true;
9612
+ get id() {
9613
+ return this.metadata.id;
9302
9614
  }
9303
- increment(_name) {
9615
+ get projectId() {
9616
+ return this.metadata.project_id;
9304
9617
  }
9305
- setTotal(_name, _total) {
9618
+ get name() {
9619
+ return this.metadata.name;
9620
+ }
9621
+ get slug() {
9622
+ return this.metadata.slug;
9623
+ }
9624
+ get version() {
9625
+ return this.metadata[TRANSACTION_ID_FIELD];
9626
+ }
9627
+ get schema() {
9628
+ return this.metadata.function_data.__schema;
9629
+ }
9630
+ get data() {
9631
+ return this.metadata.function_data.data ?? {};
9632
+ }
9633
+ validate(data) {
9634
+ if (typeof data !== "object" || data === null) {
9635
+ return false;
9636
+ }
9637
+ const schemaProps = this.schema.properties;
9638
+ if (typeof schemaProps !== "object" || schemaProps === null) {
9639
+ return true;
9640
+ }
9641
+ for (const key of Object.keys(schemaProps)) {
9642
+ if (!(key in data)) {
9643
+ const required = Array.isArray(this.schema.required) ? this.schema.required : [];
9644
+ if (required.includes(key)) {
9645
+ return false;
9646
+ }
9647
+ }
9648
+ }
9649
+ return true;
9650
+ }
9651
+ static isParameters(x) {
9652
+ return typeof x === "object" && x !== null && "__braintrust_parameters_marker" in x && // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
9653
+ x.__braintrust_parameters_marker === true;
9654
+ }
9655
+ };
9656
+ var TEST_API_KEY = "___TEST_API_KEY__THIS_IS_NOT_REAL___";
9657
+
9658
+ // src/cli/reporters/progress.ts
9659
+ var import_chalk = __toESM(require("chalk"));
9660
+ var cliProgress = __toESM(require("cli-progress"));
9661
+
9662
+ // src/reporters/progress.ts
9663
+ var SimpleProgressReporter = class {
9664
+ start(name, _total) {
9665
+ console.log(`Running evaluator ${name}`);
9666
+ }
9667
+ stop() {
9668
+ }
9669
+ increment(_name) {
9670
+ }
9671
+ setTotal(_name, _total) {
9306
9672
  }
9307
9673
  };
9308
9674
 
@@ -10385,6 +10751,85 @@ function waterfall(tasks, callback) {
10385
10751
  }
10386
10752
  var waterfall$1 = awaitify(waterfall);
10387
10753
 
10754
+ // src/functions/invoke.ts
10755
+ async function invoke(args) {
10756
+ const {
10757
+ orgName,
10758
+ apiKey,
10759
+ appUrl,
10760
+ forceLogin,
10761
+ fetch: fetch2,
10762
+ input,
10763
+ messages,
10764
+ parent: parentArg,
10765
+ metadata,
10766
+ tags,
10767
+ state: stateArg,
10768
+ stream,
10769
+ mode,
10770
+ schema,
10771
+ strict,
10772
+ projectId,
10773
+ ...functionIdArgs
10774
+ } = args;
10775
+ const state = stateArg ?? _internalGetGlobalState();
10776
+ await state.login({
10777
+ orgName,
10778
+ apiKey,
10779
+ appUrl,
10780
+ forceLogin,
10781
+ fetch: fetch2
10782
+ });
10783
+ const parent = parentArg ? typeof parentArg === "string" ? parentArg : await parentArg.export() : await getSpanParentObject().export();
10784
+ const functionId = FunctionId.safeParse({
10785
+ function_id: functionIdArgs.function_id,
10786
+ project_name: functionIdArgs.projectName,
10787
+ slug: functionIdArgs.slug,
10788
+ global_function: functionIdArgs.globalFunction,
10789
+ function_type: functionIdArgs.functionType,
10790
+ prompt_session_id: functionIdArgs.promptSessionId,
10791
+ prompt_session_function_id: functionIdArgs.promptSessionFunctionId,
10792
+ version: functionIdArgs.version
10793
+ });
10794
+ if (!functionId.success) {
10795
+ throw new Error(
10796
+ `Invalid function ID arguments: ${functionId.error.message}`
10797
+ );
10798
+ }
10799
+ const request = {
10800
+ ...functionId.data,
10801
+ input,
10802
+ messages,
10803
+ parent,
10804
+ metadata,
10805
+ tags,
10806
+ stream,
10807
+ mode,
10808
+ strict
10809
+ };
10810
+ const headers = {
10811
+ Accept: stream ? "text/event-stream" : "application/json"
10812
+ };
10813
+ if (projectId) {
10814
+ headers["x-bt-project-id"] = projectId;
10815
+ }
10816
+ if (orgName) {
10817
+ headers["x-bt-org-name"] = orgName;
10818
+ }
10819
+ const resp = await state.proxyConn().post(`function/invoke`, request, {
10820
+ headers
10821
+ });
10822
+ if (stream) {
10823
+ if (!resp.body) {
10824
+ throw new Error("Received empty stream body");
10825
+ }
10826
+ return new BraintrustStream(resp.body);
10827
+ } else {
10828
+ const data = await resp.json();
10829
+ return schema ? schema.parse(data) : data;
10830
+ }
10831
+ }
10832
+
10388
10833
  // src/trace.ts
10389
10834
  var SpanFetcher = class _SpanFetcher extends ObjectFetcher {
10390
10835
  constructor(objectType, _objectId, rootSpanId, _state, spanTypeFilter) {
@@ -10520,6 +10965,7 @@ var LocalTrace = class {
10520
10965
  spansFlushed = false;
10521
10966
  spansFlushPromise = null;
10522
10967
  cachedFetcher;
10968
+ threadCache = /* @__PURE__ */ new Map();
10523
10969
  constructor({
10524
10970
  objectType,
10525
10971
  objectId,
@@ -10590,6 +11036,36 @@ var LocalTrace = class {
10590
11036
  }
10591
11037
  return this.cachedFetcher.getSpans({ spanType });
10592
11038
  }
11039
+ /**
11040
+ * Get the thread (preprocessed messages) for this trace.
11041
+ * Calls the API with the project_default preprocessor (which falls back to "thread").
11042
+ */
11043
+ async getThread(options) {
11044
+ const cacheKey = options?.preprocessor ?? "project_default";
11045
+ if (!this.threadCache.has(cacheKey)) {
11046
+ const promise = this.fetchThread(options);
11047
+ this.threadCache.set(cacheKey, promise);
11048
+ }
11049
+ return this.threadCache.get(cacheKey);
11050
+ }
11051
+ async fetchThread(options) {
11052
+ await this.ensureSpansReady();
11053
+ await this.state.login({});
11054
+ const result = await invoke({
11055
+ globalFunction: options?.preprocessor ?? "project_default",
11056
+ functionType: "preprocessor",
11057
+ input: {
11058
+ trace_ref: {
11059
+ object_type: this.objectType,
11060
+ object_id: this.objectId,
11061
+ root_span_id: this.rootSpanId
11062
+ }
11063
+ },
11064
+ mode: "json",
11065
+ state: this.state
11066
+ });
11067
+ return Array.isArray(result) ? result : [];
11068
+ }
10593
11069
  async ensureSpansReady() {
10594
11070
  if (this.spansFlushed || !this.ensureSpansFlushed) {
10595
11071
  return;
@@ -10611,660 +11087,383 @@ var LocalTrace = class {
10611
11087
 
10612
11088
  // src/eval-parameters.ts
10613
11089
  var import_v310 = require("zod/v3");
11090
+ var import_ajv = __toESM(require("ajv"));
10614
11091
 
10615
- // src/framework2.ts
11092
+ // src/prompt-schemas.ts
10616
11093
  var import_v39 = require("zod/v3");
10617
- var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
10618
- var ProjectBuilder = class {
10619
- create(opts) {
10620
- return new Project2(opts);
11094
+ var promptContentsSchema = import_v39.z.union([
11095
+ import_v39.z.object({
11096
+ prompt: import_v39.z.string()
11097
+ }),
11098
+ import_v39.z.object({
11099
+ messages: import_v39.z.array(ChatCompletionMessageParam)
11100
+ })
11101
+ ]);
11102
+ var promptDefinitionSchema = promptContentsSchema.and(
11103
+ import_v39.z.object({
11104
+ model: import_v39.z.string(),
11105
+ params: ModelParams.optional(),
11106
+ templateFormat: import_v39.z.enum(["mustache", "nunjucks", "none"]).optional()
11107
+ })
11108
+ );
11109
+ var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
11110
+ import_v39.z.object({
11111
+ tools: import_v39.z.array(ToolFunctionDefinition).optional()
11112
+ })
11113
+ );
11114
+ function promptDefinitionToPromptData(promptDefinition, rawTools) {
11115
+ const promptBlock = "messages" in promptDefinition ? {
11116
+ type: "chat",
11117
+ messages: promptDefinition.messages,
11118
+ tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
11119
+ } : {
11120
+ type: "completion",
11121
+ content: promptDefinition.prompt
11122
+ };
11123
+ return {
11124
+ prompt: promptBlock,
11125
+ options: {
11126
+ model: promptDefinition.model,
11127
+ params: promptDefinition.params
11128
+ },
11129
+ ...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
11130
+ };
11131
+ }
11132
+
11133
+ // src/eval-parameters.ts
11134
+ var evalParametersSchema = import_v310.z.record(
11135
+ import_v310.z.string(),
11136
+ import_v310.z.union([
11137
+ import_v310.z.object({
11138
+ type: import_v310.z.literal("prompt"),
11139
+ default: promptDefinitionWithToolsSchema.optional(),
11140
+ description: import_v310.z.string().optional()
11141
+ }),
11142
+ import_v310.z.instanceof(import_v310.z.ZodType)
11143
+ // For Zod schemas
11144
+ ])
11145
+ );
11146
+ async function validateParameters(parameters, parameterSchema) {
11147
+ let resolvedSchema = parameterSchema;
11148
+ if (resolvedSchema instanceof Promise) {
11149
+ resolvedSchema = await resolvedSchema;
11150
+ }
11151
+ if (resolvedSchema === void 0 || resolvedSchema === null) {
11152
+ return parameters;
11153
+ }
11154
+ if (RemoteEvalParameters.isParameters(resolvedSchema)) {
11155
+ const mergedParameters = parameters && Object.keys(parameters).length > 0 ? {
11156
+ ...resolvedSchema.data,
11157
+ ...parameters
11158
+ } : resolvedSchema.data;
11159
+ return validateParametersWithJsonSchema(
11160
+ mergedParameters,
11161
+ resolvedSchema.schema
11162
+ );
10621
11163
  }
10622
- };
10623
- var projects = new ProjectBuilder();
10624
- var Project2 = class {
10625
- name;
10626
- id;
10627
- tools;
10628
- prompts;
10629
- scorers;
10630
- _publishableCodeFunctions = [];
10631
- _publishablePrompts = [];
10632
- constructor(args) {
10633
- _initializeSpanContext();
10634
- this.name = "name" in args ? args.name : void 0;
10635
- this.id = "id" in args ? args.id : void 0;
10636
- this.tools = new ToolBuilder(this);
10637
- this.prompts = new PromptBuilder(this);
10638
- this.scorers = new ScorerBuilder(this);
11164
+ return validateParametersWithZod(
11165
+ parameters,
11166
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
11167
+ resolvedSchema
11168
+ );
11169
+ }
11170
+ function validateParametersWithZod(parameters, parameterSchema) {
11171
+ return Object.fromEntries(
11172
+ Object.entries(parameterSchema).map(([name, schema]) => {
11173
+ const value = parameters[name];
11174
+ try {
11175
+ if ("type" in schema && schema.type === "prompt") {
11176
+ const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
11177
+ schema.default,
11178
+ schema.default.tools
11179
+ ) : void 0;
11180
+ if (!promptData) {
11181
+ throw new Error(`Parameter '${name}' is required`);
11182
+ }
11183
+ return [name, Prompt2.fromPromptData(name, promptData)];
11184
+ } else {
11185
+ const schemaCasted = schema;
11186
+ return [name, schemaCasted.parse(value)];
11187
+ }
11188
+ } catch (e) {
11189
+ console.error("Error validating parameter", name, e);
11190
+ throw Error(
11191
+ `Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
11192
+ );
11193
+ }
11194
+ })
11195
+ );
11196
+ }
11197
+ function validateParametersWithJsonSchema(parameters, schema) {
11198
+ const ajv = new import_ajv.default({ coerceTypes: true, useDefaults: true, strict: false });
11199
+ const validate = ajv.compile(schema);
11200
+ if (!validate(parameters)) {
11201
+ const errorMessages = validate.errors?.map((err) => {
11202
+ const path8 = err.instancePath || "root";
11203
+ return `${path8}: ${err.message}`;
11204
+ }).join(", ");
11205
+ throw Error(`Invalid parameters: ${errorMessages}`);
10639
11206
  }
10640
- addPrompt(prompt) {
10641
- this._publishablePrompts.push(prompt);
10642
- if (globalThis._lazy_load) {
10643
- globalThis._evals.prompts.push(prompt);
10644
- }
11207
+ return parameters;
11208
+ }
11209
+
11210
+ // src/framework.ts
11211
+ var EvalResultWithSummary = class {
11212
+ constructor(summary, results) {
11213
+ this.summary = summary;
11214
+ this.results = results;
10645
11215
  }
10646
- addCodeFunction(fn) {
10647
- this._publishableCodeFunctions.push(fn);
10648
- if (globalThis._lazy_load) {
10649
- globalThis._evals.functions.push(fn);
10650
- }
11216
+ /**
11217
+ * @deprecated Use `summary` instead.
11218
+ */
11219
+ toString() {
11220
+ return JSON.stringify(this.summary);
10651
11221
  }
10652
- async publish() {
10653
- if (globalThis._lazy_load) {
10654
- console.warn("publish() is a no-op when running `braintrust push`.");
10655
- return;
10656
- }
10657
- await login();
10658
- const projectMap = new ProjectNameIdMap();
10659
- const functionDefinitions = [];
10660
- if (this._publishableCodeFunctions.length > 0) {
10661
- console.warn(
10662
- "Code functions cannot be published directly. Use `braintrust push` instead."
10663
- );
10664
- }
10665
- if (this._publishablePrompts.length > 0) {
10666
- for (const prompt of this._publishablePrompts) {
10667
- const functionDefinition = await prompt.toFunctionDefinition(projectMap);
10668
- functionDefinitions.push(functionDefinition);
10669
- }
10670
- }
10671
- await _internalGetGlobalState().apiConn().post_json("insert-functions", {
10672
- functions: functionDefinitions
10673
- });
11222
+ [Symbol.for("nodejs.util.inspect.custom")]() {
11223
+ return `EvalResultWithSummary(summary="...", results=[...])`;
11224
+ }
11225
+ toJSON() {
11226
+ return {
11227
+ summary: this.summary,
11228
+ results: this.results
11229
+ };
10674
11230
  }
10675
11231
  };
10676
- var ToolBuilder = class {
10677
- constructor(project) {
10678
- this.project = project;
11232
+ function makeEvalName(projectName, experimentName) {
11233
+ let out = projectName;
11234
+ if (experimentName) {
11235
+ out += ` [experimentName=${experimentName}]`;
10679
11236
  }
10680
- taskCounter = 0;
10681
- // This type definition is just a catch all so that the implementation can be
10682
- // less specific than the two more specific declarations above.
10683
- create(opts) {
10684
- this.taskCounter++;
10685
- opts = opts ?? {};
10686
- const { handler, name, slug, parameters, returns, ...rest } = opts;
10687
- let resolvedName = name ?? handler.name;
10688
- if (resolvedName.trim().length === 0) {
10689
- resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
10690
- }
10691
- const tool = new CodeFunction(this.project, {
10692
- handler,
10693
- name: resolvedName,
10694
- slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
10695
- type: "tool",
10696
- // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
10697
- parameters,
10698
- // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
10699
- returns,
10700
- ...rest
10701
- });
10702
- this.project.addCodeFunction(tool);
10703
- return tool;
11237
+ return out;
11238
+ }
11239
+ function initExperiment(state, options = {}) {
11240
+ return init({
11241
+ state,
11242
+ ...options,
11243
+ setCurrent: false
11244
+ });
11245
+ }
11246
+ function callEvaluatorData(data) {
11247
+ const dataResult = typeof data === "function" ? data() : data;
11248
+ let baseExperiment = void 0;
11249
+ if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
11250
+ baseExperiment = dataResult.name;
10704
11251
  }
11252
+ return {
11253
+ data: dataResult,
11254
+ baseExperiment
11255
+ };
11256
+ }
11257
+ function isAsyncIterable2(value) {
11258
+ return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
11259
+ }
11260
+ function isIterable(value) {
11261
+ return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
11262
+ }
11263
+ globalThis._evals = {
11264
+ functions: [],
11265
+ prompts: [],
11266
+ parameters: [],
11267
+ evaluators: {},
11268
+ reporters: {}
10705
11269
  };
10706
- var ScorerBuilder = class {
10707
- constructor(project) {
10708
- this.project = project;
11270
+ function _initializeSpanContext() {
11271
+ globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
11272
+ }
11273
+ async function Eval(name, evaluator, reporterOrOpts) {
11274
+ const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
11275
+ let evalName = makeEvalName(name, evaluator.experimentName);
11276
+ if (globalThis._evals.evaluators[evalName]) {
11277
+ evalName = `${evalName}_${Object.keys(_evals).length}`;
10709
11278
  }
10710
- taskCounter = 0;
10711
- create(opts) {
10712
- this.taskCounter++;
10713
- let resolvedName = opts.name;
10714
- if (!resolvedName && "handler" in opts) {
10715
- resolvedName = opts.handler.name;
11279
+ if (globalThis._lazy_load) {
11280
+ globalThis._evals.evaluators[evalName] = {
11281
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
11282
+ evaluator: {
11283
+ evalName,
11284
+ projectName: name,
11285
+ ...evaluator
11286
+ },
11287
+ reporter: options.reporter
11288
+ };
11289
+ _initializeSpanContext();
11290
+ return new EvalResultWithSummary(
11291
+ {
11292
+ scores: {},
11293
+ metrics: {},
11294
+ projectName: "",
11295
+ experimentName: ""
11296
+ },
11297
+ []
11298
+ );
11299
+ }
11300
+ const progressReporter = options.progress ?? new SimpleProgressReporter();
11301
+ const shouldCollectResults = options.returnResults ?? true;
11302
+ if (typeof options.reporter === "string") {
11303
+ throw new Error(
11304
+ "Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
11305
+ );
11306
+ }
11307
+ const resolvedReporter = options.reporter || defaultReporter;
11308
+ try {
11309
+ const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
11310
+ evaluator.data
11311
+ );
11312
+ const experiment = options.parent || options.noSendLogs ? null : initExperiment(evaluator.state, {
11313
+ ...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
11314
+ experiment: evaluator.experimentName,
11315
+ description: evaluator.description,
11316
+ metadata: evaluator.metadata,
11317
+ isPublic: evaluator.isPublic,
11318
+ update: evaluator.update,
11319
+ baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
11320
+ baseExperimentId: evaluator.baseExperimentId,
11321
+ gitMetadataSettings: evaluator.gitMetadataSettings,
11322
+ repoInfo: evaluator.repoInfo,
11323
+ dataset: Dataset2.isDataset(data) ? data : void 0
11324
+ });
11325
+ if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
11326
+ await experiment._waitForId();
10716
11327
  }
10717
- if (!resolvedName || resolvedName.trim().length === 0) {
10718
- resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
11328
+ if (experiment && options.onStart) {
11329
+ const summary = await experiment.summarize({ summarizeScores: false });
11330
+ options.onStart(summary);
10719
11331
  }
10720
- const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
10721
- if ("handler" in opts) {
10722
- const scorer = new CodeFunction(this.project, {
10723
- ...opts,
10724
- name: resolvedName,
10725
- slug,
10726
- type: "scorer"
10727
- });
10728
- this.project.addCodeFunction(scorer);
10729
- } else {
10730
- const promptBlock = "messages" in opts ? {
10731
- type: "chat",
10732
- messages: opts.messages
10733
- } : {
10734
- type: "completion",
10735
- content: opts.prompt
10736
- };
10737
- const promptData = {
10738
- prompt: promptBlock,
10739
- options: {
10740
- model: opts.model,
10741
- params: opts.params
10742
- },
10743
- parser: {
10744
- type: "llm_classifier",
10745
- use_cot: opts.useCot,
10746
- choice_scores: opts.choiceScores
10747
- }
11332
+ try {
11333
+ const evalDef = {
11334
+ evalName,
11335
+ projectName: name,
11336
+ ...evaluator,
11337
+ data
10748
11338
  };
10749
- const codePrompt = new CodePrompt(
10750
- this.project,
10751
- promptData,
10752
- [],
10753
- {
10754
- ...opts,
10755
- name: resolvedName,
10756
- slug
10757
- },
10758
- "scorer"
10759
- );
10760
- this.project.addPrompt(codePrompt);
11339
+ const enableCache = options.enableCache ?? true;
11340
+ let ret;
11341
+ if (options.parent) {
11342
+ ret = await withParent(
11343
+ options.parent,
11344
+ () => runEvaluator(
11345
+ null,
11346
+ evalDef,
11347
+ progressReporter,
11348
+ [],
11349
+ options.stream,
11350
+ options.parameters,
11351
+ shouldCollectResults,
11352
+ enableCache
11353
+ ),
11354
+ evaluator.state
11355
+ );
11356
+ } else {
11357
+ ret = await runEvaluator(
11358
+ experiment,
11359
+ evalDef,
11360
+ progressReporter,
11361
+ [],
11362
+ options.stream,
11363
+ options.parameters,
11364
+ shouldCollectResults,
11365
+ enableCache
11366
+ );
11367
+ }
11368
+ progressReporter.stop();
11369
+ resolvedReporter.reportEval(evalDef, ret, {
11370
+ verbose: true,
11371
+ jsonl: false
11372
+ });
11373
+ return ret;
11374
+ } finally {
11375
+ if (experiment) {
11376
+ await experiment.flush().catch(console.error);
11377
+ } else if (options.parent) {
11378
+ await flush().catch(console.error);
11379
+ }
10761
11380
  }
11381
+ } finally {
11382
+ progressReporter.stop();
10762
11383
  }
10763
- };
10764
- var CodeFunction = class {
10765
- constructor(project, opts) {
10766
- this.project = project;
10767
- this.handler = opts.handler;
10768
- this.name = opts.name;
10769
- this.slug = opts.slug;
10770
- this.description = opts.description;
10771
- this.type = opts.type;
10772
- this.ifExists = opts.ifExists;
10773
- this.metadata = opts.metadata;
10774
- this.parameters = opts.parameters;
10775
- this.returns = opts.returns;
10776
- if (this.returns && !this.parameters) {
10777
- throw new Error("parameters are required if return type is defined");
10778
- }
11384
+ }
11385
+ function serializeJSONWithPlainString(v) {
11386
+ if (typeof v === "string") {
11387
+ return v;
11388
+ } else {
11389
+ return JSON.stringify(v);
10779
11390
  }
10780
- handler;
10781
- name;
10782
- slug;
10783
- type;
10784
- description;
10785
- parameters;
10786
- returns;
10787
- ifExists;
10788
- metadata;
10789
- key() {
10790
- return JSON.stringify([
10791
- this.project.id ?? "",
10792
- this.project.name ?? "",
10793
- this.slug
10794
- ]);
11391
+ }
11392
+ function deserializePlainStringAsJSON2(s) {
11393
+ try {
11394
+ return { value: JSON.parse(s), error: void 0 };
11395
+ } catch (e) {
11396
+ return { value: s, error: e };
10795
11397
  }
10796
- };
10797
- var CodePrompt = class {
10798
- project;
10799
- name;
10800
- slug;
10801
- prompt;
10802
- ifExists;
10803
- description;
10804
- id;
10805
- functionType;
10806
- toolFunctions;
10807
- metadata;
10808
- constructor(project, prompt, toolFunctions, opts, functionType) {
10809
- this.project = project;
10810
- this.name = opts.name;
10811
- this.slug = opts.slug;
10812
- this.prompt = prompt;
10813
- this.toolFunctions = toolFunctions;
10814
- this.ifExists = opts.ifExists;
10815
- this.description = opts.description;
10816
- this.id = opts.id;
10817
- this.functionType = functionType;
10818
- this.metadata = opts.metadata;
10819
- }
10820
- async toFunctionDefinition(projectNameToId) {
10821
- const prompt_data = {
10822
- ...this.prompt
10823
- };
10824
- if (this.toolFunctions.length > 0) {
10825
- const resolvableToolFunctions = await Promise.all(
10826
- this.toolFunctions.map(async (fn) => {
10827
- if ("slug" in fn) {
10828
- return {
10829
- type: "slug",
10830
- project_id: await projectNameToId.resolve(fn.project),
10831
- slug: fn.slug
10832
- };
10833
- } else {
10834
- return fn;
10835
- }
10836
- })
10837
- );
10838
- prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
10839
- resolvableToolFunctions;
11398
+ }
11399
+ function parseFilters(filters) {
11400
+ const result = [];
11401
+ for (const f of filters) {
11402
+ const equalsIdx = f.indexOf("=");
11403
+ if (equalsIdx === -1) {
11404
+ throw new Error(`Invalid filter ${f}`);
10840
11405
  }
10841
- return {
10842
- project_id: await projectNameToId.resolve(this.project),
10843
- name: this.name,
10844
- slug: this.slug,
10845
- description: this.description ?? "",
10846
- function_data: {
10847
- type: "prompt"
10848
- },
10849
- function_type: this.functionType,
10850
- prompt_data,
10851
- if_exists: this.ifExists,
10852
- metadata: this.metadata
10853
- };
10854
- }
10855
- };
10856
- var promptContentsSchema = import_v39.z.union([
10857
- import_v39.z.object({
10858
- prompt: import_v39.z.string()
10859
- }),
10860
- import_v39.z.object({
10861
- messages: import_v39.z.array(ChatCompletionMessageParam)
10862
- })
10863
- ]);
10864
- var promptDefinitionSchema = promptContentsSchema.and(
10865
- import_v39.z.object({
10866
- model: import_v39.z.string(),
10867
- params: ModelParams.optional(),
10868
- templateFormat: import_v39.z.enum(["mustache", "nunjucks", "none"]).optional()
10869
- })
10870
- );
10871
- var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
10872
- import_v39.z.object({
10873
- tools: import_v39.z.array(ToolFunctionDefinition).optional()
10874
- })
10875
- );
10876
- var PromptBuilder = class {
10877
- constructor(project) {
10878
- this.project = project;
10879
- }
10880
- create(opts) {
10881
- const toolFunctions = [];
10882
- const rawTools = [];
10883
- for (const tool of opts.tools ?? []) {
10884
- if (tool instanceof CodeFunction) {
10885
- toolFunctions.push(tool);
10886
- } else if ("type" in tool && !("function" in tool)) {
10887
- toolFunctions.push(tool);
10888
- } else {
10889
- rawTools.push(tool);
10890
- }
11406
+ const [path8, value] = [f.slice(0, equalsIdx), f.slice(equalsIdx + 1)];
11407
+ let deserializedValue = deserializePlainStringAsJSON2(value).value;
11408
+ if (typeof deserializedValue !== "string") {
11409
+ deserializedValue = value;
10891
11410
  }
10892
- const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
10893
- const promptData = promptDefinitionToPromptData(opts, rawTools);
10894
- const promptRow = {
10895
- id: opts.id,
10896
- _xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
10897
- name: opts.name,
10898
- slug,
10899
- prompt_data: promptData,
10900
- ...this.project.id !== void 0 ? { project_id: this.project.id } : {}
10901
- };
10902
- const prompt = new Prompt2(
10903
- promptRow,
10904
- {},
10905
- // It doesn't make sense to specify defaults here.
10906
- opts.noTrace ?? false
10907
- );
10908
- const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
10909
- ...opts,
10910
- slug
11411
+ result.push({
11412
+ path: path8.split("."),
11413
+ pattern: new RegExp(deserializedValue)
10911
11414
  });
10912
- this.project.addPrompt(codePrompt);
10913
- return prompt;
10914
11415
  }
10915
- };
10916
- function promptDefinitionToPromptData(promptDefinition, rawTools) {
10917
- const promptBlock = "messages" in promptDefinition ? {
10918
- type: "chat",
10919
- messages: promptDefinition.messages,
10920
- tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
10921
- } : {
10922
- type: "completion",
10923
- content: promptDefinition.prompt
10924
- };
10925
- return {
10926
- prompt: promptBlock,
10927
- options: {
10928
- model: promptDefinition.model,
10929
- params: promptDefinition.params
10930
- },
10931
- ...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
10932
- };
11416
+ return result;
10933
11417
  }
10934
- var ProjectNameIdMap = class {
10935
- nameToId = {};
10936
- idToName = {};
10937
- async getId(projectName) {
10938
- if (!(projectName in this.nameToId)) {
10939
- const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
10940
- project_name: projectName
10941
- });
10942
- const result = import_v39.z.object({
10943
- project: Project
10944
- }).parse(response);
10945
- const projectId = result.project.id;
10946
- this.nameToId[projectName] = projectId;
10947
- this.idToName[projectId] = projectName;
10948
- }
10949
- return this.nameToId[projectName];
11418
+ function evaluateFilter(object, filter2) {
11419
+ const { path: path8, pattern } = filter2;
11420
+ const key = path8.reduce(
11421
+ (acc, p) => typeof acc === "object" && acc !== null ? (
11422
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
11423
+ acc[p]
11424
+ ) : void 0,
11425
+ object
11426
+ );
11427
+ if (key === void 0) {
11428
+ return false;
10950
11429
  }
10951
- async getName(projectId) {
10952
- if (!(projectId in this.idToName)) {
10953
- const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
10954
- id: projectId
10955
- });
10956
- const result = import_v39.z.array(Project).nonempty().parse(response);
10957
- const projectName = result[0].name;
10958
- this.idToName[projectId] = projectName;
10959
- this.nameToId[projectName] = projectId;
10960
- }
10961
- return this.idToName[projectId];
11430
+ return pattern.test(serializeJSONWithPlainString(key));
11431
+ }
11432
+ function scorerName(scorer, scorer_idx) {
11433
+ return scorer.name || `scorer_${scorer_idx}`;
11434
+ }
11435
+ async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
11436
+ return await runEvaluatorInternal(
11437
+ experiment,
11438
+ evaluator,
11439
+ progressReporter,
11440
+ filters,
11441
+ stream,
11442
+ parameters,
11443
+ collectResults,
11444
+ enableCache
11445
+ );
11446
+ }
11447
+ async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
11448
+ if (enableCache) {
11449
+ (evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
10962
11450
  }
10963
- async resolve(project) {
10964
- if (project.id) {
10965
- return project.id;
11451
+ try {
11452
+ if (typeof evaluator.data === "string") {
11453
+ throw new Error("Unimplemented: string data paths");
10966
11454
  }
10967
- return this.getId(project.name);
10968
- }
10969
- };
10970
-
10971
- // src/eval-parameters.ts
10972
- var evalParametersSchema = import_v310.z.record(
10973
- import_v310.z.string(),
10974
- import_v310.z.union([
10975
- import_v310.z.object({
10976
- type: import_v310.z.literal("prompt"),
10977
- default: promptDefinitionWithToolsSchema.optional(),
10978
- description: import_v310.z.string().optional()
10979
- }),
10980
- import_v310.z.instanceof(import_v310.z.ZodType)
10981
- // For Zod schemas
10982
- ])
10983
- );
10984
- function validateParameters(parameters, parameterSchema) {
10985
- return Object.fromEntries(
10986
- Object.entries(parameterSchema).map(([name, schema]) => {
10987
- const value = parameters[name];
10988
- try {
10989
- if ("type" in schema && schema.type === "prompt") {
10990
- const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
10991
- schema.default,
10992
- schema.default.tools
10993
- ) : void 0;
10994
- if (!promptData) {
10995
- throw new Error(`Parameter '${name}' is required`);
10996
- }
10997
- return [name, Prompt2.fromPromptData(name, promptData)];
10998
- } else {
10999
- const schemaCasted = schema;
11000
- return [name, schemaCasted.parse(value)];
11001
- }
11002
- } catch (e) {
11003
- console.error("Error validating parameter", name, e);
11004
- throw Error(
11005
- `Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
11006
- );
11007
- }
11008
- })
11009
- );
11010
- }
11011
-
11012
- // src/framework.ts
11013
- var EvalResultWithSummary = class {
11014
- constructor(summary, results) {
11015
- this.summary = summary;
11016
- this.results = results;
11017
- }
11018
- /**
11019
- * @deprecated Use `summary` instead.
11020
- */
11021
- toString() {
11022
- return JSON.stringify(this.summary);
11023
- }
11024
- [Symbol.for("nodejs.util.inspect.custom")]() {
11025
- return `EvalResultWithSummary(summary="...", results=[...])`;
11026
- }
11027
- toJSON() {
11028
- return {
11029
- summary: this.summary,
11030
- results: this.results
11031
- };
11032
- }
11033
- };
11034
- function makeEvalName(projectName, experimentName) {
11035
- let out = projectName;
11036
- if (experimentName) {
11037
- out += ` [experimentName=${experimentName}]`;
11038
- }
11039
- return out;
11040
- }
11041
- function initExperiment(state, options = {}) {
11042
- return init({
11043
- state,
11044
- ...options,
11045
- setCurrent: false
11046
- });
11047
- }
11048
- function callEvaluatorData(data) {
11049
- const dataResult = typeof data === "function" ? data() : data;
11050
- let baseExperiment = void 0;
11051
- if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
11052
- baseExperiment = dataResult.name;
11053
- }
11054
- return {
11055
- data: dataResult,
11056
- baseExperiment
11057
- };
11058
- }
11059
- function isAsyncIterable2(value) {
11060
- return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
11061
- }
11062
- function isIterable(value) {
11063
- return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
11064
- }
11065
- globalThis._evals = {
11066
- functions: [],
11067
- prompts: [],
11068
- evaluators: {},
11069
- reporters: {}
11070
- };
11071
- function _initializeSpanContext() {
11072
- globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
11073
- }
11074
- async function Eval(name, evaluator, reporterOrOpts) {
11075
- const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
11076
- let evalName = makeEvalName(name, evaluator.experimentName);
11077
- if (globalThis._evals.evaluators[evalName]) {
11078
- evalName = `${evalName}_${Object.keys(_evals).length}`;
11079
- }
11080
- if (globalThis._lazy_load) {
11081
- globalThis._evals.evaluators[evalName] = {
11082
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
11083
- evaluator: {
11084
- evalName,
11085
- projectName: name,
11086
- ...evaluator
11087
- },
11088
- reporter: options.reporter
11089
- };
11090
- _initializeSpanContext();
11091
- return new EvalResultWithSummary(
11092
- {
11093
- scores: {},
11094
- metrics: {},
11095
- projectName: "",
11096
- experimentName: ""
11097
- },
11098
- []
11099
- );
11100
- }
11101
- const progressReporter = options.progress ?? new SimpleProgressReporter();
11102
- const shouldCollectResults = options.returnResults ?? true;
11103
- if (typeof options.reporter === "string") {
11104
- throw new Error(
11105
- "Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
11106
- );
11107
- }
11108
- const resolvedReporter = options.reporter || defaultReporter;
11109
- try {
11110
- const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
11111
- evaluator.data
11112
- );
11113
- const experiment = options.parent || options.noSendLogs ? null : initExperiment(evaluator.state, {
11114
- ...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
11115
- experiment: evaluator.experimentName,
11116
- description: evaluator.description,
11117
- metadata: evaluator.metadata,
11118
- isPublic: evaluator.isPublic,
11119
- update: evaluator.update,
11120
- baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
11121
- baseExperimentId: evaluator.baseExperimentId,
11122
- gitMetadataSettings: evaluator.gitMetadataSettings,
11123
- repoInfo: evaluator.repoInfo,
11124
- dataset: Dataset2.isDataset(data) ? data : void 0
11125
- });
11126
- if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
11127
- await experiment._waitForId();
11128
- }
11129
- if (experiment && options.onStart) {
11130
- const summary = await experiment.summarize({ summarizeScores: false });
11131
- options.onStart(summary);
11132
- }
11133
- try {
11134
- const evalDef = {
11135
- evalName,
11136
- projectName: name,
11137
- ...evaluator,
11138
- data
11139
- };
11140
- const enableCache = options.enableCache ?? true;
11141
- let ret;
11142
- if (options.parent) {
11143
- ret = await withParent(
11144
- options.parent,
11145
- () => runEvaluator(
11146
- null,
11147
- evalDef,
11148
- progressReporter,
11149
- [],
11150
- options.stream,
11151
- options.parameters,
11152
- shouldCollectResults,
11153
- enableCache
11154
- ),
11155
- evaluator.state
11156
- );
11157
- } else {
11158
- ret = await runEvaluator(
11159
- experiment,
11160
- evalDef,
11161
- progressReporter,
11162
- [],
11163
- options.stream,
11164
- options.parameters,
11165
- shouldCollectResults,
11166
- enableCache
11167
- );
11168
- }
11169
- progressReporter.stop();
11170
- resolvedReporter.reportEval(evalDef, ret, {
11171
- verbose: true,
11172
- jsonl: false
11173
- });
11174
- return ret;
11175
- } finally {
11176
- if (experiment) {
11177
- await experiment.flush().catch(console.error);
11178
- } else if (options.parent) {
11179
- await flush().catch(console.error);
11180
- }
11181
- }
11182
- } finally {
11183
- progressReporter.stop();
11184
- }
11185
- }
11186
- function serializeJSONWithPlainString(v) {
11187
- if (typeof v === "string") {
11188
- return v;
11189
- } else {
11190
- return JSON.stringify(v);
11191
- }
11192
- }
11193
- function deserializePlainStringAsJSON2(s) {
11194
- try {
11195
- return { value: JSON.parse(s), error: void 0 };
11196
- } catch (e) {
11197
- return { value: s, error: e };
11198
- }
11199
- }
11200
- function parseFilters(filters) {
11201
- const result = [];
11202
- for (const f of filters) {
11203
- const equalsIdx = f.indexOf("=");
11204
- if (equalsIdx === -1) {
11205
- throw new Error(`Invalid filter ${f}`);
11206
- }
11207
- const [path8, value] = [f.slice(0, equalsIdx), f.slice(equalsIdx + 1)];
11208
- let deserializedValue = deserializePlainStringAsJSON2(value).value;
11209
- if (typeof deserializedValue !== "string") {
11210
- deserializedValue = value;
11211
- }
11212
- result.push({
11213
- path: path8.split("."),
11214
- pattern: new RegExp(deserializedValue)
11215
- });
11216
- }
11217
- return result;
11218
- }
11219
- function evaluateFilter(object, filter2) {
11220
- const { path: path8, pattern } = filter2;
11221
- const key = path8.reduce(
11222
- (acc, p) => typeof acc === "object" && acc !== null ? (
11223
- // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
11224
- acc[p]
11225
- ) : void 0,
11226
- object
11227
- );
11228
- if (key === void 0) {
11229
- return false;
11230
- }
11231
- return pattern.test(serializeJSONWithPlainString(key));
11232
- }
11233
- function scorerName(scorer, scorer_idx) {
11234
- return scorer.name || `scorer_${scorer_idx}`;
11235
- }
11236
- async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
11237
- return await runEvaluatorInternal(
11238
- experiment,
11239
- evaluator,
11240
- progressReporter,
11241
- filters,
11242
- stream,
11243
- parameters,
11244
- collectResults,
11245
- enableCache
11246
- );
11247
- }
11248
- async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
11249
- if (enableCache) {
11250
- (evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
11251
- }
11252
- try {
11253
- if (typeof evaluator.data === "string") {
11254
- throw new Error("Unimplemented: string data paths");
11255
- }
11256
- let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
11257
- parameters = validateParameters(
11258
- parameters ?? {},
11259
- evaluator.parameters ?? {}
11260
- );
11261
- if ("_type" in dataResult) {
11262
- if (dataResult._type !== "BaseExperiment") {
11263
- throw new Error("Invalid _type");
11264
- }
11265
- if (!experiment) {
11266
- throw new Error(
11267
- "Cannot use BaseExperiment() without connecting to Braintrust (you most likely set --no-send-logs)"
11455
+ let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
11456
+ parameters = await validateParameters(
11457
+ parameters ?? {},
11458
+ evaluator.parameters
11459
+ );
11460
+ if ("_type" in dataResult) {
11461
+ if (dataResult._type !== "BaseExperiment") {
11462
+ throw new Error("Invalid _type");
11463
+ }
11464
+ if (!experiment) {
11465
+ throw new Error(
11466
+ "Cannot use BaseExperiment() without connecting to Braintrust (you most likely set --no-send-logs)"
11268
11467
  );
11269
11468
  }
11270
11469
  let name = dataResult.name;
@@ -11415,6 +11614,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
11415
11614
  } else {
11416
11615
  rootSpan.log({ output, metadata, expected });
11417
11616
  }
11617
+ if (evaluator.flushBeforeScoring) {
11618
+ await rootSpan.flush();
11619
+ }
11418
11620
  const scoringArgs = {
11419
11621
  input: datum.input,
11420
11622
  expected: "expected" in datum ? datum.expected : void 0,
@@ -11654,869 +11856,1331 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
11654
11856
  summary,
11655
11857
  collectResults ? collectedResults : []
11656
11858
  );
11657
- } finally {
11658
- if (enableCache) {
11659
- const spanCache = (evaluator.state ?? _internalGetGlobalState())?.spanCache;
11660
- spanCache?.dispose();
11661
- spanCache?.stop();
11662
- }
11859
+ } finally {
11860
+ if (enableCache) {
11861
+ const spanCache = (evaluator.state ?? _internalGetGlobalState())?.spanCache;
11862
+ spanCache?.dispose();
11863
+ spanCache?.stop();
11864
+ }
11865
+ }
11866
+ }
11867
+ var error = (text) => `Error: ${text}`;
11868
+ var warning = (text) => `Warning: ${text}`;
11869
+ function logError2(e, verbose) {
11870
+ if (!verbose) {
11871
+ console.error(`${e}`);
11872
+ } else {
11873
+ console.error(e);
11874
+ }
11875
+ }
11876
+ function accumulateScores(accumulator, scores) {
11877
+ for (const [name, score] of Object.entries(scores)) {
11878
+ if (score === null || score === void 0) {
11879
+ continue;
11880
+ }
11881
+ const existing = accumulator[name] ?? { total: 0, count: 0 };
11882
+ accumulator[name] = {
11883
+ total: existing.total + score,
11884
+ count: existing.count + 1
11885
+ };
11886
+ }
11887
+ }
11888
+ function ensureScoreAccumulator(results) {
11889
+ const accumulator = {};
11890
+ for (const result of results) {
11891
+ accumulateScores(accumulator, result.scores);
11892
+ }
11893
+ return accumulator;
11894
+ }
11895
+ function buildLocalSummary(evaluator, results, precomputedScores) {
11896
+ const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
11897
+ return {
11898
+ projectName: evaluator.projectName,
11899
+ experimentName: evaluator.evalName,
11900
+ scores: Object.fromEntries(
11901
+ Object.entries(scoresByName).map(([name, { total, count }]) => [
11902
+ name,
11903
+ {
11904
+ name,
11905
+ score: count === 0 ? 0 : total / count,
11906
+ improvements: 0,
11907
+ regressions: 0
11908
+ }
11909
+ ])
11910
+ )
11911
+ };
11912
+ }
11913
+ function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
11914
+ if (failingResults.length > 0) {
11915
+ console.error(
11916
+ warning(
11917
+ `Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
11918
+ )
11919
+ );
11920
+ if (jsonl) {
11921
+ console.log(
11922
+ JSON.stringify({
11923
+ evaluatorName: evaluator.evalName,
11924
+ errors: failingResults.map(
11925
+ (r) => `${r.error instanceof Error ? r.error.stack : r.error}`
11926
+ )
11927
+ })
11928
+ );
11929
+ } else {
11930
+ for (const result of failingResults) {
11931
+ logError2(result.error, verbose);
11932
+ }
11933
+ }
11934
+ if (!verbose && !jsonl) {
11935
+ console.error(warning("Add --verbose to see full stack traces."));
11936
+ }
11937
+ }
11938
+ }
11939
+ var defaultReporter = {
11940
+ name: "Braintrust default reporter",
11941
+ async reportEval(evaluator, result, { verbose, jsonl }) {
11942
+ const { results, summary } = result;
11943
+ const failingResults = results.filter(
11944
+ (r) => r.error !== void 0
11945
+ );
11946
+ if (failingResults.length > 0) {
11947
+ reportFailures(evaluator, failingResults, { verbose, jsonl });
11948
+ }
11949
+ if (jsonl) {
11950
+ isomorph_default.writeln(JSON.stringify(summary));
11951
+ } else {
11952
+ isomorph_default.writeln("Experiment summary");
11953
+ isomorph_default.writeln("==================");
11954
+ if (summary.comparisonExperimentName) {
11955
+ isomorph_default.writeln(
11956
+ `${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
11957
+ );
11958
+ isomorph_default.writeln("");
11959
+ }
11960
+ const hasScores = Object.keys(summary.scores).length > 0;
11961
+ const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
11962
+ const hasComparison = !!summary.comparisonExperimentName;
11963
+ if (hasScores || hasMetrics) {
11964
+ if (hasComparison) {
11965
+ isomorph_default.writeln(
11966
+ "Name Value Change Improvements Regressions"
11967
+ );
11968
+ isomorph_default.writeln(
11969
+ "----------------------------------------------------------------"
11970
+ );
11971
+ }
11972
+ for (const score of Object.values(summary.scores)) {
11973
+ const scorePercent = (score.score * 100).toFixed(2);
11974
+ const scoreValue = `${scorePercent}%`;
11975
+ if (hasComparison) {
11976
+ let diffString = "-";
11977
+ if (!isEmpty2(score.diff)) {
11978
+ const diffPercent = (score.diff * 100).toFixed(2);
11979
+ const diffSign = score.diff > 0 ? "+" : "";
11980
+ diffString = `${diffSign}${diffPercent}%`;
11981
+ }
11982
+ const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
11983
+ const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
11984
+ isomorph_default.writeln(
11985
+ `${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
11986
+ );
11987
+ } else {
11988
+ isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
11989
+ }
11990
+ }
11991
+ for (const metric of Object.values(summary.metrics ?? {})) {
11992
+ const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
11993
+ const formattedValue = metric.metric.toFixed(fractionDigits);
11994
+ const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
11995
+ if (hasComparison) {
11996
+ let diffString = "-";
11997
+ if (!isEmpty2(metric.diff)) {
11998
+ const diffPercent = (metric.diff * 100).toFixed(2);
11999
+ const diffSign = metric.diff > 0 ? "+" : "";
12000
+ diffString = `${diffSign}${diffPercent}%`;
12001
+ }
12002
+ const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
12003
+ const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
12004
+ isomorph_default.writeln(
12005
+ `${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
12006
+ );
12007
+ } else {
12008
+ isomorph_default.writeln(
12009
+ `${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
12010
+ );
12011
+ }
12012
+ }
12013
+ }
12014
+ if (summary.experimentUrl) {
12015
+ isomorph_default.writeln("");
12016
+ isomorph_default.writeln(`View results for ${summary.experimentName}`);
12017
+ isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
12018
+ }
12019
+ }
12020
+ isomorph_default.writeln("");
12021
+ return failingResults.length === 0;
12022
+ },
12023
+ async reportRun(evalReports) {
12024
+ return evalReports.every((r) => r);
12025
+ }
12026
+ };
12027
+
12028
+ // src/cli/reporters/eval.ts
12029
+ var import_chalk2 = __toESM(require("chalk"));
12030
+ var import_termi_link = require("termi-link");
12031
+ var import_boxen = __toESM(require("boxen"));
12032
+ var import_cli_table3 = __toESM(require("cli-table3"));
12033
+ var import_pluralize = __toESM(require("pluralize"));
12034
+ function formatExperimentSummaryFancy(summary) {
12035
+ let comparisonLine = "";
12036
+ if (summary.comparisonExperimentName) {
12037
+ comparisonLine = `${summary.comparisonExperimentName} ${import_chalk2.default.gray("(baseline)")} \u2190 ${summary.experimentName} ${import_chalk2.default.gray("(comparison)")}
12038
+
12039
+ `;
12040
+ }
12041
+ const tableParts = [];
12042
+ const hasScores = Object.keys(summary.scores).length > 0;
12043
+ const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
12044
+ const hasComparison = !!summary.comparisonExperimentName;
12045
+ if (hasScores || hasMetrics) {
12046
+ const headers = [import_chalk2.default.gray("Name"), import_chalk2.default.gray("Value")];
12047
+ if (hasComparison) {
12048
+ headers.push(
12049
+ import_chalk2.default.gray("Change"),
12050
+ import_chalk2.default.gray("Improvements"),
12051
+ import_chalk2.default.gray("Regressions")
12052
+ );
12053
+ }
12054
+ const combinedTable = new import_cli_table3.default({
12055
+ head: hasComparison ? headers : [],
12056
+ style: { head: [], "padding-left": 0, "padding-right": 0, border: [] },
12057
+ chars: {
12058
+ top: "",
12059
+ "top-mid": "",
12060
+ "top-left": "",
12061
+ "top-right": "",
12062
+ bottom: "",
12063
+ "bottom-mid": "",
12064
+ "bottom-left": "",
12065
+ "bottom-right": "",
12066
+ left: "",
12067
+ "left-mid": "",
12068
+ mid: "",
12069
+ "mid-mid": "",
12070
+ right: "",
12071
+ "right-mid": "",
12072
+ middle: " "
12073
+ },
12074
+ colWidths: hasComparison ? [18, 10, 10, 13, 12] : [20, 15],
12075
+ colAligns: hasComparison ? ["left", "right", "right", "right", "right"] : ["left", "right"],
12076
+ wordWrap: false
12077
+ });
12078
+ const scoreValues = Object.values(summary.scores);
12079
+ for (const score of scoreValues) {
12080
+ const scorePercent = (score.score * 100).toFixed(2);
12081
+ const scoreValue = import_chalk2.default.white(`${scorePercent}%`);
12082
+ let diffString = "";
12083
+ if (!isEmpty2(score.diff)) {
12084
+ const diffPercent = (score.diff * 100).toFixed(2);
12085
+ const diffSign = score.diff > 0 ? "+" : "";
12086
+ const diffColor = score.diff > 0 ? import_chalk2.default.green : import_chalk2.default.red;
12087
+ diffString = diffColor(`${diffSign}${diffPercent}%`);
12088
+ } else {
12089
+ diffString = import_chalk2.default.gray("-");
12090
+ }
12091
+ const improvements = score.improvements > 0 ? import_chalk2.default.dim.green(score.improvements) : import_chalk2.default.gray("-");
12092
+ const regressions = score.regressions > 0 ? import_chalk2.default.dim.red(score.regressions) : import_chalk2.default.gray("-");
12093
+ const row = [`${import_chalk2.default.blue("\u25EF")} ${score.name}`, scoreValue];
12094
+ if (hasComparison) {
12095
+ row.push(diffString, improvements, regressions);
12096
+ }
12097
+ combinedTable.push(row);
12098
+ }
12099
+ const metricValues = Object.values(summary.metrics ?? {});
12100
+ for (const metric of metricValues) {
12101
+ const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
12102
+ const formattedValue = metric.metric.toFixed(fractionDigits);
12103
+ const metricValue = import_chalk2.default.white(
12104
+ metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`
12105
+ );
12106
+ let diffString = "";
12107
+ if (!isEmpty2(metric.diff)) {
12108
+ const diffPercent = (metric.diff * 100).toFixed(2);
12109
+ const diffSign = metric.diff > 0 ? "+" : "";
12110
+ const diffColor = metric.diff > 0 ? import_chalk2.default.green : import_chalk2.default.red;
12111
+ diffString = diffColor(`${diffSign}${diffPercent}%`);
12112
+ } else {
12113
+ diffString = import_chalk2.default.gray("-");
12114
+ }
12115
+ const improvements = metric.improvements > 0 ? import_chalk2.default.dim.green(metric.improvements) : import_chalk2.default.gray("-");
12116
+ const regressions = metric.regressions > 0 ? import_chalk2.default.dim.red(metric.regressions) : import_chalk2.default.gray("-");
12117
+ const row = [`${import_chalk2.default.magenta("\u25EF")} ${metric.name}`, metricValue];
12118
+ if (hasComparison) {
12119
+ row.push(diffString, improvements, regressions);
12120
+ }
12121
+ combinedTable.push(row);
12122
+ }
12123
+ tableParts.push(combinedTable.toString());
12124
+ }
12125
+ const content = [comparisonLine, ...tableParts].filter(Boolean).join("\n");
12126
+ const footer = summary.experimentUrl ? (0, import_termi_link.terminalLink)(
12127
+ `View results for ${summary.experimentName}`,
12128
+ summary.experimentUrl,
12129
+ { fallback: () => `See results at ${summary.experimentUrl}` }
12130
+ ) : "";
12131
+ const boxContent = [content, footer].filter(Boolean).join("\n\n");
12132
+ try {
12133
+ return "\n" + (0, import_boxen.default)(boxContent, {
12134
+ title: import_chalk2.default.gray("Experiment summary"),
12135
+ titleAlignment: "left",
12136
+ padding: 0.5,
12137
+ borderColor: "gray",
12138
+ borderStyle: "round"
12139
+ });
12140
+ } catch (error2) {
12141
+ return "\n" + import_chalk2.default.gray("Experiment summary") + "\n" + boxContent + "\n";
12142
+ }
12143
+ }
12144
+ var warning2 = import_chalk2.default.yellow;
12145
+ var fancyReporter = {
12146
+ name: "Braintrust fancy reporter",
12147
+ async reportEval(evaluator, result, { verbose, jsonl }) {
12148
+ const { results, summary } = result;
12149
+ const failingResults = results.filter(
12150
+ (r) => r.error !== void 0
12151
+ );
12152
+ if (failingResults.length > 0) {
12153
+ console.error(
12154
+ warning2(
12155
+ `Evaluator ${evaluator.evalName} failed with ${(0, import_pluralize.default)("error", failingResults.length, true)}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
12156
+ )
12157
+ );
12158
+ if (jsonl) {
12159
+ for (const result2 of failingResults) {
12160
+ process.stdout.write(JSON.stringify(result2));
12161
+ process.stdout.write("\n");
12162
+ }
12163
+ } else if (verbose) {
12164
+ for (const result2 of failingResults) {
12165
+ console.error(result2);
12166
+ }
12167
+ }
12168
+ }
12169
+ process.stdout.write(
12170
+ jsonl ? JSON.stringify(summary) : formatExperimentSummaryFancy(summary)
12171
+ );
12172
+ process.stdout.write("\n");
12173
+ return failingResults.length === 0;
12174
+ },
12175
+ async reportRun(evalReports) {
12176
+ return evalReports.every((r) => r);
12177
+ }
12178
+ };
12179
+
12180
+ // src/node.ts
12181
+ var import_node_async_hooks = require("async_hooks");
12182
+ var path = __toESM(require("path"));
12183
+ var fs = __toESM(require("fs/promises"));
12184
+ var os = __toESM(require("os"));
12185
+ var fsSync = __toESM(require("fs"));
12186
+ var crypto = __toESM(require("crypto"));
12187
+
12188
+ // src/gitutil.ts
12189
+ var import_simple_git = require("simple-git");
12190
+ var COMMON_BASE_BRANCHES = ["main", "master", "develop"];
12191
+ async function currentRepo() {
12192
+ try {
12193
+ const git = (0, import_simple_git.simpleGit)();
12194
+ if (await git.checkIsRepo()) {
12195
+ return git;
12196
+ } else {
12197
+ return null;
12198
+ }
12199
+ } catch (e) {
12200
+ return null;
12201
+ }
12202
+ }
12203
+ var _baseBranch = null;
12204
+ async function getBaseBranch(remote = void 0) {
12205
+ if (_baseBranch === null) {
12206
+ const git = await currentRepo();
12207
+ if (git === null) {
12208
+ throw new Error("Not in a git repo");
12209
+ }
12210
+ const remoteName = remote ?? (await git.getRemotes())[0]?.name;
12211
+ if (!remoteName) {
12212
+ throw new Error("No remote found");
12213
+ }
12214
+ let branch = null;
12215
+ const repoBranches = new Set((await git.branchLocal()).all);
12216
+ const matchingBaseBranches = COMMON_BASE_BRANCHES.filter(
12217
+ (b) => repoBranches.has(b)
12218
+ );
12219
+ if (matchingBaseBranches.length === 1) {
12220
+ branch = matchingBaseBranches[0];
12221
+ } else {
12222
+ try {
12223
+ const remoteInfo = await git.remote(["show", remoteName]);
12224
+ if (!remoteInfo) {
12225
+ throw new Error(`Could not find remote ${remoteName}`);
12226
+ }
12227
+ const match = remoteInfo.match(/\s*HEAD branch:\s*(.*)$/m);
12228
+ if (!match) {
12229
+ throw new Error(`Could not find HEAD branch in remote ${remoteName}`);
12230
+ }
12231
+ branch = match[1];
12232
+ } catch {
12233
+ branch = "main";
12234
+ }
12235
+ }
12236
+ _baseBranch = { remote: remoteName, branch };
12237
+ }
12238
+ return _baseBranch;
12239
+ }
12240
+ async function getBaseBranchAncestor(remote = void 0) {
12241
+ const git = await currentRepo();
12242
+ if (git === null) {
12243
+ throw new Error("Not in a git repo");
12244
+ }
12245
+ const { remote: remoteName, branch: baseBranch } = await getBaseBranch(remote);
12246
+ const isDirty = (await git.diffSummary()).files.length > 0;
12247
+ const head = isDirty ? "HEAD" : "HEAD^";
12248
+ try {
12249
+ const ancestor = await git.raw([
12250
+ "merge-base",
12251
+ head,
12252
+ `${remoteName}/${baseBranch}`
12253
+ ]);
12254
+ return ancestor.trim();
12255
+ } catch (e) {
12256
+ return void 0;
12257
+ }
12258
+ }
12259
+ async function getPastNAncestors(n = 1e3, remote = void 0) {
12260
+ const git = await currentRepo();
12261
+ if (git === null) {
12262
+ return [];
12263
+ }
12264
+ let ancestor = void 0;
12265
+ try {
12266
+ ancestor = await getBaseBranchAncestor(remote);
12267
+ } catch (e) {
12268
+ console.warn(
12269
+ "Skipping git metadata. This is likely because the repository has not been published to a remote yet.",
12270
+ `${e}`
12271
+ );
12272
+ }
12273
+ if (!ancestor) {
12274
+ return [];
12275
+ }
12276
+ const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
12277
+ return commits.all.slice(0, n).map((c) => c.hash);
12278
+ }
12279
+ async function attempt(fn) {
12280
+ try {
12281
+ return await fn();
12282
+ } catch (e) {
12283
+ return void 0;
12284
+ }
12285
+ }
12286
+ function truncateToByteLimit(s, byteLimit = 65536) {
12287
+ const encoded = new TextEncoder().encode(s);
12288
+ if (encoded.length <= byteLimit) {
12289
+ return s;
12290
+ }
12291
+ const truncated = encoded.subarray(0, byteLimit);
12292
+ return new TextDecoder().decode(truncated);
12293
+ }
12294
+ async function getRepoInfo(settings) {
12295
+ if (settings && settings.collect === "none") {
12296
+ return void 0;
12297
+ }
12298
+ const repo = await repoInfo();
12299
+ if (!repo || !settings || settings.collect === "all") {
12300
+ return repo;
12301
+ }
12302
+ let sanitized = {};
12303
+ settings.fields?.forEach((field) => {
12304
+ sanitized = { ...sanitized, [field]: repo[field] };
12305
+ });
12306
+ return sanitized;
12307
+ }
12308
+ async function repoInfo() {
12309
+ const git = await currentRepo();
12310
+ if (git === null) {
12311
+ return void 0;
12312
+ }
12313
+ let commit = void 0;
12314
+ let commit_message = void 0;
12315
+ let commit_time = void 0;
12316
+ let author_name = void 0;
12317
+ let author_email = void 0;
12318
+ let tag = void 0;
12319
+ let branch = void 0;
12320
+ let git_diff = void 0;
12321
+ const dirty = (await git.diffSummary()).files.length > 0;
12322
+ commit = await attempt(async () => await git.revparse(["HEAD"]));
12323
+ commit_message = await attempt(
12324
+ async () => (await git.raw(["log", "-1", "--pretty=%B"])).trim()
12325
+ );
12326
+ commit_time = await attempt(
12327
+ async () => (await git.raw(["log", "-1", "--pretty=%cI"])).trim()
12328
+ );
12329
+ author_name = await attempt(
12330
+ async () => (await git.raw(["log", "-1", "--pretty=%aN"])).trim()
12331
+ );
12332
+ author_email = await attempt(
12333
+ async () => (await git.raw(["log", "-1", "--pretty=%aE"])).trim()
12334
+ );
12335
+ tag = await attempt(
12336
+ async () => (await git.raw(["describe", "--tags", "--exact-match", "--always"])).trim()
12337
+ );
12338
+ branch = await attempt(
12339
+ async () => (await git.raw(["rev-parse", "--abbrev-ref", "HEAD"])).trim()
12340
+ );
12341
+ if (dirty) {
12342
+ git_diff = await attempt(
12343
+ async () => truncateToByteLimit(await git.raw(["--no-ext-diff", "diff", "HEAD"]))
12344
+ );
11663
12345
  }
12346
+ return {
12347
+ commit,
12348
+ branch,
12349
+ tag,
12350
+ dirty,
12351
+ author_name,
12352
+ author_email,
12353
+ commit_message,
12354
+ commit_time,
12355
+ git_diff
12356
+ };
11664
12357
  }
11665
- var error = (text) => `Error: ${text}`;
11666
- var warning = (text) => `Warning: ${text}`;
11667
- function logError2(e, verbose) {
11668
- if (!verbose) {
11669
- console.error(`${e}`);
11670
- } else {
11671
- console.error(e);
12358
+
12359
+ // src/stackutil.ts
12360
+ function getStackTrace() {
12361
+ const trace = new Error().stack;
12362
+ if (typeof trace !== "string") {
12363
+ return [];
11672
12364
  }
11673
- }
11674
- function accumulateScores(accumulator, scores) {
11675
- for (const [name, score] of Object.entries(scores)) {
11676
- if (score === null || score === void 0) {
12365
+ const traceLines = trace.split("\n");
12366
+ const out = [];
12367
+ const stackFrameRegex = /at(.*)\((.*):(\d+):(\d+)\)/;
12368
+ for (const traceLine of traceLines.slice(1)) {
12369
+ const matches = traceLine.match(stackFrameRegex);
12370
+ if (matches === null || matches.length !== 5) {
11677
12371
  continue;
11678
12372
  }
11679
- const existing = accumulator[name] ?? { total: 0, count: 0 };
11680
- accumulator[name] = {
11681
- total: existing.total + score,
11682
- count: existing.count + 1
12373
+ const entry = {
12374
+ functionName: matches[1].trim(),
12375
+ fileName: matches[2],
12376
+ lineNo: parseInt(matches[3])
11683
12377
  };
12378
+ if (!isNaN(entry.lineNo)) {
12379
+ out.push(entry);
12380
+ }
11684
12381
  }
12382
+ return out;
11685
12383
  }
11686
- function ensureScoreAccumulator(results) {
11687
- const accumulator = {};
11688
- for (const result of results) {
11689
- accumulateScores(accumulator, result.scores);
11690
- }
11691
- return accumulator;
11692
- }
11693
- function buildLocalSummary(evaluator, results, precomputedScores) {
11694
- const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
11695
- return {
11696
- projectName: evaluator.projectName,
11697
- experimentName: evaluator.evalName,
11698
- scores: Object.fromEntries(
11699
- Object.entries(scoresByName).map(([name, { total, count }]) => [
11700
- name,
11701
- {
11702
- name,
11703
- score: count === 0 ? 0 : total / count,
11704
- improvements: 0,
11705
- regressions: 0
11706
- }
11707
- ])
11708
- )
11709
- };
11710
- }
11711
- function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
11712
- if (failingResults.length > 0) {
11713
- console.error(
11714
- warning(
11715
- `Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
11716
- )
11717
- );
11718
- if (jsonl) {
11719
- console.log(
11720
- JSON.stringify({
11721
- evaluatorName: evaluator.evalName,
11722
- errors: failingResults.map(
11723
- (r) => `${r.error instanceof Error ? r.error.stack : r.error}`
11724
- )
11725
- })
11726
- );
11727
- } else {
11728
- for (const result of failingResults) {
11729
- logError2(result.error, verbose);
11730
- }
12384
+ function getCallerLocation() {
12385
+ let thisDir = void 0;
12386
+ const entries = getStackTrace();
12387
+ for (const frame of entries) {
12388
+ if (thisDir === void 0) {
12389
+ thisDir = isomorph_default.pathDirname?.(frame.fileName);
11731
12390
  }
11732
- if (!verbose && !jsonl) {
11733
- console.error(warning("Add --verbose to see full stack traces."));
12391
+ if (isomorph_default.pathDirname?.(frame.fileName) !== thisDir) {
12392
+ return {
12393
+ caller_functionname: frame.functionName,
12394
+ caller_filename: frame.fileName,
12395
+ caller_lineno: frame.lineNo
12396
+ };
11734
12397
  }
11735
12398
  }
12399
+ return void 0;
11736
12400
  }
11737
- var defaultReporter = {
11738
- name: "Braintrust default reporter",
11739
- async reportEval(evaluator, result, { verbose, jsonl }) {
11740
- const { results, summary } = result;
11741
- const failingResults = results.filter(
11742
- (r) => r.error !== void 0
11743
- );
11744
- if (failingResults.length > 0) {
11745
- reportFailures(evaluator, failingResults, { verbose, jsonl });
11746
- }
11747
- if (jsonl) {
11748
- isomorph_default.writeln(JSON.stringify(summary));
11749
- } else {
11750
- isomorph_default.writeln("Experiment summary");
11751
- isomorph_default.writeln("==================");
11752
- if (summary.comparisonExperimentName) {
11753
- isomorph_default.writeln(
11754
- `${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
11755
- );
11756
- isomorph_default.writeln("");
11757
- }
11758
- const hasScores = Object.keys(summary.scores).length > 0;
11759
- const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
11760
- const hasComparison = !!summary.comparisonExperimentName;
11761
- if (hasScores || hasMetrics) {
11762
- if (hasComparison) {
11763
- isomorph_default.writeln(
11764
- "Name Value Change Improvements Regressions"
11765
- );
11766
- isomorph_default.writeln(
11767
- "----------------------------------------------------------------"
11768
- );
11769
- }
11770
- for (const score of Object.values(summary.scores)) {
11771
- const scorePercent = (score.score * 100).toFixed(2);
11772
- const scoreValue = `${scorePercent}%`;
11773
- if (hasComparison) {
11774
- let diffString = "-";
11775
- if (!isEmpty2(score.diff)) {
11776
- const diffPercent = (score.diff * 100).toFixed(2);
11777
- const diffSign = score.diff > 0 ? "+" : "";
11778
- diffString = `${diffSign}${diffPercent}%`;
11779
- }
11780
- const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
11781
- const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
11782
- isomorph_default.writeln(
11783
- `${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
11784
- );
11785
- } else {
11786
- isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
11787
- }
11788
- }
11789
- for (const metric of Object.values(summary.metrics ?? {})) {
11790
- const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
11791
- const formattedValue = metric.metric.toFixed(fractionDigits);
11792
- const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
11793
- if (hasComparison) {
11794
- let diffString = "-";
11795
- if (!isEmpty2(metric.diff)) {
11796
- const diffPercent = (metric.diff * 100).toFixed(2);
11797
- const diffSign = metric.diff > 0 ? "+" : "";
11798
- diffString = `${diffSign}${diffPercent}%`;
11799
- }
11800
- const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
11801
- const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
11802
- isomorph_default.writeln(
11803
- `${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
11804
- );
11805
- } else {
11806
- isomorph_default.writeln(
11807
- `${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
11808
- );
11809
- }
11810
- }
11811
- }
11812
- if (summary.experimentUrl) {
11813
- isomorph_default.writeln("");
11814
- isomorph_default.writeln(`View results for ${summary.experimentName}`);
11815
- isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
11816
- }
12401
+
12402
+ // src/node.ts
12403
+ var import_util9 = require("util");
12404
+ var zlib = __toESM(require("zlib"));
12405
+ function configureNode() {
12406
+ isomorph_default.getRepoInfo = getRepoInfo;
12407
+ isomorph_default.getPastNAncestors = getPastNAncestors;
12408
+ isomorph_default.getEnv = (name) => process.env[name];
12409
+ isomorph_default.getCallerLocation = getCallerLocation;
12410
+ isomorph_default.newAsyncLocalStorage = () => new import_node_async_hooks.AsyncLocalStorage();
12411
+ isomorph_default.processOn = (event, handler) => {
12412
+ process.on(event, handler);
12413
+ };
12414
+ isomorph_default.basename = path.basename;
12415
+ isomorph_default.writeln = (text) => process.stdout.write(text + "\n");
12416
+ isomorph_default.pathJoin = path.join;
12417
+ isomorph_default.pathDirname = path.dirname;
12418
+ isomorph_default.mkdir = fs.mkdir;
12419
+ isomorph_default.writeFile = fs.writeFile;
12420
+ isomorph_default.readFile = fs.readFile;
12421
+ isomorph_default.readdir = fs.readdir;
12422
+ isomorph_default.stat = fs.stat;
12423
+ isomorph_default.statSync = fsSync.statSync;
12424
+ isomorph_default.utimes = fs.utimes;
12425
+ isomorph_default.unlink = fs.unlink;
12426
+ isomorph_default.homedir = os.homedir;
12427
+ isomorph_default.tmpdir = os.tmpdir;
12428
+ isomorph_default.writeFileSync = fsSync.writeFileSync;
12429
+ isomorph_default.appendFileSync = fsSync.appendFileSync;
12430
+ isomorph_default.readFileSync = (filename, encoding) => fsSync.readFileSync(filename, encoding);
12431
+ isomorph_default.unlinkSync = fsSync.unlinkSync;
12432
+ isomorph_default.openFile = fs.open;
12433
+ isomorph_default.gzip = (0, import_util9.promisify)(zlib.gzip);
12434
+ isomorph_default.gunzip = (0, import_util9.promisify)(zlib.gunzip);
12435
+ isomorph_default.hash = (data) => crypto.createHash("sha256").update(data).digest("hex");
12436
+ _internalSetInitialState();
12437
+ }
12438
+
12439
+ // src/cli/index.ts
12440
+ var import_env2 = require("@next/env");
12441
+
12442
+ // src/cli/functions/upload.ts
12443
+ var import_fs = __toESM(require("fs"));
12444
+ var import_path3 = __toESM(require("path"));
12445
+ var import_zlib = require("zlib");
12446
+ var import_v312 = require("zod/v3");
12447
+
12448
+ // src/cli/functions/infer-source.ts
12449
+ var import_source_map = require("source-map");
12450
+ var fs2 = __toESM(require("fs/promises"));
12451
+
12452
+ // src/cli/jest/nodeModulesPaths.ts
12453
+ var path2 = __toESM(require("path"));
12454
+
12455
+ // src/cli/jest/tryRealpath.ts
12456
+ var import_graceful_fs = require("graceful-fs");
12457
+ function tryRealpath(path8) {
12458
+ try {
12459
+ path8 = import_graceful_fs.realpathSync.native(path8);
12460
+ } catch (error2) {
12461
+ if (error2.code !== "ENOENT" && error2.code !== "EISDIR") {
12462
+ throw error2;
11817
12463
  }
11818
- isomorph_default.writeln("");
11819
- return failingResults.length === 0;
11820
- },
11821
- async reportRun(evalReports) {
11822
- return evalReports.every((r) => r);
11823
12464
  }
11824
- };
11825
-
11826
- // src/cli/reporters/eval.ts
11827
- var import_chalk2 = __toESM(require("chalk"));
11828
- var import_termi_link = require("termi-link");
11829
- var import_boxen = __toESM(require("boxen"));
11830
- var import_cli_table3 = __toESM(require("cli-table3"));
11831
- var import_pluralize = __toESM(require("pluralize"));
11832
- function formatExperimentSummaryFancy(summary) {
11833
- let comparisonLine = "";
11834
- if (summary.comparisonExperimentName) {
11835
- comparisonLine = `${summary.comparisonExperimentName} ${import_chalk2.default.gray("(baseline)")} \u2190 ${summary.experimentName} ${import_chalk2.default.gray("(comparison)")}
12465
+ return path8;
12466
+ }
11836
12467
 
11837
- `;
12468
+ // src/cli/jest/nodeModulesPaths.ts
12469
+ function nodeModulesPaths(basedir, options) {
12470
+ const modules = options && options.moduleDirectory ? Array.from(options.moduleDirectory) : ["node_modules"];
12471
+ const basedirAbs = path2.resolve(basedir);
12472
+ let prefix = "/";
12473
+ if (/^([A-Za-z]:)/.test(basedirAbs)) {
12474
+ prefix = "";
12475
+ } else if (/^\\\\/.test(basedirAbs)) {
12476
+ prefix = "\\\\";
11838
12477
  }
11839
- const tableParts = [];
11840
- const hasScores = Object.keys(summary.scores).length > 0;
11841
- const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
11842
- const hasComparison = !!summary.comparisonExperimentName;
11843
- if (hasScores || hasMetrics) {
11844
- const headers = [import_chalk2.default.gray("Name"), import_chalk2.default.gray("Value")];
11845
- if (hasComparison) {
11846
- headers.push(
11847
- import_chalk2.default.gray("Change"),
11848
- import_chalk2.default.gray("Improvements"),
11849
- import_chalk2.default.gray("Regressions")
11850
- );
11851
- }
11852
- const combinedTable = new import_cli_table3.default({
11853
- head: hasComparison ? headers : [],
11854
- style: { head: [], "padding-left": 0, "padding-right": 0, border: [] },
11855
- chars: {
11856
- top: "",
11857
- "top-mid": "",
11858
- "top-left": "",
11859
- "top-right": "",
11860
- bottom: "",
11861
- "bottom-mid": "",
11862
- "bottom-left": "",
11863
- "bottom-right": "",
11864
- left: "",
11865
- "left-mid": "",
11866
- mid: "",
11867
- "mid-mid": "",
11868
- right: "",
11869
- "right-mid": "",
11870
- middle: " "
11871
- },
11872
- colWidths: hasComparison ? [18, 10, 10, 13, 12] : [20, 15],
11873
- colAligns: hasComparison ? ["left", "right", "right", "right", "right"] : ["left", "right"],
11874
- wordWrap: false
11875
- });
11876
- const scoreValues = Object.values(summary.scores);
11877
- for (const score of scoreValues) {
11878
- const scorePercent = (score.score * 100).toFixed(2);
11879
- const scoreValue = import_chalk2.default.white(`${scorePercent}%`);
11880
- let diffString = "";
11881
- if (!isEmpty2(score.diff)) {
11882
- const diffPercent = (score.diff * 100).toFixed(2);
11883
- const diffSign = score.diff > 0 ? "+" : "";
11884
- const diffColor = score.diff > 0 ? import_chalk2.default.green : import_chalk2.default.red;
11885
- diffString = diffColor(`${diffSign}${diffPercent}%`);
11886
- } else {
11887
- diffString = import_chalk2.default.gray("-");
11888
- }
11889
- const improvements = score.improvements > 0 ? import_chalk2.default.dim.green(score.improvements) : import_chalk2.default.gray("-");
11890
- const regressions = score.regressions > 0 ? import_chalk2.default.dim.red(score.regressions) : import_chalk2.default.gray("-");
11891
- const row = [`${import_chalk2.default.blue("\u25EF")} ${score.name}`, scoreValue];
11892
- if (hasComparison) {
11893
- row.push(diffString, improvements, regressions);
11894
- }
11895
- combinedTable.push(row);
11896
- }
11897
- const metricValues = Object.values(summary.metrics ?? {});
11898
- for (const metric of metricValues) {
11899
- const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
11900
- const formattedValue = metric.metric.toFixed(fractionDigits);
11901
- const metricValue = import_chalk2.default.white(
11902
- metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`
11903
- );
11904
- let diffString = "";
11905
- if (!isEmpty2(metric.diff)) {
11906
- const diffPercent = (metric.diff * 100).toFixed(2);
11907
- const diffSign = metric.diff > 0 ? "+" : "";
11908
- const diffColor = metric.diff > 0 ? import_chalk2.default.green : import_chalk2.default.red;
11909
- diffString = diffColor(`${diffSign}${diffPercent}%`);
12478
+ let physicalBasedir;
12479
+ try {
12480
+ physicalBasedir = tryRealpath(basedirAbs);
12481
+ } catch {
12482
+ physicalBasedir = basedirAbs;
12483
+ }
12484
+ const paths = [physicalBasedir];
12485
+ let parsed = path2.parse(physicalBasedir);
12486
+ while (parsed.dir !== paths[paths.length - 1]) {
12487
+ paths.push(parsed.dir);
12488
+ parsed = path2.parse(parsed.dir);
12489
+ }
12490
+ const dirs = paths.reduce((dirs2, aPath) => {
12491
+ for (const moduleDir of modules) {
12492
+ if (path2.isAbsolute(moduleDir)) {
12493
+ if (aPath === basedirAbs && moduleDir) {
12494
+ dirs2.push(moduleDir);
12495
+ }
11910
12496
  } else {
11911
- diffString = import_chalk2.default.gray("-");
11912
- }
11913
- const improvements = metric.improvements > 0 ? import_chalk2.default.dim.green(metric.improvements) : import_chalk2.default.gray("-");
11914
- const regressions = metric.regressions > 0 ? import_chalk2.default.dim.red(metric.regressions) : import_chalk2.default.gray("-");
11915
- const row = [`${import_chalk2.default.magenta("\u25EF")} ${metric.name}`, metricValue];
11916
- if (hasComparison) {
11917
- row.push(diffString, improvements, regressions);
12497
+ dirs2.push(path2.join(prefix, aPath, moduleDir));
11918
12498
  }
11919
- combinedTable.push(row);
11920
12499
  }
11921
- tableParts.push(combinedTable.toString());
12500
+ return dirs2;
12501
+ }, []);
12502
+ if (options.paths) {
12503
+ dirs.push(...options.paths);
11922
12504
  }
11923
- const content = [comparisonLine, ...tableParts].filter(Boolean).join("\n");
11924
- const footer = summary.experimentUrl ? (0, import_termi_link.terminalLink)(
11925
- `View results for ${summary.experimentName}`,
11926
- summary.experimentUrl,
11927
- { fallback: () => `See results at ${summary.experimentUrl}` }
11928
- ) : "";
11929
- const boxContent = [content, footer].filter(Boolean).join("\n\n");
12505
+ return dirs;
12506
+ }
12507
+ function findGlobalPaths() {
12508
+ const { root } = path2.parse(process.cwd());
12509
+ const globalPath = path2.join(root, "node_modules");
12510
+ const resolvePaths = require.resolve.paths("/");
12511
+ if (resolvePaths) {
12512
+ const rootIndex = resolvePaths.indexOf(globalPath);
12513
+ return rootIndex > -1 ? resolvePaths.slice(rootIndex + 1) : [];
12514
+ }
12515
+ return [];
12516
+ }
12517
+ var GlobalPaths = findGlobalPaths();
12518
+
12519
+ // src/cli/functions/load-module.ts
12520
+ var import_path = __toESM(require("path"));
12521
+ function evalWithModuleContext(inFile, evalFn) {
12522
+ const modulePaths = [...module.paths];
11930
12523
  try {
11931
- return "\n" + (0, import_boxen.default)(boxContent, {
11932
- title: import_chalk2.default.gray("Experiment summary"),
11933
- titleAlignment: "left",
11934
- padding: 0.5,
11935
- borderColor: "gray",
11936
- borderStyle: "round"
11937
- });
11938
- } catch (error2) {
11939
- return "\n" + import_chalk2.default.gray("Experiment summary") + "\n" + boxContent + "\n";
12524
+ module.paths = nodeModulesPaths(import_path.default.dirname(inFile), {});
12525
+ return evalFn();
12526
+ } finally {
12527
+ module.paths = modulePaths;
11940
12528
  }
11941
12529
  }
11942
- var warning2 = import_chalk2.default.yellow;
11943
- var fancyReporter = {
11944
- name: "Braintrust fancy reporter",
11945
- async reportEval(evaluator, result, { verbose, jsonl }) {
11946
- const { results, summary } = result;
11947
- const failingResults = results.filter(
11948
- (r) => r.error !== void 0
12530
+ function loadModule({
12531
+ inFile,
12532
+ moduleText
12533
+ }) {
12534
+ return evalWithModuleContext(inFile, () => {
12535
+ globalThis._evals = {
12536
+ functions: [],
12537
+ prompts: [],
12538
+ parameters: [],
12539
+ evaluators: {},
12540
+ reporters: {}
12541
+ };
12542
+ globalThis._lazy_load = true;
12543
+ globalThis.__inherited_braintrust_state = _internalGetGlobalState();
12544
+ const __filename2 = inFile;
12545
+ const __dirname = (0, import_path.dirname)(__filename2);
12546
+ new Function("require", "module", "__filename", "__dirname", moduleText)(
12547
+ require,
12548
+ module,
12549
+ __filename2,
12550
+ __dirname
11949
12551
  );
11950
- if (failingResults.length > 0) {
11951
- console.error(
11952
- warning2(
11953
- `Evaluator ${evaluator.evalName} failed with ${(0, import_pluralize.default)("error", failingResults.length, true)}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
12552
+ return { ...globalThis._evals };
12553
+ });
12554
+ }
12555
+
12556
+ // src/cli/functions/infer-source.ts
12557
+ var import_path2 = __toESM(require("path"));
12558
+ async function makeSourceMapContext({
12559
+ inFile,
12560
+ outFile,
12561
+ sourceMapFile
12562
+ }) {
12563
+ const [inFileContents, outFileContents, sourceMap] = await Promise.all([
12564
+ fs2.readFile(inFile, "utf8"),
12565
+ fs2.readFile(outFile, "utf8"),
12566
+ (async () => {
12567
+ const sourceMap2 = await fs2.readFile(sourceMapFile, "utf8");
12568
+ const sourceMapJSON = JSON.parse(sourceMap2);
12569
+ return new import_source_map.SourceMapConsumer(sourceMapJSON);
12570
+ })()
12571
+ ]);
12572
+ return {
12573
+ inFiles: { [inFile]: inFileContents.split("\n") },
12574
+ outFileModule: loadModule({ inFile, moduleText: outFileContents }),
12575
+ outFileLines: outFileContents.split("\n"),
12576
+ sourceMapDir: import_path2.default.dirname(sourceMapFile),
12577
+ sourceMap
12578
+ };
12579
+ }
12580
+ function isNative(fn) {
12581
+ return /\{\s*\[native code\]\s*\}/.test(Function.prototype.toString.call(fn));
12582
+ }
12583
+ function locationToString(location) {
12584
+ if (location.type === "experiment") {
12585
+ return `eval ${location.eval_name} -> ${location.position.type}`;
12586
+ } else {
12587
+ return `task ${location.index}`;
12588
+ }
12589
+ }
12590
+ async function findCodeDefinition({
12591
+ location,
12592
+ ctx: { inFiles, outFileModule, outFileLines, sourceMapDir, sourceMap }
12593
+ }) {
12594
+ let fn = void 0;
12595
+ if (location.type === "experiment") {
12596
+ const evaluator = outFileModule.evaluators[location.eval_name]?.evaluator;
12597
+ if (!evaluator) {
12598
+ console.warn(
12599
+ warning(
12600
+ `Warning: failed to find evaluator for ${location.eval_name}. Will not display preview.`
11954
12601
  )
11955
12602
  );
11956
- if (jsonl) {
11957
- for (const result2 of failingResults) {
11958
- process.stdout.write(JSON.stringify(result2));
11959
- process.stdout.write("\n");
11960
- }
11961
- } else if (verbose) {
11962
- for (const result2 of failingResults) {
11963
- console.error(result2);
11964
- }
11965
- }
11966
- }
11967
- process.stdout.write(
11968
- jsonl ? JSON.stringify(summary) : formatExperimentSummaryFancy(summary)
11969
- );
11970
- process.stdout.write("\n");
11971
- return failingResults.length === 0;
11972
- },
11973
- async reportRun(evalReports) {
11974
- return evalReports.every((r) => r);
11975
- }
11976
- };
11977
-
11978
- // src/node.ts
11979
- var import_node_async_hooks = require("async_hooks");
11980
- var path = __toESM(require("path"));
11981
- var fs = __toESM(require("fs/promises"));
11982
- var os = __toESM(require("os"));
11983
- var fsSync = __toESM(require("fs"));
11984
- var crypto = __toESM(require("crypto"));
11985
-
11986
- // src/gitutil.ts
11987
- var import_simple_git = require("simple-git");
11988
- var COMMON_BASE_BRANCHES = ["main", "master", "develop"];
11989
- async function currentRepo() {
11990
- try {
11991
- const git = (0, import_simple_git.simpleGit)();
11992
- if (await git.checkIsRepo()) {
11993
- return git;
11994
- } else {
11995
- return null;
12603
+ return void 0;
11996
12604
  }
11997
- } catch (e) {
11998
- return null;
12605
+ fn = location.position.type === "task" ? evaluator.task : evaluator.scores[location.position.index];
12606
+ } else {
12607
+ fn = outFileModule.functions[location.index].handler;
11999
12608
  }
12000
- }
12001
- var _baseBranch = null;
12002
- async function getBaseBranch(remote = void 0) {
12003
- if (_baseBranch === null) {
12004
- const git = await currentRepo();
12005
- if (git === null) {
12006
- throw new Error("Not in a git repo");
12007
- }
12008
- const remoteName = remote ?? (await git.getRemotes())[0]?.name;
12009
- if (!remoteName) {
12010
- throw new Error("No remote found");
12011
- }
12012
- let branch = null;
12013
- const repoBranches = new Set((await git.branchLocal()).all);
12014
- const matchingBaseBranches = COMMON_BASE_BRANCHES.filter(
12015
- (b) => repoBranches.has(b)
12609
+ if (!fn) {
12610
+ console.warn(
12611
+ warning(
12612
+ `Warning: failed to find ${locationToString(location)}. Will not display preview.`
12613
+ )
12016
12614
  );
12017
- if (matchingBaseBranches.length === 1) {
12018
- branch = matchingBaseBranches[0];
12019
- } else {
12020
- try {
12021
- const remoteInfo = await git.remote(["show", remoteName]);
12022
- if (!remoteInfo) {
12023
- throw new Error(`Could not find remote ${remoteName}`);
12024
- }
12025
- const match = remoteInfo.match(/\s*HEAD branch:\s*(.*)$/m);
12026
- if (!match) {
12027
- throw new Error(`Could not find HEAD branch in remote ${remoteName}`);
12028
- }
12029
- branch = match[1];
12030
- } catch {
12031
- branch = "main";
12032
- }
12033
- }
12034
- _baseBranch = { remote: remoteName, branch };
12035
- }
12036
- return _baseBranch;
12037
- }
12038
- async function getBaseBranchAncestor(remote = void 0) {
12039
- const git = await currentRepo();
12040
- if (git === null) {
12041
- throw new Error("Not in a git repo");
12615
+ return void 0;
12042
12616
  }
12043
- const { remote: remoteName, branch: baseBranch } = await getBaseBranch(remote);
12044
- const isDirty = (await git.diffSummary()).files.length > 0;
12045
- const head = isDirty ? "HEAD" : "HEAD^";
12046
- try {
12047
- const ancestor = await git.raw([
12048
- "merge-base",
12049
- head,
12050
- `${remoteName}/${baseBranch}`
12051
- ]);
12052
- return ancestor.trim();
12053
- } catch (e) {
12617
+ const sourceCode = fn.toString();
12618
+ if (isNative(fn)) {
12054
12619
  return void 0;
12055
12620
  }
12056
- }
12057
- async function getPastNAncestors(n = 1e3, remote = void 0) {
12058
- const git = await currentRepo();
12059
- if (git === null) {
12060
- return [];
12621
+ let lineNumber = 0;
12622
+ let columnNumber = -1;
12623
+ for (const line of outFileLines) {
12624
+ const sourceDefinition = line.indexOf(sourceCode);
12625
+ if (sourceDefinition !== -1) {
12626
+ columnNumber = sourceDefinition;
12627
+ break;
12628
+ }
12629
+ lineNumber++;
12061
12630
  }
12062
- let ancestor = void 0;
12063
- try {
12064
- ancestor = await getBaseBranchAncestor(remote);
12065
- } catch (e) {
12631
+ if (columnNumber === -1) {
12066
12632
  console.warn(
12067
- "Skipping git metadata. This is likely because the repository has not been published to a remote yet.",
12068
- `${e}`
12633
+ warning(
12634
+ `Warning: failed to find code definition for ${fn.name}. Will not display preview.`
12635
+ )
12069
12636
  );
12070
- }
12071
- if (!ancestor) {
12072
- return [];
12073
- }
12074
- const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
12075
- return commits.all.slice(0, n).map((c) => c.hash);
12076
- }
12077
- async function attempt(fn) {
12078
- try {
12079
- return await fn();
12080
- } catch (e) {
12081
12637
  return void 0;
12082
12638
  }
12083
- }
12084
- function truncateToByteLimit(s, byteLimit = 65536) {
12085
- const encoded = new TextEncoder().encode(s);
12086
- if (encoded.length <= byteLimit) {
12087
- return s;
12088
- }
12089
- const truncated = encoded.subarray(0, byteLimit);
12090
- return new TextDecoder().decode(truncated);
12091
- }
12092
- async function getRepoInfo(settings) {
12093
- if (settings && settings.collect === "none") {
12639
+ const originalPosition = sourceMap.originalPositionFor({
12640
+ line: lineNumber + 1,
12641
+ column: columnNumber + 1
12642
+ });
12643
+ if (originalPosition.source === null || originalPosition.line === null) {
12094
12644
  return void 0;
12095
12645
  }
12096
- const repo = await repoInfo();
12097
- if (!repo || !settings || settings.collect === "all") {
12098
- return repo;
12646
+ if (!inFiles[originalPosition.source]) {
12647
+ const originalFile = import_path2.default.join(sourceMapDir, originalPosition.source);
12648
+ inFiles[originalPosition.source] = (await fs2.readFile(originalFile, "utf-8")).split("\n");
12099
12649
  }
12100
- let sanitized = {};
12101
- settings.fields?.forEach((field) => {
12102
- sanitized = { ...sanitized, [field]: repo[field] };
12103
- });
12104
- return sanitized;
12105
- }
12106
- async function repoInfo() {
12107
- const git = await currentRepo();
12108
- if (git === null) {
12650
+ const originalLines = inFiles[originalPosition.source];
12651
+ const ts = await getTsModule();
12652
+ if (!ts) {
12109
12653
  return void 0;
12110
12654
  }
12111
- let commit = void 0;
12112
- let commit_message = void 0;
12113
- let commit_time = void 0;
12114
- let author_name = void 0;
12115
- let author_email = void 0;
12116
- let tag = void 0;
12117
- let branch = void 0;
12118
- let git_diff = void 0;
12119
- const dirty = (await git.diffSummary()).files.length > 0;
12120
- commit = await attempt(async () => await git.revparse(["HEAD"]));
12121
- commit_message = await attempt(
12122
- async () => (await git.raw(["log", "-1", "--pretty=%B"])).trim()
12123
- );
12124
- commit_time = await attempt(
12125
- async () => (await git.raw(["log", "-1", "--pretty=%cI"])).trim()
12126
- );
12127
- author_name = await attempt(
12128
- async () => (await git.raw(["log", "-1", "--pretty=%aN"])).trim()
12129
- );
12130
- author_email = await attempt(
12131
- async () => (await git.raw(["log", "-1", "--pretty=%aE"])).trim()
12132
- );
12133
- tag = await attempt(
12134
- async () => (await git.raw(["describe", "--tags", "--exact-match", "--always"])).trim()
12655
+ const sourceFile = ts.createSourceFile(
12656
+ originalPosition.source,
12657
+ originalLines.join("\n"),
12658
+ ts.ScriptTarget.Latest,
12659
+ true
12135
12660
  );
12136
- branch = await attempt(
12137
- async () => (await git.raw(["rev-parse", "--abbrev-ref", "HEAD"])).trim()
12661
+ let functionNode = void 0;
12662
+ const targetPosition = ts.getPositionOfLineAndCharacter(
12663
+ sourceFile,
12664
+ originalPosition.line - 1,
12665
+ originalPosition.column || 0
12138
12666
  );
12139
- if (dirty) {
12140
- git_diff = await attempt(
12141
- async () => truncateToByteLimit(await git.raw(["--no-ext-diff", "diff", "HEAD"]))
12142
- );
12143
- }
12144
- return {
12145
- commit,
12146
- branch,
12147
- tag,
12148
- dirty,
12149
- author_name,
12150
- author_email,
12151
- commit_message,
12152
- commit_time,
12153
- git_diff
12154
- };
12155
- }
12156
-
12157
- // src/stackutil.ts
12158
- function getStackTrace() {
12159
- const trace = new Error().stack;
12160
- if (typeof trace !== "string") {
12161
- return [];
12162
- }
12163
- const traceLines = trace.split("\n");
12164
- const out = [];
12165
- const stackFrameRegex = /at(.*)\((.*):(\d+):(\d+)\)/;
12166
- for (const traceLine of traceLines.slice(1)) {
12167
- const matches = traceLine.match(stackFrameRegex);
12168
- if (matches === null || matches.length !== 5) {
12169
- continue;
12170
- }
12171
- const entry = {
12172
- functionName: matches[1].trim(),
12173
- fileName: matches[2],
12174
- lineNo: parseInt(matches[3])
12175
- };
12176
- if (!isNaN(entry.lineNo)) {
12177
- out.push(entry);
12667
+ ts.forEachChild(sourceFile, function visit(node) {
12668
+ if (node.pos <= targetPosition && targetPosition < node.end) {
12669
+ if (ts.isFunctionDeclaration(node) || ts.isFunctionExpression(node) || ts.isArrowFunction(node)) {
12670
+ functionNode = node;
12671
+ } else {
12672
+ ts.forEachChild(node, visit);
12673
+ }
12178
12674
  }
12675
+ });
12676
+ if (!functionNode) {
12677
+ return void 0;
12179
12678
  }
12180
- return out;
12679
+ const printer = ts.createPrinter();
12680
+ const functionDefinition = printer.printNode(
12681
+ ts.EmitHint.Unspecified,
12682
+ functionNode,
12683
+ sourceFile
12684
+ );
12685
+ return functionDefinition;
12181
12686
  }
12182
- function getCallerLocation() {
12183
- let thisDir = void 0;
12184
- const entries = getStackTrace();
12185
- for (const frame of entries) {
12186
- if (thisDir === void 0) {
12187
- thisDir = isomorph_default.pathDirname?.(frame.fileName);
12188
- }
12189
- if (isomorph_default.pathDirname?.(frame.fileName) !== thisDir) {
12190
- return {
12191
- caller_functionname: frame.functionName,
12192
- caller_filename: frame.fileName,
12193
- caller_lineno: frame.lineNo
12194
- };
12687
+ var tsModule = void 0;
12688
+ async function getTsModule() {
12689
+ if (!tsModule) {
12690
+ try {
12691
+ tsModule = require("typescript");
12692
+ } catch {
12693
+ console.warn(
12694
+ warning(
12695
+ "Failed to load TypeScript module. Will not use TypeScript to derive preview."
12696
+ )
12697
+ );
12195
12698
  }
12196
12699
  }
12197
- return void 0;
12700
+ return tsModule;
12198
12701
  }
12199
12702
 
12200
- // src/node.ts
12201
- var import_util10 = require("util");
12202
- var zlib = __toESM(require("zlib"));
12203
- function configureNode() {
12204
- isomorph_default.getRepoInfo = getRepoInfo;
12205
- isomorph_default.getPastNAncestors = getPastNAncestors;
12206
- isomorph_default.getEnv = (name) => process.env[name];
12207
- isomorph_default.getCallerLocation = getCallerLocation;
12208
- isomorph_default.newAsyncLocalStorage = () => new import_node_async_hooks.AsyncLocalStorage();
12209
- isomorph_default.processOn = (event, handler) => {
12210
- process.on(event, handler);
12211
- };
12212
- isomorph_default.basename = path.basename;
12213
- isomorph_default.writeln = (text) => process.stdout.write(text + "\n");
12214
- isomorph_default.pathJoin = path.join;
12215
- isomorph_default.pathDirname = path.dirname;
12216
- isomorph_default.mkdir = fs.mkdir;
12217
- isomorph_default.writeFile = fs.writeFile;
12218
- isomorph_default.readFile = fs.readFile;
12219
- isomorph_default.readdir = fs.readdir;
12220
- isomorph_default.stat = fs.stat;
12221
- isomorph_default.statSync = fsSync.statSync;
12222
- isomorph_default.utimes = fs.utimes;
12223
- isomorph_default.unlink = fs.unlink;
12224
- isomorph_default.homedir = os.homedir;
12225
- isomorph_default.tmpdir = os.tmpdir;
12226
- isomorph_default.writeFileSync = fsSync.writeFileSync;
12227
- isomorph_default.appendFileSync = fsSync.appendFileSync;
12228
- isomorph_default.readFileSync = (filename, encoding) => fsSync.readFileSync(filename, encoding);
12229
- isomorph_default.unlinkSync = fsSync.unlinkSync;
12230
- isomorph_default.openFile = fs.open;
12231
- isomorph_default.gzip = (0, import_util10.promisify)(zlib.gzip);
12232
- isomorph_default.gunzip = (0, import_util10.promisify)(zlib.gunzip);
12233
- isomorph_default.hash = (data) => crypto.createHash("sha256").update(data).digest("hex");
12234
- _internalSetInitialState();
12703
+ // src/zod/utils.ts
12704
+ var import_zod_to_json_schema = require("zod-to-json-schema");
12705
+ var z42 = __toESM(require("zod/v4"));
12706
+ function isZodV4(zodObject) {
12707
+ return typeof zodObject === "object" && zodObject !== null && "_zod" in zodObject && zodObject._zod !== void 0;
12708
+ }
12709
+ function zodToJsonSchema(schema) {
12710
+ if (isZodV4(schema)) {
12711
+ return z42.toJSONSchema(schema, {
12712
+ target: "draft-7"
12713
+ });
12714
+ }
12715
+ return (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
12235
12716
  }
12236
-
12237
- // src/cli/index.ts
12238
- var import_env2 = require("@next/env");
12239
12717
 
12240
12718
  // src/cli/functions/upload.ts
12241
- var import_fs = __toESM(require("fs"));
12242
- var import_path3 = __toESM(require("path"));
12243
- var import_zlib = require("zlib");
12244
- var import_v311 = require("zod/v3");
12245
-
12246
- // src/cli/functions/infer-source.ts
12247
- var import_source_map = require("source-map");
12248
- var fs2 = __toESM(require("fs/promises"));
12249
-
12250
- // src/cli/jest/nodeModulesPaths.ts
12251
- var path2 = __toESM(require("path"));
12719
+ var import_pluralize2 = __toESM(require("pluralize"));
12252
12720
 
12253
- // src/cli/jest/tryRealpath.ts
12254
- var import_graceful_fs = require("graceful-fs");
12255
- function tryRealpath(path8) {
12256
- try {
12257
- path8 = import_graceful_fs.realpathSync.native(path8);
12258
- } catch (error2) {
12259
- if (error2.code !== "ENOENT" && error2.code !== "EISDIR") {
12260
- throw error2;
12721
+ // src/framework2.ts
12722
+ var import_v311 = require("zod/v3");
12723
+ var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
12724
+ var ProjectBuilder = class {
12725
+ create(opts) {
12726
+ return new Project2(opts);
12727
+ }
12728
+ };
12729
+ var projects = new ProjectBuilder();
12730
+ var Project2 = class {
12731
+ name;
12732
+ id;
12733
+ tools;
12734
+ prompts;
12735
+ parameters;
12736
+ scorers;
12737
+ _publishableCodeFunctions = [];
12738
+ _publishablePrompts = [];
12739
+ _publishableParameters = [];
12740
+ constructor(args) {
12741
+ _initializeSpanContext();
12742
+ this.name = "name" in args ? args.name : void 0;
12743
+ this.id = "id" in args ? args.id : void 0;
12744
+ this.tools = new ToolBuilder(this);
12745
+ this.prompts = new PromptBuilder(this);
12746
+ this.parameters = new ParametersBuilder(this);
12747
+ this.scorers = new ScorerBuilder(this);
12748
+ }
12749
+ addPrompt(prompt) {
12750
+ this._publishablePrompts.push(prompt);
12751
+ if (globalThis._lazy_load) {
12752
+ globalThis._evals.prompts.push(prompt);
12753
+ }
12754
+ }
12755
+ addParameters(parameters) {
12756
+ this._publishableParameters.push(parameters);
12757
+ if (globalThis._lazy_load) {
12758
+ if (globalThis._evals.parameters == null)
12759
+ globalThis._evals.parameters = [];
12760
+ globalThis._evals.parameters.push(parameters);
12761
+ }
12762
+ }
12763
+ addCodeFunction(fn) {
12764
+ this._publishableCodeFunctions.push(fn);
12765
+ if (globalThis._lazy_load) {
12766
+ globalThis._evals.functions.push(fn);
12767
+ }
12768
+ }
12769
+ async publish() {
12770
+ if (globalThis._lazy_load) {
12771
+ console.warn("publish() is a no-op when running `braintrust push`.");
12772
+ return;
12773
+ }
12774
+ await login();
12775
+ const projectMap = new ProjectNameIdMap();
12776
+ const functionDefinitions = [];
12777
+ if (this._publishableCodeFunctions.length > 0) {
12778
+ console.warn(
12779
+ "Code functions cannot be published directly. Use `braintrust push` instead."
12780
+ );
12781
+ }
12782
+ if (this._publishablePrompts.length > 0) {
12783
+ for (const prompt of this._publishablePrompts) {
12784
+ const functionDefinition = await prompt.toFunctionDefinition(projectMap);
12785
+ functionDefinitions.push(functionDefinition);
12786
+ }
12787
+ }
12788
+ await _internalGetGlobalState().apiConn().post_json("insert-functions", {
12789
+ functions: functionDefinitions
12790
+ });
12791
+ }
12792
+ };
12793
+ var ToolBuilder = class {
12794
+ constructor(project) {
12795
+ this.project = project;
12796
+ }
12797
+ taskCounter = 0;
12798
+ // This type definition is just a catch all so that the implementation can be
12799
+ // less specific than the two more specific declarations above.
12800
+ create(opts) {
12801
+ this.taskCounter++;
12802
+ opts = opts ?? {};
12803
+ const { handler, name, slug, parameters, returns, ...rest } = opts;
12804
+ let resolvedName = name ?? handler.name;
12805
+ if (resolvedName.trim().length === 0) {
12806
+ resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
12807
+ }
12808
+ const tool = new CodeFunction(this.project, {
12809
+ handler,
12810
+ name: resolvedName,
12811
+ slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
12812
+ type: "tool",
12813
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
12814
+ parameters,
12815
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
12816
+ returns,
12817
+ ...rest
12818
+ });
12819
+ this.project.addCodeFunction(tool);
12820
+ return tool;
12821
+ }
12822
+ };
12823
+ var ScorerBuilder = class {
12824
+ constructor(project) {
12825
+ this.project = project;
12826
+ }
12827
+ taskCounter = 0;
12828
+ create(opts) {
12829
+ this.taskCounter++;
12830
+ let resolvedName = opts.name;
12831
+ if (!resolvedName && "handler" in opts) {
12832
+ resolvedName = opts.handler.name;
12833
+ }
12834
+ if (!resolvedName || resolvedName.trim().length === 0) {
12835
+ resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
12836
+ }
12837
+ const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
12838
+ if ("handler" in opts) {
12839
+ const scorer = new CodeFunction(this.project, {
12840
+ ...opts,
12841
+ name: resolvedName,
12842
+ slug,
12843
+ type: "scorer"
12844
+ });
12845
+ this.project.addCodeFunction(scorer);
12846
+ } else {
12847
+ const promptBlock = "messages" in opts ? {
12848
+ type: "chat",
12849
+ messages: opts.messages
12850
+ } : {
12851
+ type: "completion",
12852
+ content: opts.prompt
12853
+ };
12854
+ const promptData = {
12855
+ prompt: promptBlock,
12856
+ options: {
12857
+ model: opts.model,
12858
+ params: opts.params
12859
+ },
12860
+ parser: {
12861
+ type: "llm_classifier",
12862
+ use_cot: opts.useCot,
12863
+ choice_scores: opts.choiceScores
12864
+ }
12865
+ };
12866
+ const codePrompt = new CodePrompt(
12867
+ this.project,
12868
+ promptData,
12869
+ [],
12870
+ {
12871
+ ...opts,
12872
+ name: resolvedName,
12873
+ slug
12874
+ },
12875
+ "scorer"
12876
+ );
12877
+ this.project.addPrompt(codePrompt);
12261
12878
  }
12262
12879
  }
12263
- return path8;
12264
- }
12265
-
12266
- // src/cli/jest/nodeModulesPaths.ts
12267
- function nodeModulesPaths(basedir, options) {
12268
- const modules = options && options.moduleDirectory ? Array.from(options.moduleDirectory) : ["node_modules"];
12269
- const basedirAbs = path2.resolve(basedir);
12270
- let prefix = "/";
12271
- if (/^([A-Za-z]:)/.test(basedirAbs)) {
12272
- prefix = "";
12273
- } else if (/^\\\\/.test(basedirAbs)) {
12274
- prefix = "\\\\";
12275
- }
12276
- let physicalBasedir;
12277
- try {
12278
- physicalBasedir = tryRealpath(basedirAbs);
12279
- } catch {
12280
- physicalBasedir = basedirAbs;
12281
- }
12282
- const paths = [physicalBasedir];
12283
- let parsed = path2.parse(physicalBasedir);
12284
- while (parsed.dir !== paths[paths.length - 1]) {
12285
- paths.push(parsed.dir);
12286
- parsed = path2.parse(parsed.dir);
12287
- }
12288
- const dirs = paths.reduce((dirs2, aPath) => {
12289
- for (const moduleDir of modules) {
12290
- if (path2.isAbsolute(moduleDir)) {
12291
- if (aPath === basedirAbs && moduleDir) {
12292
- dirs2.push(moduleDir);
12293
- }
12294
- } else {
12295
- dirs2.push(path2.join(prefix, aPath, moduleDir));
12296
- }
12880
+ };
12881
+ var CodeFunction = class {
12882
+ constructor(project, opts) {
12883
+ this.project = project;
12884
+ this.handler = opts.handler;
12885
+ this.name = opts.name;
12886
+ this.slug = opts.slug;
12887
+ this.description = opts.description;
12888
+ this.type = opts.type;
12889
+ this.ifExists = opts.ifExists;
12890
+ this.metadata = opts.metadata;
12891
+ this.parameters = opts.parameters;
12892
+ this.returns = opts.returns;
12893
+ if (this.returns && !this.parameters) {
12894
+ throw new Error("parameters are required if return type is defined");
12297
12895
  }
12298
- return dirs2;
12299
- }, []);
12300
- if (options.paths) {
12301
- dirs.push(...options.paths);
12302
12896
  }
12303
- return dirs;
12304
- }
12305
- function findGlobalPaths() {
12306
- const { root } = path2.parse(process.cwd());
12307
- const globalPath = path2.join(root, "node_modules");
12308
- const resolvePaths = require.resolve.paths("/");
12309
- if (resolvePaths) {
12310
- const rootIndex = resolvePaths.indexOf(globalPath);
12311
- return rootIndex > -1 ? resolvePaths.slice(rootIndex + 1) : [];
12897
+ handler;
12898
+ name;
12899
+ slug;
12900
+ type;
12901
+ description;
12902
+ parameters;
12903
+ returns;
12904
+ ifExists;
12905
+ metadata;
12906
+ key() {
12907
+ return JSON.stringify([
12908
+ this.project.id ?? "",
12909
+ this.project.name ?? "",
12910
+ this.slug
12911
+ ]);
12312
12912
  }
12313
- return [];
12314
- }
12315
- var GlobalPaths = findGlobalPaths();
12316
-
12317
- // src/cli/functions/load-module.ts
12318
- var import_path = __toESM(require("path"));
12319
- function evalWithModuleContext(inFile, evalFn) {
12320
- const modulePaths = [...module.paths];
12321
- try {
12322
- module.paths = nodeModulesPaths(import_path.default.dirname(inFile), {});
12323
- return evalFn();
12324
- } finally {
12325
- module.paths = modulePaths;
12913
+ };
12914
+ var CodePrompt = class {
12915
+ project;
12916
+ name;
12917
+ slug;
12918
+ prompt;
12919
+ ifExists;
12920
+ description;
12921
+ id;
12922
+ functionType;
12923
+ toolFunctions;
12924
+ metadata;
12925
+ constructor(project, prompt, toolFunctions, opts, functionType) {
12926
+ this.project = project;
12927
+ this.name = opts.name;
12928
+ this.slug = opts.slug;
12929
+ this.prompt = prompt;
12930
+ this.toolFunctions = toolFunctions;
12931
+ this.ifExists = opts.ifExists;
12932
+ this.description = opts.description;
12933
+ this.id = opts.id;
12934
+ this.functionType = functionType;
12935
+ this.metadata = opts.metadata;
12326
12936
  }
12327
- }
12328
- function loadModule({
12329
- inFile,
12330
- moduleText
12331
- }) {
12332
- return evalWithModuleContext(inFile, () => {
12333
- globalThis._evals = {
12334
- functions: [],
12335
- prompts: [],
12336
- evaluators: {},
12337
- reporters: {}
12937
+ async toFunctionDefinition(projectNameToId) {
12938
+ const prompt_data = {
12939
+ ...this.prompt
12338
12940
  };
12339
- globalThis._lazy_load = true;
12340
- globalThis.__inherited_braintrust_state = _internalGetGlobalState();
12341
- const __filename2 = inFile;
12342
- const __dirname = (0, import_path.dirname)(__filename2);
12343
- new Function("require", "module", "__filename", "__dirname", moduleText)(
12344
- require,
12345
- module,
12346
- __filename2,
12347
- __dirname
12348
- );
12349
- return { ...globalThis._evals };
12350
- });
12351
- }
12352
-
12353
- // src/cli/functions/infer-source.ts
12354
- var import_path2 = __toESM(require("path"));
12355
- async function makeSourceMapContext({
12356
- inFile,
12357
- outFile,
12358
- sourceMapFile
12359
- }) {
12360
- const [inFileContents, outFileContents, sourceMap] = await Promise.all([
12361
- fs2.readFile(inFile, "utf8"),
12362
- fs2.readFile(outFile, "utf8"),
12363
- (async () => {
12364
- const sourceMap2 = await fs2.readFile(sourceMapFile, "utf8");
12365
- const sourceMapJSON = JSON.parse(sourceMap2);
12366
- return new import_source_map.SourceMapConsumer(sourceMapJSON);
12367
- })()
12368
- ]);
12369
- return {
12370
- inFiles: { [inFile]: inFileContents.split("\n") },
12371
- outFileModule: loadModule({ inFile, moduleText: outFileContents }),
12372
- outFileLines: outFileContents.split("\n"),
12373
- sourceMapDir: import_path2.default.dirname(sourceMapFile),
12374
- sourceMap
12375
- };
12376
- }
12377
- function isNative(fn) {
12378
- return /\{\s*\[native code\]\s*\}/.test(Function.prototype.toString.call(fn));
12379
- }
12380
- function locationToString(location) {
12381
- if (location.type === "experiment") {
12382
- return `eval ${location.eval_name} -> ${location.position.type}`;
12383
- } else {
12384
- return `task ${location.index}`;
12385
- }
12386
- }
12387
- async function findCodeDefinition({
12388
- location,
12389
- ctx: { inFiles, outFileModule, outFileLines, sourceMapDir, sourceMap }
12390
- }) {
12391
- let fn = void 0;
12392
- if (location.type === "experiment") {
12393
- const evaluator = outFileModule.evaluators[location.eval_name]?.evaluator;
12394
- if (!evaluator) {
12395
- console.warn(
12396
- warning(
12397
- `Warning: failed to find evaluator for ${location.eval_name}. Will not display preview.`
12398
- )
12941
+ if (this.toolFunctions.length > 0) {
12942
+ const resolvableToolFunctions = await Promise.all(
12943
+ this.toolFunctions.map(async (fn) => {
12944
+ if ("slug" in fn) {
12945
+ return {
12946
+ type: "slug",
12947
+ project_id: await projectNameToId.resolve(fn.project),
12948
+ slug: fn.slug
12949
+ };
12950
+ } else {
12951
+ return fn;
12952
+ }
12953
+ })
12399
12954
  );
12400
- return void 0;
12955
+ prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
12956
+ resolvableToolFunctions;
12401
12957
  }
12402
- fn = location.position.type === "task" ? evaluator.task : evaluator.scores[location.position.index];
12403
- } else {
12404
- fn = outFileModule.functions[location.index].handler;
12405
- }
12406
- if (!fn) {
12407
- console.warn(
12408
- warning(
12409
- `Warning: failed to find ${locationToString(location)}. Will not display preview.`
12410
- )
12411
- );
12412
- return void 0;
12958
+ return {
12959
+ project_id: await projectNameToId.resolve(this.project),
12960
+ name: this.name,
12961
+ slug: this.slug,
12962
+ description: this.description ?? "",
12963
+ function_data: {
12964
+ type: "prompt"
12965
+ },
12966
+ function_type: this.functionType,
12967
+ prompt_data,
12968
+ if_exists: this.ifExists,
12969
+ metadata: this.metadata
12970
+ };
12413
12971
  }
12414
- const sourceCode = fn.toString();
12415
- if (isNative(fn)) {
12416
- return void 0;
12972
+ };
12973
+ var PromptBuilder = class {
12974
+ constructor(project) {
12975
+ this.project = project;
12417
12976
  }
12418
- let lineNumber = 0;
12419
- let columnNumber = -1;
12420
- for (const line of outFileLines) {
12421
- const sourceDefinition = line.indexOf(sourceCode);
12422
- if (sourceDefinition !== -1) {
12423
- columnNumber = sourceDefinition;
12424
- break;
12977
+ create(opts) {
12978
+ const toolFunctions = [];
12979
+ const rawTools = [];
12980
+ for (const tool of opts.tools ?? []) {
12981
+ if (tool instanceof CodeFunction) {
12982
+ toolFunctions.push(tool);
12983
+ } else if ("type" in tool && !("function" in tool)) {
12984
+ toolFunctions.push(tool);
12985
+ } else {
12986
+ rawTools.push(tool);
12987
+ }
12425
12988
  }
12426
- lineNumber++;
12427
- }
12428
- if (columnNumber === -1) {
12429
- console.warn(
12430
- warning(
12431
- `Warning: failed to find code definition for ${fn.name}. Will not display preview.`
12432
- )
12989
+ const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
12990
+ const promptData = promptDefinitionToPromptData(opts, rawTools);
12991
+ const promptRow = {
12992
+ id: opts.id,
12993
+ _xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
12994
+ name: opts.name,
12995
+ slug,
12996
+ prompt_data: promptData,
12997
+ ...this.project.id !== void 0 ? { project_id: this.project.id } : {}
12998
+ };
12999
+ const prompt = new Prompt2(
13000
+ promptRow,
13001
+ {},
13002
+ // It doesn't make sense to specify defaults here.
13003
+ opts.noTrace ?? false
12433
13004
  );
12434
- return void 0;
13005
+ const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
13006
+ ...opts,
13007
+ slug
13008
+ });
13009
+ this.project.addPrompt(codePrompt);
13010
+ return prompt;
12435
13011
  }
12436
- const originalPosition = sourceMap.originalPositionFor({
12437
- line: lineNumber + 1,
12438
- column: columnNumber + 1
12439
- });
12440
- if (originalPosition.source === null || originalPosition.line === null) {
12441
- return void 0;
13012
+ };
13013
+ var CodeParameters = class {
13014
+ project;
13015
+ name;
13016
+ slug;
13017
+ description;
13018
+ schema;
13019
+ ifExists;
13020
+ metadata;
13021
+ constructor(project, opts) {
13022
+ this.project = project;
13023
+ this.name = opts.name;
13024
+ this.slug = opts.slug;
13025
+ this.description = opts.description;
13026
+ this.schema = opts.schema;
13027
+ this.ifExists = opts.ifExists;
13028
+ this.metadata = opts.metadata;
12442
13029
  }
12443
- if (!inFiles[originalPosition.source]) {
12444
- const originalFile = import_path2.default.join(sourceMapDir, originalPosition.source);
12445
- inFiles[originalPosition.source] = (await fs2.readFile(originalFile, "utf-8")).split("\n");
13030
+ async toFunctionDefinition(projectNameToId) {
13031
+ return {
13032
+ project_id: await projectNameToId.resolve(this.project),
13033
+ name: this.name,
13034
+ slug: this.slug,
13035
+ description: this.description ?? "",
13036
+ function_type: "parameters",
13037
+ function_data: {
13038
+ type: "parameters",
13039
+ data: {},
13040
+ __schema: serializeEvalParameterstoParametersSchema(this.schema)
13041
+ },
13042
+ if_exists: this.ifExists,
13043
+ metadata: this.metadata
13044
+ };
12446
13045
  }
12447
- const originalLines = inFiles[originalPosition.source];
12448
- const ts = await getTsModule();
12449
- if (!ts) {
12450
- return void 0;
13046
+ };
13047
+ var ParametersBuilder = class {
13048
+ constructor(project) {
13049
+ this.project = project;
12451
13050
  }
12452
- const sourceFile = ts.createSourceFile(
12453
- originalPosition.source,
12454
- originalLines.join("\n"),
12455
- ts.ScriptTarget.Latest,
12456
- true
12457
- );
12458
- let functionNode = void 0;
12459
- const targetPosition = ts.getPositionOfLineAndCharacter(
12460
- sourceFile,
12461
- originalPosition.line - 1,
12462
- originalPosition.column || 0
12463
- );
12464
- ts.forEachChild(sourceFile, function visit(node) {
12465
- if (node.pos <= targetPosition && targetPosition < node.end) {
12466
- if (ts.isFunctionDeclaration(node) || ts.isFunctionExpression(node) || ts.isArrowFunction(node)) {
12467
- functionNode = node;
13051
+ create(opts) {
13052
+ const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
13053
+ const codeParameters = new CodeParameters(this.project, {
13054
+ name: opts.name,
13055
+ slug,
13056
+ description: opts.description,
13057
+ schema: opts.schema,
13058
+ ifExists: opts.ifExists,
13059
+ metadata: opts.metadata
13060
+ });
13061
+ this.project.addParameters(codeParameters);
13062
+ return opts.schema;
13063
+ }
13064
+ };
13065
+ function serializeEvalParametersToStaticParametersSchema(parameters) {
13066
+ return Object.fromEntries(
13067
+ Object.entries(parameters).map(([name, value]) => {
13068
+ if ("type" in value && value.type === "prompt") {
13069
+ return [
13070
+ name,
13071
+ {
13072
+ type: "prompt",
13073
+ default: value.default ? promptDefinitionToPromptData(value.default) : void 0,
13074
+ description: value.description
13075
+ }
13076
+ ];
12468
13077
  } else {
12469
- ts.forEachChild(node, visit);
13078
+ const schemaObj = zodToJsonSchema(value);
13079
+ return [
13080
+ name,
13081
+ {
13082
+ type: "data",
13083
+ schema: schemaObj,
13084
+ default: schemaObj.default,
13085
+ description: schemaObj.description
13086
+ }
13087
+ ];
12470
13088
  }
12471
- }
12472
- });
12473
- if (!functionNode) {
12474
- return void 0;
12475
- }
12476
- const printer = ts.createPrinter();
12477
- const functionDefinition = printer.printNode(
12478
- ts.EmitHint.Unspecified,
12479
- functionNode,
12480
- sourceFile
13089
+ })
12481
13090
  );
12482
- return functionDefinition;
12483
13091
  }
12484
- var tsModule = void 0;
12485
- async function getTsModule() {
12486
- if (!tsModule) {
12487
- try {
12488
- tsModule = require("typescript");
12489
- } catch {
12490
- console.warn(
12491
- warning(
12492
- "Failed to load TypeScript module. Will not use TypeScript to derive preview."
12493
- )
12494
- );
13092
+ function serializeEvalParameterstoParametersSchema(parameters) {
13093
+ const properties = {};
13094
+ const required = [];
13095
+ for (const [name, value] of Object.entries(parameters)) {
13096
+ if ("type" in value && value.type === "prompt") {
13097
+ const defaultPromptData = value.default ? promptDefinitionToPromptData(value.default) : void 0;
13098
+ properties[name] = {
13099
+ type: "object",
13100
+ "x-bt-type": "prompt",
13101
+ ...value.description ? { description: value.description } : {},
13102
+ ...defaultPromptData ? { default: defaultPromptData } : {}
13103
+ };
13104
+ if (!defaultPromptData) {
13105
+ required.push(name);
13106
+ }
13107
+ } else {
13108
+ const schemaObj = zodToJsonSchema(value);
13109
+ properties[name] = schemaObj;
13110
+ if (!("default" in schemaObj)) {
13111
+ required.push(name);
13112
+ }
12495
13113
  }
12496
13114
  }
12497
- return tsModule;
12498
- }
12499
-
12500
- // src/zod/utils.ts
12501
- var import_zod_to_json_schema = require("zod-to-json-schema");
12502
- var z42 = __toESM(require("zod/v4"));
12503
- function isZodV4(zodObject) {
12504
- return typeof zodObject === "object" && zodObject !== null && "_zod" in zodObject && zodObject._zod !== void 0;
13115
+ return {
13116
+ type: "object",
13117
+ properties,
13118
+ ...required.length > 0 ? { required } : {},
13119
+ additionalProperties: true
13120
+ };
12505
13121
  }
12506
- function zodToJsonSchema(schema) {
12507
- if (isZodV4(schema)) {
12508
- return z42.toJSONSchema(schema, {
12509
- target: "draft-7"
12510
- });
13122
+ function serializeRemoteEvalParametersContainer(parameters) {
13123
+ if (RemoteEvalParameters.isParameters(parameters)) {
13124
+ return {
13125
+ type: "braintrust.parameters",
13126
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
13127
+ schema: parameters.schema,
13128
+ source: {
13129
+ parametersId: parameters.id,
13130
+ slug: parameters.slug,
13131
+ name: parameters.name,
13132
+ projectId: parameters.projectId,
13133
+ version: parameters.version
13134
+ }
13135
+ };
12511
13136
  }
12512
- return (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
13137
+ return {
13138
+ type: "braintrust.staticParameters",
13139
+ schema: serializeEvalParametersToStaticParametersSchema(parameters),
13140
+ source: null
13141
+ };
12513
13142
  }
13143
+ var ProjectNameIdMap = class {
13144
+ nameToId = {};
13145
+ idToName = {};
13146
+ async getId(projectName) {
13147
+ if (!(projectName in this.nameToId)) {
13148
+ const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
13149
+ project_name: projectName
13150
+ });
13151
+ const result = import_v311.z.object({
13152
+ project: Project
13153
+ }).parse(response);
13154
+ const projectId = result.project.id;
13155
+ this.nameToId[projectName] = projectId;
13156
+ this.idToName[projectId] = projectName;
13157
+ }
13158
+ return this.nameToId[projectName];
13159
+ }
13160
+ async getName(projectId) {
13161
+ if (!(projectId in this.idToName)) {
13162
+ const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
13163
+ id: projectId
13164
+ });
13165
+ const result = import_v311.z.array(Project).nonempty().parse(response);
13166
+ const projectName = result[0].name;
13167
+ this.idToName[projectId] = projectName;
13168
+ this.nameToId[projectName] = projectId;
13169
+ }
13170
+ return this.idToName[projectId];
13171
+ }
13172
+ async resolve(project) {
13173
+ if (project.id) {
13174
+ return project.id;
13175
+ }
13176
+ return this.getId(project.name);
13177
+ }
13178
+ };
12514
13179
 
12515
13180
  // src/cli/functions/upload.ts
12516
- var import_pluralize2 = __toESM(require("pluralize"));
12517
- var pathInfoSchema = import_v311.z.strictObject({
12518
- url: import_v311.z.string(),
12519
- bundleId: import_v311.z.string()
13181
+ var pathInfoSchema = import_v312.z.strictObject({
13182
+ url: import_v312.z.string(),
13183
+ bundleId: import_v312.z.string()
12520
13184
  }).strip();
12521
13185
  async function uploadHandleBundles({
12522
13186
  buildResults,
@@ -12563,6 +13227,11 @@ async function uploadHandleBundles({
12563
13227
  for (const prompt of result.evaluator.prompts) {
12564
13228
  prompts.push(await prompt.toFunctionDefinition(projectNameToId));
12565
13229
  }
13230
+ if (result.evaluator.parameters != null) {
13231
+ for (const param of result.evaluator.parameters) {
13232
+ prompts.push(await param.toFunctionDefinition(projectNameToId));
13233
+ }
13234
+ }
12566
13235
  }
12567
13236
  for (const evaluator of Object.values(result.evaluator.evaluators)) {
12568
13237
  const experiment = evalToExperiment?.[sourceFile]?.[evaluator.evaluator.evalName];
@@ -12858,7 +13527,7 @@ async function bundleCommand(args) {
12858
13527
  }
12859
13528
 
12860
13529
  // src/cli/util/pull.ts
12861
- var import_v312 = require("zod/v3");
13530
+ var import_v313 = require("zod/v3");
12862
13531
  var import_promises = __toESM(require("fs/promises"));
12863
13532
  var import_util13 = __toESM(require("util"));
12864
13533
  var import_path4 = __toESM(require("path"));
@@ -12873,7 +13542,7 @@ async function pullCommand(args) {
12873
13542
  ...args.id ? { ids: [args.id] } : {},
12874
13543
  ...args.version ? { version: loadPrettyXact(args.version) } : {}
12875
13544
  });
12876
- const functionObjects = import_v312.z.object({ objects: import_v312.z.array(import_v312.z.unknown()) }).parse(functions);
13545
+ const functionObjects = import_v313.z.object({ objects: import_v313.z.array(import_v313.z.unknown()) }).parse(functions);
12877
13546
  const projectNameToFunctions = {};
12878
13547
  const projectNameIdMap = new ProjectNameIdMap();
12879
13548
  for (const rawFunc of functionObjects.objects) {
@@ -13036,7 +13705,7 @@ function makeFunctionDefinition({
13036
13705
  const objectType = "prompt";
13037
13706
  const prompt = func.prompt_data.prompt;
13038
13707
  const promptContents = prompt.type === "completion" ? `prompt: ${doubleQuote(prompt.content)}` : `messages: ${safeStringify(prompt.messages).trimStart()}`;
13039
- const rawToolsParsed = prompt.type === "chat" && prompt.tools && prompt.tools.length > 0 ? import_v312.z.array(ToolFunctionDefinition).safeParse(JSON.parse(prompt.tools)) : void 0;
13708
+ const rawToolsParsed = prompt.type === "chat" && prompt.tools && prompt.tools.length > 0 ? import_v313.z.array(ToolFunctionDefinition).safeParse(JSON.parse(prompt.tools)) : void 0;
13040
13709
  if (rawToolsParsed && !rawToolsParsed.success) {
13041
13710
  console.warn(
13042
13711
  warning(
@@ -13148,7 +13817,7 @@ var import_express = __toESM(require("express"));
13148
13817
  var import_cors = __toESM(require("cors"));
13149
13818
 
13150
13819
  // dev/errorHandler.ts
13151
- var import_v313 = require("zod/v3");
13820
+ var import_v314 = require("zod/v3");
13152
13821
  var errorHandler = (err, req, res, next) => {
13153
13822
  if ("status" in err) {
13154
13823
  res.status(err.status).json({
@@ -13159,7 +13828,7 @@ var errorHandler = (err, req, res, next) => {
13159
13828
  });
13160
13829
  return;
13161
13830
  }
13162
- if (err instanceof import_v313.z.ZodError) {
13831
+ if (err instanceof import_v314.z.ZodError) {
13163
13832
  res.status(400).json({
13164
13833
  error: {
13165
13834
  message: "Invalid request",
@@ -13331,49 +14000,79 @@ function serializeSSEEvent(event) {
13331
14000
  }
13332
14001
 
13333
14002
  // dev/types.ts
13334
- var import_v314 = require("zod/v3");
13335
- var evalBodySchema = import_v314.z.object({
13336
- name: import_v314.z.string(),
13337
- parameters: import_v314.z.record(import_v314.z.string(), import_v314.z.unknown()).nullish(),
14003
+ var import_v315 = require("zod/v3");
14004
+ var evalBodySchema = import_v315.z.object({
14005
+ name: import_v315.z.string(),
14006
+ parameters: import_v315.z.record(import_v315.z.string(), import_v315.z.unknown()).nullish(),
13338
14007
  data: RunEval.shape.data,
13339
- scores: import_v314.z.array(
13340
- import_v314.z.object({
14008
+ scores: import_v315.z.array(
14009
+ import_v315.z.object({
13341
14010
  function_id: FunctionId,
13342
- name: import_v314.z.string()
14011
+ name: import_v315.z.string()
13343
14012
  })
13344
14013
  ).nullish(),
13345
- experiment_name: import_v314.z.string().nullish(),
13346
- project_id: import_v314.z.string().nullish(),
14014
+ experiment_name: import_v315.z.string().nullish(),
14015
+ project_id: import_v315.z.string().nullish(),
13347
14016
  parent: InvokeParent.optional(),
13348
- stream: import_v314.z.boolean().optional()
14017
+ stream: import_v315.z.boolean().optional()
13349
14018
  });
13350
- var evalParametersSerializedSchema = import_v314.z.record(
13351
- import_v314.z.string(),
13352
- import_v314.z.union([
13353
- import_v314.z.object({
13354
- type: import_v314.z.literal("prompt"),
14019
+ var staticParametersSchema = import_v315.z.record(
14020
+ import_v315.z.string(),
14021
+ import_v315.z.union([
14022
+ import_v315.z.object({
14023
+ type: import_v315.z.literal("prompt"),
13355
14024
  default: PromptData.optional(),
13356
- description: import_v314.z.string().optional()
14025
+ description: import_v315.z.string().optional()
13357
14026
  }),
13358
- import_v314.z.object({
13359
- type: import_v314.z.literal("data"),
13360
- schema: import_v314.z.record(import_v314.z.unknown()),
13361
- // JSON Schema
13362
- default: import_v314.z.unknown().optional(),
13363
- description: import_v314.z.string().optional()
14027
+ import_v315.z.object({
14028
+ type: import_v315.z.literal("data"),
14029
+ schema: import_v315.z.record(import_v315.z.unknown()),
14030
+ default: import_v315.z.unknown().optional(),
14031
+ description: import_v315.z.string().optional()
13364
14032
  })
13365
14033
  ])
13366
14034
  );
13367
- var evaluatorDefinitionSchema = import_v314.z.object({
13368
- parameters: evalParametersSerializedSchema.optional()
14035
+ var parametersSchema = import_v315.z.object({
14036
+ type: import_v315.z.literal("object"),
14037
+ properties: import_v315.z.record(import_v315.z.string(), import_v315.z.record(import_v315.z.unknown())),
14038
+ required: import_v315.z.array(import_v315.z.string()).optional(),
14039
+ additionalProperties: import_v315.z.boolean().optional()
13369
14040
  });
13370
- var evaluatorDefinitionsSchema = import_v314.z.record(
13371
- import_v314.z.string(),
14041
+ var parametersSourceSchema = import_v315.z.object({
14042
+ parametersId: import_v315.z.string().optional(),
14043
+ slug: import_v315.z.string(),
14044
+ name: import_v315.z.string(),
14045
+ projectId: import_v315.z.string().optional(),
14046
+ version: import_v315.z.string().optional()
14047
+ });
14048
+ var parametersContainerSchema = import_v315.z.object({
14049
+ type: import_v315.z.literal("braintrust.parameters"),
14050
+ schema: parametersSchema,
14051
+ source: parametersSourceSchema
14052
+ });
14053
+ var staticParametersContainerSchema = import_v315.z.object({
14054
+ type: import_v315.z.literal("braintrust.staticParameters"),
14055
+ schema: staticParametersSchema,
14056
+ source: import_v315.z.null()
14057
+ });
14058
+ var serializedParametersContainerSchema = import_v315.z.union([
14059
+ parametersContainerSchema,
14060
+ staticParametersContainerSchema,
14061
+ // keeping this type here since old versions of the SDK will still pass the unwrapped schema and we need to handle this in the app
14062
+ staticParametersSchema
14063
+ ]);
14064
+ var evaluatorDefinitionSchema = import_v315.z.object({
14065
+ parameters: serializedParametersContainerSchema.optional(),
14066
+ scores: import_v315.z.array(import_v315.z.object({ name: import_v315.z.string() })).optional()
14067
+ });
14068
+ var evaluatorDefinitionsSchema = import_v315.z.record(
14069
+ import_v315.z.string(),
13372
14070
  evaluatorDefinitionSchema
13373
14071
  );
13374
14072
 
13375
14073
  // dev/server.ts
13376
- var import_v315 = require("zod/v3");
14074
+ var import_v316 = require("zod/v3");
14075
+ var import_ajv2 = require("ajv");
13377
14076
  function runDevServer(evaluators, opts) {
13378
14077
  const allEvaluators = Object.fromEntries(
13379
14078
  evaluators.map((evaluator) => [evaluator.evalName, evaluator])
@@ -13408,20 +14107,27 @@ function runDevServer(evaluators, opts) {
13408
14107
  app.get("/", (req, res) => {
13409
14108
  res.send("Hello, world!");
13410
14109
  });
13411
- app.get("/list", checkAuthorized, (req, res) => {
13412
- const evalDefs = Object.fromEntries(
13413
- Object.entries(allEvaluators).map(([name, evaluator]) => [
13414
- name,
13415
- {
13416
- parameters: evaluator.parameters ? makeEvalParametersSchema(evaluator.parameters) : void 0,
14110
+ app.get(
14111
+ "/list",
14112
+ checkAuthorized,
14113
+ asyncHandler(async (req, res) => {
14114
+ const evalDefs = {};
14115
+ for (const [name, evaluator] of Object.entries(allEvaluators)) {
14116
+ let parameters;
14117
+ if (evaluator.parameters) {
14118
+ const resolvedParams = await Promise.resolve(evaluator.parameters);
14119
+ parameters = serializeRemoteEvalParametersContainer(resolvedParams);
14120
+ }
14121
+ evalDefs[name] = {
14122
+ parameters,
13417
14123
  scores: evaluator.scores.map((score, idx) => ({
13418
14124
  name: scorerName(score, idx)
13419
14125
  }))
13420
- }
13421
- ])
13422
- );
13423
- res.json(evalDefs);
13424
- });
14126
+ };
14127
+ }
14128
+ res.json(evalDefs);
14129
+ })
14130
+ );
13425
14131
  app.post(
13426
14132
  "/eval",
13427
14133
  checkAuthorized,
@@ -13446,18 +14152,12 @@ function runDevServer(evaluators, opts) {
13446
14152
  res.status(404).json({ error: `Evaluator '${name}' not found` });
13447
14153
  return;
13448
14154
  }
13449
- if (evaluator.parameters && Object.keys(evaluator.parameters).length > 0) {
14155
+ if (evaluator.parameters) {
13450
14156
  try {
13451
- if (!evaluator.parameters) {
13452
- res.status(400).json({
13453
- error: `Evaluator '${name}' does not accept parameters`
13454
- });
13455
- return;
13456
- }
13457
- validateParameters(parameters ?? {}, evaluator.parameters);
14157
+ await validateParameters(parameters ?? {}, evaluator.parameters);
13458
14158
  } catch (e) {
13459
14159
  console.error("Error validating parameters", e);
13460
- if (e instanceof import_v315.z.ZodError || e instanceof Error) {
14160
+ if (e instanceof import_v316.z.ZodError || e instanceof import_ajv2.ValidationError || e instanceof Error) {
13461
14161
  res.status(400).json({
13462
14162
  error: e.message
13463
14163
  });
@@ -13606,9 +14306,9 @@ async function getDataset(state, data) {
13606
14306
  return data.data;
13607
14307
  }
13608
14308
  }
13609
- var datasetFetchSchema = import_v315.z.object({
13610
- project_id: import_v315.z.string(),
13611
- name: import_v315.z.string()
14309
+ var datasetFetchSchema = import_v316.z.object({
14310
+ project_id: import_v316.z.string(),
14311
+ name: import_v316.z.string()
13612
14312
  });
13613
14313
  async function getDatasetById({
13614
14314
  state,
@@ -13617,7 +14317,7 @@ async function getDatasetById({
13617
14317
  const dataset = await state.appConn().post_json("api/dataset/get", {
13618
14318
  id: datasetId
13619
14319
  });
13620
- const parsed = import_v315.z.array(datasetFetchSchema).parse(dataset);
14320
+ const parsed = import_v316.z.array(datasetFetchSchema).parse(dataset);
13621
14321
  if (parsed.length === 0) {
13622
14322
  throw new Error(`Dataset '${datasetId}' not found`);
13623
14323
  }
@@ -13650,33 +14350,6 @@ function makeScorer(state, name, score, projectId) {
13650
14350
  });
13651
14351
  return ret;
13652
14352
  }
13653
- function makeEvalParametersSchema(parameters) {
13654
- return Object.fromEntries(
13655
- Object.entries(parameters).map(([name, value]) => {
13656
- if ("type" in value && value.type === "prompt") {
13657
- return [
13658
- name,
13659
- {
13660
- type: "prompt",
13661
- default: value.default ? promptDefinitionToPromptData(value.default) : void 0,
13662
- description: value.description
13663
- }
13664
- ];
13665
- } else {
13666
- const schemaObj = zodToJsonSchema(value);
13667
- return [
13668
- name,
13669
- {
13670
- type: "data",
13671
- schema: schemaObj,
13672
- default: schemaObj.default,
13673
- description: schemaObj.description
13674
- }
13675
- ];
13676
- }
13677
- })
13678
- );
13679
- }
13680
14353
 
13681
14354
  // src/cli/util/external-packages-plugin.ts
13682
14355
  function createMarkKnownPackagesExternalPlugin(additionalPackages = []) {
@@ -13901,6 +14574,7 @@ async function initFile({
13901
14574
  const evaluator = evaluateBuildResults(inFile, result) || {
13902
14575
  functions: [],
13903
14576
  prompts: [],
14577
+ parameters: [],
13904
14578
  evaluators: {},
13905
14579
  reporters: {}
13906
14580
  };