braintrust 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/dist/index.d.mts +4844 -3703
- package/dev/dist/index.d.ts +4844 -3703
- package/dev/dist/index.js +2068 -1402
- package/dev/dist/index.mjs +1954 -1288
- package/dist/browser.d.mts +16987 -8720
- package/dist/browser.d.ts +16987 -8720
- package/dist/browser.js +1810 -841
- package/dist/browser.mjs +2056 -1087
- package/dist/cli.js +2403 -1729
- package/dist/index.d.mts +16987 -8720
- package/dist/index.d.ts +16987 -8720
- package/dist/index.js +1810 -841
- package/dist/index.mjs +2056 -1087
- package/package.json +2 -1
- package/util/dist/index.d.mts +10 -8
- package/util/dist/index.d.ts +10 -8
- package/util/dist/index.js +27 -142
- package/util/dist/index.mjs +26 -141
package/dist/cli.js
CHANGED
|
@@ -1275,7 +1275,7 @@ var require_package = __commonJS({
|
|
|
1275
1275
|
"package.json"(exports2, module2) {
|
|
1276
1276
|
module2.exports = {
|
|
1277
1277
|
name: "braintrust",
|
|
1278
|
-
version: "2.2.
|
|
1278
|
+
version: "2.2.1",
|
|
1279
1279
|
description: "SDK for integrating Braintrust",
|
|
1280
1280
|
repository: {
|
|
1281
1281
|
type: "git",
|
|
@@ -1394,6 +1394,7 @@ var require_package = __commonJS({
|
|
|
1394
1394
|
"@next/env": "^14.2.3",
|
|
1395
1395
|
"@types/nunjucks": "^3.2.6",
|
|
1396
1396
|
"@vercel/functions": "^1.0.2",
|
|
1397
|
+
ajv: "^8.17.1",
|
|
1397
1398
|
argparse: "^2.0.1",
|
|
1398
1399
|
boxen: "^8.0.1",
|
|
1399
1400
|
chalk: "^4.1.2",
|
|
@@ -1523,11 +1524,19 @@ function getIdGenerator() {
|
|
|
1523
1524
|
|
|
1524
1525
|
// util/db_fields.ts
|
|
1525
1526
|
var TRANSACTION_ID_FIELD = "_xact_id";
|
|
1527
|
+
var OBJECT_DELETE_FIELD = "_object_delete";
|
|
1526
1528
|
var IS_MERGE_FIELD = "_is_merge";
|
|
1527
1529
|
var AUDIT_SOURCE_FIELD = "_audit_source";
|
|
1528
1530
|
var AUDIT_METADATA_FIELD = "_audit_metadata";
|
|
1529
1531
|
var VALID_SOURCES = ["app", "api", "external"];
|
|
1530
|
-
var
|
|
1532
|
+
var OBJECT_ID_KEYS = [
|
|
1533
|
+
"experiment_id",
|
|
1534
|
+
"dataset_id",
|
|
1535
|
+
"prompt_session_id",
|
|
1536
|
+
"project_id",
|
|
1537
|
+
"log_id",
|
|
1538
|
+
"function_data"
|
|
1539
|
+
];
|
|
1531
1540
|
|
|
1532
1541
|
// util/span_identifier_v3.ts
|
|
1533
1542
|
var uuid3 = __toESM(require("uuid"));
|
|
@@ -2205,13 +2214,6 @@ function mergeDictsWithPathsHelper({
|
|
|
2205
2214
|
function mergeDicts(mergeInto, mergeFrom) {
|
|
2206
2215
|
return mergeDictsWithPaths({ mergeInto, mergeFrom, mergePaths: [] });
|
|
2207
2216
|
}
|
|
2208
|
-
function mapAt(m, k) {
|
|
2209
|
-
const ret = m.get(k);
|
|
2210
|
-
if (ret === void 0) {
|
|
2211
|
-
throw new Error(`Map does not contain key ${k}`);
|
|
2212
|
-
}
|
|
2213
|
-
return ret;
|
|
2214
|
-
}
|
|
2215
2217
|
function recordFind(m, k) {
|
|
2216
2218
|
return m[k];
|
|
2217
2219
|
}
|
|
@@ -2226,72 +2228,8 @@ function getObjValueByPath(row, path8) {
|
|
|
2226
2228
|
return curr;
|
|
2227
2229
|
}
|
|
2228
2230
|
|
|
2229
|
-
// util/graph_util.ts
|
|
2230
|
-
function depthFirstSearch(args) {
|
|
2231
|
-
const { graph, firstVisitF, lastVisitF } = args;
|
|
2232
|
-
for (const vs of graph.values()) {
|
|
2233
|
-
for (const v of vs.values()) {
|
|
2234
|
-
if (!graph.has(v)) {
|
|
2235
|
-
throw new Error(`Outgoing vertex ${v} must be a key in the graph`);
|
|
2236
|
-
}
|
|
2237
|
-
}
|
|
2238
|
-
}
|
|
2239
|
-
const firstVisitedVertices = /* @__PURE__ */ new Set();
|
|
2240
|
-
const visitationOrder = args.visitationOrder ?? [...graph.keys()];
|
|
2241
|
-
const events = visitationOrder.map((vertex) => ({ eventType: "first", vertex, extras: {} })).reverse();
|
|
2242
|
-
while (events.length) {
|
|
2243
|
-
const { eventType, vertex, extras } = events.pop();
|
|
2244
|
-
if (eventType === "last") {
|
|
2245
|
-
lastVisitF?.(vertex);
|
|
2246
|
-
continue;
|
|
2247
|
-
}
|
|
2248
|
-
if (firstVisitedVertices.has(vertex)) {
|
|
2249
|
-
continue;
|
|
2250
|
-
}
|
|
2251
|
-
firstVisitedVertices.add(vertex);
|
|
2252
|
-
firstVisitF?.(vertex, { parentVertex: extras.parentVertex });
|
|
2253
|
-
events.push({ eventType: "last", vertex, extras: {} });
|
|
2254
|
-
mapAt(graph, vertex).forEach((child) => {
|
|
2255
|
-
events.push({
|
|
2256
|
-
eventType: "first",
|
|
2257
|
-
vertex: child,
|
|
2258
|
-
extras: { parentVertex: vertex }
|
|
2259
|
-
});
|
|
2260
|
-
});
|
|
2261
|
-
}
|
|
2262
|
-
}
|
|
2263
|
-
function undirectedConnectedComponents(graph) {
|
|
2264
|
-
const directedGraph = new Map(
|
|
2265
|
-
[...graph.vertices].map((v) => [v, /* @__PURE__ */ new Set()])
|
|
2266
|
-
);
|
|
2267
|
-
for (const [i, j] of graph.edges) {
|
|
2268
|
-
mapAt(directedGraph, i).add(j);
|
|
2269
|
-
mapAt(directedGraph, j).add(i);
|
|
2270
|
-
}
|
|
2271
|
-
let labelCounter = 0;
|
|
2272
|
-
const vertexLabels = /* @__PURE__ */ new Map();
|
|
2273
|
-
const firstVisitF = (vertex, args) => {
|
|
2274
|
-
const label = args?.parentVertex !== void 0 ? mapAt(vertexLabels, args?.parentVertex) : labelCounter++;
|
|
2275
|
-
vertexLabels.set(vertex, label);
|
|
2276
|
-
};
|
|
2277
|
-
depthFirstSearch({ graph: directedGraph, firstVisitF });
|
|
2278
|
-
const output = Array.from({ length: labelCounter }).map(() => []);
|
|
2279
|
-
for (const [vertex, label] of vertexLabels.entries()) {
|
|
2280
|
-
output[label].push(vertex);
|
|
2281
|
-
}
|
|
2282
|
-
return output;
|
|
2283
|
-
}
|
|
2284
|
-
function topologicalSort(graph, visitationOrder) {
|
|
2285
|
-
const reverseOrdering = [];
|
|
2286
|
-
const lastVisitF = (vertex) => {
|
|
2287
|
-
reverseOrdering.push(vertex);
|
|
2288
|
-
};
|
|
2289
|
-
depthFirstSearch({ graph, lastVisitF, visitationOrder });
|
|
2290
|
-
return reverseOrdering.reverse();
|
|
2291
|
-
}
|
|
2292
|
-
|
|
2293
2231
|
// util/merge_row_batch.ts
|
|
2294
|
-
function generateMergedRowKey(row
|
|
2232
|
+
function generateMergedRowKey(row) {
|
|
2295
2233
|
return JSON.stringify(
|
|
2296
2234
|
[
|
|
2297
2235
|
"org_id",
|
|
@@ -2300,7 +2238,7 @@ function generateMergedRowKey(row, useParentIdForId) {
|
|
|
2300
2238
|
"dataset_id",
|
|
2301
2239
|
"prompt_session_id",
|
|
2302
2240
|
"log_id",
|
|
2303
|
-
|
|
2241
|
+
"id"
|
|
2304
2242
|
].map((k) => row[k])
|
|
2305
2243
|
);
|
|
2306
2244
|
}
|
|
@@ -2354,96 +2292,34 @@ function mergeRowBatch(rows) {
|
|
|
2354
2292
|
rowGroups.set(key, row);
|
|
2355
2293
|
}
|
|
2356
2294
|
}
|
|
2357
|
-
|
|
2358
|
-
const rowToLabel = new Map(
|
|
2359
|
-
merged.map((r, i) => [generateMergedRowKey(r), i])
|
|
2360
|
-
);
|
|
2361
|
-
const graph = new Map(
|
|
2362
|
-
Array.from({ length: merged.length }).map((_, i) => [i, /* @__PURE__ */ new Set()])
|
|
2363
|
-
);
|
|
2364
|
-
merged.forEach((r, i) => {
|
|
2365
|
-
const parentId = r[PARENT_ID_FIELD];
|
|
2366
|
-
if (!parentId) {
|
|
2367
|
-
return;
|
|
2368
|
-
}
|
|
2369
|
-
const parentRowKey = generateMergedRowKey(
|
|
2370
|
-
r,
|
|
2371
|
-
true
|
|
2372
|
-
/* useParentIdForId */
|
|
2373
|
-
);
|
|
2374
|
-
const parentLabel = rowToLabel.get(parentRowKey);
|
|
2375
|
-
if (parentLabel !== void 0) {
|
|
2376
|
-
mapAt(graph, parentLabel).add(i);
|
|
2377
|
-
}
|
|
2378
|
-
});
|
|
2379
|
-
const connectedComponents = undirectedConnectedComponents({
|
|
2380
|
-
vertices: new Set(graph.keys()),
|
|
2381
|
-
edges: new Set(
|
|
2382
|
-
[...graph.entries()].flatMap(
|
|
2383
|
-
([k, vs]) => [...vs].map((v) => {
|
|
2384
|
-
const ret = [k, v];
|
|
2385
|
-
return ret;
|
|
2386
|
-
})
|
|
2387
|
-
)
|
|
2388
|
-
)
|
|
2389
|
-
});
|
|
2390
|
-
const buckets = connectedComponents.map(
|
|
2391
|
-
(cc) => topologicalSort(
|
|
2392
|
-
graph,
|
|
2393
|
-
cc
|
|
2394
|
-
/* visitationOrder */
|
|
2395
|
-
)
|
|
2396
|
-
);
|
|
2397
|
-
return buckets.map((bucket) => bucket.map((i) => merged[i]));
|
|
2295
|
+
return [...rowGroups.values()];
|
|
2398
2296
|
}
|
|
2399
2297
|
function batchItems(args) {
|
|
2400
|
-
|
|
2298
|
+
const { items } = args;
|
|
2401
2299
|
const batchMaxNumItems = args.batchMaxNumItems ?? Number.POSITIVE_INFINITY;
|
|
2402
2300
|
const batchMaxNumBytes = args.batchMaxNumBytes ?? Number.POSITIVE_INFINITY;
|
|
2301
|
+
const getByteSize = args.getByteSize;
|
|
2403
2302
|
const output = [];
|
|
2404
|
-
let nextItems = [];
|
|
2405
|
-
let batchSet = [];
|
|
2406
2303
|
let batch = [];
|
|
2407
2304
|
let batchLen = 0;
|
|
2408
2305
|
function addToBatch(item) {
|
|
2409
2306
|
batch.push(item);
|
|
2410
|
-
batchLen += item
|
|
2307
|
+
batchLen += getByteSize(item);
|
|
2411
2308
|
}
|
|
2412
2309
|
function flushBatch() {
|
|
2413
|
-
|
|
2310
|
+
output.push(batch);
|
|
2414
2311
|
batch = [];
|
|
2415
2312
|
batchLen = 0;
|
|
2416
2313
|
}
|
|
2417
|
-
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
for (const item of bucket) {
|
|
2421
|
-
if (batch.length === 0 || item.length + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems) {
|
|
2422
|
-
addToBatch(item);
|
|
2423
|
-
} else if (i === 0) {
|
|
2424
|
-
flushBatch();
|
|
2425
|
-
addToBatch(item);
|
|
2426
|
-
} else {
|
|
2427
|
-
break;
|
|
2428
|
-
}
|
|
2429
|
-
++i;
|
|
2430
|
-
}
|
|
2431
|
-
if (i < bucket.length) {
|
|
2432
|
-
nextItems.push(bucket.slice(i));
|
|
2433
|
-
}
|
|
2434
|
-
if (batchLen >= batchMaxNumBytes || batch.length > batchMaxNumItems) {
|
|
2435
|
-
flushBatch();
|
|
2436
|
-
}
|
|
2437
|
-
}
|
|
2438
|
-
if (batch.length) {
|
|
2314
|
+
for (const item of items) {
|
|
2315
|
+
const itemSize = getByteSize(item);
|
|
2316
|
+
if (batch.length > 0 && !(itemSize + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems)) {
|
|
2439
2317
|
flushBatch();
|
|
2440
2318
|
}
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
items = nextItems;
|
|
2446
|
-
nextItems = [];
|
|
2319
|
+
addToBatch(item);
|
|
2320
|
+
}
|
|
2321
|
+
if (batch.length > 0) {
|
|
2322
|
+
flushBatch();
|
|
2447
2323
|
}
|
|
2448
2324
|
return output;
|
|
2449
2325
|
}
|
|
@@ -3050,10 +2926,15 @@ var FunctionTypeEnum = import_v36.z.enum([
|
|
|
3050
2926
|
"preprocessor",
|
|
3051
2927
|
"facet",
|
|
3052
2928
|
"classifier",
|
|
3053
|
-
"tag"
|
|
2929
|
+
"tag",
|
|
2930
|
+
"parameters"
|
|
3054
2931
|
]);
|
|
3055
2932
|
var NullableSavedFunctionId = import_v36.z.union([
|
|
3056
|
-
import_v36.z.object({
|
|
2933
|
+
import_v36.z.object({
|
|
2934
|
+
type: import_v36.z.literal("function"),
|
|
2935
|
+
id: import_v36.z.string(),
|
|
2936
|
+
version: import_v36.z.string().optional()
|
|
2937
|
+
}),
|
|
3057
2938
|
import_v36.z.object({
|
|
3058
2939
|
type: import_v36.z.literal("global"),
|
|
3059
2940
|
name: import_v36.z.string(),
|
|
@@ -3061,6 +2942,67 @@ var NullableSavedFunctionId = import_v36.z.union([
|
|
|
3061
2942
|
}),
|
|
3062
2943
|
import_v36.z.null()
|
|
3063
2944
|
]);
|
|
2945
|
+
var TopicMapReport = import_v36.z.object({
|
|
2946
|
+
version: import_v36.z.literal(1),
|
|
2947
|
+
created_at: import_v36.z.string().optional(),
|
|
2948
|
+
settings: import_v36.z.object({
|
|
2949
|
+
algorithm: import_v36.z.enum(["hdbscan", "kmeans", "hierarchical"]),
|
|
2950
|
+
dimension_reduction: import_v36.z.enum(["umap", "pca", "none"]),
|
|
2951
|
+
vector_field: import_v36.z.string(),
|
|
2952
|
+
embedding_model: import_v36.z.string(),
|
|
2953
|
+
n_clusters: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
|
|
2954
|
+
umap_dimensions: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
|
|
2955
|
+
min_cluster_size: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
|
|
2956
|
+
min_samples: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional()
|
|
2957
|
+
}),
|
|
2958
|
+
query_settings: import_v36.z.object({
|
|
2959
|
+
hierarchy_threshold: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]),
|
|
2960
|
+
auto_naming: import_v36.z.boolean(),
|
|
2961
|
+
skip_cache: import_v36.z.boolean(),
|
|
2962
|
+
viz_mode: import_v36.z.enum(["bar", "scatter"]),
|
|
2963
|
+
naming_model: import_v36.z.string()
|
|
2964
|
+
}).partial(),
|
|
2965
|
+
clusters: import_v36.z.array(
|
|
2966
|
+
import_v36.z.object({
|
|
2967
|
+
cluster_id: import_v36.z.number(),
|
|
2968
|
+
parent_cluster_id: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
|
|
2969
|
+
topic_id: import_v36.z.string(),
|
|
2970
|
+
count: import_v36.z.number(),
|
|
2971
|
+
sample_texts: import_v36.z.array(import_v36.z.string()),
|
|
2972
|
+
samples: import_v36.z.array(
|
|
2973
|
+
import_v36.z.object({
|
|
2974
|
+
id: import_v36.z.string(),
|
|
2975
|
+
text: import_v36.z.string(),
|
|
2976
|
+
root_span_id: import_v36.z.string(),
|
|
2977
|
+
span_id: import_v36.z.string()
|
|
2978
|
+
})
|
|
2979
|
+
),
|
|
2980
|
+
name: import_v36.z.string().optional(),
|
|
2981
|
+
description: import_v36.z.string().optional(),
|
|
2982
|
+
keywords: import_v36.z.array(import_v36.z.string()).optional(),
|
|
2983
|
+
centroid: import_v36.z.array(import_v36.z.number()).optional(),
|
|
2984
|
+
parent_id: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
|
|
2985
|
+
is_leaf: import_v36.z.boolean().optional(),
|
|
2986
|
+
depth: import_v36.z.number().optional()
|
|
2987
|
+
})
|
|
2988
|
+
),
|
|
2989
|
+
embedding_points: import_v36.z.array(
|
|
2990
|
+
import_v36.z.object({
|
|
2991
|
+
x: import_v36.z.number(),
|
|
2992
|
+
y: import_v36.z.number(),
|
|
2993
|
+
cluster: import_v36.z.number(),
|
|
2994
|
+
text: import_v36.z.string().optional()
|
|
2995
|
+
})
|
|
2996
|
+
).optional()
|
|
2997
|
+
});
|
|
2998
|
+
var TopicMapData = import_v36.z.object({
|
|
2999
|
+
type: import_v36.z.literal("topic_map"),
|
|
3000
|
+
source_facet: import_v36.z.string(),
|
|
3001
|
+
embedding_model: import_v36.z.string(),
|
|
3002
|
+
bundle_key: import_v36.z.string(),
|
|
3003
|
+
distance_threshold: import_v36.z.number().optional(),
|
|
3004
|
+
report: TopicMapReport.optional()
|
|
3005
|
+
});
|
|
3064
3006
|
var BatchedFacetData = import_v36.z.object({
|
|
3065
3007
|
type: import_v36.z.literal("batched_facet"),
|
|
3066
3008
|
preprocessor: NullableSavedFunctionId.and(import_v36.z.unknown()).optional(),
|
|
@@ -3069,9 +3011,17 @@ var BatchedFacetData = import_v36.z.object({
|
|
|
3069
3011
|
name: import_v36.z.string(),
|
|
3070
3012
|
prompt: import_v36.z.string(),
|
|
3071
3013
|
model: import_v36.z.string().optional(),
|
|
3014
|
+
embedding_model: import_v36.z.string().optional(),
|
|
3072
3015
|
no_match_pattern: import_v36.z.string().optional()
|
|
3073
3016
|
})
|
|
3074
|
-
)
|
|
3017
|
+
),
|
|
3018
|
+
topic_maps: import_v36.z.record(
|
|
3019
|
+
import_v36.z.object({
|
|
3020
|
+
function_name: import_v36.z.string(),
|
|
3021
|
+
topic_map_id: import_v36.z.string().optional(),
|
|
3022
|
+
topic_map_data: TopicMapData
|
|
3023
|
+
})
|
|
3024
|
+
).optional()
|
|
3075
3025
|
});
|
|
3076
3026
|
var BraintrustModelParams = import_v36.z.object({
|
|
3077
3027
|
use_cache: import_v36.z.boolean(),
|
|
@@ -3282,6 +3232,18 @@ var ObjectReferenceNullish = import_v36.z.union([
|
|
|
3282
3232
|
}),
|
|
3283
3233
|
import_v36.z.null()
|
|
3284
3234
|
]);
|
|
3235
|
+
var SavedFunctionId = import_v36.z.union([
|
|
3236
|
+
import_v36.z.object({
|
|
3237
|
+
type: import_v36.z.literal("function"),
|
|
3238
|
+
id: import_v36.z.string(),
|
|
3239
|
+
version: import_v36.z.string().optional()
|
|
3240
|
+
}),
|
|
3241
|
+
import_v36.z.object({
|
|
3242
|
+
type: import_v36.z.literal("global"),
|
|
3243
|
+
name: import_v36.z.string(),
|
|
3244
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
3245
|
+
})
|
|
3246
|
+
]);
|
|
3285
3247
|
var DatasetEvent = import_v36.z.object({
|
|
3286
3248
|
id: import_v36.z.string(),
|
|
3287
3249
|
_xact_id: import_v36.z.string(),
|
|
@@ -3301,7 +3263,36 @@ var DatasetEvent = import_v36.z.object({
|
|
|
3301
3263
|
is_root: import_v36.z.union([import_v36.z.boolean(), import_v36.z.null()]).optional(),
|
|
3302
3264
|
origin: ObjectReferenceNullish.optional(),
|
|
3303
3265
|
comments: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
|
|
3304
|
-
audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional()
|
|
3266
|
+
audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
|
|
3267
|
+
facets: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
|
|
3268
|
+
classifications: import_v36.z.union([
|
|
3269
|
+
import_v36.z.record(
|
|
3270
|
+
import_v36.z.array(
|
|
3271
|
+
import_v36.z.object({
|
|
3272
|
+
id: import_v36.z.string(),
|
|
3273
|
+
label: import_v36.z.string().optional(),
|
|
3274
|
+
confidence: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
|
|
3275
|
+
metadata: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
|
|
3276
|
+
source: SavedFunctionId.and(
|
|
3277
|
+
import_v36.z.union([
|
|
3278
|
+
import_v36.z.object({
|
|
3279
|
+
type: import_v36.z.literal("function"),
|
|
3280
|
+
id: import_v36.z.string(),
|
|
3281
|
+
version: import_v36.z.string().optional()
|
|
3282
|
+
}),
|
|
3283
|
+
import_v36.z.object({
|
|
3284
|
+
type: import_v36.z.literal("global"),
|
|
3285
|
+
name: import_v36.z.string(),
|
|
3286
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
3287
|
+
}),
|
|
3288
|
+
import_v36.z.null()
|
|
3289
|
+
])
|
|
3290
|
+
).optional()
|
|
3291
|
+
})
|
|
3292
|
+
)
|
|
3293
|
+
),
|
|
3294
|
+
import_v36.z.null()
|
|
3295
|
+
]).optional()
|
|
3305
3296
|
});
|
|
3306
3297
|
var EnvVar = import_v36.z.object({
|
|
3307
3298
|
id: import_v36.z.string().uuid(),
|
|
@@ -3378,7 +3369,8 @@ var SpanType = import_v36.z.union([
|
|
|
3378
3369
|
"automation",
|
|
3379
3370
|
"facet",
|
|
3380
3371
|
"preprocessor",
|
|
3381
|
-
"classifier"
|
|
3372
|
+
"classifier",
|
|
3373
|
+
"review"
|
|
3382
3374
|
]),
|
|
3383
3375
|
import_v36.z.null()
|
|
3384
3376
|
]);
|
|
@@ -3419,10 +3411,43 @@ var ExperimentEvent = import_v36.z.object({
|
|
|
3419
3411
|
is_root: import_v36.z.union([import_v36.z.boolean(), import_v36.z.null()]).optional(),
|
|
3420
3412
|
origin: ObjectReferenceNullish.optional(),
|
|
3421
3413
|
comments: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
|
|
3422
|
-
audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional()
|
|
3414
|
+
audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
|
|
3415
|
+
facets: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
|
|
3416
|
+
classifications: import_v36.z.union([
|
|
3417
|
+
import_v36.z.record(
|
|
3418
|
+
import_v36.z.array(
|
|
3419
|
+
import_v36.z.object({
|
|
3420
|
+
id: import_v36.z.string(),
|
|
3421
|
+
label: import_v36.z.string().optional(),
|
|
3422
|
+
confidence: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
|
|
3423
|
+
metadata: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
|
|
3424
|
+
source: SavedFunctionId.and(
|
|
3425
|
+
import_v36.z.union([
|
|
3426
|
+
import_v36.z.object({
|
|
3427
|
+
type: import_v36.z.literal("function"),
|
|
3428
|
+
id: import_v36.z.string(),
|
|
3429
|
+
version: import_v36.z.string().optional()
|
|
3430
|
+
}),
|
|
3431
|
+
import_v36.z.object({
|
|
3432
|
+
type: import_v36.z.literal("global"),
|
|
3433
|
+
name: import_v36.z.string(),
|
|
3434
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
3435
|
+
}),
|
|
3436
|
+
import_v36.z.null()
|
|
3437
|
+
])
|
|
3438
|
+
).optional()
|
|
3439
|
+
})
|
|
3440
|
+
)
|
|
3441
|
+
),
|
|
3442
|
+
import_v36.z.null()
|
|
3443
|
+
]).optional()
|
|
3423
3444
|
});
|
|
3424
3445
|
var ExtendedSavedFunctionId = import_v36.z.union([
|
|
3425
|
-
import_v36.z.object({
|
|
3446
|
+
import_v36.z.object({
|
|
3447
|
+
type: import_v36.z.literal("function"),
|
|
3448
|
+
id: import_v36.z.string(),
|
|
3449
|
+
version: import_v36.z.string().optional()
|
|
3450
|
+
}),
|
|
3426
3451
|
import_v36.z.object({
|
|
3427
3452
|
type: import_v36.z.literal("global"),
|
|
3428
3453
|
name: import_v36.z.string(),
|
|
@@ -3439,6 +3464,7 @@ var FacetData = import_v36.z.object({
|
|
|
3439
3464
|
preprocessor: NullableSavedFunctionId.and(import_v36.z.unknown()).optional(),
|
|
3440
3465
|
prompt: import_v36.z.string(),
|
|
3441
3466
|
model: import_v36.z.string().optional(),
|
|
3467
|
+
embedding_model: import_v36.z.string().optional(),
|
|
3442
3468
|
no_match_pattern: import_v36.z.string().optional()
|
|
3443
3469
|
});
|
|
3444
3470
|
var PromptBlockDataNullish = import_v36.z.union([
|
|
@@ -3528,14 +3554,6 @@ var PromptParserNullish = import_v36.z.union([
|
|
|
3528
3554
|
}),
|
|
3529
3555
|
import_v36.z.null()
|
|
3530
3556
|
]);
|
|
3531
|
-
var SavedFunctionId = import_v36.z.union([
|
|
3532
|
-
import_v36.z.object({ type: import_v36.z.literal("function"), id: import_v36.z.string() }),
|
|
3533
|
-
import_v36.z.object({
|
|
3534
|
-
type: import_v36.z.literal("global"),
|
|
3535
|
-
name: import_v36.z.string(),
|
|
3536
|
-
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
3537
|
-
})
|
|
3538
|
-
]);
|
|
3539
3557
|
var PromptDataNullish = import_v36.z.union([
|
|
3540
3558
|
import_v36.z.object({
|
|
3541
3559
|
prompt: PromptBlockDataNullish,
|
|
@@ -3586,7 +3604,8 @@ var FunctionTypeEnumNullish = import_v36.z.union([
|
|
|
3586
3604
|
"preprocessor",
|
|
3587
3605
|
"facet",
|
|
3588
3606
|
"classifier",
|
|
3589
|
-
"tag"
|
|
3607
|
+
"tag",
|
|
3608
|
+
"parameters"
|
|
3590
3609
|
]),
|
|
3591
3610
|
import_v36.z.null()
|
|
3592
3611
|
]);
|
|
@@ -3678,7 +3697,8 @@ var FunctionData = import_v36.z.union([
|
|
|
3678
3697
|
type: import_v36.z.literal("remote_eval"),
|
|
3679
3698
|
endpoint: import_v36.z.string(),
|
|
3680
3699
|
eval_name: import_v36.z.string(),
|
|
3681
|
-
parameters: import_v36.z.object({}).partial().passthrough()
|
|
3700
|
+
parameters: import_v36.z.object({}).partial().passthrough(),
|
|
3701
|
+
parameters_version: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional()
|
|
3682
3702
|
}),
|
|
3683
3703
|
import_v36.z.object({
|
|
3684
3704
|
type: import_v36.z.literal("global"),
|
|
@@ -3687,7 +3707,18 @@ var FunctionData = import_v36.z.union([
|
|
|
3687
3707
|
config: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional()
|
|
3688
3708
|
}),
|
|
3689
3709
|
FacetData,
|
|
3690
|
-
BatchedFacetData
|
|
3710
|
+
BatchedFacetData,
|
|
3711
|
+
import_v36.z.object({
|
|
3712
|
+
type: import_v36.z.literal("parameters"),
|
|
3713
|
+
data: import_v36.z.object({}).partial().passthrough(),
|
|
3714
|
+
__schema: import_v36.z.object({
|
|
3715
|
+
type: import_v36.z.literal("object"),
|
|
3716
|
+
properties: import_v36.z.record(import_v36.z.object({}).partial().passthrough()),
|
|
3717
|
+
required: import_v36.z.array(import_v36.z.string()).optional(),
|
|
3718
|
+
additionalProperties: import_v36.z.boolean().optional()
|
|
3719
|
+
})
|
|
3720
|
+
}),
|
|
3721
|
+
TopicMapData.and(import_v36.z.unknown())
|
|
3691
3722
|
]);
|
|
3692
3723
|
var Function2 = import_v36.z.object({
|
|
3693
3724
|
id: import_v36.z.string().uuid(),
|
|
@@ -3717,7 +3748,13 @@ var Function2 = import_v36.z.object({
|
|
|
3717
3748
|
import_v36.z.null()
|
|
3718
3749
|
]).optional()
|
|
3719
3750
|
});
|
|
3720
|
-
var FunctionFormat = import_v36.z.enum([
|
|
3751
|
+
var FunctionFormat = import_v36.z.enum([
|
|
3752
|
+
"llm",
|
|
3753
|
+
"code",
|
|
3754
|
+
"global",
|
|
3755
|
+
"graph",
|
|
3756
|
+
"topic_map"
|
|
3757
|
+
]);
|
|
3721
3758
|
var PromptData = import_v36.z.object({
|
|
3722
3759
|
prompt: PromptBlockDataNullish,
|
|
3723
3760
|
options: PromptOptionsNullish,
|
|
@@ -3800,13 +3837,14 @@ var FunctionObjectType = import_v36.z.enum([
|
|
|
3800
3837
|
"custom_view",
|
|
3801
3838
|
"preprocessor",
|
|
3802
3839
|
"facet",
|
|
3803
|
-
"classifier"
|
|
3840
|
+
"classifier",
|
|
3841
|
+
"parameters"
|
|
3804
3842
|
]);
|
|
3805
3843
|
var FunctionOutputType = import_v36.z.enum([
|
|
3806
3844
|
"completion",
|
|
3807
3845
|
"score",
|
|
3808
3846
|
"facet",
|
|
3809
|
-
"
|
|
3847
|
+
"classification",
|
|
3810
3848
|
"any"
|
|
3811
3849
|
]);
|
|
3812
3850
|
var GitMetadataSettings = import_v36.z.object({
|
|
@@ -3842,6 +3880,10 @@ var GroupScope = import_v36.z.object({
|
|
|
3842
3880
|
idle_seconds: import_v36.z.number().optional()
|
|
3843
3881
|
});
|
|
3844
3882
|
var IfExists = import_v36.z.enum(["error", "ignore", "replace"]);
|
|
3883
|
+
var ImageRenderingMode = import_v36.z.union([
|
|
3884
|
+
import_v36.z.enum(["auto", "click_to_load", "blocked"]),
|
|
3885
|
+
import_v36.z.null()
|
|
3886
|
+
]);
|
|
3845
3887
|
var InvokeParent = import_v36.z.union([
|
|
3846
3888
|
import_v36.z.object({
|
|
3847
3889
|
object_type: import_v36.z.enum(["project_logs", "experiment", "playground_logs"]),
|
|
@@ -3934,7 +3976,8 @@ var Organization = import_v36.z.object({
|
|
|
3934
3976
|
is_universal_api: import_v36.z.union([import_v36.z.boolean(), import_v36.z.null()]).optional(),
|
|
3935
3977
|
proxy_url: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional(),
|
|
3936
3978
|
realtime_url: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional(),
|
|
3937
|
-
created: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional()
|
|
3979
|
+
created: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional(),
|
|
3980
|
+
image_rendering_mode: ImageRenderingMode.optional()
|
|
3938
3981
|
});
|
|
3939
3982
|
var ProjectSettings = import_v36.z.union([
|
|
3940
3983
|
import_v36.z.object({
|
|
@@ -4075,7 +4118,36 @@ var ProjectLogsEvent = import_v36.z.object({
|
|
|
4075
4118
|
origin: ObjectReferenceNullish.optional(),
|
|
4076
4119
|
comments: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
|
|
4077
4120
|
audit_data: import_v36.z.union([import_v36.z.array(import_v36.z.unknown()), import_v36.z.null()]).optional(),
|
|
4078
|
-
_async_scoring_state: import_v36.z.unknown().optional()
|
|
4121
|
+
_async_scoring_state: import_v36.z.unknown().optional(),
|
|
4122
|
+
facets: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
|
|
4123
|
+
classifications: import_v36.z.union([
|
|
4124
|
+
import_v36.z.record(
|
|
4125
|
+
import_v36.z.array(
|
|
4126
|
+
import_v36.z.object({
|
|
4127
|
+
id: import_v36.z.string(),
|
|
4128
|
+
label: import_v36.z.string().optional(),
|
|
4129
|
+
confidence: import_v36.z.union([import_v36.z.number(), import_v36.z.null()]).optional(),
|
|
4130
|
+
metadata: import_v36.z.union([import_v36.z.object({}).partial().passthrough(), import_v36.z.null()]).optional(),
|
|
4131
|
+
source: SavedFunctionId.and(
|
|
4132
|
+
import_v36.z.union([
|
|
4133
|
+
import_v36.z.object({
|
|
4134
|
+
type: import_v36.z.literal("function"),
|
|
4135
|
+
id: import_v36.z.string(),
|
|
4136
|
+
version: import_v36.z.string().optional()
|
|
4137
|
+
}),
|
|
4138
|
+
import_v36.z.object({
|
|
4139
|
+
type: import_v36.z.literal("global"),
|
|
4140
|
+
name: import_v36.z.string(),
|
|
4141
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
4142
|
+
}),
|
|
4143
|
+
import_v36.z.null()
|
|
4144
|
+
])
|
|
4145
|
+
).optional()
|
|
4146
|
+
})
|
|
4147
|
+
)
|
|
4148
|
+
),
|
|
4149
|
+
import_v36.z.null()
|
|
4150
|
+
]).optional()
|
|
4079
4151
|
});
|
|
4080
4152
|
var ProjectScoreType = import_v36.z.enum([
|
|
4081
4153
|
"slider",
|
|
@@ -4377,12 +4449,15 @@ var View = import_v36.z.object({
|
|
|
4377
4449
|
"datasets",
|
|
4378
4450
|
"dataset",
|
|
4379
4451
|
"prompts",
|
|
4452
|
+
"parameters",
|
|
4380
4453
|
"tools",
|
|
4381
4454
|
"scorers",
|
|
4382
4455
|
"classifiers",
|
|
4383
4456
|
"logs",
|
|
4384
4457
|
"monitor",
|
|
4385
|
-
"
|
|
4458
|
+
"for_review_project_log",
|
|
4459
|
+
"for_review_experiments",
|
|
4460
|
+
"for_review_datasets"
|
|
4386
4461
|
]),
|
|
4387
4462
|
name: import_v36.z.string(),
|
|
4388
4463
|
created: import_v36.z.union([import_v36.z.string(), import_v36.z.null()]).optional(),
|
|
@@ -5139,6 +5214,52 @@ var PromptCache = class {
|
|
|
5139
5214
|
}
|
|
5140
5215
|
};
|
|
5141
5216
|
|
|
5217
|
+
// src/prompt-cache/parameters-cache.ts
|
|
5218
|
+
function createCacheKey2(key) {
|
|
5219
|
+
if (key.id) {
|
|
5220
|
+
return `parameters:id:${key.id}`;
|
|
5221
|
+
}
|
|
5222
|
+
const prefix = key.projectId ?? key.projectName;
|
|
5223
|
+
if (!prefix) {
|
|
5224
|
+
throw new Error("Either projectId or projectName must be provided");
|
|
5225
|
+
}
|
|
5226
|
+
if (!key.slug) {
|
|
5227
|
+
throw new Error("Slug must be provided when not using ID");
|
|
5228
|
+
}
|
|
5229
|
+
return `parameters:${prefix}:${key.slug}:${key.version ?? "latest"}`;
|
|
5230
|
+
}
|
|
5231
|
+
var ParametersCache = class {
|
|
5232
|
+
memoryCache;
|
|
5233
|
+
diskCache;
|
|
5234
|
+
constructor(options) {
|
|
5235
|
+
this.memoryCache = options.memoryCache;
|
|
5236
|
+
this.diskCache = options.diskCache;
|
|
5237
|
+
}
|
|
5238
|
+
async get(key) {
|
|
5239
|
+
const cacheKey = createCacheKey2(key);
|
|
5240
|
+
const memoryParams = this.memoryCache.get(cacheKey);
|
|
5241
|
+
if (memoryParams !== void 0) {
|
|
5242
|
+
return memoryParams;
|
|
5243
|
+
}
|
|
5244
|
+
if (this.diskCache) {
|
|
5245
|
+
const diskParams = await this.diskCache.get(cacheKey);
|
|
5246
|
+
if (!diskParams) {
|
|
5247
|
+
return void 0;
|
|
5248
|
+
}
|
|
5249
|
+
this.memoryCache.set(cacheKey, diskParams);
|
|
5250
|
+
return diskParams;
|
|
5251
|
+
}
|
|
5252
|
+
return void 0;
|
|
5253
|
+
}
|
|
5254
|
+
async set(key, value) {
|
|
5255
|
+
const cacheKey = createCacheKey2(key);
|
|
5256
|
+
this.memoryCache.set(cacheKey, value);
|
|
5257
|
+
if (this.diskCache) {
|
|
5258
|
+
await this.diskCache.set(cacheKey, value);
|
|
5259
|
+
}
|
|
5260
|
+
}
|
|
5261
|
+
};
|
|
5262
|
+
|
|
5142
5263
|
// src/span-cache.ts
|
|
5143
5264
|
var activeCaches = /* @__PURE__ */ new Set();
|
|
5144
5265
|
var exitHandlersRegistered = false;
|
|
@@ -5429,7 +5550,24 @@ var SpanCache = class {
|
|
|
5429
5550
|
// src/logger.ts
|
|
5430
5551
|
var BRAINTRUST_ATTACHMENT = BraintrustAttachmentReference.shape.type.value;
|
|
5431
5552
|
var EXTERNAL_ATTACHMENT = ExternalAttachmentReference.shape.type.value;
|
|
5553
|
+
var LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow";
|
|
5432
5554
|
var BRAINTRUST_PARAMS = Object.keys(BraintrustModelParams.shape);
|
|
5555
|
+
var DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024;
|
|
5556
|
+
var parametersRowSchema = import_v38.z.object({
|
|
5557
|
+
id: import_v38.z.string().uuid(),
|
|
5558
|
+
_xact_id: import_v38.z.string(),
|
|
5559
|
+
project_id: import_v38.z.string().uuid(),
|
|
5560
|
+
name: import_v38.z.string(),
|
|
5561
|
+
slug: import_v38.z.string(),
|
|
5562
|
+
description: import_v38.z.union([import_v38.z.string(), import_v38.z.null()]).optional(),
|
|
5563
|
+
function_type: import_v38.z.literal("parameters"),
|
|
5564
|
+
function_data: import_v38.z.object({
|
|
5565
|
+
type: import_v38.z.literal("parameters"),
|
|
5566
|
+
data: import_v38.z.record(import_v38.z.unknown()).optional(),
|
|
5567
|
+
__schema: import_v38.z.record(import_v38.z.unknown())
|
|
5568
|
+
}),
|
|
5569
|
+
metadata: import_v38.z.union([import_v38.z.object({}).partial().passthrough(), import_v38.z.null()]).optional()
|
|
5570
|
+
});
|
|
5433
5571
|
var LoginInvalidOrgError = class extends Error {
|
|
5434
5572
|
constructor(message) {
|
|
5435
5573
|
super(message);
|
|
@@ -5606,6 +5744,17 @@ var BraintrustState = class _BraintrustState {
|
|
|
5606
5744
|
max: Number(isomorph_default.getEnv("BRAINTRUST_PROMPT_CACHE_DISK_MAX")) ?? 1 << 20
|
|
5607
5745
|
}) : void 0;
|
|
5608
5746
|
this.promptCache = new PromptCache({ memoryCache, diskCache });
|
|
5747
|
+
const parametersMemoryCache = new LRUCache({
|
|
5748
|
+
max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_MEMORY_MAX")) ?? 1 << 10
|
|
5749
|
+
});
|
|
5750
|
+
const parametersDiskCache = canUseDiskCache() ? new DiskCache({
|
|
5751
|
+
cacheDir: isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DIR") ?? `${isomorph_default.getEnv("HOME") ?? isomorph_default.homedir()}/.braintrust/parameters_cache`,
|
|
5752
|
+
max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DISK_MAX")) ?? 1 << 20
|
|
5753
|
+
}) : void 0;
|
|
5754
|
+
this.parametersCache = new ParametersCache({
|
|
5755
|
+
memoryCache: parametersMemoryCache,
|
|
5756
|
+
diskCache: parametersDiskCache
|
|
5757
|
+
});
|
|
5609
5758
|
this.spanCache = new SpanCache({ disabled: loginParams.disableSpanCache });
|
|
5610
5759
|
}
|
|
5611
5760
|
id;
|
|
@@ -5635,6 +5784,7 @@ var BraintrustState = class _BraintrustState {
|
|
|
5635
5784
|
_apiConn = null;
|
|
5636
5785
|
_proxyConn = null;
|
|
5637
5786
|
promptCache;
|
|
5787
|
+
parametersCache;
|
|
5638
5788
|
spanCache;
|
|
5639
5789
|
_idGenerator = null;
|
|
5640
5790
|
_contextManager = null;
|
|
@@ -6736,8 +6886,100 @@ function castLogger(logger, asyncFlush) {
|
|
|
6736
6886
|
}
|
|
6737
6887
|
return logger;
|
|
6738
6888
|
}
|
|
6889
|
+
var logs3OverflowUploadSchema = import_v38.z.object({
|
|
6890
|
+
method: import_v38.z.enum(["PUT", "POST"]),
|
|
6891
|
+
signedUrl: import_v38.z.string().url(),
|
|
6892
|
+
headers: import_v38.z.record(import_v38.z.string()).optional(),
|
|
6893
|
+
fields: import_v38.z.record(import_v38.z.string()).optional(),
|
|
6894
|
+
key: import_v38.z.string().min(1)
|
|
6895
|
+
});
|
|
6739
6896
|
function constructLogs3Data(items) {
|
|
6740
|
-
return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
|
|
6897
|
+
return `{"rows": ${constructJsonArray(items.map((i) => i.str))}, "api_version": 2}`;
|
|
6898
|
+
}
|
|
6899
|
+
function constructLogs3OverflowRequest(key) {
|
|
6900
|
+
return {
|
|
6901
|
+
rows: {
|
|
6902
|
+
type: LOGS3_OVERFLOW_REFERENCE_TYPE,
|
|
6903
|
+
key
|
|
6904
|
+
},
|
|
6905
|
+
api_version: 2
|
|
6906
|
+
};
|
|
6907
|
+
}
|
|
6908
|
+
function pickLogs3OverflowObjectIds(row) {
|
|
6909
|
+
const objectIds = {};
|
|
6910
|
+
for (const key of OBJECT_ID_KEYS) {
|
|
6911
|
+
if (key in row) {
|
|
6912
|
+
objectIds[key] = row[key];
|
|
6913
|
+
}
|
|
6914
|
+
}
|
|
6915
|
+
return objectIds;
|
|
6916
|
+
}
|
|
6917
|
+
async function uploadLogs3OverflowPayload(upload, payload, fetchFn = fetch) {
|
|
6918
|
+
if (upload.method === "POST") {
|
|
6919
|
+
if (!upload.fields) {
|
|
6920
|
+
throw new Error("Missing logs3 overflow upload fields");
|
|
6921
|
+
}
|
|
6922
|
+
if (typeof FormData === "undefined" || typeof Blob === "undefined") {
|
|
6923
|
+
throw new Error("FormData is not available for logs3 overflow upload");
|
|
6924
|
+
}
|
|
6925
|
+
const form = new FormData();
|
|
6926
|
+
for (const [key, value] of Object.entries(upload.fields)) {
|
|
6927
|
+
form.append(key, value);
|
|
6928
|
+
}
|
|
6929
|
+
const contentType = upload.fields["Content-Type"] ?? "application/json";
|
|
6930
|
+
form.append("file", new Blob([payload], { type: contentType }));
|
|
6931
|
+
const headers2 = {};
|
|
6932
|
+
for (const [key, value] of Object.entries(upload.headers ?? {})) {
|
|
6933
|
+
if (key.toLowerCase() !== "content-type") {
|
|
6934
|
+
headers2[key] = value;
|
|
6935
|
+
}
|
|
6936
|
+
}
|
|
6937
|
+
const response2 = await fetchFn(upload.signedUrl, {
|
|
6938
|
+
method: "POST",
|
|
6939
|
+
headers: headers2,
|
|
6940
|
+
body: form
|
|
6941
|
+
});
|
|
6942
|
+
if (!response2.ok) {
|
|
6943
|
+
const responseText = await response2.text().catch(() => "");
|
|
6944
|
+
throw new Error(
|
|
6945
|
+
`Failed to upload logs3 overflow payload: ${response2.status} ${responseText}`
|
|
6946
|
+
);
|
|
6947
|
+
}
|
|
6948
|
+
return;
|
|
6949
|
+
}
|
|
6950
|
+
const headers = { ...upload.headers ?? {} };
|
|
6951
|
+
addAzureBlobHeaders(headers, upload.signedUrl);
|
|
6952
|
+
const response = await fetchFn(upload.signedUrl, {
|
|
6953
|
+
method: "PUT",
|
|
6954
|
+
headers,
|
|
6955
|
+
body: payload
|
|
6956
|
+
});
|
|
6957
|
+
if (!response.ok) {
|
|
6958
|
+
const responseText = await response.text().catch(() => "");
|
|
6959
|
+
throw new Error(
|
|
6960
|
+
`Failed to upload logs3 overflow payload: ${response.status} ${responseText}`
|
|
6961
|
+
);
|
|
6962
|
+
}
|
|
6963
|
+
}
|
|
6964
|
+
function stringifyWithOverflowMeta(item) {
|
|
6965
|
+
const str = JSON.stringify(item);
|
|
6966
|
+
const record = item;
|
|
6967
|
+
return {
|
|
6968
|
+
str,
|
|
6969
|
+
overflowMeta: {
|
|
6970
|
+
object_ids: pickLogs3OverflowObjectIds(record),
|
|
6971
|
+
is_delete: record[OBJECT_DELETE_FIELD] === true,
|
|
6972
|
+
input_row: {
|
|
6973
|
+
byte_size: utf8ByteLength(str)
|
|
6974
|
+
}
|
|
6975
|
+
}
|
|
6976
|
+
};
|
|
6977
|
+
}
|
|
6978
|
+
function utf8ByteLength(value) {
|
|
6979
|
+
if (typeof TextEncoder !== "undefined") {
|
|
6980
|
+
return new TextEncoder().encode(value).length;
|
|
6981
|
+
}
|
|
6982
|
+
return value.length;
|
|
6741
6983
|
}
|
|
6742
6984
|
function now() {
|
|
6743
6985
|
return (/* @__PURE__ */ new Date()).getTime();
|
|
@@ -6752,8 +6994,8 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
6752
6994
|
onFlushError;
|
|
6753
6995
|
maskingFunction = null;
|
|
6754
6996
|
syncFlush = false;
|
|
6755
|
-
|
|
6756
|
-
|
|
6997
|
+
maxRequestSizeOverride = null;
|
|
6998
|
+
_maxRequestSizePromise = null;
|
|
6757
6999
|
defaultBatchSize = 100;
|
|
6758
7000
|
numTries = 3;
|
|
6759
7001
|
queueDropExceedingMaxsize = DEFAULT_QUEUE_SIZE;
|
|
@@ -6781,7 +7023,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
6781
7023
|
}
|
|
6782
7024
|
const maxRequestSizeEnv = Number(isomorph_default.getEnv("BRAINTRUST_MAX_REQUEST_SIZE"));
|
|
6783
7025
|
if (!isNaN(maxRequestSizeEnv)) {
|
|
6784
|
-
this.
|
|
7026
|
+
this.maxRequestSizeOverride = maxRequestSizeEnv;
|
|
6785
7027
|
}
|
|
6786
7028
|
const numTriesEnv = Number(isomorph_default.getEnv("BRAINTRUST_NUM_RETRIES"));
|
|
6787
7029
|
if (!isNaN(numTriesEnv)) {
|
|
@@ -6843,6 +7085,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
6843
7085
|
}
|
|
6844
7086
|
}
|
|
6845
7087
|
}
|
|
7088
|
+
getMaxRequestSize() {
|
|
7089
|
+
if (!this._maxRequestSizePromise) {
|
|
7090
|
+
this._maxRequestSizePromise = (async () => {
|
|
7091
|
+
let serverLimit = null;
|
|
7092
|
+
try {
|
|
7093
|
+
const conn = await this.apiConn.get();
|
|
7094
|
+
const versionInfo = await conn.get_json("version");
|
|
7095
|
+
serverLimit = import_v38.z.object({ logs3_payload_max_bytes: import_v38.z.number().nullish() }).parse(versionInfo).logs3_payload_max_bytes ?? null;
|
|
7096
|
+
} catch (e) {
|
|
7097
|
+
console.warn("Failed to fetch version info for payload limit:", e);
|
|
7098
|
+
}
|
|
7099
|
+
const validServerLimit = serverLimit !== null && serverLimit > 0 ? serverLimit : null;
|
|
7100
|
+
const canUseOverflow = validServerLimit !== null;
|
|
7101
|
+
let maxRequestSize = DEFAULT_MAX_REQUEST_SIZE;
|
|
7102
|
+
if (this.maxRequestSizeOverride !== null) {
|
|
7103
|
+
maxRequestSize = validServerLimit !== null ? Math.min(this.maxRequestSizeOverride, validServerLimit) : this.maxRequestSizeOverride;
|
|
7104
|
+
} else if (validServerLimit !== null) {
|
|
7105
|
+
maxRequestSize = validServerLimit;
|
|
7106
|
+
}
|
|
7107
|
+
return { maxRequestSize, canUseOverflow };
|
|
7108
|
+
})();
|
|
7109
|
+
}
|
|
7110
|
+
return this._maxRequestSizePromise;
|
|
7111
|
+
}
|
|
6846
7112
|
async flush() {
|
|
6847
7113
|
if (this.syncFlush) {
|
|
6848
7114
|
this.triggerActiveFlush();
|
|
@@ -6886,33 +7152,33 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
6886
7152
|
if (allItems.length === 0) {
|
|
6887
7153
|
return;
|
|
6888
7154
|
}
|
|
6889
|
-
const
|
|
6890
|
-
(
|
|
7155
|
+
const allItemsWithMeta = allItems.map(
|
|
7156
|
+
(item) => stringifyWithOverflowMeta(item)
|
|
6891
7157
|
);
|
|
6892
|
-
const
|
|
6893
|
-
|
|
7158
|
+
const maxRequestSizeResult = await this.getMaxRequestSize();
|
|
7159
|
+
const batches = batchItems({
|
|
7160
|
+
items: allItemsWithMeta,
|
|
6894
7161
|
batchMaxNumItems: batchSize,
|
|
6895
|
-
batchMaxNumBytes:
|
|
7162
|
+
batchMaxNumBytes: maxRequestSizeResult.maxRequestSize / 2,
|
|
7163
|
+
getByteSize: (item) => item.str.length
|
|
6896
7164
|
});
|
|
6897
|
-
|
|
6898
|
-
|
|
6899
|
-
|
|
6900
|
-
|
|
6901
|
-
|
|
6902
|
-
|
|
6903
|
-
|
|
6904
|
-
|
|
6905
|
-
|
|
6906
|
-
|
|
7165
|
+
const postPromises = batches.map(
|
|
7166
|
+
(batch) => (async () => {
|
|
7167
|
+
try {
|
|
7168
|
+
await this.submitLogsRequest(batch, maxRequestSizeResult);
|
|
7169
|
+
return { type: "success" };
|
|
7170
|
+
} catch (e) {
|
|
7171
|
+
return { type: "error", value: e };
|
|
7172
|
+
}
|
|
7173
|
+
})()
|
|
7174
|
+
);
|
|
7175
|
+
const results = await Promise.all(postPromises);
|
|
7176
|
+
const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
|
|
7177
|
+
if (failingResultErrors.length) {
|
|
7178
|
+
throw new AggregateError(
|
|
7179
|
+
failingResultErrors,
|
|
7180
|
+
`Encountered the following errors while logging:`
|
|
6907
7181
|
);
|
|
6908
|
-
const results = await Promise.all(postPromises);
|
|
6909
|
-
const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
|
|
6910
|
-
if (failingResultErrors.length) {
|
|
6911
|
-
throw new AggregateError(
|
|
6912
|
-
failingResultErrors,
|
|
6913
|
-
`Encountered the following errors while logging:`
|
|
6914
|
-
);
|
|
6915
|
-
}
|
|
6916
7182
|
}
|
|
6917
7183
|
const attachmentErrors = [];
|
|
6918
7184
|
for (const attachment of attachments) {
|
|
@@ -6942,32 +7208,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
6942
7208
|
items.forEach((item) => extractAttachments(item, attachments));
|
|
6943
7209
|
let mergedItems = mergeRowBatch(items);
|
|
6944
7210
|
if (this.maskingFunction) {
|
|
6945
|
-
mergedItems = mergedItems.map(
|
|
6946
|
-
|
|
6947
|
-
|
|
6948
|
-
|
|
6949
|
-
|
|
6950
|
-
|
|
6951
|
-
|
|
6952
|
-
|
|
6953
|
-
|
|
6954
|
-
|
|
6955
|
-
|
|
6956
|
-
|
|
6957
|
-
|
|
6958
|
-
|
|
6959
|
-
maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
|
|
6960
|
-
} else {
|
|
6961
|
-
maskedItem.error = maskedValue.errorMsg;
|
|
6962
|
-
}
|
|
7211
|
+
mergedItems = mergedItems.map((item) => {
|
|
7212
|
+
const maskedItem = { ...item };
|
|
7213
|
+
for (const field of REDACTION_FIELDS) {
|
|
7214
|
+
if (item[field] !== void 0) {
|
|
7215
|
+
const maskedValue = applyMaskingToField(
|
|
7216
|
+
this.maskingFunction,
|
|
7217
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
7218
|
+
item[field],
|
|
7219
|
+
field
|
|
7220
|
+
);
|
|
7221
|
+
if (maskedValue instanceof MaskingError) {
|
|
7222
|
+
delete maskedItem[field];
|
|
7223
|
+
if (maskedItem.error) {
|
|
7224
|
+
maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
|
|
6963
7225
|
} else {
|
|
6964
|
-
maskedItem
|
|
7226
|
+
maskedItem.error = maskedValue.errorMsg;
|
|
6965
7227
|
}
|
|
7228
|
+
} else {
|
|
7229
|
+
maskedItem[field] = maskedValue;
|
|
6966
7230
|
}
|
|
6967
7231
|
}
|
|
6968
|
-
|
|
6969
|
-
|
|
6970
|
-
);
|
|
7232
|
+
}
|
|
7233
|
+
return maskedItem;
|
|
7234
|
+
});
|
|
6971
7235
|
}
|
|
6972
7236
|
return [mergedItems, attachments];
|
|
6973
7237
|
} catch (e) {
|
|
@@ -6994,20 +7258,73 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
6994
7258
|
}
|
|
6995
7259
|
throw new Error("Impossible");
|
|
6996
7260
|
}
|
|
6997
|
-
async
|
|
7261
|
+
async requestLogs3OverflowUpload(conn, args) {
|
|
7262
|
+
let response;
|
|
7263
|
+
try {
|
|
7264
|
+
response = await conn.post_json("logs3/overflow", {
|
|
7265
|
+
content_type: "application/json",
|
|
7266
|
+
size_bytes: args.sizeBytes,
|
|
7267
|
+
rows: args.rows
|
|
7268
|
+
});
|
|
7269
|
+
} catch (error2) {
|
|
7270
|
+
const errorStr = JSON.stringify(error2);
|
|
7271
|
+
throw new Error(
|
|
7272
|
+
`Failed to request logs3 overflow upload URL: ${errorStr}`
|
|
7273
|
+
);
|
|
7274
|
+
}
|
|
7275
|
+
try {
|
|
7276
|
+
return logs3OverflowUploadSchema.parse(response);
|
|
7277
|
+
} catch (error2) {
|
|
7278
|
+
if (error2 instanceof import_v38.ZodError) {
|
|
7279
|
+
const errorStr = JSON.stringify(error2.flatten());
|
|
7280
|
+
throw new Error(`Invalid response from API server: ${errorStr}`);
|
|
7281
|
+
}
|
|
7282
|
+
throw error2;
|
|
7283
|
+
}
|
|
7284
|
+
}
|
|
7285
|
+
async _uploadLogs3OverflowPayload(conn, upload, payload) {
|
|
7286
|
+
await uploadLogs3OverflowPayload(upload, payload, conn.fetch.bind(conn));
|
|
7287
|
+
}
|
|
7288
|
+
async submitLogsRequest(items, {
|
|
7289
|
+
maxRequestSize,
|
|
7290
|
+
canUseOverflow
|
|
7291
|
+
}) {
|
|
6998
7292
|
const conn = await this.apiConn.get();
|
|
6999
7293
|
const dataStr = constructLogs3Data(items);
|
|
7294
|
+
const payloadBytes = utf8ByteLength(dataStr);
|
|
7295
|
+
const useOverflow = canUseOverflow && payloadBytes > maxRequestSize;
|
|
7000
7296
|
if (this.allPublishPayloadsDir) {
|
|
7001
7297
|
await _HTTPBackgroundLogger.writePayloadToDir({
|
|
7002
7298
|
payloadDir: this.allPublishPayloadsDir,
|
|
7003
7299
|
payload: dataStr
|
|
7004
7300
|
});
|
|
7005
7301
|
}
|
|
7302
|
+
let overflowUpload = null;
|
|
7303
|
+
const overflowRows = useOverflow ? items.map((item) => item.overflowMeta) : null;
|
|
7006
7304
|
for (let i = 0; i < this.numTries; i++) {
|
|
7007
7305
|
const startTime = now();
|
|
7008
7306
|
let error2 = void 0;
|
|
7009
7307
|
try {
|
|
7010
|
-
|
|
7308
|
+
if (overflowRows) {
|
|
7309
|
+
if (!overflowUpload) {
|
|
7310
|
+
const currentUpload = await this.requestLogs3OverflowUpload(conn, {
|
|
7311
|
+
rows: overflowRows,
|
|
7312
|
+
sizeBytes: payloadBytes
|
|
7313
|
+
});
|
|
7314
|
+
await this._uploadLogs3OverflowPayload(
|
|
7315
|
+
conn,
|
|
7316
|
+
currentUpload,
|
|
7317
|
+
dataStr
|
|
7318
|
+
);
|
|
7319
|
+
overflowUpload = currentUpload;
|
|
7320
|
+
}
|
|
7321
|
+
await conn.post_json(
|
|
7322
|
+
"logs3",
|
|
7323
|
+
constructLogs3OverflowRequest(overflowUpload.key)
|
|
7324
|
+
);
|
|
7325
|
+
} else {
|
|
7326
|
+
await conn.post_json("logs3", dataStr);
|
|
7327
|
+
}
|
|
7011
7328
|
} catch (e) {
|
|
7012
7329
|
error2 = e;
|
|
7013
7330
|
}
|
|
@@ -7023,7 +7340,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
7023
7340
|
return `${error2}`;
|
|
7024
7341
|
}
|
|
7025
7342
|
})();
|
|
7026
|
-
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${
|
|
7343
|
+
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${payloadBytes}.${retryingText}
|
|
7027
7344
|
Error: ${errorText}`;
|
|
7028
7345
|
if (!isRetrying && this.failedPublishPayloadsDir) {
|
|
7029
7346
|
await _HTTPBackgroundLogger.writePayloadToDir({
|
|
@@ -7077,7 +7394,7 @@ Error: ${errorText}`;
|
|
|
7077
7394
|
try {
|
|
7078
7395
|
const [allItems, allAttachments] = await this.unwrapLazyValues(wrappedItems);
|
|
7079
7396
|
const dataStr = constructLogs3Data(
|
|
7080
|
-
allItems.map((x) =>
|
|
7397
|
+
allItems.map((x) => stringifyWithOverflowMeta(x))
|
|
7081
7398
|
);
|
|
7082
7399
|
const attachmentStr = JSON.stringify(
|
|
7083
7400
|
allAttachments.map((a) => a.debugInfo())
|
|
@@ -9287,22 +9604,71 @@ var Prompt2 = class _Prompt {
|
|
|
9287
9604
|
);
|
|
9288
9605
|
}
|
|
9289
9606
|
};
|
|
9290
|
-
var
|
|
9291
|
-
|
|
9292
|
-
|
|
9293
|
-
var import_chalk = __toESM(require("chalk"));
|
|
9294
|
-
var cliProgress = __toESM(require("cli-progress"));
|
|
9295
|
-
|
|
9296
|
-
// src/reporters/progress.ts
|
|
9297
|
-
var SimpleProgressReporter = class {
|
|
9298
|
-
start(name, _total) {
|
|
9299
|
-
console.log(`Running evaluator ${name}`);
|
|
9607
|
+
var RemoteEvalParameters = class {
|
|
9608
|
+
constructor(metadata) {
|
|
9609
|
+
this.metadata = metadata;
|
|
9300
9610
|
}
|
|
9301
|
-
|
|
9611
|
+
__braintrust_parameters_marker = true;
|
|
9612
|
+
get id() {
|
|
9613
|
+
return this.metadata.id;
|
|
9302
9614
|
}
|
|
9303
|
-
|
|
9615
|
+
get projectId() {
|
|
9616
|
+
return this.metadata.project_id;
|
|
9304
9617
|
}
|
|
9305
|
-
|
|
9618
|
+
get name() {
|
|
9619
|
+
return this.metadata.name;
|
|
9620
|
+
}
|
|
9621
|
+
get slug() {
|
|
9622
|
+
return this.metadata.slug;
|
|
9623
|
+
}
|
|
9624
|
+
get version() {
|
|
9625
|
+
return this.metadata[TRANSACTION_ID_FIELD];
|
|
9626
|
+
}
|
|
9627
|
+
get schema() {
|
|
9628
|
+
return this.metadata.function_data.__schema;
|
|
9629
|
+
}
|
|
9630
|
+
get data() {
|
|
9631
|
+
return this.metadata.function_data.data ?? {};
|
|
9632
|
+
}
|
|
9633
|
+
validate(data) {
|
|
9634
|
+
if (typeof data !== "object" || data === null) {
|
|
9635
|
+
return false;
|
|
9636
|
+
}
|
|
9637
|
+
const schemaProps = this.schema.properties;
|
|
9638
|
+
if (typeof schemaProps !== "object" || schemaProps === null) {
|
|
9639
|
+
return true;
|
|
9640
|
+
}
|
|
9641
|
+
for (const key of Object.keys(schemaProps)) {
|
|
9642
|
+
if (!(key in data)) {
|
|
9643
|
+
const required = Array.isArray(this.schema.required) ? this.schema.required : [];
|
|
9644
|
+
if (required.includes(key)) {
|
|
9645
|
+
return false;
|
|
9646
|
+
}
|
|
9647
|
+
}
|
|
9648
|
+
}
|
|
9649
|
+
return true;
|
|
9650
|
+
}
|
|
9651
|
+
static isParameters(x) {
|
|
9652
|
+
return typeof x === "object" && x !== null && "__braintrust_parameters_marker" in x && // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
9653
|
+
x.__braintrust_parameters_marker === true;
|
|
9654
|
+
}
|
|
9655
|
+
};
|
|
9656
|
+
var TEST_API_KEY = "___TEST_API_KEY__THIS_IS_NOT_REAL___";
|
|
9657
|
+
|
|
9658
|
+
// src/cli/reporters/progress.ts
|
|
9659
|
+
var import_chalk = __toESM(require("chalk"));
|
|
9660
|
+
var cliProgress = __toESM(require("cli-progress"));
|
|
9661
|
+
|
|
9662
|
+
// src/reporters/progress.ts
|
|
9663
|
+
var SimpleProgressReporter = class {
|
|
9664
|
+
start(name, _total) {
|
|
9665
|
+
console.log(`Running evaluator ${name}`);
|
|
9666
|
+
}
|
|
9667
|
+
stop() {
|
|
9668
|
+
}
|
|
9669
|
+
increment(_name) {
|
|
9670
|
+
}
|
|
9671
|
+
setTotal(_name, _total) {
|
|
9306
9672
|
}
|
|
9307
9673
|
};
|
|
9308
9674
|
|
|
@@ -10385,6 +10751,85 @@ function waterfall(tasks, callback) {
|
|
|
10385
10751
|
}
|
|
10386
10752
|
var waterfall$1 = awaitify(waterfall);
|
|
10387
10753
|
|
|
10754
|
+
// src/functions/invoke.ts
|
|
10755
|
+
async function invoke(args) {
|
|
10756
|
+
const {
|
|
10757
|
+
orgName,
|
|
10758
|
+
apiKey,
|
|
10759
|
+
appUrl,
|
|
10760
|
+
forceLogin,
|
|
10761
|
+
fetch: fetch2,
|
|
10762
|
+
input,
|
|
10763
|
+
messages,
|
|
10764
|
+
parent: parentArg,
|
|
10765
|
+
metadata,
|
|
10766
|
+
tags,
|
|
10767
|
+
state: stateArg,
|
|
10768
|
+
stream,
|
|
10769
|
+
mode,
|
|
10770
|
+
schema,
|
|
10771
|
+
strict,
|
|
10772
|
+
projectId,
|
|
10773
|
+
...functionIdArgs
|
|
10774
|
+
} = args;
|
|
10775
|
+
const state = stateArg ?? _internalGetGlobalState();
|
|
10776
|
+
await state.login({
|
|
10777
|
+
orgName,
|
|
10778
|
+
apiKey,
|
|
10779
|
+
appUrl,
|
|
10780
|
+
forceLogin,
|
|
10781
|
+
fetch: fetch2
|
|
10782
|
+
});
|
|
10783
|
+
const parent = parentArg ? typeof parentArg === "string" ? parentArg : await parentArg.export() : await getSpanParentObject().export();
|
|
10784
|
+
const functionId = FunctionId.safeParse({
|
|
10785
|
+
function_id: functionIdArgs.function_id,
|
|
10786
|
+
project_name: functionIdArgs.projectName,
|
|
10787
|
+
slug: functionIdArgs.slug,
|
|
10788
|
+
global_function: functionIdArgs.globalFunction,
|
|
10789
|
+
function_type: functionIdArgs.functionType,
|
|
10790
|
+
prompt_session_id: functionIdArgs.promptSessionId,
|
|
10791
|
+
prompt_session_function_id: functionIdArgs.promptSessionFunctionId,
|
|
10792
|
+
version: functionIdArgs.version
|
|
10793
|
+
});
|
|
10794
|
+
if (!functionId.success) {
|
|
10795
|
+
throw new Error(
|
|
10796
|
+
`Invalid function ID arguments: ${functionId.error.message}`
|
|
10797
|
+
);
|
|
10798
|
+
}
|
|
10799
|
+
const request = {
|
|
10800
|
+
...functionId.data,
|
|
10801
|
+
input,
|
|
10802
|
+
messages,
|
|
10803
|
+
parent,
|
|
10804
|
+
metadata,
|
|
10805
|
+
tags,
|
|
10806
|
+
stream,
|
|
10807
|
+
mode,
|
|
10808
|
+
strict
|
|
10809
|
+
};
|
|
10810
|
+
const headers = {
|
|
10811
|
+
Accept: stream ? "text/event-stream" : "application/json"
|
|
10812
|
+
};
|
|
10813
|
+
if (projectId) {
|
|
10814
|
+
headers["x-bt-project-id"] = projectId;
|
|
10815
|
+
}
|
|
10816
|
+
if (orgName) {
|
|
10817
|
+
headers["x-bt-org-name"] = orgName;
|
|
10818
|
+
}
|
|
10819
|
+
const resp = await state.proxyConn().post(`function/invoke`, request, {
|
|
10820
|
+
headers
|
|
10821
|
+
});
|
|
10822
|
+
if (stream) {
|
|
10823
|
+
if (!resp.body) {
|
|
10824
|
+
throw new Error("Received empty stream body");
|
|
10825
|
+
}
|
|
10826
|
+
return new BraintrustStream(resp.body);
|
|
10827
|
+
} else {
|
|
10828
|
+
const data = await resp.json();
|
|
10829
|
+
return schema ? schema.parse(data) : data;
|
|
10830
|
+
}
|
|
10831
|
+
}
|
|
10832
|
+
|
|
10388
10833
|
// src/trace.ts
|
|
10389
10834
|
var SpanFetcher = class _SpanFetcher extends ObjectFetcher {
|
|
10390
10835
|
constructor(objectType, _objectId, rootSpanId, _state, spanTypeFilter) {
|
|
@@ -10520,6 +10965,7 @@ var LocalTrace = class {
|
|
|
10520
10965
|
spansFlushed = false;
|
|
10521
10966
|
spansFlushPromise = null;
|
|
10522
10967
|
cachedFetcher;
|
|
10968
|
+
threadCache = /* @__PURE__ */ new Map();
|
|
10523
10969
|
constructor({
|
|
10524
10970
|
objectType,
|
|
10525
10971
|
objectId,
|
|
@@ -10590,6 +11036,36 @@ var LocalTrace = class {
|
|
|
10590
11036
|
}
|
|
10591
11037
|
return this.cachedFetcher.getSpans({ spanType });
|
|
10592
11038
|
}
|
|
11039
|
+
/**
|
|
11040
|
+
* Get the thread (preprocessed messages) for this trace.
|
|
11041
|
+
* Calls the API with the project_default preprocessor (which falls back to "thread").
|
|
11042
|
+
*/
|
|
11043
|
+
async getThread(options) {
|
|
11044
|
+
const cacheKey = options?.preprocessor ?? "project_default";
|
|
11045
|
+
if (!this.threadCache.has(cacheKey)) {
|
|
11046
|
+
const promise = this.fetchThread(options);
|
|
11047
|
+
this.threadCache.set(cacheKey, promise);
|
|
11048
|
+
}
|
|
11049
|
+
return this.threadCache.get(cacheKey);
|
|
11050
|
+
}
|
|
11051
|
+
async fetchThread(options) {
|
|
11052
|
+
await this.ensureSpansReady();
|
|
11053
|
+
await this.state.login({});
|
|
11054
|
+
const result = await invoke({
|
|
11055
|
+
globalFunction: options?.preprocessor ?? "project_default",
|
|
11056
|
+
functionType: "preprocessor",
|
|
11057
|
+
input: {
|
|
11058
|
+
trace_ref: {
|
|
11059
|
+
object_type: this.objectType,
|
|
11060
|
+
object_id: this.objectId,
|
|
11061
|
+
root_span_id: this.rootSpanId
|
|
11062
|
+
}
|
|
11063
|
+
},
|
|
11064
|
+
mode: "json",
|
|
11065
|
+
state: this.state
|
|
11066
|
+
});
|
|
11067
|
+
return Array.isArray(result) ? result : [];
|
|
11068
|
+
}
|
|
10593
11069
|
async ensureSpansReady() {
|
|
10594
11070
|
if (this.spansFlushed || !this.ensureSpansFlushed) {
|
|
10595
11071
|
return;
|
|
@@ -10611,660 +11087,383 @@ var LocalTrace = class {
|
|
|
10611
11087
|
|
|
10612
11088
|
// src/eval-parameters.ts
|
|
10613
11089
|
var import_v310 = require("zod/v3");
|
|
11090
|
+
var import_ajv = __toESM(require("ajv"));
|
|
10614
11091
|
|
|
10615
|
-
// src/
|
|
11092
|
+
// src/prompt-schemas.ts
|
|
10616
11093
|
var import_v39 = require("zod/v3");
|
|
10617
|
-
var
|
|
10618
|
-
|
|
10619
|
-
|
|
10620
|
-
|
|
11094
|
+
var promptContentsSchema = import_v39.z.union([
|
|
11095
|
+
import_v39.z.object({
|
|
11096
|
+
prompt: import_v39.z.string()
|
|
11097
|
+
}),
|
|
11098
|
+
import_v39.z.object({
|
|
11099
|
+
messages: import_v39.z.array(ChatCompletionMessageParam)
|
|
11100
|
+
})
|
|
11101
|
+
]);
|
|
11102
|
+
var promptDefinitionSchema = promptContentsSchema.and(
|
|
11103
|
+
import_v39.z.object({
|
|
11104
|
+
model: import_v39.z.string(),
|
|
11105
|
+
params: ModelParams.optional(),
|
|
11106
|
+
templateFormat: import_v39.z.enum(["mustache", "nunjucks", "none"]).optional()
|
|
11107
|
+
})
|
|
11108
|
+
);
|
|
11109
|
+
var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
|
|
11110
|
+
import_v39.z.object({
|
|
11111
|
+
tools: import_v39.z.array(ToolFunctionDefinition).optional()
|
|
11112
|
+
})
|
|
11113
|
+
);
|
|
11114
|
+
function promptDefinitionToPromptData(promptDefinition, rawTools) {
|
|
11115
|
+
const promptBlock = "messages" in promptDefinition ? {
|
|
11116
|
+
type: "chat",
|
|
11117
|
+
messages: promptDefinition.messages,
|
|
11118
|
+
tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
|
|
11119
|
+
} : {
|
|
11120
|
+
type: "completion",
|
|
11121
|
+
content: promptDefinition.prompt
|
|
11122
|
+
};
|
|
11123
|
+
return {
|
|
11124
|
+
prompt: promptBlock,
|
|
11125
|
+
options: {
|
|
11126
|
+
model: promptDefinition.model,
|
|
11127
|
+
params: promptDefinition.params
|
|
11128
|
+
},
|
|
11129
|
+
...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
|
|
11130
|
+
};
|
|
11131
|
+
}
|
|
11132
|
+
|
|
11133
|
+
// src/eval-parameters.ts
|
|
11134
|
+
var evalParametersSchema = import_v310.z.record(
|
|
11135
|
+
import_v310.z.string(),
|
|
11136
|
+
import_v310.z.union([
|
|
11137
|
+
import_v310.z.object({
|
|
11138
|
+
type: import_v310.z.literal("prompt"),
|
|
11139
|
+
default: promptDefinitionWithToolsSchema.optional(),
|
|
11140
|
+
description: import_v310.z.string().optional()
|
|
11141
|
+
}),
|
|
11142
|
+
import_v310.z.instanceof(import_v310.z.ZodType)
|
|
11143
|
+
// For Zod schemas
|
|
11144
|
+
])
|
|
11145
|
+
);
|
|
11146
|
+
async function validateParameters(parameters, parameterSchema) {
|
|
11147
|
+
let resolvedSchema = parameterSchema;
|
|
11148
|
+
if (resolvedSchema instanceof Promise) {
|
|
11149
|
+
resolvedSchema = await resolvedSchema;
|
|
11150
|
+
}
|
|
11151
|
+
if (resolvedSchema === void 0 || resolvedSchema === null) {
|
|
11152
|
+
return parameters;
|
|
11153
|
+
}
|
|
11154
|
+
if (RemoteEvalParameters.isParameters(resolvedSchema)) {
|
|
11155
|
+
const mergedParameters = parameters && Object.keys(parameters).length > 0 ? {
|
|
11156
|
+
...resolvedSchema.data,
|
|
11157
|
+
...parameters
|
|
11158
|
+
} : resolvedSchema.data;
|
|
11159
|
+
return validateParametersWithJsonSchema(
|
|
11160
|
+
mergedParameters,
|
|
11161
|
+
resolvedSchema.schema
|
|
11162
|
+
);
|
|
10621
11163
|
}
|
|
10622
|
-
|
|
10623
|
-
|
|
10624
|
-
|
|
10625
|
-
|
|
10626
|
-
|
|
10627
|
-
|
|
10628
|
-
|
|
10629
|
-
|
|
10630
|
-
|
|
10631
|
-
|
|
10632
|
-
|
|
10633
|
-
|
|
10634
|
-
|
|
10635
|
-
|
|
10636
|
-
|
|
10637
|
-
|
|
10638
|
-
|
|
11164
|
+
return validateParametersWithZod(
|
|
11165
|
+
parameters,
|
|
11166
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
11167
|
+
resolvedSchema
|
|
11168
|
+
);
|
|
11169
|
+
}
|
|
11170
|
+
function validateParametersWithZod(parameters, parameterSchema) {
|
|
11171
|
+
return Object.fromEntries(
|
|
11172
|
+
Object.entries(parameterSchema).map(([name, schema]) => {
|
|
11173
|
+
const value = parameters[name];
|
|
11174
|
+
try {
|
|
11175
|
+
if ("type" in schema && schema.type === "prompt") {
|
|
11176
|
+
const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
|
|
11177
|
+
schema.default,
|
|
11178
|
+
schema.default.tools
|
|
11179
|
+
) : void 0;
|
|
11180
|
+
if (!promptData) {
|
|
11181
|
+
throw new Error(`Parameter '${name}' is required`);
|
|
11182
|
+
}
|
|
11183
|
+
return [name, Prompt2.fromPromptData(name, promptData)];
|
|
11184
|
+
} else {
|
|
11185
|
+
const schemaCasted = schema;
|
|
11186
|
+
return [name, schemaCasted.parse(value)];
|
|
11187
|
+
}
|
|
11188
|
+
} catch (e) {
|
|
11189
|
+
console.error("Error validating parameter", name, e);
|
|
11190
|
+
throw Error(
|
|
11191
|
+
`Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
|
|
11192
|
+
);
|
|
11193
|
+
}
|
|
11194
|
+
})
|
|
11195
|
+
);
|
|
11196
|
+
}
|
|
11197
|
+
function validateParametersWithJsonSchema(parameters, schema) {
|
|
11198
|
+
const ajv = new import_ajv.default({ coerceTypes: true, useDefaults: true, strict: false });
|
|
11199
|
+
const validate = ajv.compile(schema);
|
|
11200
|
+
if (!validate(parameters)) {
|
|
11201
|
+
const errorMessages = validate.errors?.map((err) => {
|
|
11202
|
+
const path8 = err.instancePath || "root";
|
|
11203
|
+
return `${path8}: ${err.message}`;
|
|
11204
|
+
}).join(", ");
|
|
11205
|
+
throw Error(`Invalid parameters: ${errorMessages}`);
|
|
10639
11206
|
}
|
|
10640
|
-
|
|
10641
|
-
|
|
10642
|
-
|
|
10643
|
-
|
|
10644
|
-
|
|
11207
|
+
return parameters;
|
|
11208
|
+
}
|
|
11209
|
+
|
|
11210
|
+
// src/framework.ts
|
|
11211
|
+
var EvalResultWithSummary = class {
|
|
11212
|
+
constructor(summary, results) {
|
|
11213
|
+
this.summary = summary;
|
|
11214
|
+
this.results = results;
|
|
10645
11215
|
}
|
|
10646
|
-
|
|
10647
|
-
|
|
10648
|
-
|
|
10649
|
-
|
|
10650
|
-
|
|
11216
|
+
/**
|
|
11217
|
+
* @deprecated Use `summary` instead.
|
|
11218
|
+
*/
|
|
11219
|
+
toString() {
|
|
11220
|
+
return JSON.stringify(this.summary);
|
|
10651
11221
|
}
|
|
10652
|
-
|
|
10653
|
-
|
|
10654
|
-
|
|
10655
|
-
|
|
10656
|
-
|
|
10657
|
-
|
|
10658
|
-
|
|
10659
|
-
|
|
10660
|
-
if (this._publishableCodeFunctions.length > 0) {
|
|
10661
|
-
console.warn(
|
|
10662
|
-
"Code functions cannot be published directly. Use `braintrust push` instead."
|
|
10663
|
-
);
|
|
10664
|
-
}
|
|
10665
|
-
if (this._publishablePrompts.length > 0) {
|
|
10666
|
-
for (const prompt of this._publishablePrompts) {
|
|
10667
|
-
const functionDefinition = await prompt.toFunctionDefinition(projectMap);
|
|
10668
|
-
functionDefinitions.push(functionDefinition);
|
|
10669
|
-
}
|
|
10670
|
-
}
|
|
10671
|
-
await _internalGetGlobalState().apiConn().post_json("insert-functions", {
|
|
10672
|
-
functions: functionDefinitions
|
|
10673
|
-
});
|
|
11222
|
+
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
11223
|
+
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
11224
|
+
}
|
|
11225
|
+
toJSON() {
|
|
11226
|
+
return {
|
|
11227
|
+
summary: this.summary,
|
|
11228
|
+
results: this.results
|
|
11229
|
+
};
|
|
10674
11230
|
}
|
|
10675
11231
|
};
|
|
10676
|
-
|
|
10677
|
-
|
|
10678
|
-
|
|
11232
|
+
function makeEvalName(projectName, experimentName) {
|
|
11233
|
+
let out = projectName;
|
|
11234
|
+
if (experimentName) {
|
|
11235
|
+
out += ` [experimentName=${experimentName}]`;
|
|
10679
11236
|
}
|
|
10680
|
-
|
|
10681
|
-
|
|
10682
|
-
|
|
10683
|
-
|
|
10684
|
-
|
|
10685
|
-
|
|
10686
|
-
|
|
10687
|
-
|
|
10688
|
-
|
|
10689
|
-
|
|
10690
|
-
|
|
10691
|
-
|
|
10692
|
-
|
|
10693
|
-
|
|
10694
|
-
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
10695
|
-
type: "tool",
|
|
10696
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
10697
|
-
parameters,
|
|
10698
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
10699
|
-
returns,
|
|
10700
|
-
...rest
|
|
10701
|
-
});
|
|
10702
|
-
this.project.addCodeFunction(tool);
|
|
10703
|
-
return tool;
|
|
11237
|
+
return out;
|
|
11238
|
+
}
|
|
11239
|
+
function initExperiment(state, options = {}) {
|
|
11240
|
+
return init({
|
|
11241
|
+
state,
|
|
11242
|
+
...options,
|
|
11243
|
+
setCurrent: false
|
|
11244
|
+
});
|
|
11245
|
+
}
|
|
11246
|
+
function callEvaluatorData(data) {
|
|
11247
|
+
const dataResult = typeof data === "function" ? data() : data;
|
|
11248
|
+
let baseExperiment = void 0;
|
|
11249
|
+
if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
|
|
11250
|
+
baseExperiment = dataResult.name;
|
|
10704
11251
|
}
|
|
11252
|
+
return {
|
|
11253
|
+
data: dataResult,
|
|
11254
|
+
baseExperiment
|
|
11255
|
+
};
|
|
11256
|
+
}
|
|
11257
|
+
function isAsyncIterable2(value) {
|
|
11258
|
+
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
11259
|
+
}
|
|
11260
|
+
function isIterable(value) {
|
|
11261
|
+
return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
|
|
11262
|
+
}
|
|
11263
|
+
globalThis._evals = {
|
|
11264
|
+
functions: [],
|
|
11265
|
+
prompts: [],
|
|
11266
|
+
parameters: [],
|
|
11267
|
+
evaluators: {},
|
|
11268
|
+
reporters: {}
|
|
10705
11269
|
};
|
|
10706
|
-
|
|
10707
|
-
|
|
10708
|
-
|
|
11270
|
+
function _initializeSpanContext() {
|
|
11271
|
+
globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
|
|
11272
|
+
}
|
|
11273
|
+
async function Eval(name, evaluator, reporterOrOpts) {
|
|
11274
|
+
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
11275
|
+
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
11276
|
+
if (globalThis._evals.evaluators[evalName]) {
|
|
11277
|
+
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
10709
11278
|
}
|
|
10710
|
-
|
|
10711
|
-
|
|
10712
|
-
|
|
10713
|
-
|
|
10714
|
-
|
|
10715
|
-
|
|
11279
|
+
if (globalThis._lazy_load) {
|
|
11280
|
+
globalThis._evals.evaluators[evalName] = {
|
|
11281
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
11282
|
+
evaluator: {
|
|
11283
|
+
evalName,
|
|
11284
|
+
projectName: name,
|
|
11285
|
+
...evaluator
|
|
11286
|
+
},
|
|
11287
|
+
reporter: options.reporter
|
|
11288
|
+
};
|
|
11289
|
+
_initializeSpanContext();
|
|
11290
|
+
return new EvalResultWithSummary(
|
|
11291
|
+
{
|
|
11292
|
+
scores: {},
|
|
11293
|
+
metrics: {},
|
|
11294
|
+
projectName: "",
|
|
11295
|
+
experimentName: ""
|
|
11296
|
+
},
|
|
11297
|
+
[]
|
|
11298
|
+
);
|
|
11299
|
+
}
|
|
11300
|
+
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
11301
|
+
const shouldCollectResults = options.returnResults ?? true;
|
|
11302
|
+
if (typeof options.reporter === "string") {
|
|
11303
|
+
throw new Error(
|
|
11304
|
+
"Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
|
|
11305
|
+
);
|
|
11306
|
+
}
|
|
11307
|
+
const resolvedReporter = options.reporter || defaultReporter;
|
|
11308
|
+
try {
|
|
11309
|
+
const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
|
|
11310
|
+
evaluator.data
|
|
11311
|
+
);
|
|
11312
|
+
const experiment = options.parent || options.noSendLogs ? null : initExperiment(evaluator.state, {
|
|
11313
|
+
...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
|
|
11314
|
+
experiment: evaluator.experimentName,
|
|
11315
|
+
description: evaluator.description,
|
|
11316
|
+
metadata: evaluator.metadata,
|
|
11317
|
+
isPublic: evaluator.isPublic,
|
|
11318
|
+
update: evaluator.update,
|
|
11319
|
+
baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
|
|
11320
|
+
baseExperimentId: evaluator.baseExperimentId,
|
|
11321
|
+
gitMetadataSettings: evaluator.gitMetadataSettings,
|
|
11322
|
+
repoInfo: evaluator.repoInfo,
|
|
11323
|
+
dataset: Dataset2.isDataset(data) ? data : void 0
|
|
11324
|
+
});
|
|
11325
|
+
if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
|
|
11326
|
+
await experiment._waitForId();
|
|
10716
11327
|
}
|
|
10717
|
-
if (
|
|
10718
|
-
|
|
11328
|
+
if (experiment && options.onStart) {
|
|
11329
|
+
const summary = await experiment.summarize({ summarizeScores: false });
|
|
11330
|
+
options.onStart(summary);
|
|
10719
11331
|
}
|
|
10720
|
-
|
|
10721
|
-
|
|
10722
|
-
|
|
10723
|
-
|
|
10724
|
-
|
|
10725
|
-
|
|
10726
|
-
type: "scorer"
|
|
10727
|
-
});
|
|
10728
|
-
this.project.addCodeFunction(scorer);
|
|
10729
|
-
} else {
|
|
10730
|
-
const promptBlock = "messages" in opts ? {
|
|
10731
|
-
type: "chat",
|
|
10732
|
-
messages: opts.messages
|
|
10733
|
-
} : {
|
|
10734
|
-
type: "completion",
|
|
10735
|
-
content: opts.prompt
|
|
10736
|
-
};
|
|
10737
|
-
const promptData = {
|
|
10738
|
-
prompt: promptBlock,
|
|
10739
|
-
options: {
|
|
10740
|
-
model: opts.model,
|
|
10741
|
-
params: opts.params
|
|
10742
|
-
},
|
|
10743
|
-
parser: {
|
|
10744
|
-
type: "llm_classifier",
|
|
10745
|
-
use_cot: opts.useCot,
|
|
10746
|
-
choice_scores: opts.choiceScores
|
|
10747
|
-
}
|
|
11332
|
+
try {
|
|
11333
|
+
const evalDef = {
|
|
11334
|
+
evalName,
|
|
11335
|
+
projectName: name,
|
|
11336
|
+
...evaluator,
|
|
11337
|
+
data
|
|
10748
11338
|
};
|
|
10749
|
-
const
|
|
10750
|
-
|
|
10751
|
-
|
|
10752
|
-
|
|
10753
|
-
|
|
10754
|
-
|
|
10755
|
-
|
|
10756
|
-
|
|
10757
|
-
|
|
10758
|
-
|
|
10759
|
-
|
|
10760
|
-
|
|
11339
|
+
const enableCache = options.enableCache ?? true;
|
|
11340
|
+
let ret;
|
|
11341
|
+
if (options.parent) {
|
|
11342
|
+
ret = await withParent(
|
|
11343
|
+
options.parent,
|
|
11344
|
+
() => runEvaluator(
|
|
11345
|
+
null,
|
|
11346
|
+
evalDef,
|
|
11347
|
+
progressReporter,
|
|
11348
|
+
[],
|
|
11349
|
+
options.stream,
|
|
11350
|
+
options.parameters,
|
|
11351
|
+
shouldCollectResults,
|
|
11352
|
+
enableCache
|
|
11353
|
+
),
|
|
11354
|
+
evaluator.state
|
|
11355
|
+
);
|
|
11356
|
+
} else {
|
|
11357
|
+
ret = await runEvaluator(
|
|
11358
|
+
experiment,
|
|
11359
|
+
evalDef,
|
|
11360
|
+
progressReporter,
|
|
11361
|
+
[],
|
|
11362
|
+
options.stream,
|
|
11363
|
+
options.parameters,
|
|
11364
|
+
shouldCollectResults,
|
|
11365
|
+
enableCache
|
|
11366
|
+
);
|
|
11367
|
+
}
|
|
11368
|
+
progressReporter.stop();
|
|
11369
|
+
resolvedReporter.reportEval(evalDef, ret, {
|
|
11370
|
+
verbose: true,
|
|
11371
|
+
jsonl: false
|
|
11372
|
+
});
|
|
11373
|
+
return ret;
|
|
11374
|
+
} finally {
|
|
11375
|
+
if (experiment) {
|
|
11376
|
+
await experiment.flush().catch(console.error);
|
|
11377
|
+
} else if (options.parent) {
|
|
11378
|
+
await flush().catch(console.error);
|
|
11379
|
+
}
|
|
10761
11380
|
}
|
|
11381
|
+
} finally {
|
|
11382
|
+
progressReporter.stop();
|
|
10762
11383
|
}
|
|
10763
|
-
}
|
|
10764
|
-
|
|
10765
|
-
|
|
10766
|
-
|
|
10767
|
-
|
|
10768
|
-
|
|
10769
|
-
this.slug = opts.slug;
|
|
10770
|
-
this.description = opts.description;
|
|
10771
|
-
this.type = opts.type;
|
|
10772
|
-
this.ifExists = opts.ifExists;
|
|
10773
|
-
this.metadata = opts.metadata;
|
|
10774
|
-
this.parameters = opts.parameters;
|
|
10775
|
-
this.returns = opts.returns;
|
|
10776
|
-
if (this.returns && !this.parameters) {
|
|
10777
|
-
throw new Error("parameters are required if return type is defined");
|
|
10778
|
-
}
|
|
11384
|
+
}
|
|
11385
|
+
function serializeJSONWithPlainString(v) {
|
|
11386
|
+
if (typeof v === "string") {
|
|
11387
|
+
return v;
|
|
11388
|
+
} else {
|
|
11389
|
+
return JSON.stringify(v);
|
|
10779
11390
|
}
|
|
10780
|
-
|
|
10781
|
-
|
|
10782
|
-
|
|
10783
|
-
|
|
10784
|
-
|
|
10785
|
-
|
|
10786
|
-
returns;
|
|
10787
|
-
ifExists;
|
|
10788
|
-
metadata;
|
|
10789
|
-
key() {
|
|
10790
|
-
return JSON.stringify([
|
|
10791
|
-
this.project.id ?? "",
|
|
10792
|
-
this.project.name ?? "",
|
|
10793
|
-
this.slug
|
|
10794
|
-
]);
|
|
11391
|
+
}
|
|
11392
|
+
function deserializePlainStringAsJSON2(s) {
|
|
11393
|
+
try {
|
|
11394
|
+
return { value: JSON.parse(s), error: void 0 };
|
|
11395
|
+
} catch (e) {
|
|
11396
|
+
return { value: s, error: e };
|
|
10795
11397
|
}
|
|
10796
|
-
}
|
|
10797
|
-
|
|
10798
|
-
|
|
10799
|
-
|
|
10800
|
-
|
|
10801
|
-
|
|
10802
|
-
|
|
10803
|
-
description;
|
|
10804
|
-
id;
|
|
10805
|
-
functionType;
|
|
10806
|
-
toolFunctions;
|
|
10807
|
-
metadata;
|
|
10808
|
-
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
10809
|
-
this.project = project;
|
|
10810
|
-
this.name = opts.name;
|
|
10811
|
-
this.slug = opts.slug;
|
|
10812
|
-
this.prompt = prompt;
|
|
10813
|
-
this.toolFunctions = toolFunctions;
|
|
10814
|
-
this.ifExists = opts.ifExists;
|
|
10815
|
-
this.description = opts.description;
|
|
10816
|
-
this.id = opts.id;
|
|
10817
|
-
this.functionType = functionType;
|
|
10818
|
-
this.metadata = opts.metadata;
|
|
10819
|
-
}
|
|
10820
|
-
async toFunctionDefinition(projectNameToId) {
|
|
10821
|
-
const prompt_data = {
|
|
10822
|
-
...this.prompt
|
|
10823
|
-
};
|
|
10824
|
-
if (this.toolFunctions.length > 0) {
|
|
10825
|
-
const resolvableToolFunctions = await Promise.all(
|
|
10826
|
-
this.toolFunctions.map(async (fn) => {
|
|
10827
|
-
if ("slug" in fn) {
|
|
10828
|
-
return {
|
|
10829
|
-
type: "slug",
|
|
10830
|
-
project_id: await projectNameToId.resolve(fn.project),
|
|
10831
|
-
slug: fn.slug
|
|
10832
|
-
};
|
|
10833
|
-
} else {
|
|
10834
|
-
return fn;
|
|
10835
|
-
}
|
|
10836
|
-
})
|
|
10837
|
-
);
|
|
10838
|
-
prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
10839
|
-
resolvableToolFunctions;
|
|
11398
|
+
}
|
|
11399
|
+
function parseFilters(filters) {
|
|
11400
|
+
const result = [];
|
|
11401
|
+
for (const f of filters) {
|
|
11402
|
+
const equalsIdx = f.indexOf("=");
|
|
11403
|
+
if (equalsIdx === -1) {
|
|
11404
|
+
throw new Error(`Invalid filter ${f}`);
|
|
10840
11405
|
}
|
|
10841
|
-
|
|
10842
|
-
|
|
10843
|
-
|
|
10844
|
-
|
|
10845
|
-
description: this.description ?? "",
|
|
10846
|
-
function_data: {
|
|
10847
|
-
type: "prompt"
|
|
10848
|
-
},
|
|
10849
|
-
function_type: this.functionType,
|
|
10850
|
-
prompt_data,
|
|
10851
|
-
if_exists: this.ifExists,
|
|
10852
|
-
metadata: this.metadata
|
|
10853
|
-
};
|
|
10854
|
-
}
|
|
10855
|
-
};
|
|
10856
|
-
var promptContentsSchema = import_v39.z.union([
|
|
10857
|
-
import_v39.z.object({
|
|
10858
|
-
prompt: import_v39.z.string()
|
|
10859
|
-
}),
|
|
10860
|
-
import_v39.z.object({
|
|
10861
|
-
messages: import_v39.z.array(ChatCompletionMessageParam)
|
|
10862
|
-
})
|
|
10863
|
-
]);
|
|
10864
|
-
var promptDefinitionSchema = promptContentsSchema.and(
|
|
10865
|
-
import_v39.z.object({
|
|
10866
|
-
model: import_v39.z.string(),
|
|
10867
|
-
params: ModelParams.optional(),
|
|
10868
|
-
templateFormat: import_v39.z.enum(["mustache", "nunjucks", "none"]).optional()
|
|
10869
|
-
})
|
|
10870
|
-
);
|
|
10871
|
-
var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
|
|
10872
|
-
import_v39.z.object({
|
|
10873
|
-
tools: import_v39.z.array(ToolFunctionDefinition).optional()
|
|
10874
|
-
})
|
|
10875
|
-
);
|
|
10876
|
-
var PromptBuilder = class {
|
|
10877
|
-
constructor(project) {
|
|
10878
|
-
this.project = project;
|
|
10879
|
-
}
|
|
10880
|
-
create(opts) {
|
|
10881
|
-
const toolFunctions = [];
|
|
10882
|
-
const rawTools = [];
|
|
10883
|
-
for (const tool of opts.tools ?? []) {
|
|
10884
|
-
if (tool instanceof CodeFunction) {
|
|
10885
|
-
toolFunctions.push(tool);
|
|
10886
|
-
} else if ("type" in tool && !("function" in tool)) {
|
|
10887
|
-
toolFunctions.push(tool);
|
|
10888
|
-
} else {
|
|
10889
|
-
rawTools.push(tool);
|
|
10890
|
-
}
|
|
11406
|
+
const [path8, value] = [f.slice(0, equalsIdx), f.slice(equalsIdx + 1)];
|
|
11407
|
+
let deserializedValue = deserializePlainStringAsJSON2(value).value;
|
|
11408
|
+
if (typeof deserializedValue !== "string") {
|
|
11409
|
+
deserializedValue = value;
|
|
10891
11410
|
}
|
|
10892
|
-
|
|
10893
|
-
|
|
10894
|
-
|
|
10895
|
-
id: opts.id,
|
|
10896
|
-
_xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
|
|
10897
|
-
name: opts.name,
|
|
10898
|
-
slug,
|
|
10899
|
-
prompt_data: promptData,
|
|
10900
|
-
...this.project.id !== void 0 ? { project_id: this.project.id } : {}
|
|
10901
|
-
};
|
|
10902
|
-
const prompt = new Prompt2(
|
|
10903
|
-
promptRow,
|
|
10904
|
-
{},
|
|
10905
|
-
// It doesn't make sense to specify defaults here.
|
|
10906
|
-
opts.noTrace ?? false
|
|
10907
|
-
);
|
|
10908
|
-
const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
|
|
10909
|
-
...opts,
|
|
10910
|
-
slug
|
|
11411
|
+
result.push({
|
|
11412
|
+
path: path8.split("."),
|
|
11413
|
+
pattern: new RegExp(deserializedValue)
|
|
10911
11414
|
});
|
|
10912
|
-
this.project.addPrompt(codePrompt);
|
|
10913
|
-
return prompt;
|
|
10914
11415
|
}
|
|
10915
|
-
|
|
10916
|
-
function promptDefinitionToPromptData(promptDefinition, rawTools) {
|
|
10917
|
-
const promptBlock = "messages" in promptDefinition ? {
|
|
10918
|
-
type: "chat",
|
|
10919
|
-
messages: promptDefinition.messages,
|
|
10920
|
-
tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
|
|
10921
|
-
} : {
|
|
10922
|
-
type: "completion",
|
|
10923
|
-
content: promptDefinition.prompt
|
|
10924
|
-
};
|
|
10925
|
-
return {
|
|
10926
|
-
prompt: promptBlock,
|
|
10927
|
-
options: {
|
|
10928
|
-
model: promptDefinition.model,
|
|
10929
|
-
params: promptDefinition.params
|
|
10930
|
-
},
|
|
10931
|
-
...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
|
|
10932
|
-
};
|
|
11416
|
+
return result;
|
|
10933
11417
|
}
|
|
10934
|
-
|
|
10935
|
-
|
|
10936
|
-
|
|
10937
|
-
|
|
10938
|
-
|
|
10939
|
-
|
|
10940
|
-
|
|
10941
|
-
|
|
10942
|
-
|
|
10943
|
-
|
|
10944
|
-
|
|
10945
|
-
const projectId = result.project.id;
|
|
10946
|
-
this.nameToId[projectName] = projectId;
|
|
10947
|
-
this.idToName[projectId] = projectName;
|
|
10948
|
-
}
|
|
10949
|
-
return this.nameToId[projectName];
|
|
11418
|
+
function evaluateFilter(object, filter2) {
|
|
11419
|
+
const { path: path8, pattern } = filter2;
|
|
11420
|
+
const key = path8.reduce(
|
|
11421
|
+
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
11422
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
11423
|
+
acc[p]
|
|
11424
|
+
) : void 0,
|
|
11425
|
+
object
|
|
11426
|
+
);
|
|
11427
|
+
if (key === void 0) {
|
|
11428
|
+
return false;
|
|
10950
11429
|
}
|
|
10951
|
-
|
|
10952
|
-
|
|
10953
|
-
|
|
10954
|
-
|
|
10955
|
-
|
|
10956
|
-
|
|
10957
|
-
|
|
10958
|
-
|
|
10959
|
-
|
|
10960
|
-
|
|
10961
|
-
|
|
11430
|
+
return pattern.test(serializeJSONWithPlainString(key));
|
|
11431
|
+
}
|
|
11432
|
+
function scorerName(scorer, scorer_idx) {
|
|
11433
|
+
return scorer.name || `scorer_${scorer_idx}`;
|
|
11434
|
+
}
|
|
11435
|
+
async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
|
|
11436
|
+
return await runEvaluatorInternal(
|
|
11437
|
+
experiment,
|
|
11438
|
+
evaluator,
|
|
11439
|
+
progressReporter,
|
|
11440
|
+
filters,
|
|
11441
|
+
stream,
|
|
11442
|
+
parameters,
|
|
11443
|
+
collectResults,
|
|
11444
|
+
enableCache
|
|
11445
|
+
);
|
|
11446
|
+
}
|
|
11447
|
+
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
11448
|
+
if (enableCache) {
|
|
11449
|
+
(evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
|
|
10962
11450
|
}
|
|
10963
|
-
|
|
10964
|
-
if (
|
|
10965
|
-
|
|
11451
|
+
try {
|
|
11452
|
+
if (typeof evaluator.data === "string") {
|
|
11453
|
+
throw new Error("Unimplemented: string data paths");
|
|
10966
11454
|
}
|
|
10967
|
-
|
|
10968
|
-
|
|
10969
|
-
}
|
|
10970
|
-
|
|
10971
|
-
|
|
10972
|
-
|
|
10973
|
-
|
|
10974
|
-
|
|
10975
|
-
|
|
10976
|
-
|
|
10977
|
-
|
|
10978
|
-
|
|
10979
|
-
}),
|
|
10980
|
-
import_v310.z.instanceof(import_v310.z.ZodType)
|
|
10981
|
-
// For Zod schemas
|
|
10982
|
-
])
|
|
10983
|
-
);
|
|
10984
|
-
function validateParameters(parameters, parameterSchema) {
|
|
10985
|
-
return Object.fromEntries(
|
|
10986
|
-
Object.entries(parameterSchema).map(([name, schema]) => {
|
|
10987
|
-
const value = parameters[name];
|
|
10988
|
-
try {
|
|
10989
|
-
if ("type" in schema && schema.type === "prompt") {
|
|
10990
|
-
const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
|
|
10991
|
-
schema.default,
|
|
10992
|
-
schema.default.tools
|
|
10993
|
-
) : void 0;
|
|
10994
|
-
if (!promptData) {
|
|
10995
|
-
throw new Error(`Parameter '${name}' is required`);
|
|
10996
|
-
}
|
|
10997
|
-
return [name, Prompt2.fromPromptData(name, promptData)];
|
|
10998
|
-
} else {
|
|
10999
|
-
const schemaCasted = schema;
|
|
11000
|
-
return [name, schemaCasted.parse(value)];
|
|
11001
|
-
}
|
|
11002
|
-
} catch (e) {
|
|
11003
|
-
console.error("Error validating parameter", name, e);
|
|
11004
|
-
throw Error(
|
|
11005
|
-
`Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
|
|
11006
|
-
);
|
|
11007
|
-
}
|
|
11008
|
-
})
|
|
11009
|
-
);
|
|
11010
|
-
}
|
|
11011
|
-
|
|
11012
|
-
// src/framework.ts
|
|
11013
|
-
var EvalResultWithSummary = class {
|
|
11014
|
-
constructor(summary, results) {
|
|
11015
|
-
this.summary = summary;
|
|
11016
|
-
this.results = results;
|
|
11017
|
-
}
|
|
11018
|
-
/**
|
|
11019
|
-
* @deprecated Use `summary` instead.
|
|
11020
|
-
*/
|
|
11021
|
-
toString() {
|
|
11022
|
-
return JSON.stringify(this.summary);
|
|
11023
|
-
}
|
|
11024
|
-
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
11025
|
-
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
11026
|
-
}
|
|
11027
|
-
toJSON() {
|
|
11028
|
-
return {
|
|
11029
|
-
summary: this.summary,
|
|
11030
|
-
results: this.results
|
|
11031
|
-
};
|
|
11032
|
-
}
|
|
11033
|
-
};
|
|
11034
|
-
function makeEvalName(projectName, experimentName) {
|
|
11035
|
-
let out = projectName;
|
|
11036
|
-
if (experimentName) {
|
|
11037
|
-
out += ` [experimentName=${experimentName}]`;
|
|
11038
|
-
}
|
|
11039
|
-
return out;
|
|
11040
|
-
}
|
|
11041
|
-
function initExperiment(state, options = {}) {
|
|
11042
|
-
return init({
|
|
11043
|
-
state,
|
|
11044
|
-
...options,
|
|
11045
|
-
setCurrent: false
|
|
11046
|
-
});
|
|
11047
|
-
}
|
|
11048
|
-
function callEvaluatorData(data) {
|
|
11049
|
-
const dataResult = typeof data === "function" ? data() : data;
|
|
11050
|
-
let baseExperiment = void 0;
|
|
11051
|
-
if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
|
|
11052
|
-
baseExperiment = dataResult.name;
|
|
11053
|
-
}
|
|
11054
|
-
return {
|
|
11055
|
-
data: dataResult,
|
|
11056
|
-
baseExperiment
|
|
11057
|
-
};
|
|
11058
|
-
}
|
|
11059
|
-
function isAsyncIterable2(value) {
|
|
11060
|
-
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
11061
|
-
}
|
|
11062
|
-
function isIterable(value) {
|
|
11063
|
-
return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
|
|
11064
|
-
}
|
|
11065
|
-
globalThis._evals = {
|
|
11066
|
-
functions: [],
|
|
11067
|
-
prompts: [],
|
|
11068
|
-
evaluators: {},
|
|
11069
|
-
reporters: {}
|
|
11070
|
-
};
|
|
11071
|
-
function _initializeSpanContext() {
|
|
11072
|
-
globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
|
|
11073
|
-
}
|
|
11074
|
-
async function Eval(name, evaluator, reporterOrOpts) {
|
|
11075
|
-
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
11076
|
-
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
11077
|
-
if (globalThis._evals.evaluators[evalName]) {
|
|
11078
|
-
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
11079
|
-
}
|
|
11080
|
-
if (globalThis._lazy_load) {
|
|
11081
|
-
globalThis._evals.evaluators[evalName] = {
|
|
11082
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
11083
|
-
evaluator: {
|
|
11084
|
-
evalName,
|
|
11085
|
-
projectName: name,
|
|
11086
|
-
...evaluator
|
|
11087
|
-
},
|
|
11088
|
-
reporter: options.reporter
|
|
11089
|
-
};
|
|
11090
|
-
_initializeSpanContext();
|
|
11091
|
-
return new EvalResultWithSummary(
|
|
11092
|
-
{
|
|
11093
|
-
scores: {},
|
|
11094
|
-
metrics: {},
|
|
11095
|
-
projectName: "",
|
|
11096
|
-
experimentName: ""
|
|
11097
|
-
},
|
|
11098
|
-
[]
|
|
11099
|
-
);
|
|
11100
|
-
}
|
|
11101
|
-
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
11102
|
-
const shouldCollectResults = options.returnResults ?? true;
|
|
11103
|
-
if (typeof options.reporter === "string") {
|
|
11104
|
-
throw new Error(
|
|
11105
|
-
"Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
|
|
11106
|
-
);
|
|
11107
|
-
}
|
|
11108
|
-
const resolvedReporter = options.reporter || defaultReporter;
|
|
11109
|
-
try {
|
|
11110
|
-
const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
|
|
11111
|
-
evaluator.data
|
|
11112
|
-
);
|
|
11113
|
-
const experiment = options.parent || options.noSendLogs ? null : initExperiment(evaluator.state, {
|
|
11114
|
-
...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
|
|
11115
|
-
experiment: evaluator.experimentName,
|
|
11116
|
-
description: evaluator.description,
|
|
11117
|
-
metadata: evaluator.metadata,
|
|
11118
|
-
isPublic: evaluator.isPublic,
|
|
11119
|
-
update: evaluator.update,
|
|
11120
|
-
baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
|
|
11121
|
-
baseExperimentId: evaluator.baseExperimentId,
|
|
11122
|
-
gitMetadataSettings: evaluator.gitMetadataSettings,
|
|
11123
|
-
repoInfo: evaluator.repoInfo,
|
|
11124
|
-
dataset: Dataset2.isDataset(data) ? data : void 0
|
|
11125
|
-
});
|
|
11126
|
-
if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
|
|
11127
|
-
await experiment._waitForId();
|
|
11128
|
-
}
|
|
11129
|
-
if (experiment && options.onStart) {
|
|
11130
|
-
const summary = await experiment.summarize({ summarizeScores: false });
|
|
11131
|
-
options.onStart(summary);
|
|
11132
|
-
}
|
|
11133
|
-
try {
|
|
11134
|
-
const evalDef = {
|
|
11135
|
-
evalName,
|
|
11136
|
-
projectName: name,
|
|
11137
|
-
...evaluator,
|
|
11138
|
-
data
|
|
11139
|
-
};
|
|
11140
|
-
const enableCache = options.enableCache ?? true;
|
|
11141
|
-
let ret;
|
|
11142
|
-
if (options.parent) {
|
|
11143
|
-
ret = await withParent(
|
|
11144
|
-
options.parent,
|
|
11145
|
-
() => runEvaluator(
|
|
11146
|
-
null,
|
|
11147
|
-
evalDef,
|
|
11148
|
-
progressReporter,
|
|
11149
|
-
[],
|
|
11150
|
-
options.stream,
|
|
11151
|
-
options.parameters,
|
|
11152
|
-
shouldCollectResults,
|
|
11153
|
-
enableCache
|
|
11154
|
-
),
|
|
11155
|
-
evaluator.state
|
|
11156
|
-
);
|
|
11157
|
-
} else {
|
|
11158
|
-
ret = await runEvaluator(
|
|
11159
|
-
experiment,
|
|
11160
|
-
evalDef,
|
|
11161
|
-
progressReporter,
|
|
11162
|
-
[],
|
|
11163
|
-
options.stream,
|
|
11164
|
-
options.parameters,
|
|
11165
|
-
shouldCollectResults,
|
|
11166
|
-
enableCache
|
|
11167
|
-
);
|
|
11168
|
-
}
|
|
11169
|
-
progressReporter.stop();
|
|
11170
|
-
resolvedReporter.reportEval(evalDef, ret, {
|
|
11171
|
-
verbose: true,
|
|
11172
|
-
jsonl: false
|
|
11173
|
-
});
|
|
11174
|
-
return ret;
|
|
11175
|
-
} finally {
|
|
11176
|
-
if (experiment) {
|
|
11177
|
-
await experiment.flush().catch(console.error);
|
|
11178
|
-
} else if (options.parent) {
|
|
11179
|
-
await flush().catch(console.error);
|
|
11180
|
-
}
|
|
11181
|
-
}
|
|
11182
|
-
} finally {
|
|
11183
|
-
progressReporter.stop();
|
|
11184
|
-
}
|
|
11185
|
-
}
|
|
11186
|
-
function serializeJSONWithPlainString(v) {
|
|
11187
|
-
if (typeof v === "string") {
|
|
11188
|
-
return v;
|
|
11189
|
-
} else {
|
|
11190
|
-
return JSON.stringify(v);
|
|
11191
|
-
}
|
|
11192
|
-
}
|
|
11193
|
-
function deserializePlainStringAsJSON2(s) {
|
|
11194
|
-
try {
|
|
11195
|
-
return { value: JSON.parse(s), error: void 0 };
|
|
11196
|
-
} catch (e) {
|
|
11197
|
-
return { value: s, error: e };
|
|
11198
|
-
}
|
|
11199
|
-
}
|
|
11200
|
-
function parseFilters(filters) {
|
|
11201
|
-
const result = [];
|
|
11202
|
-
for (const f of filters) {
|
|
11203
|
-
const equalsIdx = f.indexOf("=");
|
|
11204
|
-
if (equalsIdx === -1) {
|
|
11205
|
-
throw new Error(`Invalid filter ${f}`);
|
|
11206
|
-
}
|
|
11207
|
-
const [path8, value] = [f.slice(0, equalsIdx), f.slice(equalsIdx + 1)];
|
|
11208
|
-
let deserializedValue = deserializePlainStringAsJSON2(value).value;
|
|
11209
|
-
if (typeof deserializedValue !== "string") {
|
|
11210
|
-
deserializedValue = value;
|
|
11211
|
-
}
|
|
11212
|
-
result.push({
|
|
11213
|
-
path: path8.split("."),
|
|
11214
|
-
pattern: new RegExp(deserializedValue)
|
|
11215
|
-
});
|
|
11216
|
-
}
|
|
11217
|
-
return result;
|
|
11218
|
-
}
|
|
11219
|
-
function evaluateFilter(object, filter2) {
|
|
11220
|
-
const { path: path8, pattern } = filter2;
|
|
11221
|
-
const key = path8.reduce(
|
|
11222
|
-
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
11223
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
11224
|
-
acc[p]
|
|
11225
|
-
) : void 0,
|
|
11226
|
-
object
|
|
11227
|
-
);
|
|
11228
|
-
if (key === void 0) {
|
|
11229
|
-
return false;
|
|
11230
|
-
}
|
|
11231
|
-
return pattern.test(serializeJSONWithPlainString(key));
|
|
11232
|
-
}
|
|
11233
|
-
function scorerName(scorer, scorer_idx) {
|
|
11234
|
-
return scorer.name || `scorer_${scorer_idx}`;
|
|
11235
|
-
}
|
|
11236
|
-
async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
|
|
11237
|
-
return await runEvaluatorInternal(
|
|
11238
|
-
experiment,
|
|
11239
|
-
evaluator,
|
|
11240
|
-
progressReporter,
|
|
11241
|
-
filters,
|
|
11242
|
-
stream,
|
|
11243
|
-
parameters,
|
|
11244
|
-
collectResults,
|
|
11245
|
-
enableCache
|
|
11246
|
-
);
|
|
11247
|
-
}
|
|
11248
|
-
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
11249
|
-
if (enableCache) {
|
|
11250
|
-
(evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
|
|
11251
|
-
}
|
|
11252
|
-
try {
|
|
11253
|
-
if (typeof evaluator.data === "string") {
|
|
11254
|
-
throw new Error("Unimplemented: string data paths");
|
|
11255
|
-
}
|
|
11256
|
-
let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
|
|
11257
|
-
parameters = validateParameters(
|
|
11258
|
-
parameters ?? {},
|
|
11259
|
-
evaluator.parameters ?? {}
|
|
11260
|
-
);
|
|
11261
|
-
if ("_type" in dataResult) {
|
|
11262
|
-
if (dataResult._type !== "BaseExperiment") {
|
|
11263
|
-
throw new Error("Invalid _type");
|
|
11264
|
-
}
|
|
11265
|
-
if (!experiment) {
|
|
11266
|
-
throw new Error(
|
|
11267
|
-
"Cannot use BaseExperiment() without connecting to Braintrust (you most likely set --no-send-logs)"
|
|
11455
|
+
let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
|
|
11456
|
+
parameters = await validateParameters(
|
|
11457
|
+
parameters ?? {},
|
|
11458
|
+
evaluator.parameters
|
|
11459
|
+
);
|
|
11460
|
+
if ("_type" in dataResult) {
|
|
11461
|
+
if (dataResult._type !== "BaseExperiment") {
|
|
11462
|
+
throw new Error("Invalid _type");
|
|
11463
|
+
}
|
|
11464
|
+
if (!experiment) {
|
|
11465
|
+
throw new Error(
|
|
11466
|
+
"Cannot use BaseExperiment() without connecting to Braintrust (you most likely set --no-send-logs)"
|
|
11268
11467
|
);
|
|
11269
11468
|
}
|
|
11270
11469
|
let name = dataResult.name;
|
|
@@ -11415,6 +11614,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
11415
11614
|
} else {
|
|
11416
11615
|
rootSpan.log({ output, metadata, expected });
|
|
11417
11616
|
}
|
|
11617
|
+
if (evaluator.flushBeforeScoring) {
|
|
11618
|
+
await rootSpan.flush();
|
|
11619
|
+
}
|
|
11418
11620
|
const scoringArgs = {
|
|
11419
11621
|
input: datum.input,
|
|
11420
11622
|
expected: "expected" in datum ? datum.expected : void 0,
|
|
@@ -11654,869 +11856,1331 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
11654
11856
|
summary,
|
|
11655
11857
|
collectResults ? collectedResults : []
|
|
11656
11858
|
);
|
|
11657
|
-
} finally {
|
|
11658
|
-
if (enableCache) {
|
|
11659
|
-
const spanCache = (evaluator.state ?? _internalGetGlobalState())?.spanCache;
|
|
11660
|
-
spanCache?.dispose();
|
|
11661
|
-
spanCache?.stop();
|
|
11662
|
-
}
|
|
11859
|
+
} finally {
|
|
11860
|
+
if (enableCache) {
|
|
11861
|
+
const spanCache = (evaluator.state ?? _internalGetGlobalState())?.spanCache;
|
|
11862
|
+
spanCache?.dispose();
|
|
11863
|
+
spanCache?.stop();
|
|
11864
|
+
}
|
|
11865
|
+
}
|
|
11866
|
+
}
|
|
11867
|
+
var error = (text) => `Error: ${text}`;
|
|
11868
|
+
var warning = (text) => `Warning: ${text}`;
|
|
11869
|
+
function logError2(e, verbose) {
|
|
11870
|
+
if (!verbose) {
|
|
11871
|
+
console.error(`${e}`);
|
|
11872
|
+
} else {
|
|
11873
|
+
console.error(e);
|
|
11874
|
+
}
|
|
11875
|
+
}
|
|
11876
|
+
function accumulateScores(accumulator, scores) {
|
|
11877
|
+
for (const [name, score] of Object.entries(scores)) {
|
|
11878
|
+
if (score === null || score === void 0) {
|
|
11879
|
+
continue;
|
|
11880
|
+
}
|
|
11881
|
+
const existing = accumulator[name] ?? { total: 0, count: 0 };
|
|
11882
|
+
accumulator[name] = {
|
|
11883
|
+
total: existing.total + score,
|
|
11884
|
+
count: existing.count + 1
|
|
11885
|
+
};
|
|
11886
|
+
}
|
|
11887
|
+
}
|
|
11888
|
+
function ensureScoreAccumulator(results) {
|
|
11889
|
+
const accumulator = {};
|
|
11890
|
+
for (const result of results) {
|
|
11891
|
+
accumulateScores(accumulator, result.scores);
|
|
11892
|
+
}
|
|
11893
|
+
return accumulator;
|
|
11894
|
+
}
|
|
11895
|
+
function buildLocalSummary(evaluator, results, precomputedScores) {
|
|
11896
|
+
const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
|
|
11897
|
+
return {
|
|
11898
|
+
projectName: evaluator.projectName,
|
|
11899
|
+
experimentName: evaluator.evalName,
|
|
11900
|
+
scores: Object.fromEntries(
|
|
11901
|
+
Object.entries(scoresByName).map(([name, { total, count }]) => [
|
|
11902
|
+
name,
|
|
11903
|
+
{
|
|
11904
|
+
name,
|
|
11905
|
+
score: count === 0 ? 0 : total / count,
|
|
11906
|
+
improvements: 0,
|
|
11907
|
+
regressions: 0
|
|
11908
|
+
}
|
|
11909
|
+
])
|
|
11910
|
+
)
|
|
11911
|
+
};
|
|
11912
|
+
}
|
|
11913
|
+
function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
|
|
11914
|
+
if (failingResults.length > 0) {
|
|
11915
|
+
console.error(
|
|
11916
|
+
warning(
|
|
11917
|
+
`Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
|
|
11918
|
+
)
|
|
11919
|
+
);
|
|
11920
|
+
if (jsonl) {
|
|
11921
|
+
console.log(
|
|
11922
|
+
JSON.stringify({
|
|
11923
|
+
evaluatorName: evaluator.evalName,
|
|
11924
|
+
errors: failingResults.map(
|
|
11925
|
+
(r) => `${r.error instanceof Error ? r.error.stack : r.error}`
|
|
11926
|
+
)
|
|
11927
|
+
})
|
|
11928
|
+
);
|
|
11929
|
+
} else {
|
|
11930
|
+
for (const result of failingResults) {
|
|
11931
|
+
logError2(result.error, verbose);
|
|
11932
|
+
}
|
|
11933
|
+
}
|
|
11934
|
+
if (!verbose && !jsonl) {
|
|
11935
|
+
console.error(warning("Add --verbose to see full stack traces."));
|
|
11936
|
+
}
|
|
11937
|
+
}
|
|
11938
|
+
}
|
|
11939
|
+
var defaultReporter = {
|
|
11940
|
+
name: "Braintrust default reporter",
|
|
11941
|
+
async reportEval(evaluator, result, { verbose, jsonl }) {
|
|
11942
|
+
const { results, summary } = result;
|
|
11943
|
+
const failingResults = results.filter(
|
|
11944
|
+
(r) => r.error !== void 0
|
|
11945
|
+
);
|
|
11946
|
+
if (failingResults.length > 0) {
|
|
11947
|
+
reportFailures(evaluator, failingResults, { verbose, jsonl });
|
|
11948
|
+
}
|
|
11949
|
+
if (jsonl) {
|
|
11950
|
+
isomorph_default.writeln(JSON.stringify(summary));
|
|
11951
|
+
} else {
|
|
11952
|
+
isomorph_default.writeln("Experiment summary");
|
|
11953
|
+
isomorph_default.writeln("==================");
|
|
11954
|
+
if (summary.comparisonExperimentName) {
|
|
11955
|
+
isomorph_default.writeln(
|
|
11956
|
+
`${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
|
|
11957
|
+
);
|
|
11958
|
+
isomorph_default.writeln("");
|
|
11959
|
+
}
|
|
11960
|
+
const hasScores = Object.keys(summary.scores).length > 0;
|
|
11961
|
+
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
11962
|
+
const hasComparison = !!summary.comparisonExperimentName;
|
|
11963
|
+
if (hasScores || hasMetrics) {
|
|
11964
|
+
if (hasComparison) {
|
|
11965
|
+
isomorph_default.writeln(
|
|
11966
|
+
"Name Value Change Improvements Regressions"
|
|
11967
|
+
);
|
|
11968
|
+
isomorph_default.writeln(
|
|
11969
|
+
"----------------------------------------------------------------"
|
|
11970
|
+
);
|
|
11971
|
+
}
|
|
11972
|
+
for (const score of Object.values(summary.scores)) {
|
|
11973
|
+
const scorePercent = (score.score * 100).toFixed(2);
|
|
11974
|
+
const scoreValue = `${scorePercent}%`;
|
|
11975
|
+
if (hasComparison) {
|
|
11976
|
+
let diffString = "-";
|
|
11977
|
+
if (!isEmpty2(score.diff)) {
|
|
11978
|
+
const diffPercent = (score.diff * 100).toFixed(2);
|
|
11979
|
+
const diffSign = score.diff > 0 ? "+" : "";
|
|
11980
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
11981
|
+
}
|
|
11982
|
+
const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
|
|
11983
|
+
const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
|
|
11984
|
+
isomorph_default.writeln(
|
|
11985
|
+
`${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
11986
|
+
);
|
|
11987
|
+
} else {
|
|
11988
|
+
isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
|
|
11989
|
+
}
|
|
11990
|
+
}
|
|
11991
|
+
for (const metric of Object.values(summary.metrics ?? {})) {
|
|
11992
|
+
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
11993
|
+
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
11994
|
+
const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
|
|
11995
|
+
if (hasComparison) {
|
|
11996
|
+
let diffString = "-";
|
|
11997
|
+
if (!isEmpty2(metric.diff)) {
|
|
11998
|
+
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
11999
|
+
const diffSign = metric.diff > 0 ? "+" : "";
|
|
12000
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
12001
|
+
}
|
|
12002
|
+
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
12003
|
+
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
12004
|
+
isomorph_default.writeln(
|
|
12005
|
+
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
12006
|
+
);
|
|
12007
|
+
} else {
|
|
12008
|
+
isomorph_default.writeln(
|
|
12009
|
+
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
12010
|
+
);
|
|
12011
|
+
}
|
|
12012
|
+
}
|
|
12013
|
+
}
|
|
12014
|
+
if (summary.experimentUrl) {
|
|
12015
|
+
isomorph_default.writeln("");
|
|
12016
|
+
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
12017
|
+
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
12018
|
+
}
|
|
12019
|
+
}
|
|
12020
|
+
isomorph_default.writeln("");
|
|
12021
|
+
return failingResults.length === 0;
|
|
12022
|
+
},
|
|
12023
|
+
async reportRun(evalReports) {
|
|
12024
|
+
return evalReports.every((r) => r);
|
|
12025
|
+
}
|
|
12026
|
+
};
|
|
12027
|
+
|
|
12028
|
+
// src/cli/reporters/eval.ts
|
|
12029
|
+
var import_chalk2 = __toESM(require("chalk"));
|
|
12030
|
+
var import_termi_link = require("termi-link");
|
|
12031
|
+
var import_boxen = __toESM(require("boxen"));
|
|
12032
|
+
var import_cli_table3 = __toESM(require("cli-table3"));
|
|
12033
|
+
var import_pluralize = __toESM(require("pluralize"));
|
|
12034
|
+
function formatExperimentSummaryFancy(summary) {
|
|
12035
|
+
let comparisonLine = "";
|
|
12036
|
+
if (summary.comparisonExperimentName) {
|
|
12037
|
+
comparisonLine = `${summary.comparisonExperimentName} ${import_chalk2.default.gray("(baseline)")} \u2190 ${summary.experimentName} ${import_chalk2.default.gray("(comparison)")}
|
|
12038
|
+
|
|
12039
|
+
`;
|
|
12040
|
+
}
|
|
12041
|
+
const tableParts = [];
|
|
12042
|
+
const hasScores = Object.keys(summary.scores).length > 0;
|
|
12043
|
+
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
12044
|
+
const hasComparison = !!summary.comparisonExperimentName;
|
|
12045
|
+
if (hasScores || hasMetrics) {
|
|
12046
|
+
const headers = [import_chalk2.default.gray("Name"), import_chalk2.default.gray("Value")];
|
|
12047
|
+
if (hasComparison) {
|
|
12048
|
+
headers.push(
|
|
12049
|
+
import_chalk2.default.gray("Change"),
|
|
12050
|
+
import_chalk2.default.gray("Improvements"),
|
|
12051
|
+
import_chalk2.default.gray("Regressions")
|
|
12052
|
+
);
|
|
12053
|
+
}
|
|
12054
|
+
const combinedTable = new import_cli_table3.default({
|
|
12055
|
+
head: hasComparison ? headers : [],
|
|
12056
|
+
style: { head: [], "padding-left": 0, "padding-right": 0, border: [] },
|
|
12057
|
+
chars: {
|
|
12058
|
+
top: "",
|
|
12059
|
+
"top-mid": "",
|
|
12060
|
+
"top-left": "",
|
|
12061
|
+
"top-right": "",
|
|
12062
|
+
bottom: "",
|
|
12063
|
+
"bottom-mid": "",
|
|
12064
|
+
"bottom-left": "",
|
|
12065
|
+
"bottom-right": "",
|
|
12066
|
+
left: "",
|
|
12067
|
+
"left-mid": "",
|
|
12068
|
+
mid: "",
|
|
12069
|
+
"mid-mid": "",
|
|
12070
|
+
right: "",
|
|
12071
|
+
"right-mid": "",
|
|
12072
|
+
middle: " "
|
|
12073
|
+
},
|
|
12074
|
+
colWidths: hasComparison ? [18, 10, 10, 13, 12] : [20, 15],
|
|
12075
|
+
colAligns: hasComparison ? ["left", "right", "right", "right", "right"] : ["left", "right"],
|
|
12076
|
+
wordWrap: false
|
|
12077
|
+
});
|
|
12078
|
+
const scoreValues = Object.values(summary.scores);
|
|
12079
|
+
for (const score of scoreValues) {
|
|
12080
|
+
const scorePercent = (score.score * 100).toFixed(2);
|
|
12081
|
+
const scoreValue = import_chalk2.default.white(`${scorePercent}%`);
|
|
12082
|
+
let diffString = "";
|
|
12083
|
+
if (!isEmpty2(score.diff)) {
|
|
12084
|
+
const diffPercent = (score.diff * 100).toFixed(2);
|
|
12085
|
+
const diffSign = score.diff > 0 ? "+" : "";
|
|
12086
|
+
const diffColor = score.diff > 0 ? import_chalk2.default.green : import_chalk2.default.red;
|
|
12087
|
+
diffString = diffColor(`${diffSign}${diffPercent}%`);
|
|
12088
|
+
} else {
|
|
12089
|
+
diffString = import_chalk2.default.gray("-");
|
|
12090
|
+
}
|
|
12091
|
+
const improvements = score.improvements > 0 ? import_chalk2.default.dim.green(score.improvements) : import_chalk2.default.gray("-");
|
|
12092
|
+
const regressions = score.regressions > 0 ? import_chalk2.default.dim.red(score.regressions) : import_chalk2.default.gray("-");
|
|
12093
|
+
const row = [`${import_chalk2.default.blue("\u25EF")} ${score.name}`, scoreValue];
|
|
12094
|
+
if (hasComparison) {
|
|
12095
|
+
row.push(diffString, improvements, regressions);
|
|
12096
|
+
}
|
|
12097
|
+
combinedTable.push(row);
|
|
12098
|
+
}
|
|
12099
|
+
const metricValues = Object.values(summary.metrics ?? {});
|
|
12100
|
+
for (const metric of metricValues) {
|
|
12101
|
+
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
12102
|
+
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
12103
|
+
const metricValue = import_chalk2.default.white(
|
|
12104
|
+
metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`
|
|
12105
|
+
);
|
|
12106
|
+
let diffString = "";
|
|
12107
|
+
if (!isEmpty2(metric.diff)) {
|
|
12108
|
+
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
12109
|
+
const diffSign = metric.diff > 0 ? "+" : "";
|
|
12110
|
+
const diffColor = metric.diff > 0 ? import_chalk2.default.green : import_chalk2.default.red;
|
|
12111
|
+
diffString = diffColor(`${diffSign}${diffPercent}%`);
|
|
12112
|
+
} else {
|
|
12113
|
+
diffString = import_chalk2.default.gray("-");
|
|
12114
|
+
}
|
|
12115
|
+
const improvements = metric.improvements > 0 ? import_chalk2.default.dim.green(metric.improvements) : import_chalk2.default.gray("-");
|
|
12116
|
+
const regressions = metric.regressions > 0 ? import_chalk2.default.dim.red(metric.regressions) : import_chalk2.default.gray("-");
|
|
12117
|
+
const row = [`${import_chalk2.default.magenta("\u25EF")} ${metric.name}`, metricValue];
|
|
12118
|
+
if (hasComparison) {
|
|
12119
|
+
row.push(diffString, improvements, regressions);
|
|
12120
|
+
}
|
|
12121
|
+
combinedTable.push(row);
|
|
12122
|
+
}
|
|
12123
|
+
tableParts.push(combinedTable.toString());
|
|
12124
|
+
}
|
|
12125
|
+
const content = [comparisonLine, ...tableParts].filter(Boolean).join("\n");
|
|
12126
|
+
const footer = summary.experimentUrl ? (0, import_termi_link.terminalLink)(
|
|
12127
|
+
`View results for ${summary.experimentName}`,
|
|
12128
|
+
summary.experimentUrl,
|
|
12129
|
+
{ fallback: () => `See results at ${summary.experimentUrl}` }
|
|
12130
|
+
) : "";
|
|
12131
|
+
const boxContent = [content, footer].filter(Boolean).join("\n\n");
|
|
12132
|
+
try {
|
|
12133
|
+
return "\n" + (0, import_boxen.default)(boxContent, {
|
|
12134
|
+
title: import_chalk2.default.gray("Experiment summary"),
|
|
12135
|
+
titleAlignment: "left",
|
|
12136
|
+
padding: 0.5,
|
|
12137
|
+
borderColor: "gray",
|
|
12138
|
+
borderStyle: "round"
|
|
12139
|
+
});
|
|
12140
|
+
} catch (error2) {
|
|
12141
|
+
return "\n" + import_chalk2.default.gray("Experiment summary") + "\n" + boxContent + "\n";
|
|
12142
|
+
}
|
|
12143
|
+
}
|
|
12144
|
+
var warning2 = import_chalk2.default.yellow;
|
|
12145
|
+
var fancyReporter = {
|
|
12146
|
+
name: "Braintrust fancy reporter",
|
|
12147
|
+
async reportEval(evaluator, result, { verbose, jsonl }) {
|
|
12148
|
+
const { results, summary } = result;
|
|
12149
|
+
const failingResults = results.filter(
|
|
12150
|
+
(r) => r.error !== void 0
|
|
12151
|
+
);
|
|
12152
|
+
if (failingResults.length > 0) {
|
|
12153
|
+
console.error(
|
|
12154
|
+
warning2(
|
|
12155
|
+
`Evaluator ${evaluator.evalName} failed with ${(0, import_pluralize.default)("error", failingResults.length, true)}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
|
|
12156
|
+
)
|
|
12157
|
+
);
|
|
12158
|
+
if (jsonl) {
|
|
12159
|
+
for (const result2 of failingResults) {
|
|
12160
|
+
process.stdout.write(JSON.stringify(result2));
|
|
12161
|
+
process.stdout.write("\n");
|
|
12162
|
+
}
|
|
12163
|
+
} else if (verbose) {
|
|
12164
|
+
for (const result2 of failingResults) {
|
|
12165
|
+
console.error(result2);
|
|
12166
|
+
}
|
|
12167
|
+
}
|
|
12168
|
+
}
|
|
12169
|
+
process.stdout.write(
|
|
12170
|
+
jsonl ? JSON.stringify(summary) : formatExperimentSummaryFancy(summary)
|
|
12171
|
+
);
|
|
12172
|
+
process.stdout.write("\n");
|
|
12173
|
+
return failingResults.length === 0;
|
|
12174
|
+
},
|
|
12175
|
+
async reportRun(evalReports) {
|
|
12176
|
+
return evalReports.every((r) => r);
|
|
12177
|
+
}
|
|
12178
|
+
};
|
|
12179
|
+
|
|
12180
|
+
// src/node.ts
|
|
12181
|
+
var import_node_async_hooks = require("async_hooks");
|
|
12182
|
+
var path = __toESM(require("path"));
|
|
12183
|
+
var fs = __toESM(require("fs/promises"));
|
|
12184
|
+
var os = __toESM(require("os"));
|
|
12185
|
+
var fsSync = __toESM(require("fs"));
|
|
12186
|
+
var crypto = __toESM(require("crypto"));
|
|
12187
|
+
|
|
12188
|
+
// src/gitutil.ts
|
|
12189
|
+
var import_simple_git = require("simple-git");
|
|
12190
|
+
var COMMON_BASE_BRANCHES = ["main", "master", "develop"];
|
|
12191
|
+
async function currentRepo() {
|
|
12192
|
+
try {
|
|
12193
|
+
const git = (0, import_simple_git.simpleGit)();
|
|
12194
|
+
if (await git.checkIsRepo()) {
|
|
12195
|
+
return git;
|
|
12196
|
+
} else {
|
|
12197
|
+
return null;
|
|
12198
|
+
}
|
|
12199
|
+
} catch (e) {
|
|
12200
|
+
return null;
|
|
12201
|
+
}
|
|
12202
|
+
}
|
|
12203
|
+
var _baseBranch = null;
|
|
12204
|
+
async function getBaseBranch(remote = void 0) {
|
|
12205
|
+
if (_baseBranch === null) {
|
|
12206
|
+
const git = await currentRepo();
|
|
12207
|
+
if (git === null) {
|
|
12208
|
+
throw new Error("Not in a git repo");
|
|
12209
|
+
}
|
|
12210
|
+
const remoteName = remote ?? (await git.getRemotes())[0]?.name;
|
|
12211
|
+
if (!remoteName) {
|
|
12212
|
+
throw new Error("No remote found");
|
|
12213
|
+
}
|
|
12214
|
+
let branch = null;
|
|
12215
|
+
const repoBranches = new Set((await git.branchLocal()).all);
|
|
12216
|
+
const matchingBaseBranches = COMMON_BASE_BRANCHES.filter(
|
|
12217
|
+
(b) => repoBranches.has(b)
|
|
12218
|
+
);
|
|
12219
|
+
if (matchingBaseBranches.length === 1) {
|
|
12220
|
+
branch = matchingBaseBranches[0];
|
|
12221
|
+
} else {
|
|
12222
|
+
try {
|
|
12223
|
+
const remoteInfo = await git.remote(["show", remoteName]);
|
|
12224
|
+
if (!remoteInfo) {
|
|
12225
|
+
throw new Error(`Could not find remote ${remoteName}`);
|
|
12226
|
+
}
|
|
12227
|
+
const match = remoteInfo.match(/\s*HEAD branch:\s*(.*)$/m);
|
|
12228
|
+
if (!match) {
|
|
12229
|
+
throw new Error(`Could not find HEAD branch in remote ${remoteName}`);
|
|
12230
|
+
}
|
|
12231
|
+
branch = match[1];
|
|
12232
|
+
} catch {
|
|
12233
|
+
branch = "main";
|
|
12234
|
+
}
|
|
12235
|
+
}
|
|
12236
|
+
_baseBranch = { remote: remoteName, branch };
|
|
12237
|
+
}
|
|
12238
|
+
return _baseBranch;
|
|
12239
|
+
}
|
|
12240
|
+
async function getBaseBranchAncestor(remote = void 0) {
|
|
12241
|
+
const git = await currentRepo();
|
|
12242
|
+
if (git === null) {
|
|
12243
|
+
throw new Error("Not in a git repo");
|
|
12244
|
+
}
|
|
12245
|
+
const { remote: remoteName, branch: baseBranch } = await getBaseBranch(remote);
|
|
12246
|
+
const isDirty = (await git.diffSummary()).files.length > 0;
|
|
12247
|
+
const head = isDirty ? "HEAD" : "HEAD^";
|
|
12248
|
+
try {
|
|
12249
|
+
const ancestor = await git.raw([
|
|
12250
|
+
"merge-base",
|
|
12251
|
+
head,
|
|
12252
|
+
`${remoteName}/${baseBranch}`
|
|
12253
|
+
]);
|
|
12254
|
+
return ancestor.trim();
|
|
12255
|
+
} catch (e) {
|
|
12256
|
+
return void 0;
|
|
12257
|
+
}
|
|
12258
|
+
}
|
|
12259
|
+
async function getPastNAncestors(n = 1e3, remote = void 0) {
|
|
12260
|
+
const git = await currentRepo();
|
|
12261
|
+
if (git === null) {
|
|
12262
|
+
return [];
|
|
12263
|
+
}
|
|
12264
|
+
let ancestor = void 0;
|
|
12265
|
+
try {
|
|
12266
|
+
ancestor = await getBaseBranchAncestor(remote);
|
|
12267
|
+
} catch (e) {
|
|
12268
|
+
console.warn(
|
|
12269
|
+
"Skipping git metadata. This is likely because the repository has not been published to a remote yet.",
|
|
12270
|
+
`${e}`
|
|
12271
|
+
);
|
|
12272
|
+
}
|
|
12273
|
+
if (!ancestor) {
|
|
12274
|
+
return [];
|
|
12275
|
+
}
|
|
12276
|
+
const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
|
|
12277
|
+
return commits.all.slice(0, n).map((c) => c.hash);
|
|
12278
|
+
}
|
|
12279
|
+
async function attempt(fn) {
|
|
12280
|
+
try {
|
|
12281
|
+
return await fn();
|
|
12282
|
+
} catch (e) {
|
|
12283
|
+
return void 0;
|
|
12284
|
+
}
|
|
12285
|
+
}
|
|
12286
|
+
function truncateToByteLimit(s, byteLimit = 65536) {
|
|
12287
|
+
const encoded = new TextEncoder().encode(s);
|
|
12288
|
+
if (encoded.length <= byteLimit) {
|
|
12289
|
+
return s;
|
|
12290
|
+
}
|
|
12291
|
+
const truncated = encoded.subarray(0, byteLimit);
|
|
12292
|
+
return new TextDecoder().decode(truncated);
|
|
12293
|
+
}
|
|
12294
|
+
async function getRepoInfo(settings) {
|
|
12295
|
+
if (settings && settings.collect === "none") {
|
|
12296
|
+
return void 0;
|
|
12297
|
+
}
|
|
12298
|
+
const repo = await repoInfo();
|
|
12299
|
+
if (!repo || !settings || settings.collect === "all") {
|
|
12300
|
+
return repo;
|
|
12301
|
+
}
|
|
12302
|
+
let sanitized = {};
|
|
12303
|
+
settings.fields?.forEach((field) => {
|
|
12304
|
+
sanitized = { ...sanitized, [field]: repo[field] };
|
|
12305
|
+
});
|
|
12306
|
+
return sanitized;
|
|
12307
|
+
}
|
|
12308
|
+
async function repoInfo() {
|
|
12309
|
+
const git = await currentRepo();
|
|
12310
|
+
if (git === null) {
|
|
12311
|
+
return void 0;
|
|
12312
|
+
}
|
|
12313
|
+
let commit = void 0;
|
|
12314
|
+
let commit_message = void 0;
|
|
12315
|
+
let commit_time = void 0;
|
|
12316
|
+
let author_name = void 0;
|
|
12317
|
+
let author_email = void 0;
|
|
12318
|
+
let tag = void 0;
|
|
12319
|
+
let branch = void 0;
|
|
12320
|
+
let git_diff = void 0;
|
|
12321
|
+
const dirty = (await git.diffSummary()).files.length > 0;
|
|
12322
|
+
commit = await attempt(async () => await git.revparse(["HEAD"]));
|
|
12323
|
+
commit_message = await attempt(
|
|
12324
|
+
async () => (await git.raw(["log", "-1", "--pretty=%B"])).trim()
|
|
12325
|
+
);
|
|
12326
|
+
commit_time = await attempt(
|
|
12327
|
+
async () => (await git.raw(["log", "-1", "--pretty=%cI"])).trim()
|
|
12328
|
+
);
|
|
12329
|
+
author_name = await attempt(
|
|
12330
|
+
async () => (await git.raw(["log", "-1", "--pretty=%aN"])).trim()
|
|
12331
|
+
);
|
|
12332
|
+
author_email = await attempt(
|
|
12333
|
+
async () => (await git.raw(["log", "-1", "--pretty=%aE"])).trim()
|
|
12334
|
+
);
|
|
12335
|
+
tag = await attempt(
|
|
12336
|
+
async () => (await git.raw(["describe", "--tags", "--exact-match", "--always"])).trim()
|
|
12337
|
+
);
|
|
12338
|
+
branch = await attempt(
|
|
12339
|
+
async () => (await git.raw(["rev-parse", "--abbrev-ref", "HEAD"])).trim()
|
|
12340
|
+
);
|
|
12341
|
+
if (dirty) {
|
|
12342
|
+
git_diff = await attempt(
|
|
12343
|
+
async () => truncateToByteLimit(await git.raw(["--no-ext-diff", "diff", "HEAD"]))
|
|
12344
|
+
);
|
|
11663
12345
|
}
|
|
12346
|
+
return {
|
|
12347
|
+
commit,
|
|
12348
|
+
branch,
|
|
12349
|
+
tag,
|
|
12350
|
+
dirty,
|
|
12351
|
+
author_name,
|
|
12352
|
+
author_email,
|
|
12353
|
+
commit_message,
|
|
12354
|
+
commit_time,
|
|
12355
|
+
git_diff
|
|
12356
|
+
};
|
|
11664
12357
|
}
|
|
11665
|
-
|
|
11666
|
-
|
|
11667
|
-
function
|
|
11668
|
-
|
|
11669
|
-
|
|
11670
|
-
|
|
11671
|
-
console.error(e);
|
|
12358
|
+
|
|
12359
|
+
// src/stackutil.ts
|
|
12360
|
+
function getStackTrace() {
|
|
12361
|
+
const trace = new Error().stack;
|
|
12362
|
+
if (typeof trace !== "string") {
|
|
12363
|
+
return [];
|
|
11672
12364
|
}
|
|
11673
|
-
|
|
11674
|
-
|
|
11675
|
-
|
|
11676
|
-
|
|
12365
|
+
const traceLines = trace.split("\n");
|
|
12366
|
+
const out = [];
|
|
12367
|
+
const stackFrameRegex = /at(.*)\((.*):(\d+):(\d+)\)/;
|
|
12368
|
+
for (const traceLine of traceLines.slice(1)) {
|
|
12369
|
+
const matches = traceLine.match(stackFrameRegex);
|
|
12370
|
+
if (matches === null || matches.length !== 5) {
|
|
11677
12371
|
continue;
|
|
11678
12372
|
}
|
|
11679
|
-
const
|
|
11680
|
-
|
|
11681
|
-
|
|
11682
|
-
|
|
12373
|
+
const entry = {
|
|
12374
|
+
functionName: matches[1].trim(),
|
|
12375
|
+
fileName: matches[2],
|
|
12376
|
+
lineNo: parseInt(matches[3])
|
|
11683
12377
|
};
|
|
12378
|
+
if (!isNaN(entry.lineNo)) {
|
|
12379
|
+
out.push(entry);
|
|
12380
|
+
}
|
|
11684
12381
|
}
|
|
12382
|
+
return out;
|
|
11685
12383
|
}
|
|
11686
|
-
function
|
|
11687
|
-
|
|
11688
|
-
|
|
11689
|
-
|
|
11690
|
-
|
|
11691
|
-
|
|
11692
|
-
}
|
|
11693
|
-
function buildLocalSummary(evaluator, results, precomputedScores) {
|
|
11694
|
-
const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
|
|
11695
|
-
return {
|
|
11696
|
-
projectName: evaluator.projectName,
|
|
11697
|
-
experimentName: evaluator.evalName,
|
|
11698
|
-
scores: Object.fromEntries(
|
|
11699
|
-
Object.entries(scoresByName).map(([name, { total, count }]) => [
|
|
11700
|
-
name,
|
|
11701
|
-
{
|
|
11702
|
-
name,
|
|
11703
|
-
score: count === 0 ? 0 : total / count,
|
|
11704
|
-
improvements: 0,
|
|
11705
|
-
regressions: 0
|
|
11706
|
-
}
|
|
11707
|
-
])
|
|
11708
|
-
)
|
|
11709
|
-
};
|
|
11710
|
-
}
|
|
11711
|
-
function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
|
|
11712
|
-
if (failingResults.length > 0) {
|
|
11713
|
-
console.error(
|
|
11714
|
-
warning(
|
|
11715
|
-
`Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
|
|
11716
|
-
)
|
|
11717
|
-
);
|
|
11718
|
-
if (jsonl) {
|
|
11719
|
-
console.log(
|
|
11720
|
-
JSON.stringify({
|
|
11721
|
-
evaluatorName: evaluator.evalName,
|
|
11722
|
-
errors: failingResults.map(
|
|
11723
|
-
(r) => `${r.error instanceof Error ? r.error.stack : r.error}`
|
|
11724
|
-
)
|
|
11725
|
-
})
|
|
11726
|
-
);
|
|
11727
|
-
} else {
|
|
11728
|
-
for (const result of failingResults) {
|
|
11729
|
-
logError2(result.error, verbose);
|
|
11730
|
-
}
|
|
12384
|
+
function getCallerLocation() {
|
|
12385
|
+
let thisDir = void 0;
|
|
12386
|
+
const entries = getStackTrace();
|
|
12387
|
+
for (const frame of entries) {
|
|
12388
|
+
if (thisDir === void 0) {
|
|
12389
|
+
thisDir = isomorph_default.pathDirname?.(frame.fileName);
|
|
11731
12390
|
}
|
|
11732
|
-
if (
|
|
11733
|
-
|
|
12391
|
+
if (isomorph_default.pathDirname?.(frame.fileName) !== thisDir) {
|
|
12392
|
+
return {
|
|
12393
|
+
caller_functionname: frame.functionName,
|
|
12394
|
+
caller_filename: frame.fileName,
|
|
12395
|
+
caller_lineno: frame.lineNo
|
|
12396
|
+
};
|
|
11734
12397
|
}
|
|
11735
12398
|
}
|
|
12399
|
+
return void 0;
|
|
11736
12400
|
}
|
|
11737
|
-
|
|
11738
|
-
|
|
11739
|
-
|
|
11740
|
-
|
|
11741
|
-
|
|
11742
|
-
|
|
11743
|
-
|
|
11744
|
-
|
|
11745
|
-
|
|
11746
|
-
|
|
11747
|
-
|
|
11748
|
-
|
|
11749
|
-
|
|
11750
|
-
|
|
11751
|
-
|
|
11752
|
-
|
|
11753
|
-
|
|
11754
|
-
|
|
11755
|
-
|
|
11756
|
-
|
|
11757
|
-
|
|
11758
|
-
|
|
11759
|
-
|
|
11760
|
-
|
|
11761
|
-
|
|
11762
|
-
|
|
11763
|
-
|
|
11764
|
-
|
|
11765
|
-
|
|
11766
|
-
|
|
11767
|
-
|
|
11768
|
-
|
|
11769
|
-
|
|
11770
|
-
|
|
11771
|
-
|
|
11772
|
-
|
|
11773
|
-
|
|
11774
|
-
|
|
11775
|
-
|
|
11776
|
-
|
|
11777
|
-
|
|
11778
|
-
|
|
11779
|
-
|
|
11780
|
-
|
|
11781
|
-
|
|
11782
|
-
|
|
11783
|
-
|
|
11784
|
-
|
|
11785
|
-
|
|
11786
|
-
|
|
11787
|
-
|
|
11788
|
-
|
|
11789
|
-
|
|
11790
|
-
|
|
11791
|
-
|
|
11792
|
-
|
|
11793
|
-
|
|
11794
|
-
|
|
11795
|
-
|
|
11796
|
-
|
|
11797
|
-
|
|
11798
|
-
|
|
11799
|
-
}
|
|
11800
|
-
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
11801
|
-
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
11802
|
-
isomorph_default.writeln(
|
|
11803
|
-
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
11804
|
-
);
|
|
11805
|
-
} else {
|
|
11806
|
-
isomorph_default.writeln(
|
|
11807
|
-
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
11808
|
-
);
|
|
11809
|
-
}
|
|
11810
|
-
}
|
|
11811
|
-
}
|
|
11812
|
-
if (summary.experimentUrl) {
|
|
11813
|
-
isomorph_default.writeln("");
|
|
11814
|
-
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
11815
|
-
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
11816
|
-
}
|
|
12401
|
+
|
|
12402
|
+
// src/node.ts
|
|
12403
|
+
var import_util9 = require("util");
|
|
12404
|
+
var zlib = __toESM(require("zlib"));
|
|
12405
|
+
function configureNode() {
|
|
12406
|
+
isomorph_default.getRepoInfo = getRepoInfo;
|
|
12407
|
+
isomorph_default.getPastNAncestors = getPastNAncestors;
|
|
12408
|
+
isomorph_default.getEnv = (name) => process.env[name];
|
|
12409
|
+
isomorph_default.getCallerLocation = getCallerLocation;
|
|
12410
|
+
isomorph_default.newAsyncLocalStorage = () => new import_node_async_hooks.AsyncLocalStorage();
|
|
12411
|
+
isomorph_default.processOn = (event, handler) => {
|
|
12412
|
+
process.on(event, handler);
|
|
12413
|
+
};
|
|
12414
|
+
isomorph_default.basename = path.basename;
|
|
12415
|
+
isomorph_default.writeln = (text) => process.stdout.write(text + "\n");
|
|
12416
|
+
isomorph_default.pathJoin = path.join;
|
|
12417
|
+
isomorph_default.pathDirname = path.dirname;
|
|
12418
|
+
isomorph_default.mkdir = fs.mkdir;
|
|
12419
|
+
isomorph_default.writeFile = fs.writeFile;
|
|
12420
|
+
isomorph_default.readFile = fs.readFile;
|
|
12421
|
+
isomorph_default.readdir = fs.readdir;
|
|
12422
|
+
isomorph_default.stat = fs.stat;
|
|
12423
|
+
isomorph_default.statSync = fsSync.statSync;
|
|
12424
|
+
isomorph_default.utimes = fs.utimes;
|
|
12425
|
+
isomorph_default.unlink = fs.unlink;
|
|
12426
|
+
isomorph_default.homedir = os.homedir;
|
|
12427
|
+
isomorph_default.tmpdir = os.tmpdir;
|
|
12428
|
+
isomorph_default.writeFileSync = fsSync.writeFileSync;
|
|
12429
|
+
isomorph_default.appendFileSync = fsSync.appendFileSync;
|
|
12430
|
+
isomorph_default.readFileSync = (filename, encoding) => fsSync.readFileSync(filename, encoding);
|
|
12431
|
+
isomorph_default.unlinkSync = fsSync.unlinkSync;
|
|
12432
|
+
isomorph_default.openFile = fs.open;
|
|
12433
|
+
isomorph_default.gzip = (0, import_util9.promisify)(zlib.gzip);
|
|
12434
|
+
isomorph_default.gunzip = (0, import_util9.promisify)(zlib.gunzip);
|
|
12435
|
+
isomorph_default.hash = (data) => crypto.createHash("sha256").update(data).digest("hex");
|
|
12436
|
+
_internalSetInitialState();
|
|
12437
|
+
}
|
|
12438
|
+
|
|
12439
|
+
// src/cli/index.ts
|
|
12440
|
+
var import_env2 = require("@next/env");
|
|
12441
|
+
|
|
12442
|
+
// src/cli/functions/upload.ts
|
|
12443
|
+
var import_fs = __toESM(require("fs"));
|
|
12444
|
+
var import_path3 = __toESM(require("path"));
|
|
12445
|
+
var import_zlib = require("zlib");
|
|
12446
|
+
var import_v312 = require("zod/v3");
|
|
12447
|
+
|
|
12448
|
+
// src/cli/functions/infer-source.ts
|
|
12449
|
+
var import_source_map = require("source-map");
|
|
12450
|
+
var fs2 = __toESM(require("fs/promises"));
|
|
12451
|
+
|
|
12452
|
+
// src/cli/jest/nodeModulesPaths.ts
|
|
12453
|
+
var path2 = __toESM(require("path"));
|
|
12454
|
+
|
|
12455
|
+
// src/cli/jest/tryRealpath.ts
|
|
12456
|
+
var import_graceful_fs = require("graceful-fs");
|
|
12457
|
+
function tryRealpath(path8) {
|
|
12458
|
+
try {
|
|
12459
|
+
path8 = import_graceful_fs.realpathSync.native(path8);
|
|
12460
|
+
} catch (error2) {
|
|
12461
|
+
if (error2.code !== "ENOENT" && error2.code !== "EISDIR") {
|
|
12462
|
+
throw error2;
|
|
11817
12463
|
}
|
|
11818
|
-
isomorph_default.writeln("");
|
|
11819
|
-
return failingResults.length === 0;
|
|
11820
|
-
},
|
|
11821
|
-
async reportRun(evalReports) {
|
|
11822
|
-
return evalReports.every((r) => r);
|
|
11823
12464
|
}
|
|
11824
|
-
|
|
11825
|
-
|
|
11826
|
-
// src/cli/reporters/eval.ts
|
|
11827
|
-
var import_chalk2 = __toESM(require("chalk"));
|
|
11828
|
-
var import_termi_link = require("termi-link");
|
|
11829
|
-
var import_boxen = __toESM(require("boxen"));
|
|
11830
|
-
var import_cli_table3 = __toESM(require("cli-table3"));
|
|
11831
|
-
var import_pluralize = __toESM(require("pluralize"));
|
|
11832
|
-
function formatExperimentSummaryFancy(summary) {
|
|
11833
|
-
let comparisonLine = "";
|
|
11834
|
-
if (summary.comparisonExperimentName) {
|
|
11835
|
-
comparisonLine = `${summary.comparisonExperimentName} ${import_chalk2.default.gray("(baseline)")} \u2190 ${summary.experimentName} ${import_chalk2.default.gray("(comparison)")}
|
|
12465
|
+
return path8;
|
|
12466
|
+
}
|
|
11836
12467
|
|
|
11837
|
-
|
|
12468
|
+
// src/cli/jest/nodeModulesPaths.ts
|
|
12469
|
+
function nodeModulesPaths(basedir, options) {
|
|
12470
|
+
const modules = options && options.moduleDirectory ? Array.from(options.moduleDirectory) : ["node_modules"];
|
|
12471
|
+
const basedirAbs = path2.resolve(basedir);
|
|
12472
|
+
let prefix = "/";
|
|
12473
|
+
if (/^([A-Za-z]:)/.test(basedirAbs)) {
|
|
12474
|
+
prefix = "";
|
|
12475
|
+
} else if (/^\\\\/.test(basedirAbs)) {
|
|
12476
|
+
prefix = "\\\\";
|
|
11838
12477
|
}
|
|
11839
|
-
|
|
11840
|
-
|
|
11841
|
-
|
|
11842
|
-
|
|
11843
|
-
|
|
11844
|
-
|
|
11845
|
-
|
|
11846
|
-
|
|
11847
|
-
|
|
11848
|
-
|
|
11849
|
-
|
|
11850
|
-
|
|
11851
|
-
|
|
11852
|
-
const
|
|
11853
|
-
|
|
11854
|
-
|
|
11855
|
-
|
|
11856
|
-
|
|
11857
|
-
"top-mid": "",
|
|
11858
|
-
"top-left": "",
|
|
11859
|
-
"top-right": "",
|
|
11860
|
-
bottom: "",
|
|
11861
|
-
"bottom-mid": "",
|
|
11862
|
-
"bottom-left": "",
|
|
11863
|
-
"bottom-right": "",
|
|
11864
|
-
left: "",
|
|
11865
|
-
"left-mid": "",
|
|
11866
|
-
mid: "",
|
|
11867
|
-
"mid-mid": "",
|
|
11868
|
-
right: "",
|
|
11869
|
-
"right-mid": "",
|
|
11870
|
-
middle: " "
|
|
11871
|
-
},
|
|
11872
|
-
colWidths: hasComparison ? [18, 10, 10, 13, 12] : [20, 15],
|
|
11873
|
-
colAligns: hasComparison ? ["left", "right", "right", "right", "right"] : ["left", "right"],
|
|
11874
|
-
wordWrap: false
|
|
11875
|
-
});
|
|
11876
|
-
const scoreValues = Object.values(summary.scores);
|
|
11877
|
-
for (const score of scoreValues) {
|
|
11878
|
-
const scorePercent = (score.score * 100).toFixed(2);
|
|
11879
|
-
const scoreValue = import_chalk2.default.white(`${scorePercent}%`);
|
|
11880
|
-
let diffString = "";
|
|
11881
|
-
if (!isEmpty2(score.diff)) {
|
|
11882
|
-
const diffPercent = (score.diff * 100).toFixed(2);
|
|
11883
|
-
const diffSign = score.diff > 0 ? "+" : "";
|
|
11884
|
-
const diffColor = score.diff > 0 ? import_chalk2.default.green : import_chalk2.default.red;
|
|
11885
|
-
diffString = diffColor(`${diffSign}${diffPercent}%`);
|
|
11886
|
-
} else {
|
|
11887
|
-
diffString = import_chalk2.default.gray("-");
|
|
11888
|
-
}
|
|
11889
|
-
const improvements = score.improvements > 0 ? import_chalk2.default.dim.green(score.improvements) : import_chalk2.default.gray("-");
|
|
11890
|
-
const regressions = score.regressions > 0 ? import_chalk2.default.dim.red(score.regressions) : import_chalk2.default.gray("-");
|
|
11891
|
-
const row = [`${import_chalk2.default.blue("\u25EF")} ${score.name}`, scoreValue];
|
|
11892
|
-
if (hasComparison) {
|
|
11893
|
-
row.push(diffString, improvements, regressions);
|
|
11894
|
-
}
|
|
11895
|
-
combinedTable.push(row);
|
|
11896
|
-
}
|
|
11897
|
-
const metricValues = Object.values(summary.metrics ?? {});
|
|
11898
|
-
for (const metric of metricValues) {
|
|
11899
|
-
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
11900
|
-
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
11901
|
-
const metricValue = import_chalk2.default.white(
|
|
11902
|
-
metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`
|
|
11903
|
-
);
|
|
11904
|
-
let diffString = "";
|
|
11905
|
-
if (!isEmpty2(metric.diff)) {
|
|
11906
|
-
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
11907
|
-
const diffSign = metric.diff > 0 ? "+" : "";
|
|
11908
|
-
const diffColor = metric.diff > 0 ? import_chalk2.default.green : import_chalk2.default.red;
|
|
11909
|
-
diffString = diffColor(`${diffSign}${diffPercent}%`);
|
|
12478
|
+
let physicalBasedir;
|
|
12479
|
+
try {
|
|
12480
|
+
physicalBasedir = tryRealpath(basedirAbs);
|
|
12481
|
+
} catch {
|
|
12482
|
+
physicalBasedir = basedirAbs;
|
|
12483
|
+
}
|
|
12484
|
+
const paths = [physicalBasedir];
|
|
12485
|
+
let parsed = path2.parse(physicalBasedir);
|
|
12486
|
+
while (parsed.dir !== paths[paths.length - 1]) {
|
|
12487
|
+
paths.push(parsed.dir);
|
|
12488
|
+
parsed = path2.parse(parsed.dir);
|
|
12489
|
+
}
|
|
12490
|
+
const dirs = paths.reduce((dirs2, aPath) => {
|
|
12491
|
+
for (const moduleDir of modules) {
|
|
12492
|
+
if (path2.isAbsolute(moduleDir)) {
|
|
12493
|
+
if (aPath === basedirAbs && moduleDir) {
|
|
12494
|
+
dirs2.push(moduleDir);
|
|
12495
|
+
}
|
|
11910
12496
|
} else {
|
|
11911
|
-
|
|
11912
|
-
}
|
|
11913
|
-
const improvements = metric.improvements > 0 ? import_chalk2.default.dim.green(metric.improvements) : import_chalk2.default.gray("-");
|
|
11914
|
-
const regressions = metric.regressions > 0 ? import_chalk2.default.dim.red(metric.regressions) : import_chalk2.default.gray("-");
|
|
11915
|
-
const row = [`${import_chalk2.default.magenta("\u25EF")} ${metric.name}`, metricValue];
|
|
11916
|
-
if (hasComparison) {
|
|
11917
|
-
row.push(diffString, improvements, regressions);
|
|
12497
|
+
dirs2.push(path2.join(prefix, aPath, moduleDir));
|
|
11918
12498
|
}
|
|
11919
|
-
combinedTable.push(row);
|
|
11920
12499
|
}
|
|
11921
|
-
|
|
12500
|
+
return dirs2;
|
|
12501
|
+
}, []);
|
|
12502
|
+
if (options.paths) {
|
|
12503
|
+
dirs.push(...options.paths);
|
|
11922
12504
|
}
|
|
11923
|
-
|
|
11924
|
-
|
|
11925
|
-
|
|
11926
|
-
|
|
11927
|
-
|
|
11928
|
-
|
|
11929
|
-
|
|
12505
|
+
return dirs;
|
|
12506
|
+
}
|
|
12507
|
+
function findGlobalPaths() {
|
|
12508
|
+
const { root } = path2.parse(process.cwd());
|
|
12509
|
+
const globalPath = path2.join(root, "node_modules");
|
|
12510
|
+
const resolvePaths = require.resolve.paths("/");
|
|
12511
|
+
if (resolvePaths) {
|
|
12512
|
+
const rootIndex = resolvePaths.indexOf(globalPath);
|
|
12513
|
+
return rootIndex > -1 ? resolvePaths.slice(rootIndex + 1) : [];
|
|
12514
|
+
}
|
|
12515
|
+
return [];
|
|
12516
|
+
}
|
|
12517
|
+
var GlobalPaths = findGlobalPaths();
|
|
12518
|
+
|
|
12519
|
+
// src/cli/functions/load-module.ts
|
|
12520
|
+
var import_path = __toESM(require("path"));
|
|
12521
|
+
function evalWithModuleContext(inFile, evalFn) {
|
|
12522
|
+
const modulePaths = [...module.paths];
|
|
11930
12523
|
try {
|
|
11931
|
-
|
|
11932
|
-
|
|
11933
|
-
|
|
11934
|
-
|
|
11935
|
-
borderColor: "gray",
|
|
11936
|
-
borderStyle: "round"
|
|
11937
|
-
});
|
|
11938
|
-
} catch (error2) {
|
|
11939
|
-
return "\n" + import_chalk2.default.gray("Experiment summary") + "\n" + boxContent + "\n";
|
|
12524
|
+
module.paths = nodeModulesPaths(import_path.default.dirname(inFile), {});
|
|
12525
|
+
return evalFn();
|
|
12526
|
+
} finally {
|
|
12527
|
+
module.paths = modulePaths;
|
|
11940
12528
|
}
|
|
11941
12529
|
}
|
|
11942
|
-
|
|
11943
|
-
|
|
11944
|
-
|
|
11945
|
-
|
|
11946
|
-
|
|
11947
|
-
|
|
11948
|
-
|
|
12530
|
+
function loadModule({
|
|
12531
|
+
inFile,
|
|
12532
|
+
moduleText
|
|
12533
|
+
}) {
|
|
12534
|
+
return evalWithModuleContext(inFile, () => {
|
|
12535
|
+
globalThis._evals = {
|
|
12536
|
+
functions: [],
|
|
12537
|
+
prompts: [],
|
|
12538
|
+
parameters: [],
|
|
12539
|
+
evaluators: {},
|
|
12540
|
+
reporters: {}
|
|
12541
|
+
};
|
|
12542
|
+
globalThis._lazy_load = true;
|
|
12543
|
+
globalThis.__inherited_braintrust_state = _internalGetGlobalState();
|
|
12544
|
+
const __filename2 = inFile;
|
|
12545
|
+
const __dirname = (0, import_path.dirname)(__filename2);
|
|
12546
|
+
new Function("require", "module", "__filename", "__dirname", moduleText)(
|
|
12547
|
+
require,
|
|
12548
|
+
module,
|
|
12549
|
+
__filename2,
|
|
12550
|
+
__dirname
|
|
11949
12551
|
);
|
|
11950
|
-
|
|
11951
|
-
|
|
11952
|
-
|
|
11953
|
-
|
|
12552
|
+
return { ...globalThis._evals };
|
|
12553
|
+
});
|
|
12554
|
+
}
|
|
12555
|
+
|
|
12556
|
+
// src/cli/functions/infer-source.ts
|
|
12557
|
+
var import_path2 = __toESM(require("path"));
|
|
12558
|
+
async function makeSourceMapContext({
|
|
12559
|
+
inFile,
|
|
12560
|
+
outFile,
|
|
12561
|
+
sourceMapFile
|
|
12562
|
+
}) {
|
|
12563
|
+
const [inFileContents, outFileContents, sourceMap] = await Promise.all([
|
|
12564
|
+
fs2.readFile(inFile, "utf8"),
|
|
12565
|
+
fs2.readFile(outFile, "utf8"),
|
|
12566
|
+
(async () => {
|
|
12567
|
+
const sourceMap2 = await fs2.readFile(sourceMapFile, "utf8");
|
|
12568
|
+
const sourceMapJSON = JSON.parse(sourceMap2);
|
|
12569
|
+
return new import_source_map.SourceMapConsumer(sourceMapJSON);
|
|
12570
|
+
})()
|
|
12571
|
+
]);
|
|
12572
|
+
return {
|
|
12573
|
+
inFiles: { [inFile]: inFileContents.split("\n") },
|
|
12574
|
+
outFileModule: loadModule({ inFile, moduleText: outFileContents }),
|
|
12575
|
+
outFileLines: outFileContents.split("\n"),
|
|
12576
|
+
sourceMapDir: import_path2.default.dirname(sourceMapFile),
|
|
12577
|
+
sourceMap
|
|
12578
|
+
};
|
|
12579
|
+
}
|
|
12580
|
+
function isNative(fn) {
|
|
12581
|
+
return /\{\s*\[native code\]\s*\}/.test(Function.prototype.toString.call(fn));
|
|
12582
|
+
}
|
|
12583
|
+
function locationToString(location) {
|
|
12584
|
+
if (location.type === "experiment") {
|
|
12585
|
+
return `eval ${location.eval_name} -> ${location.position.type}`;
|
|
12586
|
+
} else {
|
|
12587
|
+
return `task ${location.index}`;
|
|
12588
|
+
}
|
|
12589
|
+
}
|
|
12590
|
+
async function findCodeDefinition({
|
|
12591
|
+
location,
|
|
12592
|
+
ctx: { inFiles, outFileModule, outFileLines, sourceMapDir, sourceMap }
|
|
12593
|
+
}) {
|
|
12594
|
+
let fn = void 0;
|
|
12595
|
+
if (location.type === "experiment") {
|
|
12596
|
+
const evaluator = outFileModule.evaluators[location.eval_name]?.evaluator;
|
|
12597
|
+
if (!evaluator) {
|
|
12598
|
+
console.warn(
|
|
12599
|
+
warning(
|
|
12600
|
+
`Warning: failed to find evaluator for ${location.eval_name}. Will not display preview.`
|
|
11954
12601
|
)
|
|
11955
12602
|
);
|
|
11956
|
-
|
|
11957
|
-
for (const result2 of failingResults) {
|
|
11958
|
-
process.stdout.write(JSON.stringify(result2));
|
|
11959
|
-
process.stdout.write("\n");
|
|
11960
|
-
}
|
|
11961
|
-
} else if (verbose) {
|
|
11962
|
-
for (const result2 of failingResults) {
|
|
11963
|
-
console.error(result2);
|
|
11964
|
-
}
|
|
11965
|
-
}
|
|
11966
|
-
}
|
|
11967
|
-
process.stdout.write(
|
|
11968
|
-
jsonl ? JSON.stringify(summary) : formatExperimentSummaryFancy(summary)
|
|
11969
|
-
);
|
|
11970
|
-
process.stdout.write("\n");
|
|
11971
|
-
return failingResults.length === 0;
|
|
11972
|
-
},
|
|
11973
|
-
async reportRun(evalReports) {
|
|
11974
|
-
return evalReports.every((r) => r);
|
|
11975
|
-
}
|
|
11976
|
-
};
|
|
11977
|
-
|
|
11978
|
-
// src/node.ts
|
|
11979
|
-
var import_node_async_hooks = require("async_hooks");
|
|
11980
|
-
var path = __toESM(require("path"));
|
|
11981
|
-
var fs = __toESM(require("fs/promises"));
|
|
11982
|
-
var os = __toESM(require("os"));
|
|
11983
|
-
var fsSync = __toESM(require("fs"));
|
|
11984
|
-
var crypto = __toESM(require("crypto"));
|
|
11985
|
-
|
|
11986
|
-
// src/gitutil.ts
|
|
11987
|
-
var import_simple_git = require("simple-git");
|
|
11988
|
-
var COMMON_BASE_BRANCHES = ["main", "master", "develop"];
|
|
11989
|
-
async function currentRepo() {
|
|
11990
|
-
try {
|
|
11991
|
-
const git = (0, import_simple_git.simpleGit)();
|
|
11992
|
-
if (await git.checkIsRepo()) {
|
|
11993
|
-
return git;
|
|
11994
|
-
} else {
|
|
11995
|
-
return null;
|
|
12603
|
+
return void 0;
|
|
11996
12604
|
}
|
|
11997
|
-
|
|
11998
|
-
|
|
12605
|
+
fn = location.position.type === "task" ? evaluator.task : evaluator.scores[location.position.index];
|
|
12606
|
+
} else {
|
|
12607
|
+
fn = outFileModule.functions[location.index].handler;
|
|
11999
12608
|
}
|
|
12000
|
-
|
|
12001
|
-
|
|
12002
|
-
|
|
12003
|
-
|
|
12004
|
-
|
|
12005
|
-
if (git === null) {
|
|
12006
|
-
throw new Error("Not in a git repo");
|
|
12007
|
-
}
|
|
12008
|
-
const remoteName = remote ?? (await git.getRemotes())[0]?.name;
|
|
12009
|
-
if (!remoteName) {
|
|
12010
|
-
throw new Error("No remote found");
|
|
12011
|
-
}
|
|
12012
|
-
let branch = null;
|
|
12013
|
-
const repoBranches = new Set((await git.branchLocal()).all);
|
|
12014
|
-
const matchingBaseBranches = COMMON_BASE_BRANCHES.filter(
|
|
12015
|
-
(b) => repoBranches.has(b)
|
|
12609
|
+
if (!fn) {
|
|
12610
|
+
console.warn(
|
|
12611
|
+
warning(
|
|
12612
|
+
`Warning: failed to find ${locationToString(location)}. Will not display preview.`
|
|
12613
|
+
)
|
|
12016
12614
|
);
|
|
12017
|
-
|
|
12018
|
-
branch = matchingBaseBranches[0];
|
|
12019
|
-
} else {
|
|
12020
|
-
try {
|
|
12021
|
-
const remoteInfo = await git.remote(["show", remoteName]);
|
|
12022
|
-
if (!remoteInfo) {
|
|
12023
|
-
throw new Error(`Could not find remote ${remoteName}`);
|
|
12024
|
-
}
|
|
12025
|
-
const match = remoteInfo.match(/\s*HEAD branch:\s*(.*)$/m);
|
|
12026
|
-
if (!match) {
|
|
12027
|
-
throw new Error(`Could not find HEAD branch in remote ${remoteName}`);
|
|
12028
|
-
}
|
|
12029
|
-
branch = match[1];
|
|
12030
|
-
} catch {
|
|
12031
|
-
branch = "main";
|
|
12032
|
-
}
|
|
12033
|
-
}
|
|
12034
|
-
_baseBranch = { remote: remoteName, branch };
|
|
12035
|
-
}
|
|
12036
|
-
return _baseBranch;
|
|
12037
|
-
}
|
|
12038
|
-
async function getBaseBranchAncestor(remote = void 0) {
|
|
12039
|
-
const git = await currentRepo();
|
|
12040
|
-
if (git === null) {
|
|
12041
|
-
throw new Error("Not in a git repo");
|
|
12615
|
+
return void 0;
|
|
12042
12616
|
}
|
|
12043
|
-
const
|
|
12044
|
-
|
|
12045
|
-
const head = isDirty ? "HEAD" : "HEAD^";
|
|
12046
|
-
try {
|
|
12047
|
-
const ancestor = await git.raw([
|
|
12048
|
-
"merge-base",
|
|
12049
|
-
head,
|
|
12050
|
-
`${remoteName}/${baseBranch}`
|
|
12051
|
-
]);
|
|
12052
|
-
return ancestor.trim();
|
|
12053
|
-
} catch (e) {
|
|
12617
|
+
const sourceCode = fn.toString();
|
|
12618
|
+
if (isNative(fn)) {
|
|
12054
12619
|
return void 0;
|
|
12055
12620
|
}
|
|
12056
|
-
|
|
12057
|
-
|
|
12058
|
-
const
|
|
12059
|
-
|
|
12060
|
-
|
|
12621
|
+
let lineNumber = 0;
|
|
12622
|
+
let columnNumber = -1;
|
|
12623
|
+
for (const line of outFileLines) {
|
|
12624
|
+
const sourceDefinition = line.indexOf(sourceCode);
|
|
12625
|
+
if (sourceDefinition !== -1) {
|
|
12626
|
+
columnNumber = sourceDefinition;
|
|
12627
|
+
break;
|
|
12628
|
+
}
|
|
12629
|
+
lineNumber++;
|
|
12061
12630
|
}
|
|
12062
|
-
|
|
12063
|
-
try {
|
|
12064
|
-
ancestor = await getBaseBranchAncestor(remote);
|
|
12065
|
-
} catch (e) {
|
|
12631
|
+
if (columnNumber === -1) {
|
|
12066
12632
|
console.warn(
|
|
12067
|
-
|
|
12068
|
-
|
|
12633
|
+
warning(
|
|
12634
|
+
`Warning: failed to find code definition for ${fn.name}. Will not display preview.`
|
|
12635
|
+
)
|
|
12069
12636
|
);
|
|
12070
|
-
}
|
|
12071
|
-
if (!ancestor) {
|
|
12072
|
-
return [];
|
|
12073
|
-
}
|
|
12074
|
-
const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
|
|
12075
|
-
return commits.all.slice(0, n).map((c) => c.hash);
|
|
12076
|
-
}
|
|
12077
|
-
async function attempt(fn) {
|
|
12078
|
-
try {
|
|
12079
|
-
return await fn();
|
|
12080
|
-
} catch (e) {
|
|
12081
12637
|
return void 0;
|
|
12082
12638
|
}
|
|
12083
|
-
|
|
12084
|
-
|
|
12085
|
-
|
|
12086
|
-
|
|
12087
|
-
|
|
12088
|
-
}
|
|
12089
|
-
const truncated = encoded.subarray(0, byteLimit);
|
|
12090
|
-
return new TextDecoder().decode(truncated);
|
|
12091
|
-
}
|
|
12092
|
-
async function getRepoInfo(settings) {
|
|
12093
|
-
if (settings && settings.collect === "none") {
|
|
12639
|
+
const originalPosition = sourceMap.originalPositionFor({
|
|
12640
|
+
line: lineNumber + 1,
|
|
12641
|
+
column: columnNumber + 1
|
|
12642
|
+
});
|
|
12643
|
+
if (originalPosition.source === null || originalPosition.line === null) {
|
|
12094
12644
|
return void 0;
|
|
12095
12645
|
}
|
|
12096
|
-
|
|
12097
|
-
|
|
12098
|
-
|
|
12646
|
+
if (!inFiles[originalPosition.source]) {
|
|
12647
|
+
const originalFile = import_path2.default.join(sourceMapDir, originalPosition.source);
|
|
12648
|
+
inFiles[originalPosition.source] = (await fs2.readFile(originalFile, "utf-8")).split("\n");
|
|
12099
12649
|
}
|
|
12100
|
-
|
|
12101
|
-
|
|
12102
|
-
|
|
12103
|
-
});
|
|
12104
|
-
return sanitized;
|
|
12105
|
-
}
|
|
12106
|
-
async function repoInfo() {
|
|
12107
|
-
const git = await currentRepo();
|
|
12108
|
-
if (git === null) {
|
|
12650
|
+
const originalLines = inFiles[originalPosition.source];
|
|
12651
|
+
const ts = await getTsModule();
|
|
12652
|
+
if (!ts) {
|
|
12109
12653
|
return void 0;
|
|
12110
12654
|
}
|
|
12111
|
-
|
|
12112
|
-
|
|
12113
|
-
|
|
12114
|
-
|
|
12115
|
-
|
|
12116
|
-
let tag = void 0;
|
|
12117
|
-
let branch = void 0;
|
|
12118
|
-
let git_diff = void 0;
|
|
12119
|
-
const dirty = (await git.diffSummary()).files.length > 0;
|
|
12120
|
-
commit = await attempt(async () => await git.revparse(["HEAD"]));
|
|
12121
|
-
commit_message = await attempt(
|
|
12122
|
-
async () => (await git.raw(["log", "-1", "--pretty=%B"])).trim()
|
|
12123
|
-
);
|
|
12124
|
-
commit_time = await attempt(
|
|
12125
|
-
async () => (await git.raw(["log", "-1", "--pretty=%cI"])).trim()
|
|
12126
|
-
);
|
|
12127
|
-
author_name = await attempt(
|
|
12128
|
-
async () => (await git.raw(["log", "-1", "--pretty=%aN"])).trim()
|
|
12129
|
-
);
|
|
12130
|
-
author_email = await attempt(
|
|
12131
|
-
async () => (await git.raw(["log", "-1", "--pretty=%aE"])).trim()
|
|
12132
|
-
);
|
|
12133
|
-
tag = await attempt(
|
|
12134
|
-
async () => (await git.raw(["describe", "--tags", "--exact-match", "--always"])).trim()
|
|
12655
|
+
const sourceFile = ts.createSourceFile(
|
|
12656
|
+
originalPosition.source,
|
|
12657
|
+
originalLines.join("\n"),
|
|
12658
|
+
ts.ScriptTarget.Latest,
|
|
12659
|
+
true
|
|
12135
12660
|
);
|
|
12136
|
-
|
|
12137
|
-
|
|
12661
|
+
let functionNode = void 0;
|
|
12662
|
+
const targetPosition = ts.getPositionOfLineAndCharacter(
|
|
12663
|
+
sourceFile,
|
|
12664
|
+
originalPosition.line - 1,
|
|
12665
|
+
originalPosition.column || 0
|
|
12138
12666
|
);
|
|
12139
|
-
|
|
12140
|
-
|
|
12141
|
-
|
|
12142
|
-
|
|
12143
|
-
|
|
12144
|
-
|
|
12145
|
-
|
|
12146
|
-
branch,
|
|
12147
|
-
tag,
|
|
12148
|
-
dirty,
|
|
12149
|
-
author_name,
|
|
12150
|
-
author_email,
|
|
12151
|
-
commit_message,
|
|
12152
|
-
commit_time,
|
|
12153
|
-
git_diff
|
|
12154
|
-
};
|
|
12155
|
-
}
|
|
12156
|
-
|
|
12157
|
-
// src/stackutil.ts
|
|
12158
|
-
function getStackTrace() {
|
|
12159
|
-
const trace = new Error().stack;
|
|
12160
|
-
if (typeof trace !== "string") {
|
|
12161
|
-
return [];
|
|
12162
|
-
}
|
|
12163
|
-
const traceLines = trace.split("\n");
|
|
12164
|
-
const out = [];
|
|
12165
|
-
const stackFrameRegex = /at(.*)\((.*):(\d+):(\d+)\)/;
|
|
12166
|
-
for (const traceLine of traceLines.slice(1)) {
|
|
12167
|
-
const matches = traceLine.match(stackFrameRegex);
|
|
12168
|
-
if (matches === null || matches.length !== 5) {
|
|
12169
|
-
continue;
|
|
12170
|
-
}
|
|
12171
|
-
const entry = {
|
|
12172
|
-
functionName: matches[1].trim(),
|
|
12173
|
-
fileName: matches[2],
|
|
12174
|
-
lineNo: parseInt(matches[3])
|
|
12175
|
-
};
|
|
12176
|
-
if (!isNaN(entry.lineNo)) {
|
|
12177
|
-
out.push(entry);
|
|
12667
|
+
ts.forEachChild(sourceFile, function visit(node) {
|
|
12668
|
+
if (node.pos <= targetPosition && targetPosition < node.end) {
|
|
12669
|
+
if (ts.isFunctionDeclaration(node) || ts.isFunctionExpression(node) || ts.isArrowFunction(node)) {
|
|
12670
|
+
functionNode = node;
|
|
12671
|
+
} else {
|
|
12672
|
+
ts.forEachChild(node, visit);
|
|
12673
|
+
}
|
|
12178
12674
|
}
|
|
12675
|
+
});
|
|
12676
|
+
if (!functionNode) {
|
|
12677
|
+
return void 0;
|
|
12179
12678
|
}
|
|
12180
|
-
|
|
12679
|
+
const printer = ts.createPrinter();
|
|
12680
|
+
const functionDefinition = printer.printNode(
|
|
12681
|
+
ts.EmitHint.Unspecified,
|
|
12682
|
+
functionNode,
|
|
12683
|
+
sourceFile
|
|
12684
|
+
);
|
|
12685
|
+
return functionDefinition;
|
|
12181
12686
|
}
|
|
12182
|
-
|
|
12183
|
-
|
|
12184
|
-
|
|
12185
|
-
|
|
12186
|
-
|
|
12187
|
-
|
|
12188
|
-
|
|
12189
|
-
|
|
12190
|
-
|
|
12191
|
-
|
|
12192
|
-
|
|
12193
|
-
caller_lineno: frame.lineNo
|
|
12194
|
-
};
|
|
12687
|
+
var tsModule = void 0;
|
|
12688
|
+
async function getTsModule() {
|
|
12689
|
+
if (!tsModule) {
|
|
12690
|
+
try {
|
|
12691
|
+
tsModule = require("typescript");
|
|
12692
|
+
} catch {
|
|
12693
|
+
console.warn(
|
|
12694
|
+
warning(
|
|
12695
|
+
"Failed to load TypeScript module. Will not use TypeScript to derive preview."
|
|
12696
|
+
)
|
|
12697
|
+
);
|
|
12195
12698
|
}
|
|
12196
12699
|
}
|
|
12197
|
-
return
|
|
12700
|
+
return tsModule;
|
|
12198
12701
|
}
|
|
12199
12702
|
|
|
12200
|
-
// src/
|
|
12201
|
-
var
|
|
12202
|
-
var
|
|
12203
|
-
function
|
|
12204
|
-
|
|
12205
|
-
|
|
12206
|
-
|
|
12207
|
-
|
|
12208
|
-
|
|
12209
|
-
|
|
12210
|
-
|
|
12211
|
-
}
|
|
12212
|
-
|
|
12213
|
-
isomorph_default.writeln = (text) => process.stdout.write(text + "\n");
|
|
12214
|
-
isomorph_default.pathJoin = path.join;
|
|
12215
|
-
isomorph_default.pathDirname = path.dirname;
|
|
12216
|
-
isomorph_default.mkdir = fs.mkdir;
|
|
12217
|
-
isomorph_default.writeFile = fs.writeFile;
|
|
12218
|
-
isomorph_default.readFile = fs.readFile;
|
|
12219
|
-
isomorph_default.readdir = fs.readdir;
|
|
12220
|
-
isomorph_default.stat = fs.stat;
|
|
12221
|
-
isomorph_default.statSync = fsSync.statSync;
|
|
12222
|
-
isomorph_default.utimes = fs.utimes;
|
|
12223
|
-
isomorph_default.unlink = fs.unlink;
|
|
12224
|
-
isomorph_default.homedir = os.homedir;
|
|
12225
|
-
isomorph_default.tmpdir = os.tmpdir;
|
|
12226
|
-
isomorph_default.writeFileSync = fsSync.writeFileSync;
|
|
12227
|
-
isomorph_default.appendFileSync = fsSync.appendFileSync;
|
|
12228
|
-
isomorph_default.readFileSync = (filename, encoding) => fsSync.readFileSync(filename, encoding);
|
|
12229
|
-
isomorph_default.unlinkSync = fsSync.unlinkSync;
|
|
12230
|
-
isomorph_default.openFile = fs.open;
|
|
12231
|
-
isomorph_default.gzip = (0, import_util10.promisify)(zlib.gzip);
|
|
12232
|
-
isomorph_default.gunzip = (0, import_util10.promisify)(zlib.gunzip);
|
|
12233
|
-
isomorph_default.hash = (data) => crypto.createHash("sha256").update(data).digest("hex");
|
|
12234
|
-
_internalSetInitialState();
|
|
12703
|
+
// src/zod/utils.ts
|
|
12704
|
+
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
12705
|
+
var z42 = __toESM(require("zod/v4"));
|
|
12706
|
+
function isZodV4(zodObject) {
|
|
12707
|
+
return typeof zodObject === "object" && zodObject !== null && "_zod" in zodObject && zodObject._zod !== void 0;
|
|
12708
|
+
}
|
|
12709
|
+
function zodToJsonSchema(schema) {
|
|
12710
|
+
if (isZodV4(schema)) {
|
|
12711
|
+
return z42.toJSONSchema(schema, {
|
|
12712
|
+
target: "draft-7"
|
|
12713
|
+
});
|
|
12714
|
+
}
|
|
12715
|
+
return (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
12235
12716
|
}
|
|
12236
|
-
|
|
12237
|
-
// src/cli/index.ts
|
|
12238
|
-
var import_env2 = require("@next/env");
|
|
12239
12717
|
|
|
12240
12718
|
// src/cli/functions/upload.ts
|
|
12241
|
-
var
|
|
12242
|
-
var import_path3 = __toESM(require("path"));
|
|
12243
|
-
var import_zlib = require("zlib");
|
|
12244
|
-
var import_v311 = require("zod/v3");
|
|
12245
|
-
|
|
12246
|
-
// src/cli/functions/infer-source.ts
|
|
12247
|
-
var import_source_map = require("source-map");
|
|
12248
|
-
var fs2 = __toESM(require("fs/promises"));
|
|
12249
|
-
|
|
12250
|
-
// src/cli/jest/nodeModulesPaths.ts
|
|
12251
|
-
var path2 = __toESM(require("path"));
|
|
12719
|
+
var import_pluralize2 = __toESM(require("pluralize"));
|
|
12252
12720
|
|
|
12253
|
-
// src/
|
|
12254
|
-
var
|
|
12255
|
-
|
|
12256
|
-
|
|
12257
|
-
|
|
12258
|
-
|
|
12259
|
-
|
|
12260
|
-
|
|
12721
|
+
// src/framework2.ts
|
|
12722
|
+
var import_v311 = require("zod/v3");
|
|
12723
|
+
var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
|
|
12724
|
+
var ProjectBuilder = class {
|
|
12725
|
+
create(opts) {
|
|
12726
|
+
return new Project2(opts);
|
|
12727
|
+
}
|
|
12728
|
+
};
|
|
12729
|
+
var projects = new ProjectBuilder();
|
|
12730
|
+
var Project2 = class {
|
|
12731
|
+
name;
|
|
12732
|
+
id;
|
|
12733
|
+
tools;
|
|
12734
|
+
prompts;
|
|
12735
|
+
parameters;
|
|
12736
|
+
scorers;
|
|
12737
|
+
_publishableCodeFunctions = [];
|
|
12738
|
+
_publishablePrompts = [];
|
|
12739
|
+
_publishableParameters = [];
|
|
12740
|
+
constructor(args) {
|
|
12741
|
+
_initializeSpanContext();
|
|
12742
|
+
this.name = "name" in args ? args.name : void 0;
|
|
12743
|
+
this.id = "id" in args ? args.id : void 0;
|
|
12744
|
+
this.tools = new ToolBuilder(this);
|
|
12745
|
+
this.prompts = new PromptBuilder(this);
|
|
12746
|
+
this.parameters = new ParametersBuilder(this);
|
|
12747
|
+
this.scorers = new ScorerBuilder(this);
|
|
12748
|
+
}
|
|
12749
|
+
addPrompt(prompt) {
|
|
12750
|
+
this._publishablePrompts.push(prompt);
|
|
12751
|
+
if (globalThis._lazy_load) {
|
|
12752
|
+
globalThis._evals.prompts.push(prompt);
|
|
12753
|
+
}
|
|
12754
|
+
}
|
|
12755
|
+
addParameters(parameters) {
|
|
12756
|
+
this._publishableParameters.push(parameters);
|
|
12757
|
+
if (globalThis._lazy_load) {
|
|
12758
|
+
if (globalThis._evals.parameters == null)
|
|
12759
|
+
globalThis._evals.parameters = [];
|
|
12760
|
+
globalThis._evals.parameters.push(parameters);
|
|
12761
|
+
}
|
|
12762
|
+
}
|
|
12763
|
+
addCodeFunction(fn) {
|
|
12764
|
+
this._publishableCodeFunctions.push(fn);
|
|
12765
|
+
if (globalThis._lazy_load) {
|
|
12766
|
+
globalThis._evals.functions.push(fn);
|
|
12767
|
+
}
|
|
12768
|
+
}
|
|
12769
|
+
async publish() {
|
|
12770
|
+
if (globalThis._lazy_load) {
|
|
12771
|
+
console.warn("publish() is a no-op when running `braintrust push`.");
|
|
12772
|
+
return;
|
|
12773
|
+
}
|
|
12774
|
+
await login();
|
|
12775
|
+
const projectMap = new ProjectNameIdMap();
|
|
12776
|
+
const functionDefinitions = [];
|
|
12777
|
+
if (this._publishableCodeFunctions.length > 0) {
|
|
12778
|
+
console.warn(
|
|
12779
|
+
"Code functions cannot be published directly. Use `braintrust push` instead."
|
|
12780
|
+
);
|
|
12781
|
+
}
|
|
12782
|
+
if (this._publishablePrompts.length > 0) {
|
|
12783
|
+
for (const prompt of this._publishablePrompts) {
|
|
12784
|
+
const functionDefinition = await prompt.toFunctionDefinition(projectMap);
|
|
12785
|
+
functionDefinitions.push(functionDefinition);
|
|
12786
|
+
}
|
|
12787
|
+
}
|
|
12788
|
+
await _internalGetGlobalState().apiConn().post_json("insert-functions", {
|
|
12789
|
+
functions: functionDefinitions
|
|
12790
|
+
});
|
|
12791
|
+
}
|
|
12792
|
+
};
|
|
12793
|
+
var ToolBuilder = class {
|
|
12794
|
+
constructor(project) {
|
|
12795
|
+
this.project = project;
|
|
12796
|
+
}
|
|
12797
|
+
taskCounter = 0;
|
|
12798
|
+
// This type definition is just a catch all so that the implementation can be
|
|
12799
|
+
// less specific than the two more specific declarations above.
|
|
12800
|
+
create(opts) {
|
|
12801
|
+
this.taskCounter++;
|
|
12802
|
+
opts = opts ?? {};
|
|
12803
|
+
const { handler, name, slug, parameters, returns, ...rest } = opts;
|
|
12804
|
+
let resolvedName = name ?? handler.name;
|
|
12805
|
+
if (resolvedName.trim().length === 0) {
|
|
12806
|
+
resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
12807
|
+
}
|
|
12808
|
+
const tool = new CodeFunction(this.project, {
|
|
12809
|
+
handler,
|
|
12810
|
+
name: resolvedName,
|
|
12811
|
+
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
12812
|
+
type: "tool",
|
|
12813
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
12814
|
+
parameters,
|
|
12815
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
12816
|
+
returns,
|
|
12817
|
+
...rest
|
|
12818
|
+
});
|
|
12819
|
+
this.project.addCodeFunction(tool);
|
|
12820
|
+
return tool;
|
|
12821
|
+
}
|
|
12822
|
+
};
|
|
12823
|
+
var ScorerBuilder = class {
|
|
12824
|
+
constructor(project) {
|
|
12825
|
+
this.project = project;
|
|
12826
|
+
}
|
|
12827
|
+
taskCounter = 0;
|
|
12828
|
+
create(opts) {
|
|
12829
|
+
this.taskCounter++;
|
|
12830
|
+
let resolvedName = opts.name;
|
|
12831
|
+
if (!resolvedName && "handler" in opts) {
|
|
12832
|
+
resolvedName = opts.handler.name;
|
|
12833
|
+
}
|
|
12834
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
12835
|
+
resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
12836
|
+
}
|
|
12837
|
+
const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
|
|
12838
|
+
if ("handler" in opts) {
|
|
12839
|
+
const scorer = new CodeFunction(this.project, {
|
|
12840
|
+
...opts,
|
|
12841
|
+
name: resolvedName,
|
|
12842
|
+
slug,
|
|
12843
|
+
type: "scorer"
|
|
12844
|
+
});
|
|
12845
|
+
this.project.addCodeFunction(scorer);
|
|
12846
|
+
} else {
|
|
12847
|
+
const promptBlock = "messages" in opts ? {
|
|
12848
|
+
type: "chat",
|
|
12849
|
+
messages: opts.messages
|
|
12850
|
+
} : {
|
|
12851
|
+
type: "completion",
|
|
12852
|
+
content: opts.prompt
|
|
12853
|
+
};
|
|
12854
|
+
const promptData = {
|
|
12855
|
+
prompt: promptBlock,
|
|
12856
|
+
options: {
|
|
12857
|
+
model: opts.model,
|
|
12858
|
+
params: opts.params
|
|
12859
|
+
},
|
|
12860
|
+
parser: {
|
|
12861
|
+
type: "llm_classifier",
|
|
12862
|
+
use_cot: opts.useCot,
|
|
12863
|
+
choice_scores: opts.choiceScores
|
|
12864
|
+
}
|
|
12865
|
+
};
|
|
12866
|
+
const codePrompt = new CodePrompt(
|
|
12867
|
+
this.project,
|
|
12868
|
+
promptData,
|
|
12869
|
+
[],
|
|
12870
|
+
{
|
|
12871
|
+
...opts,
|
|
12872
|
+
name: resolvedName,
|
|
12873
|
+
slug
|
|
12874
|
+
},
|
|
12875
|
+
"scorer"
|
|
12876
|
+
);
|
|
12877
|
+
this.project.addPrompt(codePrompt);
|
|
12261
12878
|
}
|
|
12262
12879
|
}
|
|
12263
|
-
|
|
12264
|
-
|
|
12265
|
-
|
|
12266
|
-
|
|
12267
|
-
|
|
12268
|
-
|
|
12269
|
-
|
|
12270
|
-
|
|
12271
|
-
|
|
12272
|
-
|
|
12273
|
-
|
|
12274
|
-
|
|
12275
|
-
|
|
12276
|
-
|
|
12277
|
-
|
|
12278
|
-
physicalBasedir = tryRealpath(basedirAbs);
|
|
12279
|
-
} catch {
|
|
12280
|
-
physicalBasedir = basedirAbs;
|
|
12281
|
-
}
|
|
12282
|
-
const paths = [physicalBasedir];
|
|
12283
|
-
let parsed = path2.parse(physicalBasedir);
|
|
12284
|
-
while (parsed.dir !== paths[paths.length - 1]) {
|
|
12285
|
-
paths.push(parsed.dir);
|
|
12286
|
-
parsed = path2.parse(parsed.dir);
|
|
12287
|
-
}
|
|
12288
|
-
const dirs = paths.reduce((dirs2, aPath) => {
|
|
12289
|
-
for (const moduleDir of modules) {
|
|
12290
|
-
if (path2.isAbsolute(moduleDir)) {
|
|
12291
|
-
if (aPath === basedirAbs && moduleDir) {
|
|
12292
|
-
dirs2.push(moduleDir);
|
|
12293
|
-
}
|
|
12294
|
-
} else {
|
|
12295
|
-
dirs2.push(path2.join(prefix, aPath, moduleDir));
|
|
12296
|
-
}
|
|
12880
|
+
};
|
|
12881
|
+
var CodeFunction = class {
|
|
12882
|
+
constructor(project, opts) {
|
|
12883
|
+
this.project = project;
|
|
12884
|
+
this.handler = opts.handler;
|
|
12885
|
+
this.name = opts.name;
|
|
12886
|
+
this.slug = opts.slug;
|
|
12887
|
+
this.description = opts.description;
|
|
12888
|
+
this.type = opts.type;
|
|
12889
|
+
this.ifExists = opts.ifExists;
|
|
12890
|
+
this.metadata = opts.metadata;
|
|
12891
|
+
this.parameters = opts.parameters;
|
|
12892
|
+
this.returns = opts.returns;
|
|
12893
|
+
if (this.returns && !this.parameters) {
|
|
12894
|
+
throw new Error("parameters are required if return type is defined");
|
|
12297
12895
|
}
|
|
12298
|
-
return dirs2;
|
|
12299
|
-
}, []);
|
|
12300
|
-
if (options.paths) {
|
|
12301
|
-
dirs.push(...options.paths);
|
|
12302
12896
|
}
|
|
12303
|
-
|
|
12304
|
-
|
|
12305
|
-
|
|
12306
|
-
|
|
12307
|
-
|
|
12308
|
-
|
|
12309
|
-
|
|
12310
|
-
|
|
12311
|
-
|
|
12897
|
+
handler;
|
|
12898
|
+
name;
|
|
12899
|
+
slug;
|
|
12900
|
+
type;
|
|
12901
|
+
description;
|
|
12902
|
+
parameters;
|
|
12903
|
+
returns;
|
|
12904
|
+
ifExists;
|
|
12905
|
+
metadata;
|
|
12906
|
+
key() {
|
|
12907
|
+
return JSON.stringify([
|
|
12908
|
+
this.project.id ?? "",
|
|
12909
|
+
this.project.name ?? "",
|
|
12910
|
+
this.slug
|
|
12911
|
+
]);
|
|
12312
12912
|
}
|
|
12313
|
-
|
|
12314
|
-
|
|
12315
|
-
|
|
12316
|
-
|
|
12317
|
-
|
|
12318
|
-
|
|
12319
|
-
|
|
12320
|
-
|
|
12321
|
-
|
|
12322
|
-
|
|
12323
|
-
|
|
12324
|
-
|
|
12325
|
-
|
|
12913
|
+
};
|
|
12914
|
+
var CodePrompt = class {
|
|
12915
|
+
project;
|
|
12916
|
+
name;
|
|
12917
|
+
slug;
|
|
12918
|
+
prompt;
|
|
12919
|
+
ifExists;
|
|
12920
|
+
description;
|
|
12921
|
+
id;
|
|
12922
|
+
functionType;
|
|
12923
|
+
toolFunctions;
|
|
12924
|
+
metadata;
|
|
12925
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
12926
|
+
this.project = project;
|
|
12927
|
+
this.name = opts.name;
|
|
12928
|
+
this.slug = opts.slug;
|
|
12929
|
+
this.prompt = prompt;
|
|
12930
|
+
this.toolFunctions = toolFunctions;
|
|
12931
|
+
this.ifExists = opts.ifExists;
|
|
12932
|
+
this.description = opts.description;
|
|
12933
|
+
this.id = opts.id;
|
|
12934
|
+
this.functionType = functionType;
|
|
12935
|
+
this.metadata = opts.metadata;
|
|
12326
12936
|
}
|
|
12327
|
-
|
|
12328
|
-
|
|
12329
|
-
|
|
12330
|
-
moduleText
|
|
12331
|
-
}) {
|
|
12332
|
-
return evalWithModuleContext(inFile, () => {
|
|
12333
|
-
globalThis._evals = {
|
|
12334
|
-
functions: [],
|
|
12335
|
-
prompts: [],
|
|
12336
|
-
evaluators: {},
|
|
12337
|
-
reporters: {}
|
|
12937
|
+
async toFunctionDefinition(projectNameToId) {
|
|
12938
|
+
const prompt_data = {
|
|
12939
|
+
...this.prompt
|
|
12338
12940
|
};
|
|
12339
|
-
|
|
12340
|
-
|
|
12341
|
-
|
|
12342
|
-
|
|
12343
|
-
|
|
12344
|
-
|
|
12345
|
-
|
|
12346
|
-
|
|
12347
|
-
|
|
12348
|
-
|
|
12349
|
-
|
|
12350
|
-
|
|
12351
|
-
}
|
|
12352
|
-
|
|
12353
|
-
// src/cli/functions/infer-source.ts
|
|
12354
|
-
var import_path2 = __toESM(require("path"));
|
|
12355
|
-
async function makeSourceMapContext({
|
|
12356
|
-
inFile,
|
|
12357
|
-
outFile,
|
|
12358
|
-
sourceMapFile
|
|
12359
|
-
}) {
|
|
12360
|
-
const [inFileContents, outFileContents, sourceMap] = await Promise.all([
|
|
12361
|
-
fs2.readFile(inFile, "utf8"),
|
|
12362
|
-
fs2.readFile(outFile, "utf8"),
|
|
12363
|
-
(async () => {
|
|
12364
|
-
const sourceMap2 = await fs2.readFile(sourceMapFile, "utf8");
|
|
12365
|
-
const sourceMapJSON = JSON.parse(sourceMap2);
|
|
12366
|
-
return new import_source_map.SourceMapConsumer(sourceMapJSON);
|
|
12367
|
-
})()
|
|
12368
|
-
]);
|
|
12369
|
-
return {
|
|
12370
|
-
inFiles: { [inFile]: inFileContents.split("\n") },
|
|
12371
|
-
outFileModule: loadModule({ inFile, moduleText: outFileContents }),
|
|
12372
|
-
outFileLines: outFileContents.split("\n"),
|
|
12373
|
-
sourceMapDir: import_path2.default.dirname(sourceMapFile),
|
|
12374
|
-
sourceMap
|
|
12375
|
-
};
|
|
12376
|
-
}
|
|
12377
|
-
function isNative(fn) {
|
|
12378
|
-
return /\{\s*\[native code\]\s*\}/.test(Function.prototype.toString.call(fn));
|
|
12379
|
-
}
|
|
12380
|
-
function locationToString(location) {
|
|
12381
|
-
if (location.type === "experiment") {
|
|
12382
|
-
return `eval ${location.eval_name} -> ${location.position.type}`;
|
|
12383
|
-
} else {
|
|
12384
|
-
return `task ${location.index}`;
|
|
12385
|
-
}
|
|
12386
|
-
}
|
|
12387
|
-
async function findCodeDefinition({
|
|
12388
|
-
location,
|
|
12389
|
-
ctx: { inFiles, outFileModule, outFileLines, sourceMapDir, sourceMap }
|
|
12390
|
-
}) {
|
|
12391
|
-
let fn = void 0;
|
|
12392
|
-
if (location.type === "experiment") {
|
|
12393
|
-
const evaluator = outFileModule.evaluators[location.eval_name]?.evaluator;
|
|
12394
|
-
if (!evaluator) {
|
|
12395
|
-
console.warn(
|
|
12396
|
-
warning(
|
|
12397
|
-
`Warning: failed to find evaluator for ${location.eval_name}. Will not display preview.`
|
|
12398
|
-
)
|
|
12941
|
+
if (this.toolFunctions.length > 0) {
|
|
12942
|
+
const resolvableToolFunctions = await Promise.all(
|
|
12943
|
+
this.toolFunctions.map(async (fn) => {
|
|
12944
|
+
if ("slug" in fn) {
|
|
12945
|
+
return {
|
|
12946
|
+
type: "slug",
|
|
12947
|
+
project_id: await projectNameToId.resolve(fn.project),
|
|
12948
|
+
slug: fn.slug
|
|
12949
|
+
};
|
|
12950
|
+
} else {
|
|
12951
|
+
return fn;
|
|
12952
|
+
}
|
|
12953
|
+
})
|
|
12399
12954
|
);
|
|
12400
|
-
|
|
12955
|
+
prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
12956
|
+
resolvableToolFunctions;
|
|
12401
12957
|
}
|
|
12402
|
-
|
|
12403
|
-
|
|
12404
|
-
|
|
12405
|
-
|
|
12406
|
-
|
|
12407
|
-
|
|
12408
|
-
|
|
12409
|
-
|
|
12410
|
-
|
|
12411
|
-
|
|
12412
|
-
|
|
12958
|
+
return {
|
|
12959
|
+
project_id: await projectNameToId.resolve(this.project),
|
|
12960
|
+
name: this.name,
|
|
12961
|
+
slug: this.slug,
|
|
12962
|
+
description: this.description ?? "",
|
|
12963
|
+
function_data: {
|
|
12964
|
+
type: "prompt"
|
|
12965
|
+
},
|
|
12966
|
+
function_type: this.functionType,
|
|
12967
|
+
prompt_data,
|
|
12968
|
+
if_exists: this.ifExists,
|
|
12969
|
+
metadata: this.metadata
|
|
12970
|
+
};
|
|
12413
12971
|
}
|
|
12414
|
-
|
|
12415
|
-
|
|
12416
|
-
|
|
12972
|
+
};
|
|
12973
|
+
var PromptBuilder = class {
|
|
12974
|
+
constructor(project) {
|
|
12975
|
+
this.project = project;
|
|
12417
12976
|
}
|
|
12418
|
-
|
|
12419
|
-
|
|
12420
|
-
|
|
12421
|
-
const
|
|
12422
|
-
|
|
12423
|
-
|
|
12424
|
-
|
|
12977
|
+
create(opts) {
|
|
12978
|
+
const toolFunctions = [];
|
|
12979
|
+
const rawTools = [];
|
|
12980
|
+
for (const tool of opts.tools ?? []) {
|
|
12981
|
+
if (tool instanceof CodeFunction) {
|
|
12982
|
+
toolFunctions.push(tool);
|
|
12983
|
+
} else if ("type" in tool && !("function" in tool)) {
|
|
12984
|
+
toolFunctions.push(tool);
|
|
12985
|
+
} else {
|
|
12986
|
+
rawTools.push(tool);
|
|
12987
|
+
}
|
|
12425
12988
|
}
|
|
12426
|
-
|
|
12427
|
-
|
|
12428
|
-
|
|
12429
|
-
|
|
12430
|
-
|
|
12431
|
-
|
|
12432
|
-
|
|
12989
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
12990
|
+
const promptData = promptDefinitionToPromptData(opts, rawTools);
|
|
12991
|
+
const promptRow = {
|
|
12992
|
+
id: opts.id,
|
|
12993
|
+
_xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
|
|
12994
|
+
name: opts.name,
|
|
12995
|
+
slug,
|
|
12996
|
+
prompt_data: promptData,
|
|
12997
|
+
...this.project.id !== void 0 ? { project_id: this.project.id } : {}
|
|
12998
|
+
};
|
|
12999
|
+
const prompt = new Prompt2(
|
|
13000
|
+
promptRow,
|
|
13001
|
+
{},
|
|
13002
|
+
// It doesn't make sense to specify defaults here.
|
|
13003
|
+
opts.noTrace ?? false
|
|
12433
13004
|
);
|
|
12434
|
-
|
|
13005
|
+
const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
|
|
13006
|
+
...opts,
|
|
13007
|
+
slug
|
|
13008
|
+
});
|
|
13009
|
+
this.project.addPrompt(codePrompt);
|
|
13010
|
+
return prompt;
|
|
12435
13011
|
}
|
|
12436
|
-
|
|
12437
|
-
|
|
12438
|
-
|
|
12439
|
-
|
|
12440
|
-
|
|
12441
|
-
|
|
13012
|
+
};
|
|
13013
|
+
var CodeParameters = class {
|
|
13014
|
+
project;
|
|
13015
|
+
name;
|
|
13016
|
+
slug;
|
|
13017
|
+
description;
|
|
13018
|
+
schema;
|
|
13019
|
+
ifExists;
|
|
13020
|
+
metadata;
|
|
13021
|
+
constructor(project, opts) {
|
|
13022
|
+
this.project = project;
|
|
13023
|
+
this.name = opts.name;
|
|
13024
|
+
this.slug = opts.slug;
|
|
13025
|
+
this.description = opts.description;
|
|
13026
|
+
this.schema = opts.schema;
|
|
13027
|
+
this.ifExists = opts.ifExists;
|
|
13028
|
+
this.metadata = opts.metadata;
|
|
12442
13029
|
}
|
|
12443
|
-
|
|
12444
|
-
|
|
12445
|
-
|
|
13030
|
+
async toFunctionDefinition(projectNameToId) {
|
|
13031
|
+
return {
|
|
13032
|
+
project_id: await projectNameToId.resolve(this.project),
|
|
13033
|
+
name: this.name,
|
|
13034
|
+
slug: this.slug,
|
|
13035
|
+
description: this.description ?? "",
|
|
13036
|
+
function_type: "parameters",
|
|
13037
|
+
function_data: {
|
|
13038
|
+
type: "parameters",
|
|
13039
|
+
data: {},
|
|
13040
|
+
__schema: serializeEvalParameterstoParametersSchema(this.schema)
|
|
13041
|
+
},
|
|
13042
|
+
if_exists: this.ifExists,
|
|
13043
|
+
metadata: this.metadata
|
|
13044
|
+
};
|
|
12446
13045
|
}
|
|
12447
|
-
|
|
12448
|
-
|
|
12449
|
-
|
|
12450
|
-
|
|
13046
|
+
};
|
|
13047
|
+
var ParametersBuilder = class {
|
|
13048
|
+
constructor(project) {
|
|
13049
|
+
this.project = project;
|
|
12451
13050
|
}
|
|
12452
|
-
|
|
12453
|
-
|
|
12454
|
-
|
|
12455
|
-
|
|
12456
|
-
|
|
12457
|
-
|
|
12458
|
-
|
|
12459
|
-
|
|
12460
|
-
|
|
12461
|
-
|
|
12462
|
-
|
|
12463
|
-
|
|
12464
|
-
|
|
12465
|
-
|
|
12466
|
-
|
|
12467
|
-
|
|
13051
|
+
create(opts) {
|
|
13052
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
13053
|
+
const codeParameters = new CodeParameters(this.project, {
|
|
13054
|
+
name: opts.name,
|
|
13055
|
+
slug,
|
|
13056
|
+
description: opts.description,
|
|
13057
|
+
schema: opts.schema,
|
|
13058
|
+
ifExists: opts.ifExists,
|
|
13059
|
+
metadata: opts.metadata
|
|
13060
|
+
});
|
|
13061
|
+
this.project.addParameters(codeParameters);
|
|
13062
|
+
return opts.schema;
|
|
13063
|
+
}
|
|
13064
|
+
};
|
|
13065
|
+
function serializeEvalParametersToStaticParametersSchema(parameters) {
|
|
13066
|
+
return Object.fromEntries(
|
|
13067
|
+
Object.entries(parameters).map(([name, value]) => {
|
|
13068
|
+
if ("type" in value && value.type === "prompt") {
|
|
13069
|
+
return [
|
|
13070
|
+
name,
|
|
13071
|
+
{
|
|
13072
|
+
type: "prompt",
|
|
13073
|
+
default: value.default ? promptDefinitionToPromptData(value.default) : void 0,
|
|
13074
|
+
description: value.description
|
|
13075
|
+
}
|
|
13076
|
+
];
|
|
12468
13077
|
} else {
|
|
12469
|
-
|
|
13078
|
+
const schemaObj = zodToJsonSchema(value);
|
|
13079
|
+
return [
|
|
13080
|
+
name,
|
|
13081
|
+
{
|
|
13082
|
+
type: "data",
|
|
13083
|
+
schema: schemaObj,
|
|
13084
|
+
default: schemaObj.default,
|
|
13085
|
+
description: schemaObj.description
|
|
13086
|
+
}
|
|
13087
|
+
];
|
|
12470
13088
|
}
|
|
12471
|
-
}
|
|
12472
|
-
});
|
|
12473
|
-
if (!functionNode) {
|
|
12474
|
-
return void 0;
|
|
12475
|
-
}
|
|
12476
|
-
const printer = ts.createPrinter();
|
|
12477
|
-
const functionDefinition = printer.printNode(
|
|
12478
|
-
ts.EmitHint.Unspecified,
|
|
12479
|
-
functionNode,
|
|
12480
|
-
sourceFile
|
|
13089
|
+
})
|
|
12481
13090
|
);
|
|
12482
|
-
return functionDefinition;
|
|
12483
13091
|
}
|
|
12484
|
-
|
|
12485
|
-
|
|
12486
|
-
|
|
12487
|
-
|
|
12488
|
-
|
|
12489
|
-
|
|
12490
|
-
|
|
12491
|
-
|
|
12492
|
-
|
|
12493
|
-
|
|
12494
|
-
|
|
13092
|
+
function serializeEvalParameterstoParametersSchema(parameters) {
|
|
13093
|
+
const properties = {};
|
|
13094
|
+
const required = [];
|
|
13095
|
+
for (const [name, value] of Object.entries(parameters)) {
|
|
13096
|
+
if ("type" in value && value.type === "prompt") {
|
|
13097
|
+
const defaultPromptData = value.default ? promptDefinitionToPromptData(value.default) : void 0;
|
|
13098
|
+
properties[name] = {
|
|
13099
|
+
type: "object",
|
|
13100
|
+
"x-bt-type": "prompt",
|
|
13101
|
+
...value.description ? { description: value.description } : {},
|
|
13102
|
+
...defaultPromptData ? { default: defaultPromptData } : {}
|
|
13103
|
+
};
|
|
13104
|
+
if (!defaultPromptData) {
|
|
13105
|
+
required.push(name);
|
|
13106
|
+
}
|
|
13107
|
+
} else {
|
|
13108
|
+
const schemaObj = zodToJsonSchema(value);
|
|
13109
|
+
properties[name] = schemaObj;
|
|
13110
|
+
if (!("default" in schemaObj)) {
|
|
13111
|
+
required.push(name);
|
|
13112
|
+
}
|
|
12495
13113
|
}
|
|
12496
13114
|
}
|
|
12497
|
-
return
|
|
12498
|
-
|
|
12499
|
-
|
|
12500
|
-
|
|
12501
|
-
|
|
12502
|
-
|
|
12503
|
-
function isZodV4(zodObject) {
|
|
12504
|
-
return typeof zodObject === "object" && zodObject !== null && "_zod" in zodObject && zodObject._zod !== void 0;
|
|
13115
|
+
return {
|
|
13116
|
+
type: "object",
|
|
13117
|
+
properties,
|
|
13118
|
+
...required.length > 0 ? { required } : {},
|
|
13119
|
+
additionalProperties: true
|
|
13120
|
+
};
|
|
12505
13121
|
}
|
|
12506
|
-
function
|
|
12507
|
-
if (
|
|
12508
|
-
return
|
|
12509
|
-
|
|
12510
|
-
|
|
13122
|
+
function serializeRemoteEvalParametersContainer(parameters) {
|
|
13123
|
+
if (RemoteEvalParameters.isParameters(parameters)) {
|
|
13124
|
+
return {
|
|
13125
|
+
type: "braintrust.parameters",
|
|
13126
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
13127
|
+
schema: parameters.schema,
|
|
13128
|
+
source: {
|
|
13129
|
+
parametersId: parameters.id,
|
|
13130
|
+
slug: parameters.slug,
|
|
13131
|
+
name: parameters.name,
|
|
13132
|
+
projectId: parameters.projectId,
|
|
13133
|
+
version: parameters.version
|
|
13134
|
+
}
|
|
13135
|
+
};
|
|
12511
13136
|
}
|
|
12512
|
-
return
|
|
13137
|
+
return {
|
|
13138
|
+
type: "braintrust.staticParameters",
|
|
13139
|
+
schema: serializeEvalParametersToStaticParametersSchema(parameters),
|
|
13140
|
+
source: null
|
|
13141
|
+
};
|
|
12513
13142
|
}
|
|
13143
|
+
var ProjectNameIdMap = class {
|
|
13144
|
+
nameToId = {};
|
|
13145
|
+
idToName = {};
|
|
13146
|
+
async getId(projectName) {
|
|
13147
|
+
if (!(projectName in this.nameToId)) {
|
|
13148
|
+
const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
|
|
13149
|
+
project_name: projectName
|
|
13150
|
+
});
|
|
13151
|
+
const result = import_v311.z.object({
|
|
13152
|
+
project: Project
|
|
13153
|
+
}).parse(response);
|
|
13154
|
+
const projectId = result.project.id;
|
|
13155
|
+
this.nameToId[projectName] = projectId;
|
|
13156
|
+
this.idToName[projectId] = projectName;
|
|
13157
|
+
}
|
|
13158
|
+
return this.nameToId[projectName];
|
|
13159
|
+
}
|
|
13160
|
+
async getName(projectId) {
|
|
13161
|
+
if (!(projectId in this.idToName)) {
|
|
13162
|
+
const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
|
|
13163
|
+
id: projectId
|
|
13164
|
+
});
|
|
13165
|
+
const result = import_v311.z.array(Project).nonempty().parse(response);
|
|
13166
|
+
const projectName = result[0].name;
|
|
13167
|
+
this.idToName[projectId] = projectName;
|
|
13168
|
+
this.nameToId[projectName] = projectId;
|
|
13169
|
+
}
|
|
13170
|
+
return this.idToName[projectId];
|
|
13171
|
+
}
|
|
13172
|
+
async resolve(project) {
|
|
13173
|
+
if (project.id) {
|
|
13174
|
+
return project.id;
|
|
13175
|
+
}
|
|
13176
|
+
return this.getId(project.name);
|
|
13177
|
+
}
|
|
13178
|
+
};
|
|
12514
13179
|
|
|
12515
13180
|
// src/cli/functions/upload.ts
|
|
12516
|
-
var
|
|
12517
|
-
|
|
12518
|
-
|
|
12519
|
-
bundleId: import_v311.z.string()
|
|
13181
|
+
var pathInfoSchema = import_v312.z.strictObject({
|
|
13182
|
+
url: import_v312.z.string(),
|
|
13183
|
+
bundleId: import_v312.z.string()
|
|
12520
13184
|
}).strip();
|
|
12521
13185
|
async function uploadHandleBundles({
|
|
12522
13186
|
buildResults,
|
|
@@ -12563,6 +13227,11 @@ async function uploadHandleBundles({
|
|
|
12563
13227
|
for (const prompt of result.evaluator.prompts) {
|
|
12564
13228
|
prompts.push(await prompt.toFunctionDefinition(projectNameToId));
|
|
12565
13229
|
}
|
|
13230
|
+
if (result.evaluator.parameters != null) {
|
|
13231
|
+
for (const param of result.evaluator.parameters) {
|
|
13232
|
+
prompts.push(await param.toFunctionDefinition(projectNameToId));
|
|
13233
|
+
}
|
|
13234
|
+
}
|
|
12566
13235
|
}
|
|
12567
13236
|
for (const evaluator of Object.values(result.evaluator.evaluators)) {
|
|
12568
13237
|
const experiment = evalToExperiment?.[sourceFile]?.[evaluator.evaluator.evalName];
|
|
@@ -12858,7 +13527,7 @@ async function bundleCommand(args) {
|
|
|
12858
13527
|
}
|
|
12859
13528
|
|
|
12860
13529
|
// src/cli/util/pull.ts
|
|
12861
|
-
var
|
|
13530
|
+
var import_v313 = require("zod/v3");
|
|
12862
13531
|
var import_promises = __toESM(require("fs/promises"));
|
|
12863
13532
|
var import_util13 = __toESM(require("util"));
|
|
12864
13533
|
var import_path4 = __toESM(require("path"));
|
|
@@ -12873,7 +13542,7 @@ async function pullCommand(args) {
|
|
|
12873
13542
|
...args.id ? { ids: [args.id] } : {},
|
|
12874
13543
|
...args.version ? { version: loadPrettyXact(args.version) } : {}
|
|
12875
13544
|
});
|
|
12876
|
-
const functionObjects =
|
|
13545
|
+
const functionObjects = import_v313.z.object({ objects: import_v313.z.array(import_v313.z.unknown()) }).parse(functions);
|
|
12877
13546
|
const projectNameToFunctions = {};
|
|
12878
13547
|
const projectNameIdMap = new ProjectNameIdMap();
|
|
12879
13548
|
for (const rawFunc of functionObjects.objects) {
|
|
@@ -13036,7 +13705,7 @@ function makeFunctionDefinition({
|
|
|
13036
13705
|
const objectType = "prompt";
|
|
13037
13706
|
const prompt = func.prompt_data.prompt;
|
|
13038
13707
|
const promptContents = prompt.type === "completion" ? `prompt: ${doubleQuote(prompt.content)}` : `messages: ${safeStringify(prompt.messages).trimStart()}`;
|
|
13039
|
-
const rawToolsParsed = prompt.type === "chat" && prompt.tools && prompt.tools.length > 0 ?
|
|
13708
|
+
const rawToolsParsed = prompt.type === "chat" && prompt.tools && prompt.tools.length > 0 ? import_v313.z.array(ToolFunctionDefinition).safeParse(JSON.parse(prompt.tools)) : void 0;
|
|
13040
13709
|
if (rawToolsParsed && !rawToolsParsed.success) {
|
|
13041
13710
|
console.warn(
|
|
13042
13711
|
warning(
|
|
@@ -13148,7 +13817,7 @@ var import_express = __toESM(require("express"));
|
|
|
13148
13817
|
var import_cors = __toESM(require("cors"));
|
|
13149
13818
|
|
|
13150
13819
|
// dev/errorHandler.ts
|
|
13151
|
-
var
|
|
13820
|
+
var import_v314 = require("zod/v3");
|
|
13152
13821
|
var errorHandler = (err, req, res, next) => {
|
|
13153
13822
|
if ("status" in err) {
|
|
13154
13823
|
res.status(err.status).json({
|
|
@@ -13159,7 +13828,7 @@ var errorHandler = (err, req, res, next) => {
|
|
|
13159
13828
|
});
|
|
13160
13829
|
return;
|
|
13161
13830
|
}
|
|
13162
|
-
if (err instanceof
|
|
13831
|
+
if (err instanceof import_v314.z.ZodError) {
|
|
13163
13832
|
res.status(400).json({
|
|
13164
13833
|
error: {
|
|
13165
13834
|
message: "Invalid request",
|
|
@@ -13331,49 +14000,79 @@ function serializeSSEEvent(event) {
|
|
|
13331
14000
|
}
|
|
13332
14001
|
|
|
13333
14002
|
// dev/types.ts
|
|
13334
|
-
var
|
|
13335
|
-
var evalBodySchema =
|
|
13336
|
-
name:
|
|
13337
|
-
parameters:
|
|
14003
|
+
var import_v315 = require("zod/v3");
|
|
14004
|
+
var evalBodySchema = import_v315.z.object({
|
|
14005
|
+
name: import_v315.z.string(),
|
|
14006
|
+
parameters: import_v315.z.record(import_v315.z.string(), import_v315.z.unknown()).nullish(),
|
|
13338
14007
|
data: RunEval.shape.data,
|
|
13339
|
-
scores:
|
|
13340
|
-
|
|
14008
|
+
scores: import_v315.z.array(
|
|
14009
|
+
import_v315.z.object({
|
|
13341
14010
|
function_id: FunctionId,
|
|
13342
|
-
name:
|
|
14011
|
+
name: import_v315.z.string()
|
|
13343
14012
|
})
|
|
13344
14013
|
).nullish(),
|
|
13345
|
-
experiment_name:
|
|
13346
|
-
project_id:
|
|
14014
|
+
experiment_name: import_v315.z.string().nullish(),
|
|
14015
|
+
project_id: import_v315.z.string().nullish(),
|
|
13347
14016
|
parent: InvokeParent.optional(),
|
|
13348
|
-
stream:
|
|
14017
|
+
stream: import_v315.z.boolean().optional()
|
|
13349
14018
|
});
|
|
13350
|
-
var
|
|
13351
|
-
|
|
13352
|
-
|
|
13353
|
-
|
|
13354
|
-
type:
|
|
14019
|
+
var staticParametersSchema = import_v315.z.record(
|
|
14020
|
+
import_v315.z.string(),
|
|
14021
|
+
import_v315.z.union([
|
|
14022
|
+
import_v315.z.object({
|
|
14023
|
+
type: import_v315.z.literal("prompt"),
|
|
13355
14024
|
default: PromptData.optional(),
|
|
13356
|
-
description:
|
|
14025
|
+
description: import_v315.z.string().optional()
|
|
13357
14026
|
}),
|
|
13358
|
-
|
|
13359
|
-
type:
|
|
13360
|
-
schema:
|
|
13361
|
-
|
|
13362
|
-
|
|
13363
|
-
description: import_v314.z.string().optional()
|
|
14027
|
+
import_v315.z.object({
|
|
14028
|
+
type: import_v315.z.literal("data"),
|
|
14029
|
+
schema: import_v315.z.record(import_v315.z.unknown()),
|
|
14030
|
+
default: import_v315.z.unknown().optional(),
|
|
14031
|
+
description: import_v315.z.string().optional()
|
|
13364
14032
|
})
|
|
13365
14033
|
])
|
|
13366
14034
|
);
|
|
13367
|
-
var
|
|
13368
|
-
|
|
14035
|
+
var parametersSchema = import_v315.z.object({
|
|
14036
|
+
type: import_v315.z.literal("object"),
|
|
14037
|
+
properties: import_v315.z.record(import_v315.z.string(), import_v315.z.record(import_v315.z.unknown())),
|
|
14038
|
+
required: import_v315.z.array(import_v315.z.string()).optional(),
|
|
14039
|
+
additionalProperties: import_v315.z.boolean().optional()
|
|
13369
14040
|
});
|
|
13370
|
-
var
|
|
13371
|
-
|
|
14041
|
+
var parametersSourceSchema = import_v315.z.object({
|
|
14042
|
+
parametersId: import_v315.z.string().optional(),
|
|
14043
|
+
slug: import_v315.z.string(),
|
|
14044
|
+
name: import_v315.z.string(),
|
|
14045
|
+
projectId: import_v315.z.string().optional(),
|
|
14046
|
+
version: import_v315.z.string().optional()
|
|
14047
|
+
});
|
|
14048
|
+
var parametersContainerSchema = import_v315.z.object({
|
|
14049
|
+
type: import_v315.z.literal("braintrust.parameters"),
|
|
14050
|
+
schema: parametersSchema,
|
|
14051
|
+
source: parametersSourceSchema
|
|
14052
|
+
});
|
|
14053
|
+
var staticParametersContainerSchema = import_v315.z.object({
|
|
14054
|
+
type: import_v315.z.literal("braintrust.staticParameters"),
|
|
14055
|
+
schema: staticParametersSchema,
|
|
14056
|
+
source: import_v315.z.null()
|
|
14057
|
+
});
|
|
14058
|
+
var serializedParametersContainerSchema = import_v315.z.union([
|
|
14059
|
+
parametersContainerSchema,
|
|
14060
|
+
staticParametersContainerSchema,
|
|
14061
|
+
// keeping this type here since old versions of the SDK will still pass the unwrapped schema and we need to handle this in the app
|
|
14062
|
+
staticParametersSchema
|
|
14063
|
+
]);
|
|
14064
|
+
var evaluatorDefinitionSchema = import_v315.z.object({
|
|
14065
|
+
parameters: serializedParametersContainerSchema.optional(),
|
|
14066
|
+
scores: import_v315.z.array(import_v315.z.object({ name: import_v315.z.string() })).optional()
|
|
14067
|
+
});
|
|
14068
|
+
var evaluatorDefinitionsSchema = import_v315.z.record(
|
|
14069
|
+
import_v315.z.string(),
|
|
13372
14070
|
evaluatorDefinitionSchema
|
|
13373
14071
|
);
|
|
13374
14072
|
|
|
13375
14073
|
// dev/server.ts
|
|
13376
|
-
var
|
|
14074
|
+
var import_v316 = require("zod/v3");
|
|
14075
|
+
var import_ajv2 = require("ajv");
|
|
13377
14076
|
function runDevServer(evaluators, opts) {
|
|
13378
14077
|
const allEvaluators = Object.fromEntries(
|
|
13379
14078
|
evaluators.map((evaluator) => [evaluator.evalName, evaluator])
|
|
@@ -13408,20 +14107,27 @@ function runDevServer(evaluators, opts) {
|
|
|
13408
14107
|
app.get("/", (req, res) => {
|
|
13409
14108
|
res.send("Hello, world!");
|
|
13410
14109
|
});
|
|
13411
|
-
app.get(
|
|
13412
|
-
|
|
13413
|
-
|
|
13414
|
-
|
|
13415
|
-
|
|
13416
|
-
|
|
14110
|
+
app.get(
|
|
14111
|
+
"/list",
|
|
14112
|
+
checkAuthorized,
|
|
14113
|
+
asyncHandler(async (req, res) => {
|
|
14114
|
+
const evalDefs = {};
|
|
14115
|
+
for (const [name, evaluator] of Object.entries(allEvaluators)) {
|
|
14116
|
+
let parameters;
|
|
14117
|
+
if (evaluator.parameters) {
|
|
14118
|
+
const resolvedParams = await Promise.resolve(evaluator.parameters);
|
|
14119
|
+
parameters = serializeRemoteEvalParametersContainer(resolvedParams);
|
|
14120
|
+
}
|
|
14121
|
+
evalDefs[name] = {
|
|
14122
|
+
parameters,
|
|
13417
14123
|
scores: evaluator.scores.map((score, idx) => ({
|
|
13418
14124
|
name: scorerName(score, idx)
|
|
13419
14125
|
}))
|
|
13420
|
-
}
|
|
13421
|
-
|
|
13422
|
-
|
|
13423
|
-
|
|
13424
|
-
|
|
14126
|
+
};
|
|
14127
|
+
}
|
|
14128
|
+
res.json(evalDefs);
|
|
14129
|
+
})
|
|
14130
|
+
);
|
|
13425
14131
|
app.post(
|
|
13426
14132
|
"/eval",
|
|
13427
14133
|
checkAuthorized,
|
|
@@ -13446,18 +14152,12 @@ function runDevServer(evaluators, opts) {
|
|
|
13446
14152
|
res.status(404).json({ error: `Evaluator '${name}' not found` });
|
|
13447
14153
|
return;
|
|
13448
14154
|
}
|
|
13449
|
-
if (evaluator.parameters
|
|
14155
|
+
if (evaluator.parameters) {
|
|
13450
14156
|
try {
|
|
13451
|
-
|
|
13452
|
-
res.status(400).json({
|
|
13453
|
-
error: `Evaluator '${name}' does not accept parameters`
|
|
13454
|
-
});
|
|
13455
|
-
return;
|
|
13456
|
-
}
|
|
13457
|
-
validateParameters(parameters ?? {}, evaluator.parameters);
|
|
14157
|
+
await validateParameters(parameters ?? {}, evaluator.parameters);
|
|
13458
14158
|
} catch (e) {
|
|
13459
14159
|
console.error("Error validating parameters", e);
|
|
13460
|
-
if (e instanceof
|
|
14160
|
+
if (e instanceof import_v316.z.ZodError || e instanceof import_ajv2.ValidationError || e instanceof Error) {
|
|
13461
14161
|
res.status(400).json({
|
|
13462
14162
|
error: e.message
|
|
13463
14163
|
});
|
|
@@ -13606,9 +14306,9 @@ async function getDataset(state, data) {
|
|
|
13606
14306
|
return data.data;
|
|
13607
14307
|
}
|
|
13608
14308
|
}
|
|
13609
|
-
var datasetFetchSchema =
|
|
13610
|
-
project_id:
|
|
13611
|
-
name:
|
|
14309
|
+
var datasetFetchSchema = import_v316.z.object({
|
|
14310
|
+
project_id: import_v316.z.string(),
|
|
14311
|
+
name: import_v316.z.string()
|
|
13612
14312
|
});
|
|
13613
14313
|
async function getDatasetById({
|
|
13614
14314
|
state,
|
|
@@ -13617,7 +14317,7 @@ async function getDatasetById({
|
|
|
13617
14317
|
const dataset = await state.appConn().post_json("api/dataset/get", {
|
|
13618
14318
|
id: datasetId
|
|
13619
14319
|
});
|
|
13620
|
-
const parsed =
|
|
14320
|
+
const parsed = import_v316.z.array(datasetFetchSchema).parse(dataset);
|
|
13621
14321
|
if (parsed.length === 0) {
|
|
13622
14322
|
throw new Error(`Dataset '${datasetId}' not found`);
|
|
13623
14323
|
}
|
|
@@ -13650,33 +14350,6 @@ function makeScorer(state, name, score, projectId) {
|
|
|
13650
14350
|
});
|
|
13651
14351
|
return ret;
|
|
13652
14352
|
}
|
|
13653
|
-
function makeEvalParametersSchema(parameters) {
|
|
13654
|
-
return Object.fromEntries(
|
|
13655
|
-
Object.entries(parameters).map(([name, value]) => {
|
|
13656
|
-
if ("type" in value && value.type === "prompt") {
|
|
13657
|
-
return [
|
|
13658
|
-
name,
|
|
13659
|
-
{
|
|
13660
|
-
type: "prompt",
|
|
13661
|
-
default: value.default ? promptDefinitionToPromptData(value.default) : void 0,
|
|
13662
|
-
description: value.description
|
|
13663
|
-
}
|
|
13664
|
-
];
|
|
13665
|
-
} else {
|
|
13666
|
-
const schemaObj = zodToJsonSchema(value);
|
|
13667
|
-
return [
|
|
13668
|
-
name,
|
|
13669
|
-
{
|
|
13670
|
-
type: "data",
|
|
13671
|
-
schema: schemaObj,
|
|
13672
|
-
default: schemaObj.default,
|
|
13673
|
-
description: schemaObj.description
|
|
13674
|
-
}
|
|
13675
|
-
];
|
|
13676
|
-
}
|
|
13677
|
-
})
|
|
13678
|
-
);
|
|
13679
|
-
}
|
|
13680
14353
|
|
|
13681
14354
|
// src/cli/util/external-packages-plugin.ts
|
|
13682
14355
|
function createMarkKnownPackagesExternalPlugin(additionalPackages = []) {
|
|
@@ -13901,6 +14574,7 @@ async function initFile({
|
|
|
13901
14574
|
const evaluator = evaluateBuildResults(inFile, result) || {
|
|
13902
14575
|
functions: [],
|
|
13903
14576
|
prompts: [],
|
|
14577
|
+
parameters: [],
|
|
13904
14578
|
evaluators: {},
|
|
13905
14579
|
reporters: {}
|
|
13906
14580
|
};
|