braintrust 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/dist/index.d.mts +4844 -3703
- package/dev/dist/index.d.ts +4844 -3703
- package/dev/dist/index.js +2068 -1402
- package/dev/dist/index.mjs +1954 -1288
- package/dist/browser.d.mts +16987 -8720
- package/dist/browser.d.ts +16987 -8720
- package/dist/browser.js +1810 -841
- package/dist/browser.mjs +2056 -1087
- package/dist/cli.js +2403 -1729
- package/dist/index.d.mts +16987 -8720
- package/dist/index.d.ts +16987 -8720
- package/dist/index.js +1810 -841
- package/dist/index.mjs +2056 -1087
- package/package.json +2 -1
- package/util/dist/index.d.mts +10 -8
- package/util/dist/index.d.ts +10 -8
- package/util/dist/index.js +27 -142
- package/util/dist/index.mjs +26 -141
package/dev/dist/index.mjs
CHANGED
|
@@ -326,11 +326,19 @@ function getIdGenerator() {
|
|
|
326
326
|
|
|
327
327
|
// util/db_fields.ts
|
|
328
328
|
var TRANSACTION_ID_FIELD = "_xact_id";
|
|
329
|
+
var OBJECT_DELETE_FIELD = "_object_delete";
|
|
329
330
|
var IS_MERGE_FIELD = "_is_merge";
|
|
330
331
|
var AUDIT_SOURCE_FIELD = "_audit_source";
|
|
331
332
|
var AUDIT_METADATA_FIELD = "_audit_metadata";
|
|
332
333
|
var VALID_SOURCES = ["app", "api", "external"];
|
|
333
|
-
var
|
|
334
|
+
var OBJECT_ID_KEYS = [
|
|
335
|
+
"experiment_id",
|
|
336
|
+
"dataset_id",
|
|
337
|
+
"prompt_session_id",
|
|
338
|
+
"project_id",
|
|
339
|
+
"log_id",
|
|
340
|
+
"function_data"
|
|
341
|
+
];
|
|
334
342
|
|
|
335
343
|
// util/span_identifier_v3.ts
|
|
336
344
|
import * as uuid3 from "uuid";
|
|
@@ -1005,13 +1013,6 @@ function mergeDictsWithPathsHelper({
|
|
|
1005
1013
|
function mergeDicts(mergeInto, mergeFrom) {
|
|
1006
1014
|
return mergeDictsWithPaths({ mergeInto, mergeFrom, mergePaths: [] });
|
|
1007
1015
|
}
|
|
1008
|
-
function mapAt(m, k) {
|
|
1009
|
-
const ret = m.get(k);
|
|
1010
|
-
if (ret === void 0) {
|
|
1011
|
-
throw new Error(`Map does not contain key ${k}`);
|
|
1012
|
-
}
|
|
1013
|
-
return ret;
|
|
1014
|
-
}
|
|
1015
1016
|
function recordFind(m, k) {
|
|
1016
1017
|
return m[k];
|
|
1017
1018
|
}
|
|
@@ -1026,72 +1027,8 @@ function getObjValueByPath(row, path2) {
|
|
|
1026
1027
|
return curr;
|
|
1027
1028
|
}
|
|
1028
1029
|
|
|
1029
|
-
// util/graph_util.ts
|
|
1030
|
-
function depthFirstSearch(args) {
|
|
1031
|
-
const { graph, firstVisitF, lastVisitF } = args;
|
|
1032
|
-
for (const vs of graph.values()) {
|
|
1033
|
-
for (const v of vs.values()) {
|
|
1034
|
-
if (!graph.has(v)) {
|
|
1035
|
-
throw new Error(`Outgoing vertex ${v} must be a key in the graph`);
|
|
1036
|
-
}
|
|
1037
|
-
}
|
|
1038
|
-
}
|
|
1039
|
-
const firstVisitedVertices = /* @__PURE__ */ new Set();
|
|
1040
|
-
const visitationOrder = args.visitationOrder ?? [...graph.keys()];
|
|
1041
|
-
const events = visitationOrder.map((vertex) => ({ eventType: "first", vertex, extras: {} })).reverse();
|
|
1042
|
-
while (events.length) {
|
|
1043
|
-
const { eventType, vertex, extras } = events.pop();
|
|
1044
|
-
if (eventType === "last") {
|
|
1045
|
-
lastVisitF?.(vertex);
|
|
1046
|
-
continue;
|
|
1047
|
-
}
|
|
1048
|
-
if (firstVisitedVertices.has(vertex)) {
|
|
1049
|
-
continue;
|
|
1050
|
-
}
|
|
1051
|
-
firstVisitedVertices.add(vertex);
|
|
1052
|
-
firstVisitF?.(vertex, { parentVertex: extras.parentVertex });
|
|
1053
|
-
events.push({ eventType: "last", vertex, extras: {} });
|
|
1054
|
-
mapAt(graph, vertex).forEach((child) => {
|
|
1055
|
-
events.push({
|
|
1056
|
-
eventType: "first",
|
|
1057
|
-
vertex: child,
|
|
1058
|
-
extras: { parentVertex: vertex }
|
|
1059
|
-
});
|
|
1060
|
-
});
|
|
1061
|
-
}
|
|
1062
|
-
}
|
|
1063
|
-
function undirectedConnectedComponents(graph) {
|
|
1064
|
-
const directedGraph = new Map(
|
|
1065
|
-
[...graph.vertices].map((v) => [v, /* @__PURE__ */ new Set()])
|
|
1066
|
-
);
|
|
1067
|
-
for (const [i, j] of graph.edges) {
|
|
1068
|
-
mapAt(directedGraph, i).add(j);
|
|
1069
|
-
mapAt(directedGraph, j).add(i);
|
|
1070
|
-
}
|
|
1071
|
-
let labelCounter = 0;
|
|
1072
|
-
const vertexLabels = /* @__PURE__ */ new Map();
|
|
1073
|
-
const firstVisitF = (vertex, args) => {
|
|
1074
|
-
const label = args?.parentVertex !== void 0 ? mapAt(vertexLabels, args?.parentVertex) : labelCounter++;
|
|
1075
|
-
vertexLabels.set(vertex, label);
|
|
1076
|
-
};
|
|
1077
|
-
depthFirstSearch({ graph: directedGraph, firstVisitF });
|
|
1078
|
-
const output = Array.from({ length: labelCounter }).map(() => []);
|
|
1079
|
-
for (const [vertex, label] of vertexLabels.entries()) {
|
|
1080
|
-
output[label].push(vertex);
|
|
1081
|
-
}
|
|
1082
|
-
return output;
|
|
1083
|
-
}
|
|
1084
|
-
function topologicalSort(graph, visitationOrder) {
|
|
1085
|
-
const reverseOrdering = [];
|
|
1086
|
-
const lastVisitF = (vertex) => {
|
|
1087
|
-
reverseOrdering.push(vertex);
|
|
1088
|
-
};
|
|
1089
|
-
depthFirstSearch({ graph, lastVisitF, visitationOrder });
|
|
1090
|
-
return reverseOrdering.reverse();
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
1030
|
// util/merge_row_batch.ts
|
|
1094
|
-
function generateMergedRowKey(row
|
|
1031
|
+
function generateMergedRowKey(row) {
|
|
1095
1032
|
return JSON.stringify(
|
|
1096
1033
|
[
|
|
1097
1034
|
"org_id",
|
|
@@ -1100,7 +1037,7 @@ function generateMergedRowKey(row, useParentIdForId) {
|
|
|
1100
1037
|
"dataset_id",
|
|
1101
1038
|
"prompt_session_id",
|
|
1102
1039
|
"log_id",
|
|
1103
|
-
|
|
1040
|
+
"id"
|
|
1104
1041
|
].map((k) => row[k])
|
|
1105
1042
|
);
|
|
1106
1043
|
}
|
|
@@ -1154,96 +1091,34 @@ function mergeRowBatch(rows) {
|
|
|
1154
1091
|
rowGroups.set(key, row);
|
|
1155
1092
|
}
|
|
1156
1093
|
}
|
|
1157
|
-
|
|
1158
|
-
const rowToLabel = new Map(
|
|
1159
|
-
merged.map((r, i) => [generateMergedRowKey(r), i])
|
|
1160
|
-
);
|
|
1161
|
-
const graph = new Map(
|
|
1162
|
-
Array.from({ length: merged.length }).map((_, i) => [i, /* @__PURE__ */ new Set()])
|
|
1163
|
-
);
|
|
1164
|
-
merged.forEach((r, i) => {
|
|
1165
|
-
const parentId = r[PARENT_ID_FIELD];
|
|
1166
|
-
if (!parentId) {
|
|
1167
|
-
return;
|
|
1168
|
-
}
|
|
1169
|
-
const parentRowKey = generateMergedRowKey(
|
|
1170
|
-
r,
|
|
1171
|
-
true
|
|
1172
|
-
/* useParentIdForId */
|
|
1173
|
-
);
|
|
1174
|
-
const parentLabel = rowToLabel.get(parentRowKey);
|
|
1175
|
-
if (parentLabel !== void 0) {
|
|
1176
|
-
mapAt(graph, parentLabel).add(i);
|
|
1177
|
-
}
|
|
1178
|
-
});
|
|
1179
|
-
const connectedComponents = undirectedConnectedComponents({
|
|
1180
|
-
vertices: new Set(graph.keys()),
|
|
1181
|
-
edges: new Set(
|
|
1182
|
-
[...graph.entries()].flatMap(
|
|
1183
|
-
([k, vs]) => [...vs].map((v) => {
|
|
1184
|
-
const ret = [k, v];
|
|
1185
|
-
return ret;
|
|
1186
|
-
})
|
|
1187
|
-
)
|
|
1188
|
-
)
|
|
1189
|
-
});
|
|
1190
|
-
const buckets = connectedComponents.map(
|
|
1191
|
-
(cc) => topologicalSort(
|
|
1192
|
-
graph,
|
|
1193
|
-
cc
|
|
1194
|
-
/* visitationOrder */
|
|
1195
|
-
)
|
|
1196
|
-
);
|
|
1197
|
-
return buckets.map((bucket) => bucket.map((i) => merged[i]));
|
|
1094
|
+
return [...rowGroups.values()];
|
|
1198
1095
|
}
|
|
1199
1096
|
function batchItems(args) {
|
|
1200
|
-
|
|
1097
|
+
const { items } = args;
|
|
1201
1098
|
const batchMaxNumItems = args.batchMaxNumItems ?? Number.POSITIVE_INFINITY;
|
|
1202
1099
|
const batchMaxNumBytes = args.batchMaxNumBytes ?? Number.POSITIVE_INFINITY;
|
|
1100
|
+
const getByteSize = args.getByteSize;
|
|
1203
1101
|
const output = [];
|
|
1204
|
-
let nextItems = [];
|
|
1205
|
-
let batchSet = [];
|
|
1206
1102
|
let batch = [];
|
|
1207
1103
|
let batchLen = 0;
|
|
1208
1104
|
function addToBatch(item) {
|
|
1209
1105
|
batch.push(item);
|
|
1210
|
-
batchLen += item
|
|
1106
|
+
batchLen += getByteSize(item);
|
|
1211
1107
|
}
|
|
1212
1108
|
function flushBatch() {
|
|
1213
|
-
|
|
1109
|
+
output.push(batch);
|
|
1214
1110
|
batch = [];
|
|
1215
1111
|
batchLen = 0;
|
|
1216
1112
|
}
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
for (const item of bucket) {
|
|
1221
|
-
if (batch.length === 0 || item.length + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems) {
|
|
1222
|
-
addToBatch(item);
|
|
1223
|
-
} else if (i === 0) {
|
|
1224
|
-
flushBatch();
|
|
1225
|
-
addToBatch(item);
|
|
1226
|
-
} else {
|
|
1227
|
-
break;
|
|
1228
|
-
}
|
|
1229
|
-
++i;
|
|
1230
|
-
}
|
|
1231
|
-
if (i < bucket.length) {
|
|
1232
|
-
nextItems.push(bucket.slice(i));
|
|
1233
|
-
}
|
|
1234
|
-
if (batchLen >= batchMaxNumBytes || batch.length > batchMaxNumItems) {
|
|
1235
|
-
flushBatch();
|
|
1236
|
-
}
|
|
1237
|
-
}
|
|
1238
|
-
if (batch.length) {
|
|
1113
|
+
for (const item of items) {
|
|
1114
|
+
const itemSize = getByteSize(item);
|
|
1115
|
+
if (batch.length > 0 && !(itemSize + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems)) {
|
|
1239
1116
|
flushBatch();
|
|
1240
1117
|
}
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
items = nextItems;
|
|
1246
|
-
nextItems = [];
|
|
1118
|
+
addToBatch(item);
|
|
1119
|
+
}
|
|
1120
|
+
if (batch.length > 0) {
|
|
1121
|
+
flushBatch();
|
|
1247
1122
|
}
|
|
1248
1123
|
return output;
|
|
1249
1124
|
}
|
|
@@ -1841,10 +1716,15 @@ var FunctionTypeEnum = z6.enum([
|
|
|
1841
1716
|
"preprocessor",
|
|
1842
1717
|
"facet",
|
|
1843
1718
|
"classifier",
|
|
1844
|
-
"tag"
|
|
1719
|
+
"tag",
|
|
1720
|
+
"parameters"
|
|
1845
1721
|
]);
|
|
1846
1722
|
var NullableSavedFunctionId = z6.union([
|
|
1847
|
-
z6.object({
|
|
1723
|
+
z6.object({
|
|
1724
|
+
type: z6.literal("function"),
|
|
1725
|
+
id: z6.string(),
|
|
1726
|
+
version: z6.string().optional()
|
|
1727
|
+
}),
|
|
1848
1728
|
z6.object({
|
|
1849
1729
|
type: z6.literal("global"),
|
|
1850
1730
|
name: z6.string(),
|
|
@@ -1852,6 +1732,67 @@ var NullableSavedFunctionId = z6.union([
|
|
|
1852
1732
|
}),
|
|
1853
1733
|
z6.null()
|
|
1854
1734
|
]);
|
|
1735
|
+
var TopicMapReport = z6.object({
|
|
1736
|
+
version: z6.literal(1),
|
|
1737
|
+
created_at: z6.string().optional(),
|
|
1738
|
+
settings: z6.object({
|
|
1739
|
+
algorithm: z6.enum(["hdbscan", "kmeans", "hierarchical"]),
|
|
1740
|
+
dimension_reduction: z6.enum(["umap", "pca", "none"]),
|
|
1741
|
+
vector_field: z6.string(),
|
|
1742
|
+
embedding_model: z6.string(),
|
|
1743
|
+
n_clusters: z6.union([z6.number(), z6.null()]).optional(),
|
|
1744
|
+
umap_dimensions: z6.union([z6.number(), z6.null()]).optional(),
|
|
1745
|
+
min_cluster_size: z6.union([z6.number(), z6.null()]).optional(),
|
|
1746
|
+
min_samples: z6.union([z6.number(), z6.null()]).optional()
|
|
1747
|
+
}),
|
|
1748
|
+
query_settings: z6.object({
|
|
1749
|
+
hierarchy_threshold: z6.union([z6.number(), z6.null()]),
|
|
1750
|
+
auto_naming: z6.boolean(),
|
|
1751
|
+
skip_cache: z6.boolean(),
|
|
1752
|
+
viz_mode: z6.enum(["bar", "scatter"]),
|
|
1753
|
+
naming_model: z6.string()
|
|
1754
|
+
}).partial(),
|
|
1755
|
+
clusters: z6.array(
|
|
1756
|
+
z6.object({
|
|
1757
|
+
cluster_id: z6.number(),
|
|
1758
|
+
parent_cluster_id: z6.union([z6.number(), z6.null()]).optional(),
|
|
1759
|
+
topic_id: z6.string(),
|
|
1760
|
+
count: z6.number(),
|
|
1761
|
+
sample_texts: z6.array(z6.string()),
|
|
1762
|
+
samples: z6.array(
|
|
1763
|
+
z6.object({
|
|
1764
|
+
id: z6.string(),
|
|
1765
|
+
text: z6.string(),
|
|
1766
|
+
root_span_id: z6.string(),
|
|
1767
|
+
span_id: z6.string()
|
|
1768
|
+
})
|
|
1769
|
+
),
|
|
1770
|
+
name: z6.string().optional(),
|
|
1771
|
+
description: z6.string().optional(),
|
|
1772
|
+
keywords: z6.array(z6.string()).optional(),
|
|
1773
|
+
centroid: z6.array(z6.number()).optional(),
|
|
1774
|
+
parent_id: z6.union([z6.number(), z6.null()]).optional(),
|
|
1775
|
+
is_leaf: z6.boolean().optional(),
|
|
1776
|
+
depth: z6.number().optional()
|
|
1777
|
+
})
|
|
1778
|
+
),
|
|
1779
|
+
embedding_points: z6.array(
|
|
1780
|
+
z6.object({
|
|
1781
|
+
x: z6.number(),
|
|
1782
|
+
y: z6.number(),
|
|
1783
|
+
cluster: z6.number(),
|
|
1784
|
+
text: z6.string().optional()
|
|
1785
|
+
})
|
|
1786
|
+
).optional()
|
|
1787
|
+
});
|
|
1788
|
+
var TopicMapData = z6.object({
|
|
1789
|
+
type: z6.literal("topic_map"),
|
|
1790
|
+
source_facet: z6.string(),
|
|
1791
|
+
embedding_model: z6.string(),
|
|
1792
|
+
bundle_key: z6.string(),
|
|
1793
|
+
distance_threshold: z6.number().optional(),
|
|
1794
|
+
report: TopicMapReport.optional()
|
|
1795
|
+
});
|
|
1855
1796
|
var BatchedFacetData = z6.object({
|
|
1856
1797
|
type: z6.literal("batched_facet"),
|
|
1857
1798
|
preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
|
|
@@ -1860,9 +1801,17 @@ var BatchedFacetData = z6.object({
|
|
|
1860
1801
|
name: z6.string(),
|
|
1861
1802
|
prompt: z6.string(),
|
|
1862
1803
|
model: z6.string().optional(),
|
|
1804
|
+
embedding_model: z6.string().optional(),
|
|
1863
1805
|
no_match_pattern: z6.string().optional()
|
|
1864
1806
|
})
|
|
1865
|
-
)
|
|
1807
|
+
),
|
|
1808
|
+
topic_maps: z6.record(
|
|
1809
|
+
z6.object({
|
|
1810
|
+
function_name: z6.string(),
|
|
1811
|
+
topic_map_id: z6.string().optional(),
|
|
1812
|
+
topic_map_data: TopicMapData
|
|
1813
|
+
})
|
|
1814
|
+
).optional()
|
|
1866
1815
|
});
|
|
1867
1816
|
var BraintrustModelParams = z6.object({
|
|
1868
1817
|
use_cache: z6.boolean(),
|
|
@@ -2073,6 +2022,18 @@ var ObjectReferenceNullish = z6.union([
|
|
|
2073
2022
|
}),
|
|
2074
2023
|
z6.null()
|
|
2075
2024
|
]);
|
|
2025
|
+
var SavedFunctionId = z6.union([
|
|
2026
|
+
z6.object({
|
|
2027
|
+
type: z6.literal("function"),
|
|
2028
|
+
id: z6.string(),
|
|
2029
|
+
version: z6.string().optional()
|
|
2030
|
+
}),
|
|
2031
|
+
z6.object({
|
|
2032
|
+
type: z6.literal("global"),
|
|
2033
|
+
name: z6.string(),
|
|
2034
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2035
|
+
})
|
|
2036
|
+
]);
|
|
2076
2037
|
var DatasetEvent = z6.object({
|
|
2077
2038
|
id: z6.string(),
|
|
2078
2039
|
_xact_id: z6.string(),
|
|
@@ -2092,7 +2053,36 @@ var DatasetEvent = z6.object({
|
|
|
2092
2053
|
is_root: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
2093
2054
|
origin: ObjectReferenceNullish.optional(),
|
|
2094
2055
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2095
|
-
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
|
|
2056
|
+
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2057
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2058
|
+
classifications: z6.union([
|
|
2059
|
+
z6.record(
|
|
2060
|
+
z6.array(
|
|
2061
|
+
z6.object({
|
|
2062
|
+
id: z6.string(),
|
|
2063
|
+
label: z6.string().optional(),
|
|
2064
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
2065
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2066
|
+
source: SavedFunctionId.and(
|
|
2067
|
+
z6.union([
|
|
2068
|
+
z6.object({
|
|
2069
|
+
type: z6.literal("function"),
|
|
2070
|
+
id: z6.string(),
|
|
2071
|
+
version: z6.string().optional()
|
|
2072
|
+
}),
|
|
2073
|
+
z6.object({
|
|
2074
|
+
type: z6.literal("global"),
|
|
2075
|
+
name: z6.string(),
|
|
2076
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2077
|
+
}),
|
|
2078
|
+
z6.null()
|
|
2079
|
+
])
|
|
2080
|
+
).optional()
|
|
2081
|
+
})
|
|
2082
|
+
)
|
|
2083
|
+
),
|
|
2084
|
+
z6.null()
|
|
2085
|
+
]).optional()
|
|
2096
2086
|
});
|
|
2097
2087
|
var EnvVar = z6.object({
|
|
2098
2088
|
id: z6.string().uuid(),
|
|
@@ -2169,7 +2159,8 @@ var SpanType = z6.union([
|
|
|
2169
2159
|
"automation",
|
|
2170
2160
|
"facet",
|
|
2171
2161
|
"preprocessor",
|
|
2172
|
-
"classifier"
|
|
2162
|
+
"classifier",
|
|
2163
|
+
"review"
|
|
2173
2164
|
]),
|
|
2174
2165
|
z6.null()
|
|
2175
2166
|
]);
|
|
@@ -2210,10 +2201,43 @@ var ExperimentEvent = z6.object({
|
|
|
2210
2201
|
is_root: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
2211
2202
|
origin: ObjectReferenceNullish.optional(),
|
|
2212
2203
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2213
|
-
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
|
|
2204
|
+
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2205
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2206
|
+
classifications: z6.union([
|
|
2207
|
+
z6.record(
|
|
2208
|
+
z6.array(
|
|
2209
|
+
z6.object({
|
|
2210
|
+
id: z6.string(),
|
|
2211
|
+
label: z6.string().optional(),
|
|
2212
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
2213
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2214
|
+
source: SavedFunctionId.and(
|
|
2215
|
+
z6.union([
|
|
2216
|
+
z6.object({
|
|
2217
|
+
type: z6.literal("function"),
|
|
2218
|
+
id: z6.string(),
|
|
2219
|
+
version: z6.string().optional()
|
|
2220
|
+
}),
|
|
2221
|
+
z6.object({
|
|
2222
|
+
type: z6.literal("global"),
|
|
2223
|
+
name: z6.string(),
|
|
2224
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2225
|
+
}),
|
|
2226
|
+
z6.null()
|
|
2227
|
+
])
|
|
2228
|
+
).optional()
|
|
2229
|
+
})
|
|
2230
|
+
)
|
|
2231
|
+
),
|
|
2232
|
+
z6.null()
|
|
2233
|
+
]).optional()
|
|
2214
2234
|
});
|
|
2215
2235
|
var ExtendedSavedFunctionId = z6.union([
|
|
2216
|
-
z6.object({
|
|
2236
|
+
z6.object({
|
|
2237
|
+
type: z6.literal("function"),
|
|
2238
|
+
id: z6.string(),
|
|
2239
|
+
version: z6.string().optional()
|
|
2240
|
+
}),
|
|
2217
2241
|
z6.object({
|
|
2218
2242
|
type: z6.literal("global"),
|
|
2219
2243
|
name: z6.string(),
|
|
@@ -2230,6 +2254,7 @@ var FacetData = z6.object({
|
|
|
2230
2254
|
preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
|
|
2231
2255
|
prompt: z6.string(),
|
|
2232
2256
|
model: z6.string().optional(),
|
|
2257
|
+
embedding_model: z6.string().optional(),
|
|
2233
2258
|
no_match_pattern: z6.string().optional()
|
|
2234
2259
|
});
|
|
2235
2260
|
var PromptBlockDataNullish = z6.union([
|
|
@@ -2319,14 +2344,6 @@ var PromptParserNullish = z6.union([
|
|
|
2319
2344
|
}),
|
|
2320
2345
|
z6.null()
|
|
2321
2346
|
]);
|
|
2322
|
-
var SavedFunctionId = z6.union([
|
|
2323
|
-
z6.object({ type: z6.literal("function"), id: z6.string() }),
|
|
2324
|
-
z6.object({
|
|
2325
|
-
type: z6.literal("global"),
|
|
2326
|
-
name: z6.string(),
|
|
2327
|
-
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2328
|
-
})
|
|
2329
|
-
]);
|
|
2330
2347
|
var PromptDataNullish = z6.union([
|
|
2331
2348
|
z6.object({
|
|
2332
2349
|
prompt: PromptBlockDataNullish,
|
|
@@ -2377,7 +2394,8 @@ var FunctionTypeEnumNullish = z6.union([
|
|
|
2377
2394
|
"preprocessor",
|
|
2378
2395
|
"facet",
|
|
2379
2396
|
"classifier",
|
|
2380
|
-
"tag"
|
|
2397
|
+
"tag",
|
|
2398
|
+
"parameters"
|
|
2381
2399
|
]),
|
|
2382
2400
|
z6.null()
|
|
2383
2401
|
]);
|
|
@@ -2469,7 +2487,8 @@ var FunctionData = z6.union([
|
|
|
2469
2487
|
type: z6.literal("remote_eval"),
|
|
2470
2488
|
endpoint: z6.string(),
|
|
2471
2489
|
eval_name: z6.string(),
|
|
2472
|
-
parameters: z6.object({}).partial().passthrough()
|
|
2490
|
+
parameters: z6.object({}).partial().passthrough(),
|
|
2491
|
+
parameters_version: z6.union([z6.string(), z6.null()]).optional()
|
|
2473
2492
|
}),
|
|
2474
2493
|
z6.object({
|
|
2475
2494
|
type: z6.literal("global"),
|
|
@@ -2478,7 +2497,18 @@ var FunctionData = z6.union([
|
|
|
2478
2497
|
config: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional()
|
|
2479
2498
|
}),
|
|
2480
2499
|
FacetData,
|
|
2481
|
-
BatchedFacetData
|
|
2500
|
+
BatchedFacetData,
|
|
2501
|
+
z6.object({
|
|
2502
|
+
type: z6.literal("parameters"),
|
|
2503
|
+
data: z6.object({}).partial().passthrough(),
|
|
2504
|
+
__schema: z6.object({
|
|
2505
|
+
type: z6.literal("object"),
|
|
2506
|
+
properties: z6.record(z6.object({}).partial().passthrough()),
|
|
2507
|
+
required: z6.array(z6.string()).optional(),
|
|
2508
|
+
additionalProperties: z6.boolean().optional()
|
|
2509
|
+
})
|
|
2510
|
+
}),
|
|
2511
|
+
TopicMapData.and(z6.unknown())
|
|
2482
2512
|
]);
|
|
2483
2513
|
var Function = z6.object({
|
|
2484
2514
|
id: z6.string().uuid(),
|
|
@@ -2508,7 +2538,13 @@ var Function = z6.object({
|
|
|
2508
2538
|
z6.null()
|
|
2509
2539
|
]).optional()
|
|
2510
2540
|
});
|
|
2511
|
-
var FunctionFormat = z6.enum([
|
|
2541
|
+
var FunctionFormat = z6.enum([
|
|
2542
|
+
"llm",
|
|
2543
|
+
"code",
|
|
2544
|
+
"global",
|
|
2545
|
+
"graph",
|
|
2546
|
+
"topic_map"
|
|
2547
|
+
]);
|
|
2512
2548
|
var PromptData = z6.object({
|
|
2513
2549
|
prompt: PromptBlockDataNullish,
|
|
2514
2550
|
options: PromptOptionsNullish,
|
|
@@ -2591,13 +2627,14 @@ var FunctionObjectType = z6.enum([
|
|
|
2591
2627
|
"custom_view",
|
|
2592
2628
|
"preprocessor",
|
|
2593
2629
|
"facet",
|
|
2594
|
-
"classifier"
|
|
2630
|
+
"classifier",
|
|
2631
|
+
"parameters"
|
|
2595
2632
|
]);
|
|
2596
2633
|
var FunctionOutputType = z6.enum([
|
|
2597
2634
|
"completion",
|
|
2598
2635
|
"score",
|
|
2599
2636
|
"facet",
|
|
2600
|
-
"
|
|
2637
|
+
"classification",
|
|
2601
2638
|
"any"
|
|
2602
2639
|
]);
|
|
2603
2640
|
var GitMetadataSettings = z6.object({
|
|
@@ -2633,6 +2670,10 @@ var GroupScope = z6.object({
|
|
|
2633
2670
|
idle_seconds: z6.number().optional()
|
|
2634
2671
|
});
|
|
2635
2672
|
var IfExists = z6.enum(["error", "ignore", "replace"]);
|
|
2673
|
+
var ImageRenderingMode = z6.union([
|
|
2674
|
+
z6.enum(["auto", "click_to_load", "blocked"]),
|
|
2675
|
+
z6.null()
|
|
2676
|
+
]);
|
|
2636
2677
|
var InvokeParent = z6.union([
|
|
2637
2678
|
z6.object({
|
|
2638
2679
|
object_type: z6.enum(["project_logs", "experiment", "playground_logs"]),
|
|
@@ -2725,7 +2766,8 @@ var Organization = z6.object({
|
|
|
2725
2766
|
is_universal_api: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
2726
2767
|
proxy_url: z6.union([z6.string(), z6.null()]).optional(),
|
|
2727
2768
|
realtime_url: z6.union([z6.string(), z6.null()]).optional(),
|
|
2728
|
-
created: z6.union([z6.string(), z6.null()]).optional()
|
|
2769
|
+
created: z6.union([z6.string(), z6.null()]).optional(),
|
|
2770
|
+
image_rendering_mode: ImageRenderingMode.optional()
|
|
2729
2771
|
});
|
|
2730
2772
|
var ProjectSettings = z6.union([
|
|
2731
2773
|
z6.object({
|
|
@@ -2866,7 +2908,36 @@ var ProjectLogsEvent = z6.object({
|
|
|
2866
2908
|
origin: ObjectReferenceNullish.optional(),
|
|
2867
2909
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2868
2910
|
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2869
|
-
_async_scoring_state: z6.unknown().optional()
|
|
2911
|
+
_async_scoring_state: z6.unknown().optional(),
|
|
2912
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2913
|
+
classifications: z6.union([
|
|
2914
|
+
z6.record(
|
|
2915
|
+
z6.array(
|
|
2916
|
+
z6.object({
|
|
2917
|
+
id: z6.string(),
|
|
2918
|
+
label: z6.string().optional(),
|
|
2919
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
2920
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2921
|
+
source: SavedFunctionId.and(
|
|
2922
|
+
z6.union([
|
|
2923
|
+
z6.object({
|
|
2924
|
+
type: z6.literal("function"),
|
|
2925
|
+
id: z6.string(),
|
|
2926
|
+
version: z6.string().optional()
|
|
2927
|
+
}),
|
|
2928
|
+
z6.object({
|
|
2929
|
+
type: z6.literal("global"),
|
|
2930
|
+
name: z6.string(),
|
|
2931
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2932
|
+
}),
|
|
2933
|
+
z6.null()
|
|
2934
|
+
])
|
|
2935
|
+
).optional()
|
|
2936
|
+
})
|
|
2937
|
+
)
|
|
2938
|
+
),
|
|
2939
|
+
z6.null()
|
|
2940
|
+
]).optional()
|
|
2870
2941
|
});
|
|
2871
2942
|
var ProjectScoreType = z6.enum([
|
|
2872
2943
|
"slider",
|
|
@@ -3168,12 +3239,15 @@ var View = z6.object({
|
|
|
3168
3239
|
"datasets",
|
|
3169
3240
|
"dataset",
|
|
3170
3241
|
"prompts",
|
|
3242
|
+
"parameters",
|
|
3171
3243
|
"tools",
|
|
3172
3244
|
"scorers",
|
|
3173
3245
|
"classifiers",
|
|
3174
3246
|
"logs",
|
|
3175
3247
|
"monitor",
|
|
3176
|
-
"
|
|
3248
|
+
"for_review_project_log",
|
|
3249
|
+
"for_review_experiments",
|
|
3250
|
+
"for_review_datasets"
|
|
3177
3251
|
]),
|
|
3178
3252
|
name: z6.string(),
|
|
3179
3253
|
created: z6.union([z6.string(), z6.null()]).optional(),
|
|
@@ -3906,6 +3980,52 @@ var PromptCache = class {
|
|
|
3906
3980
|
}
|
|
3907
3981
|
};
|
|
3908
3982
|
|
|
3983
|
+
// src/prompt-cache/parameters-cache.ts
|
|
3984
|
+
function createCacheKey2(key) {
|
|
3985
|
+
if (key.id) {
|
|
3986
|
+
return `parameters:id:${key.id}`;
|
|
3987
|
+
}
|
|
3988
|
+
const prefix = key.projectId ?? key.projectName;
|
|
3989
|
+
if (!prefix) {
|
|
3990
|
+
throw new Error("Either projectId or projectName must be provided");
|
|
3991
|
+
}
|
|
3992
|
+
if (!key.slug) {
|
|
3993
|
+
throw new Error("Slug must be provided when not using ID");
|
|
3994
|
+
}
|
|
3995
|
+
return `parameters:${prefix}:${key.slug}:${key.version ?? "latest"}`;
|
|
3996
|
+
}
|
|
3997
|
+
var ParametersCache = class {
|
|
3998
|
+
memoryCache;
|
|
3999
|
+
diskCache;
|
|
4000
|
+
constructor(options) {
|
|
4001
|
+
this.memoryCache = options.memoryCache;
|
|
4002
|
+
this.diskCache = options.diskCache;
|
|
4003
|
+
}
|
|
4004
|
+
async get(key) {
|
|
4005
|
+
const cacheKey = createCacheKey2(key);
|
|
4006
|
+
const memoryParams = this.memoryCache.get(cacheKey);
|
|
4007
|
+
if (memoryParams !== void 0) {
|
|
4008
|
+
return memoryParams;
|
|
4009
|
+
}
|
|
4010
|
+
if (this.diskCache) {
|
|
4011
|
+
const diskParams = await this.diskCache.get(cacheKey);
|
|
4012
|
+
if (!diskParams) {
|
|
4013
|
+
return void 0;
|
|
4014
|
+
}
|
|
4015
|
+
this.memoryCache.set(cacheKey, diskParams);
|
|
4016
|
+
return diskParams;
|
|
4017
|
+
}
|
|
4018
|
+
return void 0;
|
|
4019
|
+
}
|
|
4020
|
+
async set(key, value) {
|
|
4021
|
+
const cacheKey = createCacheKey2(key);
|
|
4022
|
+
this.memoryCache.set(cacheKey, value);
|
|
4023
|
+
if (this.diskCache) {
|
|
4024
|
+
await this.diskCache.set(cacheKey, value);
|
|
4025
|
+
}
|
|
4026
|
+
}
|
|
4027
|
+
};
|
|
4028
|
+
|
|
3909
4029
|
// src/span-cache.ts
|
|
3910
4030
|
var activeCaches = /* @__PURE__ */ new Set();
|
|
3911
4031
|
var exitHandlersRegistered = false;
|
|
@@ -4196,7 +4316,24 @@ var SpanCache = class {
|
|
|
4196
4316
|
// src/logger.ts
|
|
4197
4317
|
var BRAINTRUST_ATTACHMENT = BraintrustAttachmentReference.shape.type.value;
|
|
4198
4318
|
var EXTERNAL_ATTACHMENT = ExternalAttachmentReference.shape.type.value;
|
|
4319
|
+
var LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow";
|
|
4199
4320
|
var BRAINTRUST_PARAMS = Object.keys(BraintrustModelParams.shape);
|
|
4321
|
+
var DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024;
|
|
4322
|
+
var parametersRowSchema = z8.object({
|
|
4323
|
+
id: z8.string().uuid(),
|
|
4324
|
+
_xact_id: z8.string(),
|
|
4325
|
+
project_id: z8.string().uuid(),
|
|
4326
|
+
name: z8.string(),
|
|
4327
|
+
slug: z8.string(),
|
|
4328
|
+
description: z8.union([z8.string(), z8.null()]).optional(),
|
|
4329
|
+
function_type: z8.literal("parameters"),
|
|
4330
|
+
function_data: z8.object({
|
|
4331
|
+
type: z8.literal("parameters"),
|
|
4332
|
+
data: z8.record(z8.unknown()).optional(),
|
|
4333
|
+
__schema: z8.record(z8.unknown())
|
|
4334
|
+
}),
|
|
4335
|
+
metadata: z8.union([z8.object({}).partial().passthrough(), z8.null()]).optional()
|
|
4336
|
+
});
|
|
4200
4337
|
var LoginInvalidOrgError = class extends Error {
|
|
4201
4338
|
constructor(message) {
|
|
4202
4339
|
super(message);
|
|
@@ -4373,6 +4510,17 @@ var BraintrustState = class _BraintrustState {
|
|
|
4373
4510
|
max: Number(isomorph_default.getEnv("BRAINTRUST_PROMPT_CACHE_DISK_MAX")) ?? 1 << 20
|
|
4374
4511
|
}) : void 0;
|
|
4375
4512
|
this.promptCache = new PromptCache({ memoryCache, diskCache });
|
|
4513
|
+
const parametersMemoryCache = new LRUCache({
|
|
4514
|
+
max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_MEMORY_MAX")) ?? 1 << 10
|
|
4515
|
+
});
|
|
4516
|
+
const parametersDiskCache = canUseDiskCache() ? new DiskCache({
|
|
4517
|
+
cacheDir: isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DIR") ?? `${isomorph_default.getEnv("HOME") ?? isomorph_default.homedir()}/.braintrust/parameters_cache`,
|
|
4518
|
+
max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DISK_MAX")) ?? 1 << 20
|
|
4519
|
+
}) : void 0;
|
|
4520
|
+
this.parametersCache = new ParametersCache({
|
|
4521
|
+
memoryCache: parametersMemoryCache,
|
|
4522
|
+
diskCache: parametersDiskCache
|
|
4523
|
+
});
|
|
4376
4524
|
this.spanCache = new SpanCache({ disabled: loginParams.disableSpanCache });
|
|
4377
4525
|
}
|
|
4378
4526
|
id;
|
|
@@ -4402,6 +4550,7 @@ var BraintrustState = class _BraintrustState {
|
|
|
4402
4550
|
_apiConn = null;
|
|
4403
4551
|
_proxyConn = null;
|
|
4404
4552
|
promptCache;
|
|
4553
|
+
parametersCache;
|
|
4405
4554
|
spanCache;
|
|
4406
4555
|
_idGenerator = null;
|
|
4407
4556
|
_contextManager = null;
|
|
@@ -5503,8 +5652,100 @@ function castLogger(logger, asyncFlush) {
|
|
|
5503
5652
|
}
|
|
5504
5653
|
return logger;
|
|
5505
5654
|
}
|
|
5655
|
+
var logs3OverflowUploadSchema = z8.object({
|
|
5656
|
+
method: z8.enum(["PUT", "POST"]),
|
|
5657
|
+
signedUrl: z8.string().url(),
|
|
5658
|
+
headers: z8.record(z8.string()).optional(),
|
|
5659
|
+
fields: z8.record(z8.string()).optional(),
|
|
5660
|
+
key: z8.string().min(1)
|
|
5661
|
+
});
|
|
5506
5662
|
function constructLogs3Data(items) {
|
|
5507
|
-
return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
|
|
5663
|
+
return `{"rows": ${constructJsonArray(items.map((i) => i.str))}, "api_version": 2}`;
|
|
5664
|
+
}
|
|
5665
|
+
function constructLogs3OverflowRequest(key) {
|
|
5666
|
+
return {
|
|
5667
|
+
rows: {
|
|
5668
|
+
type: LOGS3_OVERFLOW_REFERENCE_TYPE,
|
|
5669
|
+
key
|
|
5670
|
+
},
|
|
5671
|
+
api_version: 2
|
|
5672
|
+
};
|
|
5673
|
+
}
|
|
5674
|
+
function pickLogs3OverflowObjectIds(row) {
|
|
5675
|
+
const objectIds = {};
|
|
5676
|
+
for (const key of OBJECT_ID_KEYS) {
|
|
5677
|
+
if (key in row) {
|
|
5678
|
+
objectIds[key] = row[key];
|
|
5679
|
+
}
|
|
5680
|
+
}
|
|
5681
|
+
return objectIds;
|
|
5682
|
+
}
|
|
5683
|
+
async function uploadLogs3OverflowPayload(upload, payload, fetchFn = fetch) {
|
|
5684
|
+
if (upload.method === "POST") {
|
|
5685
|
+
if (!upload.fields) {
|
|
5686
|
+
throw new Error("Missing logs3 overflow upload fields");
|
|
5687
|
+
}
|
|
5688
|
+
if (typeof FormData === "undefined" || typeof Blob === "undefined") {
|
|
5689
|
+
throw new Error("FormData is not available for logs3 overflow upload");
|
|
5690
|
+
}
|
|
5691
|
+
const form = new FormData();
|
|
5692
|
+
for (const [key, value] of Object.entries(upload.fields)) {
|
|
5693
|
+
form.append(key, value);
|
|
5694
|
+
}
|
|
5695
|
+
const contentType = upload.fields["Content-Type"] ?? "application/json";
|
|
5696
|
+
form.append("file", new Blob([payload], { type: contentType }));
|
|
5697
|
+
const headers2 = {};
|
|
5698
|
+
for (const [key, value] of Object.entries(upload.headers ?? {})) {
|
|
5699
|
+
if (key.toLowerCase() !== "content-type") {
|
|
5700
|
+
headers2[key] = value;
|
|
5701
|
+
}
|
|
5702
|
+
}
|
|
5703
|
+
const response2 = await fetchFn(upload.signedUrl, {
|
|
5704
|
+
method: "POST",
|
|
5705
|
+
headers: headers2,
|
|
5706
|
+
body: form
|
|
5707
|
+
});
|
|
5708
|
+
if (!response2.ok) {
|
|
5709
|
+
const responseText = await response2.text().catch(() => "");
|
|
5710
|
+
throw new Error(
|
|
5711
|
+
`Failed to upload logs3 overflow payload: ${response2.status} ${responseText}`
|
|
5712
|
+
);
|
|
5713
|
+
}
|
|
5714
|
+
return;
|
|
5715
|
+
}
|
|
5716
|
+
const headers = { ...upload.headers ?? {} };
|
|
5717
|
+
addAzureBlobHeaders(headers, upload.signedUrl);
|
|
5718
|
+
const response = await fetchFn(upload.signedUrl, {
|
|
5719
|
+
method: "PUT",
|
|
5720
|
+
headers,
|
|
5721
|
+
body: payload
|
|
5722
|
+
});
|
|
5723
|
+
if (!response.ok) {
|
|
5724
|
+
const responseText = await response.text().catch(() => "");
|
|
5725
|
+
throw new Error(
|
|
5726
|
+
`Failed to upload logs3 overflow payload: ${response.status} ${responseText}`
|
|
5727
|
+
);
|
|
5728
|
+
}
|
|
5729
|
+
}
|
|
5730
|
+
function stringifyWithOverflowMeta(item) {
|
|
5731
|
+
const str = JSON.stringify(item);
|
|
5732
|
+
const record = item;
|
|
5733
|
+
return {
|
|
5734
|
+
str,
|
|
5735
|
+
overflowMeta: {
|
|
5736
|
+
object_ids: pickLogs3OverflowObjectIds(record),
|
|
5737
|
+
is_delete: record[OBJECT_DELETE_FIELD] === true,
|
|
5738
|
+
input_row: {
|
|
5739
|
+
byte_size: utf8ByteLength(str)
|
|
5740
|
+
}
|
|
5741
|
+
}
|
|
5742
|
+
};
|
|
5743
|
+
}
|
|
5744
|
+
function utf8ByteLength(value) {
|
|
5745
|
+
if (typeof TextEncoder !== "undefined") {
|
|
5746
|
+
return new TextEncoder().encode(value).length;
|
|
5747
|
+
}
|
|
5748
|
+
return value.length;
|
|
5508
5749
|
}
|
|
5509
5750
|
function now() {
|
|
5510
5751
|
return (/* @__PURE__ */ new Date()).getTime();
|
|
@@ -5519,8 +5760,8 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5519
5760
|
onFlushError;
|
|
5520
5761
|
maskingFunction = null;
|
|
5521
5762
|
syncFlush = false;
|
|
5522
|
-
|
|
5523
|
-
|
|
5763
|
+
maxRequestSizeOverride = null;
|
|
5764
|
+
_maxRequestSizePromise = null;
|
|
5524
5765
|
defaultBatchSize = 100;
|
|
5525
5766
|
numTries = 3;
|
|
5526
5767
|
queueDropExceedingMaxsize = DEFAULT_QUEUE_SIZE;
|
|
@@ -5548,7 +5789,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5548
5789
|
}
|
|
5549
5790
|
const maxRequestSizeEnv = Number(isomorph_default.getEnv("BRAINTRUST_MAX_REQUEST_SIZE"));
|
|
5550
5791
|
if (!isNaN(maxRequestSizeEnv)) {
|
|
5551
|
-
this.
|
|
5792
|
+
this.maxRequestSizeOverride = maxRequestSizeEnv;
|
|
5552
5793
|
}
|
|
5553
5794
|
const numTriesEnv = Number(isomorph_default.getEnv("BRAINTRUST_NUM_RETRIES"));
|
|
5554
5795
|
if (!isNaN(numTriesEnv)) {
|
|
@@ -5610,6 +5851,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5610
5851
|
}
|
|
5611
5852
|
}
|
|
5612
5853
|
}
|
|
5854
|
+
getMaxRequestSize() {
|
|
5855
|
+
if (!this._maxRequestSizePromise) {
|
|
5856
|
+
this._maxRequestSizePromise = (async () => {
|
|
5857
|
+
let serverLimit = null;
|
|
5858
|
+
try {
|
|
5859
|
+
const conn = await this.apiConn.get();
|
|
5860
|
+
const versionInfo = await conn.get_json("version");
|
|
5861
|
+
serverLimit = z8.object({ logs3_payload_max_bytes: z8.number().nullish() }).parse(versionInfo).logs3_payload_max_bytes ?? null;
|
|
5862
|
+
} catch (e) {
|
|
5863
|
+
console.warn("Failed to fetch version info for payload limit:", e);
|
|
5864
|
+
}
|
|
5865
|
+
const validServerLimit = serverLimit !== null && serverLimit > 0 ? serverLimit : null;
|
|
5866
|
+
const canUseOverflow = validServerLimit !== null;
|
|
5867
|
+
let maxRequestSize = DEFAULT_MAX_REQUEST_SIZE;
|
|
5868
|
+
if (this.maxRequestSizeOverride !== null) {
|
|
5869
|
+
maxRequestSize = validServerLimit !== null ? Math.min(this.maxRequestSizeOverride, validServerLimit) : this.maxRequestSizeOverride;
|
|
5870
|
+
} else if (validServerLimit !== null) {
|
|
5871
|
+
maxRequestSize = validServerLimit;
|
|
5872
|
+
}
|
|
5873
|
+
return { maxRequestSize, canUseOverflow };
|
|
5874
|
+
})();
|
|
5875
|
+
}
|
|
5876
|
+
return this._maxRequestSizePromise;
|
|
5877
|
+
}
|
|
5613
5878
|
async flush() {
|
|
5614
5879
|
if (this.syncFlush) {
|
|
5615
5880
|
this.triggerActiveFlush();
|
|
@@ -5653,33 +5918,33 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5653
5918
|
if (allItems.length === 0) {
|
|
5654
5919
|
return;
|
|
5655
5920
|
}
|
|
5656
|
-
const
|
|
5657
|
-
(
|
|
5921
|
+
const allItemsWithMeta = allItems.map(
|
|
5922
|
+
(item) => stringifyWithOverflowMeta(item)
|
|
5658
5923
|
);
|
|
5659
|
-
const
|
|
5660
|
-
|
|
5924
|
+
const maxRequestSizeResult = await this.getMaxRequestSize();
|
|
5925
|
+
const batches = batchItems({
|
|
5926
|
+
items: allItemsWithMeta,
|
|
5661
5927
|
batchMaxNumItems: batchSize,
|
|
5662
|
-
batchMaxNumBytes:
|
|
5928
|
+
batchMaxNumBytes: maxRequestSizeResult.maxRequestSize / 2,
|
|
5929
|
+
getByteSize: (item) => item.str.length
|
|
5663
5930
|
});
|
|
5664
|
-
|
|
5665
|
-
|
|
5666
|
-
|
|
5667
|
-
|
|
5668
|
-
|
|
5669
|
-
|
|
5670
|
-
|
|
5671
|
-
|
|
5672
|
-
|
|
5673
|
-
|
|
5931
|
+
const postPromises = batches.map(
|
|
5932
|
+
(batch) => (async () => {
|
|
5933
|
+
try {
|
|
5934
|
+
await this.submitLogsRequest(batch, maxRequestSizeResult);
|
|
5935
|
+
return { type: "success" };
|
|
5936
|
+
} catch (e) {
|
|
5937
|
+
return { type: "error", value: e };
|
|
5938
|
+
}
|
|
5939
|
+
})()
|
|
5940
|
+
);
|
|
5941
|
+
const results = await Promise.all(postPromises);
|
|
5942
|
+
const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
|
|
5943
|
+
if (failingResultErrors.length) {
|
|
5944
|
+
throw new AggregateError(
|
|
5945
|
+
failingResultErrors,
|
|
5946
|
+
`Encountered the following errors while logging:`
|
|
5674
5947
|
);
|
|
5675
|
-
const results = await Promise.all(postPromises);
|
|
5676
|
-
const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
|
|
5677
|
-
if (failingResultErrors.length) {
|
|
5678
|
-
throw new AggregateError(
|
|
5679
|
-
failingResultErrors,
|
|
5680
|
-
`Encountered the following errors while logging:`
|
|
5681
|
-
);
|
|
5682
|
-
}
|
|
5683
5948
|
}
|
|
5684
5949
|
const attachmentErrors = [];
|
|
5685
5950
|
for (const attachment of attachments) {
|
|
@@ -5709,32 +5974,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5709
5974
|
items.forEach((item) => extractAttachments(item, attachments));
|
|
5710
5975
|
let mergedItems = mergeRowBatch(items);
|
|
5711
5976
|
if (this.maskingFunction) {
|
|
5712
|
-
mergedItems = mergedItems.map(
|
|
5713
|
-
|
|
5714
|
-
|
|
5715
|
-
|
|
5716
|
-
|
|
5717
|
-
|
|
5718
|
-
|
|
5719
|
-
|
|
5720
|
-
|
|
5721
|
-
|
|
5722
|
-
|
|
5723
|
-
|
|
5724
|
-
|
|
5725
|
-
|
|
5726
|
-
maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
|
|
5727
|
-
} else {
|
|
5728
|
-
maskedItem.error = maskedValue.errorMsg;
|
|
5729
|
-
}
|
|
5977
|
+
mergedItems = mergedItems.map((item) => {
|
|
5978
|
+
const maskedItem = { ...item };
|
|
5979
|
+
for (const field of REDACTION_FIELDS) {
|
|
5980
|
+
if (item[field] !== void 0) {
|
|
5981
|
+
const maskedValue = applyMaskingToField(
|
|
5982
|
+
this.maskingFunction,
|
|
5983
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
5984
|
+
item[field],
|
|
5985
|
+
field
|
|
5986
|
+
);
|
|
5987
|
+
if (maskedValue instanceof MaskingError) {
|
|
5988
|
+
delete maskedItem[field];
|
|
5989
|
+
if (maskedItem.error) {
|
|
5990
|
+
maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
|
|
5730
5991
|
} else {
|
|
5731
|
-
maskedItem
|
|
5992
|
+
maskedItem.error = maskedValue.errorMsg;
|
|
5732
5993
|
}
|
|
5994
|
+
} else {
|
|
5995
|
+
maskedItem[field] = maskedValue;
|
|
5733
5996
|
}
|
|
5734
5997
|
}
|
|
5735
|
-
|
|
5736
|
-
|
|
5737
|
-
);
|
|
5998
|
+
}
|
|
5999
|
+
return maskedItem;
|
|
6000
|
+
});
|
|
5738
6001
|
}
|
|
5739
6002
|
return [mergedItems, attachments];
|
|
5740
6003
|
} catch (e) {
|
|
@@ -5761,20 +6024,73 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5761
6024
|
}
|
|
5762
6025
|
throw new Error("Impossible");
|
|
5763
6026
|
}
|
|
5764
|
-
async
|
|
6027
|
+
async requestLogs3OverflowUpload(conn, args) {
|
|
6028
|
+
let response;
|
|
6029
|
+
try {
|
|
6030
|
+
response = await conn.post_json("logs3/overflow", {
|
|
6031
|
+
content_type: "application/json",
|
|
6032
|
+
size_bytes: args.sizeBytes,
|
|
6033
|
+
rows: args.rows
|
|
6034
|
+
});
|
|
6035
|
+
} catch (error) {
|
|
6036
|
+
const errorStr = JSON.stringify(error);
|
|
6037
|
+
throw new Error(
|
|
6038
|
+
`Failed to request logs3 overflow upload URL: ${errorStr}`
|
|
6039
|
+
);
|
|
6040
|
+
}
|
|
6041
|
+
try {
|
|
6042
|
+
return logs3OverflowUploadSchema.parse(response);
|
|
6043
|
+
} catch (error) {
|
|
6044
|
+
if (error instanceof ZodError) {
|
|
6045
|
+
const errorStr = JSON.stringify(error.flatten());
|
|
6046
|
+
throw new Error(`Invalid response from API server: ${errorStr}`);
|
|
6047
|
+
}
|
|
6048
|
+
throw error;
|
|
6049
|
+
}
|
|
6050
|
+
}
|
|
6051
|
+
async _uploadLogs3OverflowPayload(conn, upload, payload) {
|
|
6052
|
+
await uploadLogs3OverflowPayload(upload, payload, conn.fetch.bind(conn));
|
|
6053
|
+
}
|
|
6054
|
+
async submitLogsRequest(items, {
|
|
6055
|
+
maxRequestSize,
|
|
6056
|
+
canUseOverflow
|
|
6057
|
+
}) {
|
|
5765
6058
|
const conn = await this.apiConn.get();
|
|
5766
6059
|
const dataStr = constructLogs3Data(items);
|
|
6060
|
+
const payloadBytes = utf8ByteLength(dataStr);
|
|
6061
|
+
const useOverflow = canUseOverflow && payloadBytes > maxRequestSize;
|
|
5767
6062
|
if (this.allPublishPayloadsDir) {
|
|
5768
6063
|
await _HTTPBackgroundLogger.writePayloadToDir({
|
|
5769
6064
|
payloadDir: this.allPublishPayloadsDir,
|
|
5770
6065
|
payload: dataStr
|
|
5771
6066
|
});
|
|
5772
6067
|
}
|
|
6068
|
+
let overflowUpload = null;
|
|
6069
|
+
const overflowRows = useOverflow ? items.map((item) => item.overflowMeta) : null;
|
|
5773
6070
|
for (let i = 0; i < this.numTries; i++) {
|
|
5774
6071
|
const startTime = now();
|
|
5775
6072
|
let error = void 0;
|
|
5776
6073
|
try {
|
|
5777
|
-
|
|
6074
|
+
if (overflowRows) {
|
|
6075
|
+
if (!overflowUpload) {
|
|
6076
|
+
const currentUpload = await this.requestLogs3OverflowUpload(conn, {
|
|
6077
|
+
rows: overflowRows,
|
|
6078
|
+
sizeBytes: payloadBytes
|
|
6079
|
+
});
|
|
6080
|
+
await this._uploadLogs3OverflowPayload(
|
|
6081
|
+
conn,
|
|
6082
|
+
currentUpload,
|
|
6083
|
+
dataStr
|
|
6084
|
+
);
|
|
6085
|
+
overflowUpload = currentUpload;
|
|
6086
|
+
}
|
|
6087
|
+
await conn.post_json(
|
|
6088
|
+
"logs3",
|
|
6089
|
+
constructLogs3OverflowRequest(overflowUpload.key)
|
|
6090
|
+
);
|
|
6091
|
+
} else {
|
|
6092
|
+
await conn.post_json("logs3", dataStr);
|
|
6093
|
+
}
|
|
5778
6094
|
} catch (e) {
|
|
5779
6095
|
error = e;
|
|
5780
6096
|
}
|
|
@@ -5790,7 +6106,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5790
6106
|
return `${error}`;
|
|
5791
6107
|
}
|
|
5792
6108
|
})();
|
|
5793
|
-
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${
|
|
6109
|
+
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${payloadBytes}.${retryingText}
|
|
5794
6110
|
Error: ${errorText}`;
|
|
5795
6111
|
if (!isRetrying && this.failedPublishPayloadsDir) {
|
|
5796
6112
|
await _HTTPBackgroundLogger.writePayloadToDir({
|
|
@@ -5844,7 +6160,7 @@ Error: ${errorText}`;
|
|
|
5844
6160
|
try {
|
|
5845
6161
|
const [allItems, allAttachments] = await this.unwrapLazyValues(wrappedItems);
|
|
5846
6162
|
const dataStr = constructLogs3Data(
|
|
5847
|
-
allItems.map((x) =>
|
|
6163
|
+
allItems.map((x) => stringifyWithOverflowMeta(x))
|
|
5848
6164
|
);
|
|
5849
6165
|
const attachmentStr = JSON.stringify(
|
|
5850
6166
|
allAttachments.map((a) => a.debugInfo())
|
|
@@ -8054,28 +8370,77 @@ var Prompt2 = class _Prompt {
|
|
|
8054
8370
|
);
|
|
8055
8371
|
}
|
|
8056
8372
|
};
|
|
8057
|
-
var
|
|
8058
|
-
|
|
8059
|
-
|
|
8060
|
-
|
|
8061
|
-
|
|
8062
|
-
|
|
8063
|
-
|
|
8064
|
-
|
|
8065
|
-
|
|
8066
|
-
|
|
8067
|
-
|
|
8068
|
-
|
|
8069
|
-
|
|
8070
|
-
}
|
|
8071
|
-
|
|
8072
|
-
|
|
8073
|
-
|
|
8074
|
-
|
|
8075
|
-
|
|
8076
|
-
|
|
8077
|
-
|
|
8078
|
-
|
|
8373
|
+
var RemoteEvalParameters = class {
|
|
8374
|
+
constructor(metadata) {
|
|
8375
|
+
this.metadata = metadata;
|
|
8376
|
+
}
|
|
8377
|
+
__braintrust_parameters_marker = true;
|
|
8378
|
+
get id() {
|
|
8379
|
+
return this.metadata.id;
|
|
8380
|
+
}
|
|
8381
|
+
get projectId() {
|
|
8382
|
+
return this.metadata.project_id;
|
|
8383
|
+
}
|
|
8384
|
+
get name() {
|
|
8385
|
+
return this.metadata.name;
|
|
8386
|
+
}
|
|
8387
|
+
get slug() {
|
|
8388
|
+
return this.metadata.slug;
|
|
8389
|
+
}
|
|
8390
|
+
get version() {
|
|
8391
|
+
return this.metadata[TRANSACTION_ID_FIELD];
|
|
8392
|
+
}
|
|
8393
|
+
get schema() {
|
|
8394
|
+
return this.metadata.function_data.__schema;
|
|
8395
|
+
}
|
|
8396
|
+
get data() {
|
|
8397
|
+
return this.metadata.function_data.data ?? {};
|
|
8398
|
+
}
|
|
8399
|
+
validate(data) {
|
|
8400
|
+
if (typeof data !== "object" || data === null) {
|
|
8401
|
+
return false;
|
|
8402
|
+
}
|
|
8403
|
+
const schemaProps = this.schema.properties;
|
|
8404
|
+
if (typeof schemaProps !== "object" || schemaProps === null) {
|
|
8405
|
+
return true;
|
|
8406
|
+
}
|
|
8407
|
+
for (const key of Object.keys(schemaProps)) {
|
|
8408
|
+
if (!(key in data)) {
|
|
8409
|
+
const required = Array.isArray(this.schema.required) ? this.schema.required : [];
|
|
8410
|
+
if (required.includes(key)) {
|
|
8411
|
+
return false;
|
|
8412
|
+
}
|
|
8413
|
+
}
|
|
8414
|
+
}
|
|
8415
|
+
return true;
|
|
8416
|
+
}
|
|
8417
|
+
static isParameters(x) {
|
|
8418
|
+
return typeof x === "object" && x !== null && "__braintrust_parameters_marker" in x && // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
8419
|
+
x.__braintrust_parameters_marker === true;
|
|
8420
|
+
}
|
|
8421
|
+
};
|
|
8422
|
+
var TEST_API_KEY = "___TEST_API_KEY__THIS_IS_NOT_REAL___";
|
|
8423
|
+
|
|
8424
|
+
// src/node.ts
|
|
8425
|
+
import { promisify } from "util";
|
|
8426
|
+
import * as zlib from "zlib";
|
|
8427
|
+
function configureNode() {
|
|
8428
|
+
isomorph_default.getRepoInfo = getRepoInfo;
|
|
8429
|
+
isomorph_default.getPastNAncestors = getPastNAncestors;
|
|
8430
|
+
isomorph_default.getEnv = (name) => process.env[name];
|
|
8431
|
+
isomorph_default.getCallerLocation = getCallerLocation;
|
|
8432
|
+
isomorph_default.newAsyncLocalStorage = () => new AsyncLocalStorage();
|
|
8433
|
+
isomorph_default.processOn = (event, handler) => {
|
|
8434
|
+
process.on(event, handler);
|
|
8435
|
+
};
|
|
8436
|
+
isomorph_default.basename = path.basename;
|
|
8437
|
+
isomorph_default.writeln = (text) => process.stdout.write(text + "\n");
|
|
8438
|
+
isomorph_default.pathJoin = path.join;
|
|
8439
|
+
isomorph_default.pathDirname = path.dirname;
|
|
8440
|
+
isomorph_default.mkdir = fs.mkdir;
|
|
8441
|
+
isomorph_default.writeFile = fs.writeFile;
|
|
8442
|
+
isomorph_default.readFile = fs.readFile;
|
|
8443
|
+
isomorph_default.readdir = fs.readdir;
|
|
8079
8444
|
isomorph_default.stat = fs.stat;
|
|
8080
8445
|
isomorph_default.statSync = fsSync.statSync;
|
|
8081
8446
|
isomorph_default.utimes = fs.utimes;
|
|
@@ -9133,6 +9498,85 @@ function waterfall(tasks, callback) {
|
|
|
9133
9498
|
}
|
|
9134
9499
|
var waterfall$1 = awaitify(waterfall);
|
|
9135
9500
|
|
|
9501
|
+
// src/functions/invoke.ts
|
|
9502
|
+
async function invoke(args) {
|
|
9503
|
+
const {
|
|
9504
|
+
orgName,
|
|
9505
|
+
apiKey,
|
|
9506
|
+
appUrl,
|
|
9507
|
+
forceLogin,
|
|
9508
|
+
fetch: fetch2,
|
|
9509
|
+
input,
|
|
9510
|
+
messages,
|
|
9511
|
+
parent: parentArg,
|
|
9512
|
+
metadata,
|
|
9513
|
+
tags,
|
|
9514
|
+
state: stateArg,
|
|
9515
|
+
stream,
|
|
9516
|
+
mode,
|
|
9517
|
+
schema,
|
|
9518
|
+
strict,
|
|
9519
|
+
projectId,
|
|
9520
|
+
...functionIdArgs
|
|
9521
|
+
} = args;
|
|
9522
|
+
const state = stateArg ?? _internalGetGlobalState();
|
|
9523
|
+
await state.login({
|
|
9524
|
+
orgName,
|
|
9525
|
+
apiKey,
|
|
9526
|
+
appUrl,
|
|
9527
|
+
forceLogin,
|
|
9528
|
+
fetch: fetch2
|
|
9529
|
+
});
|
|
9530
|
+
const parent = parentArg ? typeof parentArg === "string" ? parentArg : await parentArg.export() : await getSpanParentObject().export();
|
|
9531
|
+
const functionId = FunctionId.safeParse({
|
|
9532
|
+
function_id: functionIdArgs.function_id,
|
|
9533
|
+
project_name: functionIdArgs.projectName,
|
|
9534
|
+
slug: functionIdArgs.slug,
|
|
9535
|
+
global_function: functionIdArgs.globalFunction,
|
|
9536
|
+
function_type: functionIdArgs.functionType,
|
|
9537
|
+
prompt_session_id: functionIdArgs.promptSessionId,
|
|
9538
|
+
prompt_session_function_id: functionIdArgs.promptSessionFunctionId,
|
|
9539
|
+
version: functionIdArgs.version
|
|
9540
|
+
});
|
|
9541
|
+
if (!functionId.success) {
|
|
9542
|
+
throw new Error(
|
|
9543
|
+
`Invalid function ID arguments: ${functionId.error.message}`
|
|
9544
|
+
);
|
|
9545
|
+
}
|
|
9546
|
+
const request = {
|
|
9547
|
+
...functionId.data,
|
|
9548
|
+
input,
|
|
9549
|
+
messages,
|
|
9550
|
+
parent,
|
|
9551
|
+
metadata,
|
|
9552
|
+
tags,
|
|
9553
|
+
stream,
|
|
9554
|
+
mode,
|
|
9555
|
+
strict
|
|
9556
|
+
};
|
|
9557
|
+
const headers = {
|
|
9558
|
+
Accept: stream ? "text/event-stream" : "application/json"
|
|
9559
|
+
};
|
|
9560
|
+
if (projectId) {
|
|
9561
|
+
headers["x-bt-project-id"] = projectId;
|
|
9562
|
+
}
|
|
9563
|
+
if (orgName) {
|
|
9564
|
+
headers["x-bt-org-name"] = orgName;
|
|
9565
|
+
}
|
|
9566
|
+
const resp = await state.proxyConn().post(`function/invoke`, request, {
|
|
9567
|
+
headers
|
|
9568
|
+
});
|
|
9569
|
+
if (stream) {
|
|
9570
|
+
if (!resp.body) {
|
|
9571
|
+
throw new Error("Received empty stream body");
|
|
9572
|
+
}
|
|
9573
|
+
return new BraintrustStream(resp.body);
|
|
9574
|
+
} else {
|
|
9575
|
+
const data = await resp.json();
|
|
9576
|
+
return schema ? schema.parse(data) : data;
|
|
9577
|
+
}
|
|
9578
|
+
}
|
|
9579
|
+
|
|
9136
9580
|
// src/trace.ts
|
|
9137
9581
|
var SpanFetcher = class _SpanFetcher extends ObjectFetcher {
|
|
9138
9582
|
constructor(objectType, _objectId, rootSpanId, _state, spanTypeFilter) {
|
|
@@ -9268,6 +9712,7 @@ var LocalTrace = class {
|
|
|
9268
9712
|
spansFlushed = false;
|
|
9269
9713
|
spansFlushPromise = null;
|
|
9270
9714
|
cachedFetcher;
|
|
9715
|
+
threadCache = /* @__PURE__ */ new Map();
|
|
9271
9716
|
constructor({
|
|
9272
9717
|
objectType,
|
|
9273
9718
|
objectId,
|
|
@@ -9338,6 +9783,36 @@ var LocalTrace = class {
|
|
|
9338
9783
|
}
|
|
9339
9784
|
return this.cachedFetcher.getSpans({ spanType });
|
|
9340
9785
|
}
|
|
9786
|
+
/**
|
|
9787
|
+
* Get the thread (preprocessed messages) for this trace.
|
|
9788
|
+
* Calls the API with the project_default preprocessor (which falls back to "thread").
|
|
9789
|
+
*/
|
|
9790
|
+
async getThread(options) {
|
|
9791
|
+
const cacheKey = options?.preprocessor ?? "project_default";
|
|
9792
|
+
if (!this.threadCache.has(cacheKey)) {
|
|
9793
|
+
const promise = this.fetchThread(options);
|
|
9794
|
+
this.threadCache.set(cacheKey, promise);
|
|
9795
|
+
}
|
|
9796
|
+
return this.threadCache.get(cacheKey);
|
|
9797
|
+
}
|
|
9798
|
+
async fetchThread(options) {
|
|
9799
|
+
await this.ensureSpansReady();
|
|
9800
|
+
await this.state.login({});
|
|
9801
|
+
const result = await invoke({
|
|
9802
|
+
globalFunction: options?.preprocessor ?? "project_default",
|
|
9803
|
+
functionType: "preprocessor",
|
|
9804
|
+
input: {
|
|
9805
|
+
trace_ref: {
|
|
9806
|
+
object_type: this.objectType,
|
|
9807
|
+
object_id: this.objectId,
|
|
9808
|
+
root_span_id: this.rootSpanId
|
|
9809
|
+
}
|
|
9810
|
+
},
|
|
9811
|
+
mode: "json",
|
|
9812
|
+
state: this.state
|
|
9813
|
+
});
|
|
9814
|
+
return Array.isArray(result) ? result : [];
|
|
9815
|
+
}
|
|
9341
9816
|
async ensureSpansReady() {
|
|
9342
9817
|
if (this.spansFlushed || !this.ensureSpansFlushed) {
|
|
9343
9818
|
return;
|
|
@@ -9372,634 +9847,357 @@ var SimpleProgressReporter = class {
|
|
|
9372
9847
|
|
|
9373
9848
|
// src/eval-parameters.ts
|
|
9374
9849
|
import { z as z10 } from "zod/v3";
|
|
9850
|
+
import Ajv from "ajv";
|
|
9375
9851
|
|
|
9376
|
-
// src/
|
|
9852
|
+
// src/prompt-schemas.ts
|
|
9377
9853
|
import { z as z9 } from "zod/v3";
|
|
9378
|
-
var
|
|
9379
|
-
|
|
9380
|
-
|
|
9381
|
-
|
|
9854
|
+
var promptContentsSchema = z9.union([
|
|
9855
|
+
z9.object({
|
|
9856
|
+
prompt: z9.string()
|
|
9857
|
+
}),
|
|
9858
|
+
z9.object({
|
|
9859
|
+
messages: z9.array(ChatCompletionMessageParam)
|
|
9860
|
+
})
|
|
9861
|
+
]);
|
|
9862
|
+
var promptDefinitionSchema = promptContentsSchema.and(
|
|
9863
|
+
z9.object({
|
|
9864
|
+
model: z9.string(),
|
|
9865
|
+
params: ModelParams.optional(),
|
|
9866
|
+
templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
|
|
9867
|
+
})
|
|
9868
|
+
);
|
|
9869
|
+
var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
|
|
9870
|
+
z9.object({
|
|
9871
|
+
tools: z9.array(ToolFunctionDefinition).optional()
|
|
9872
|
+
})
|
|
9873
|
+
);
|
|
9874
|
+
function promptDefinitionToPromptData(promptDefinition, rawTools) {
|
|
9875
|
+
const promptBlock = "messages" in promptDefinition ? {
|
|
9876
|
+
type: "chat",
|
|
9877
|
+
messages: promptDefinition.messages,
|
|
9878
|
+
tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
|
|
9879
|
+
} : {
|
|
9880
|
+
type: "completion",
|
|
9881
|
+
content: promptDefinition.prompt
|
|
9882
|
+
};
|
|
9883
|
+
return {
|
|
9884
|
+
prompt: promptBlock,
|
|
9885
|
+
options: {
|
|
9886
|
+
model: promptDefinition.model,
|
|
9887
|
+
params: promptDefinition.params
|
|
9888
|
+
},
|
|
9889
|
+
...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
|
|
9890
|
+
};
|
|
9891
|
+
}
|
|
9892
|
+
|
|
9893
|
+
// src/eval-parameters.ts
|
|
9894
|
+
var evalParametersSchema = z10.record(
|
|
9895
|
+
z10.string(),
|
|
9896
|
+
z10.union([
|
|
9897
|
+
z10.object({
|
|
9898
|
+
type: z10.literal("prompt"),
|
|
9899
|
+
default: promptDefinitionWithToolsSchema.optional(),
|
|
9900
|
+
description: z10.string().optional()
|
|
9901
|
+
}),
|
|
9902
|
+
z10.instanceof(z10.ZodType)
|
|
9903
|
+
// For Zod schemas
|
|
9904
|
+
])
|
|
9905
|
+
);
|
|
9906
|
+
async function validateParameters(parameters, parameterSchema) {
|
|
9907
|
+
let resolvedSchema = parameterSchema;
|
|
9908
|
+
if (resolvedSchema instanceof Promise) {
|
|
9909
|
+
resolvedSchema = await resolvedSchema;
|
|
9910
|
+
}
|
|
9911
|
+
if (resolvedSchema === void 0 || resolvedSchema === null) {
|
|
9912
|
+
return parameters;
|
|
9913
|
+
}
|
|
9914
|
+
if (RemoteEvalParameters.isParameters(resolvedSchema)) {
|
|
9915
|
+
const mergedParameters = parameters && Object.keys(parameters).length > 0 ? {
|
|
9916
|
+
...resolvedSchema.data,
|
|
9917
|
+
...parameters
|
|
9918
|
+
} : resolvedSchema.data;
|
|
9919
|
+
return validateParametersWithJsonSchema(
|
|
9920
|
+
mergedParameters,
|
|
9921
|
+
resolvedSchema.schema
|
|
9922
|
+
);
|
|
9382
9923
|
}
|
|
9383
|
-
|
|
9384
|
-
|
|
9385
|
-
|
|
9386
|
-
|
|
9387
|
-
|
|
9388
|
-
|
|
9389
|
-
|
|
9390
|
-
|
|
9391
|
-
|
|
9392
|
-
|
|
9393
|
-
|
|
9394
|
-
|
|
9395
|
-
|
|
9396
|
-
|
|
9397
|
-
|
|
9398
|
-
|
|
9399
|
-
|
|
9924
|
+
return validateParametersWithZod(
|
|
9925
|
+
parameters,
|
|
9926
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
9927
|
+
resolvedSchema
|
|
9928
|
+
);
|
|
9929
|
+
}
|
|
9930
|
+
function validateParametersWithZod(parameters, parameterSchema) {
|
|
9931
|
+
return Object.fromEntries(
|
|
9932
|
+
Object.entries(parameterSchema).map(([name, schema]) => {
|
|
9933
|
+
const value = parameters[name];
|
|
9934
|
+
try {
|
|
9935
|
+
if ("type" in schema && schema.type === "prompt") {
|
|
9936
|
+
const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
|
|
9937
|
+
schema.default,
|
|
9938
|
+
schema.default.tools
|
|
9939
|
+
) : void 0;
|
|
9940
|
+
if (!promptData) {
|
|
9941
|
+
throw new Error(`Parameter '${name}' is required`);
|
|
9942
|
+
}
|
|
9943
|
+
return [name, Prompt2.fromPromptData(name, promptData)];
|
|
9944
|
+
} else {
|
|
9945
|
+
const schemaCasted = schema;
|
|
9946
|
+
return [name, schemaCasted.parse(value)];
|
|
9947
|
+
}
|
|
9948
|
+
} catch (e) {
|
|
9949
|
+
console.error("Error validating parameter", name, e);
|
|
9950
|
+
throw Error(
|
|
9951
|
+
`Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
|
|
9952
|
+
);
|
|
9953
|
+
}
|
|
9954
|
+
})
|
|
9955
|
+
);
|
|
9956
|
+
}
|
|
9957
|
+
function validateParametersWithJsonSchema(parameters, schema) {
|
|
9958
|
+
const ajv = new Ajv({ coerceTypes: true, useDefaults: true, strict: false });
|
|
9959
|
+
const validate = ajv.compile(schema);
|
|
9960
|
+
if (!validate(parameters)) {
|
|
9961
|
+
const errorMessages = validate.errors?.map((err) => {
|
|
9962
|
+
const path2 = err.instancePath || "root";
|
|
9963
|
+
return `${path2}: ${err.message}`;
|
|
9964
|
+
}).join(", ");
|
|
9965
|
+
throw Error(`Invalid parameters: ${errorMessages}`);
|
|
9400
9966
|
}
|
|
9401
|
-
|
|
9402
|
-
|
|
9403
|
-
|
|
9404
|
-
|
|
9405
|
-
|
|
9967
|
+
return parameters;
|
|
9968
|
+
}
|
|
9969
|
+
|
|
9970
|
+
// src/framework.ts
|
|
9971
|
+
var EvalResultWithSummary = class {
|
|
9972
|
+
constructor(summary, results) {
|
|
9973
|
+
this.summary = summary;
|
|
9974
|
+
this.results = results;
|
|
9406
9975
|
}
|
|
9407
|
-
|
|
9408
|
-
|
|
9409
|
-
|
|
9410
|
-
|
|
9411
|
-
|
|
9976
|
+
/**
|
|
9977
|
+
* @deprecated Use `summary` instead.
|
|
9978
|
+
*/
|
|
9979
|
+
toString() {
|
|
9980
|
+
return JSON.stringify(this.summary);
|
|
9412
9981
|
}
|
|
9413
|
-
|
|
9414
|
-
|
|
9415
|
-
|
|
9416
|
-
|
|
9417
|
-
|
|
9418
|
-
|
|
9419
|
-
|
|
9420
|
-
|
|
9421
|
-
if (this._publishableCodeFunctions.length > 0) {
|
|
9422
|
-
console.warn(
|
|
9423
|
-
"Code functions cannot be published directly. Use `braintrust push` instead."
|
|
9424
|
-
);
|
|
9425
|
-
}
|
|
9426
|
-
if (this._publishablePrompts.length > 0) {
|
|
9427
|
-
for (const prompt of this._publishablePrompts) {
|
|
9428
|
-
const functionDefinition = await prompt.toFunctionDefinition(projectMap);
|
|
9429
|
-
functionDefinitions.push(functionDefinition);
|
|
9430
|
-
}
|
|
9431
|
-
}
|
|
9432
|
-
await _internalGetGlobalState().apiConn().post_json("insert-functions", {
|
|
9433
|
-
functions: functionDefinitions
|
|
9434
|
-
});
|
|
9982
|
+
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
9983
|
+
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
9984
|
+
}
|
|
9985
|
+
toJSON() {
|
|
9986
|
+
return {
|
|
9987
|
+
summary: this.summary,
|
|
9988
|
+
results: this.results
|
|
9989
|
+
};
|
|
9435
9990
|
}
|
|
9436
9991
|
};
|
|
9437
|
-
|
|
9438
|
-
|
|
9439
|
-
|
|
9992
|
+
function makeEvalName(projectName, experimentName) {
|
|
9993
|
+
let out = projectName;
|
|
9994
|
+
if (experimentName) {
|
|
9995
|
+
out += ` [experimentName=${experimentName}]`;
|
|
9440
9996
|
}
|
|
9441
|
-
|
|
9442
|
-
|
|
9443
|
-
|
|
9444
|
-
|
|
9445
|
-
|
|
9446
|
-
|
|
9447
|
-
|
|
9448
|
-
|
|
9449
|
-
|
|
9450
|
-
|
|
9451
|
-
|
|
9452
|
-
|
|
9453
|
-
|
|
9454
|
-
|
|
9455
|
-
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
9456
|
-
type: "tool",
|
|
9457
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
9458
|
-
parameters,
|
|
9459
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
9460
|
-
returns,
|
|
9461
|
-
...rest
|
|
9462
|
-
});
|
|
9463
|
-
this.project.addCodeFunction(tool);
|
|
9464
|
-
return tool;
|
|
9997
|
+
return out;
|
|
9998
|
+
}
|
|
9999
|
+
function initExperiment(state, options = {}) {
|
|
10000
|
+
return init({
|
|
10001
|
+
state,
|
|
10002
|
+
...options,
|
|
10003
|
+
setCurrent: false
|
|
10004
|
+
});
|
|
10005
|
+
}
|
|
10006
|
+
function callEvaluatorData(data) {
|
|
10007
|
+
const dataResult = typeof data === "function" ? data() : data;
|
|
10008
|
+
let baseExperiment = void 0;
|
|
10009
|
+
if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
|
|
10010
|
+
baseExperiment = dataResult.name;
|
|
9465
10011
|
}
|
|
10012
|
+
return {
|
|
10013
|
+
data: dataResult,
|
|
10014
|
+
baseExperiment
|
|
10015
|
+
};
|
|
10016
|
+
}
|
|
10017
|
+
function isAsyncIterable2(value) {
|
|
10018
|
+
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
10019
|
+
}
|
|
10020
|
+
function isIterable(value) {
|
|
10021
|
+
return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
|
|
10022
|
+
}
|
|
10023
|
+
globalThis._evals = {
|
|
10024
|
+
functions: [],
|
|
10025
|
+
prompts: [],
|
|
10026
|
+
parameters: [],
|
|
10027
|
+
evaluators: {},
|
|
10028
|
+
reporters: {}
|
|
9466
10029
|
};
|
|
9467
|
-
|
|
9468
|
-
|
|
9469
|
-
|
|
10030
|
+
function _initializeSpanContext() {
|
|
10031
|
+
globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
|
|
10032
|
+
}
|
|
10033
|
+
async function Eval(name, evaluator, reporterOrOpts) {
|
|
10034
|
+
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
10035
|
+
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
10036
|
+
if (globalThis._evals.evaluators[evalName]) {
|
|
10037
|
+
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
9470
10038
|
}
|
|
9471
|
-
|
|
9472
|
-
|
|
9473
|
-
|
|
9474
|
-
|
|
9475
|
-
|
|
9476
|
-
|
|
10039
|
+
if (globalThis._lazy_load) {
|
|
10040
|
+
globalThis._evals.evaluators[evalName] = {
|
|
10041
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
10042
|
+
evaluator: {
|
|
10043
|
+
evalName,
|
|
10044
|
+
projectName: name,
|
|
10045
|
+
...evaluator
|
|
10046
|
+
},
|
|
10047
|
+
reporter: options.reporter
|
|
10048
|
+
};
|
|
10049
|
+
_initializeSpanContext();
|
|
10050
|
+
return new EvalResultWithSummary(
|
|
10051
|
+
{
|
|
10052
|
+
scores: {},
|
|
10053
|
+
metrics: {},
|
|
10054
|
+
projectName: "",
|
|
10055
|
+
experimentName: ""
|
|
10056
|
+
},
|
|
10057
|
+
[]
|
|
10058
|
+
);
|
|
10059
|
+
}
|
|
10060
|
+
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
10061
|
+
const shouldCollectResults = options.returnResults ?? true;
|
|
10062
|
+
if (typeof options.reporter === "string") {
|
|
10063
|
+
throw new Error(
|
|
10064
|
+
"Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
|
|
10065
|
+
);
|
|
10066
|
+
}
|
|
10067
|
+
const resolvedReporter = options.reporter || defaultReporter;
|
|
10068
|
+
try {
|
|
10069
|
+
const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
|
|
10070
|
+
evaluator.data
|
|
10071
|
+
);
|
|
10072
|
+
const experiment = options.parent || options.noSendLogs ? null : initExperiment(evaluator.state, {
|
|
10073
|
+
...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
|
|
10074
|
+
experiment: evaluator.experimentName,
|
|
10075
|
+
description: evaluator.description,
|
|
10076
|
+
metadata: evaluator.metadata,
|
|
10077
|
+
isPublic: evaluator.isPublic,
|
|
10078
|
+
update: evaluator.update,
|
|
10079
|
+
baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
|
|
10080
|
+
baseExperimentId: evaluator.baseExperimentId,
|
|
10081
|
+
gitMetadataSettings: evaluator.gitMetadataSettings,
|
|
10082
|
+
repoInfo: evaluator.repoInfo,
|
|
10083
|
+
dataset: Dataset2.isDataset(data) ? data : void 0
|
|
10084
|
+
});
|
|
10085
|
+
if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
|
|
10086
|
+
await experiment._waitForId();
|
|
9477
10087
|
}
|
|
9478
|
-
if (
|
|
9479
|
-
|
|
10088
|
+
if (experiment && options.onStart) {
|
|
10089
|
+
const summary = await experiment.summarize({ summarizeScores: false });
|
|
10090
|
+
options.onStart(summary);
|
|
9480
10091
|
}
|
|
9481
|
-
|
|
9482
|
-
|
|
9483
|
-
|
|
9484
|
-
|
|
9485
|
-
|
|
9486
|
-
|
|
9487
|
-
type: "scorer"
|
|
9488
|
-
});
|
|
9489
|
-
this.project.addCodeFunction(scorer);
|
|
9490
|
-
} else {
|
|
9491
|
-
const promptBlock = "messages" in opts ? {
|
|
9492
|
-
type: "chat",
|
|
9493
|
-
messages: opts.messages
|
|
9494
|
-
} : {
|
|
9495
|
-
type: "completion",
|
|
9496
|
-
content: opts.prompt
|
|
9497
|
-
};
|
|
9498
|
-
const promptData = {
|
|
9499
|
-
prompt: promptBlock,
|
|
9500
|
-
options: {
|
|
9501
|
-
model: opts.model,
|
|
9502
|
-
params: opts.params
|
|
9503
|
-
},
|
|
9504
|
-
parser: {
|
|
9505
|
-
type: "llm_classifier",
|
|
9506
|
-
use_cot: opts.useCot,
|
|
9507
|
-
choice_scores: opts.choiceScores
|
|
9508
|
-
}
|
|
10092
|
+
try {
|
|
10093
|
+
const evalDef = {
|
|
10094
|
+
evalName,
|
|
10095
|
+
projectName: name,
|
|
10096
|
+
...evaluator,
|
|
10097
|
+
data
|
|
9509
10098
|
};
|
|
9510
|
-
const
|
|
9511
|
-
|
|
9512
|
-
|
|
9513
|
-
|
|
9514
|
-
|
|
9515
|
-
|
|
9516
|
-
|
|
9517
|
-
|
|
9518
|
-
|
|
9519
|
-
|
|
9520
|
-
|
|
9521
|
-
|
|
9522
|
-
|
|
9523
|
-
|
|
9524
|
-
|
|
9525
|
-
|
|
9526
|
-
|
|
9527
|
-
|
|
9528
|
-
|
|
9529
|
-
|
|
9530
|
-
|
|
9531
|
-
|
|
9532
|
-
|
|
9533
|
-
|
|
9534
|
-
|
|
9535
|
-
|
|
9536
|
-
|
|
9537
|
-
|
|
9538
|
-
|
|
10099
|
+
const enableCache = options.enableCache ?? true;
|
|
10100
|
+
let ret;
|
|
10101
|
+
if (options.parent) {
|
|
10102
|
+
ret = await withParent(
|
|
10103
|
+
options.parent,
|
|
10104
|
+
() => runEvaluator(
|
|
10105
|
+
null,
|
|
10106
|
+
evalDef,
|
|
10107
|
+
progressReporter,
|
|
10108
|
+
[],
|
|
10109
|
+
options.stream,
|
|
10110
|
+
options.parameters,
|
|
10111
|
+
shouldCollectResults,
|
|
10112
|
+
enableCache
|
|
10113
|
+
),
|
|
10114
|
+
evaluator.state
|
|
10115
|
+
);
|
|
10116
|
+
} else {
|
|
10117
|
+
ret = await runEvaluator(
|
|
10118
|
+
experiment,
|
|
10119
|
+
evalDef,
|
|
10120
|
+
progressReporter,
|
|
10121
|
+
[],
|
|
10122
|
+
options.stream,
|
|
10123
|
+
options.parameters,
|
|
10124
|
+
shouldCollectResults,
|
|
10125
|
+
enableCache
|
|
10126
|
+
);
|
|
10127
|
+
}
|
|
10128
|
+
progressReporter.stop();
|
|
10129
|
+
resolvedReporter.reportEval(evalDef, ret, {
|
|
10130
|
+
verbose: true,
|
|
10131
|
+
jsonl: false
|
|
10132
|
+
});
|
|
10133
|
+
return ret;
|
|
10134
|
+
} finally {
|
|
10135
|
+
if (experiment) {
|
|
10136
|
+
await experiment.flush().catch(console.error);
|
|
10137
|
+
} else if (options.parent) {
|
|
10138
|
+
await flush().catch(console.error);
|
|
10139
|
+
}
|
|
9539
10140
|
}
|
|
10141
|
+
} finally {
|
|
10142
|
+
progressReporter.stop();
|
|
9540
10143
|
}
|
|
9541
|
-
|
|
9542
|
-
|
|
9543
|
-
|
|
9544
|
-
|
|
9545
|
-
|
|
9546
|
-
|
|
9547
|
-
returns;
|
|
9548
|
-
ifExists;
|
|
9549
|
-
metadata;
|
|
9550
|
-
key() {
|
|
9551
|
-
return JSON.stringify([
|
|
9552
|
-
this.project.id ?? "",
|
|
9553
|
-
this.project.name ?? "",
|
|
9554
|
-
this.slug
|
|
9555
|
-
]);
|
|
10144
|
+
}
|
|
10145
|
+
function serializeJSONWithPlainString(v) {
|
|
10146
|
+
if (typeof v === "string") {
|
|
10147
|
+
return v;
|
|
10148
|
+
} else {
|
|
10149
|
+
return JSON.stringify(v);
|
|
9556
10150
|
}
|
|
9557
|
-
};
|
|
9558
|
-
var CodePrompt = class {
|
|
9559
|
-
project;
|
|
9560
|
-
name;
|
|
9561
|
-
slug;
|
|
9562
|
-
prompt;
|
|
9563
|
-
ifExists;
|
|
9564
|
-
description;
|
|
9565
|
-
id;
|
|
9566
|
-
functionType;
|
|
9567
|
-
toolFunctions;
|
|
9568
|
-
metadata;
|
|
9569
|
-
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
9570
|
-
this.project = project;
|
|
9571
|
-
this.name = opts.name;
|
|
9572
|
-
this.slug = opts.slug;
|
|
9573
|
-
this.prompt = prompt;
|
|
9574
|
-
this.toolFunctions = toolFunctions;
|
|
9575
|
-
this.ifExists = opts.ifExists;
|
|
9576
|
-
this.description = opts.description;
|
|
9577
|
-
this.id = opts.id;
|
|
9578
|
-
this.functionType = functionType;
|
|
9579
|
-
this.metadata = opts.metadata;
|
|
9580
|
-
}
|
|
9581
|
-
async toFunctionDefinition(projectNameToId) {
|
|
9582
|
-
const prompt_data = {
|
|
9583
|
-
...this.prompt
|
|
9584
|
-
};
|
|
9585
|
-
if (this.toolFunctions.length > 0) {
|
|
9586
|
-
const resolvableToolFunctions = await Promise.all(
|
|
9587
|
-
this.toolFunctions.map(async (fn) => {
|
|
9588
|
-
if ("slug" in fn) {
|
|
9589
|
-
return {
|
|
9590
|
-
type: "slug",
|
|
9591
|
-
project_id: await projectNameToId.resolve(fn.project),
|
|
9592
|
-
slug: fn.slug
|
|
9593
|
-
};
|
|
9594
|
-
} else {
|
|
9595
|
-
return fn;
|
|
9596
|
-
}
|
|
9597
|
-
})
|
|
9598
|
-
);
|
|
9599
|
-
prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
9600
|
-
resolvableToolFunctions;
|
|
9601
|
-
}
|
|
9602
|
-
return {
|
|
9603
|
-
project_id: await projectNameToId.resolve(this.project),
|
|
9604
|
-
name: this.name,
|
|
9605
|
-
slug: this.slug,
|
|
9606
|
-
description: this.description ?? "",
|
|
9607
|
-
function_data: {
|
|
9608
|
-
type: "prompt"
|
|
9609
|
-
},
|
|
9610
|
-
function_type: this.functionType,
|
|
9611
|
-
prompt_data,
|
|
9612
|
-
if_exists: this.ifExists,
|
|
9613
|
-
metadata: this.metadata
|
|
9614
|
-
};
|
|
9615
|
-
}
|
|
9616
|
-
};
|
|
9617
|
-
var promptContentsSchema = z9.union([
|
|
9618
|
-
z9.object({
|
|
9619
|
-
prompt: z9.string()
|
|
9620
|
-
}),
|
|
9621
|
-
z9.object({
|
|
9622
|
-
messages: z9.array(ChatCompletionMessageParam)
|
|
9623
|
-
})
|
|
9624
|
-
]);
|
|
9625
|
-
var promptDefinitionSchema = promptContentsSchema.and(
|
|
9626
|
-
z9.object({
|
|
9627
|
-
model: z9.string(),
|
|
9628
|
-
params: ModelParams.optional(),
|
|
9629
|
-
templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
|
|
9630
|
-
})
|
|
9631
|
-
);
|
|
9632
|
-
var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
|
|
9633
|
-
z9.object({
|
|
9634
|
-
tools: z9.array(ToolFunctionDefinition).optional()
|
|
9635
|
-
})
|
|
9636
|
-
);
|
|
9637
|
-
var PromptBuilder = class {
|
|
9638
|
-
constructor(project) {
|
|
9639
|
-
this.project = project;
|
|
9640
|
-
}
|
|
9641
|
-
create(opts) {
|
|
9642
|
-
const toolFunctions = [];
|
|
9643
|
-
const rawTools = [];
|
|
9644
|
-
for (const tool of opts.tools ?? []) {
|
|
9645
|
-
if (tool instanceof CodeFunction) {
|
|
9646
|
-
toolFunctions.push(tool);
|
|
9647
|
-
} else if ("type" in tool && !("function" in tool)) {
|
|
9648
|
-
toolFunctions.push(tool);
|
|
9649
|
-
} else {
|
|
9650
|
-
rawTools.push(tool);
|
|
9651
|
-
}
|
|
9652
|
-
}
|
|
9653
|
-
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
9654
|
-
const promptData = promptDefinitionToPromptData(opts, rawTools);
|
|
9655
|
-
const promptRow = {
|
|
9656
|
-
id: opts.id,
|
|
9657
|
-
_xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
|
|
9658
|
-
name: opts.name,
|
|
9659
|
-
slug,
|
|
9660
|
-
prompt_data: promptData,
|
|
9661
|
-
...this.project.id !== void 0 ? { project_id: this.project.id } : {}
|
|
9662
|
-
};
|
|
9663
|
-
const prompt = new Prompt2(
|
|
9664
|
-
promptRow,
|
|
9665
|
-
{},
|
|
9666
|
-
// It doesn't make sense to specify defaults here.
|
|
9667
|
-
opts.noTrace ?? false
|
|
9668
|
-
);
|
|
9669
|
-
const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
|
|
9670
|
-
...opts,
|
|
9671
|
-
slug
|
|
9672
|
-
});
|
|
9673
|
-
this.project.addPrompt(codePrompt);
|
|
9674
|
-
return prompt;
|
|
9675
|
-
}
|
|
9676
|
-
};
|
|
9677
|
-
function promptDefinitionToPromptData(promptDefinition, rawTools) {
|
|
9678
|
-
const promptBlock = "messages" in promptDefinition ? {
|
|
9679
|
-
type: "chat",
|
|
9680
|
-
messages: promptDefinition.messages,
|
|
9681
|
-
tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
|
|
9682
|
-
} : {
|
|
9683
|
-
type: "completion",
|
|
9684
|
-
content: promptDefinition.prompt
|
|
9685
|
-
};
|
|
9686
|
-
return {
|
|
9687
|
-
prompt: promptBlock,
|
|
9688
|
-
options: {
|
|
9689
|
-
model: promptDefinition.model,
|
|
9690
|
-
params: promptDefinition.params
|
|
9691
|
-
},
|
|
9692
|
-
...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
|
|
9693
|
-
};
|
|
9694
10151
|
}
|
|
9695
|
-
|
|
9696
|
-
|
|
9697
|
-
|
|
9698
|
-
|
|
9699
|
-
|
|
9700
|
-
|
|
9701
|
-
|
|
9702
|
-
|
|
9703
|
-
|
|
9704
|
-
|
|
9705
|
-
|
|
9706
|
-
const projectId = result.project.id;
|
|
9707
|
-
this.nameToId[projectName] = projectId;
|
|
9708
|
-
this.idToName[projectId] = projectName;
|
|
9709
|
-
}
|
|
9710
|
-
return this.nameToId[projectName];
|
|
10152
|
+
function evaluateFilter(object, filter2) {
|
|
10153
|
+
const { path: path2, pattern } = filter2;
|
|
10154
|
+
const key = path2.reduce(
|
|
10155
|
+
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
10156
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
10157
|
+
acc[p]
|
|
10158
|
+
) : void 0,
|
|
10159
|
+
object
|
|
10160
|
+
);
|
|
10161
|
+
if (key === void 0) {
|
|
10162
|
+
return false;
|
|
9711
10163
|
}
|
|
9712
|
-
|
|
9713
|
-
|
|
9714
|
-
|
|
9715
|
-
|
|
9716
|
-
|
|
9717
|
-
|
|
9718
|
-
|
|
9719
|
-
|
|
9720
|
-
|
|
9721
|
-
|
|
9722
|
-
|
|
10164
|
+
return pattern.test(serializeJSONWithPlainString(key));
|
|
10165
|
+
}
|
|
10166
|
+
function scorerName(scorer, scorer_idx) {
|
|
10167
|
+
return scorer.name || `scorer_${scorer_idx}`;
|
|
10168
|
+
}
|
|
10169
|
+
async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
|
|
10170
|
+
return await runEvaluatorInternal(
|
|
10171
|
+
experiment,
|
|
10172
|
+
evaluator,
|
|
10173
|
+
progressReporter,
|
|
10174
|
+
filters,
|
|
10175
|
+
stream,
|
|
10176
|
+
parameters,
|
|
10177
|
+
collectResults,
|
|
10178
|
+
enableCache
|
|
10179
|
+
);
|
|
10180
|
+
}
|
|
10181
|
+
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
10182
|
+
if (enableCache) {
|
|
10183
|
+
(evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
|
|
9723
10184
|
}
|
|
9724
|
-
|
|
9725
|
-
if (
|
|
9726
|
-
|
|
10185
|
+
try {
|
|
10186
|
+
if (typeof evaluator.data === "string") {
|
|
10187
|
+
throw new Error("Unimplemented: string data paths");
|
|
9727
10188
|
}
|
|
9728
|
-
|
|
9729
|
-
|
|
9730
|
-
}
|
|
9731
|
-
|
|
9732
|
-
|
|
9733
|
-
|
|
9734
|
-
|
|
9735
|
-
|
|
9736
|
-
|
|
9737
|
-
|
|
9738
|
-
|
|
9739
|
-
|
|
9740
|
-
}),
|
|
9741
|
-
z10.instanceof(z10.ZodType)
|
|
9742
|
-
// For Zod schemas
|
|
9743
|
-
])
|
|
9744
|
-
);
|
|
9745
|
-
function validateParameters(parameters, parameterSchema) {
|
|
9746
|
-
return Object.fromEntries(
|
|
9747
|
-
Object.entries(parameterSchema).map(([name, schema]) => {
|
|
9748
|
-
const value = parameters[name];
|
|
9749
|
-
try {
|
|
9750
|
-
if ("type" in schema && schema.type === "prompt") {
|
|
9751
|
-
const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
|
|
9752
|
-
schema.default,
|
|
9753
|
-
schema.default.tools
|
|
9754
|
-
) : void 0;
|
|
9755
|
-
if (!promptData) {
|
|
9756
|
-
throw new Error(`Parameter '${name}' is required`);
|
|
9757
|
-
}
|
|
9758
|
-
return [name, Prompt2.fromPromptData(name, promptData)];
|
|
9759
|
-
} else {
|
|
9760
|
-
const schemaCasted = schema;
|
|
9761
|
-
return [name, schemaCasted.parse(value)];
|
|
9762
|
-
}
|
|
9763
|
-
} catch (e) {
|
|
9764
|
-
console.error("Error validating parameter", name, e);
|
|
9765
|
-
throw Error(
|
|
9766
|
-
`Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
|
|
9767
|
-
);
|
|
9768
|
-
}
|
|
9769
|
-
})
|
|
9770
|
-
);
|
|
9771
|
-
}
|
|
9772
|
-
|
|
9773
|
-
// src/framework.ts
|
|
9774
|
-
var EvalResultWithSummary = class {
|
|
9775
|
-
constructor(summary, results) {
|
|
9776
|
-
this.summary = summary;
|
|
9777
|
-
this.results = results;
|
|
9778
|
-
}
|
|
9779
|
-
/**
|
|
9780
|
-
* @deprecated Use `summary` instead.
|
|
9781
|
-
*/
|
|
9782
|
-
toString() {
|
|
9783
|
-
return JSON.stringify(this.summary);
|
|
9784
|
-
}
|
|
9785
|
-
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
9786
|
-
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
9787
|
-
}
|
|
9788
|
-
toJSON() {
|
|
9789
|
-
return {
|
|
9790
|
-
summary: this.summary,
|
|
9791
|
-
results: this.results
|
|
9792
|
-
};
|
|
9793
|
-
}
|
|
9794
|
-
};
|
|
9795
|
-
function makeEvalName(projectName, experimentName) {
|
|
9796
|
-
let out = projectName;
|
|
9797
|
-
if (experimentName) {
|
|
9798
|
-
out += ` [experimentName=${experimentName}]`;
|
|
9799
|
-
}
|
|
9800
|
-
return out;
|
|
9801
|
-
}
|
|
9802
|
-
function initExperiment(state, options = {}) {
|
|
9803
|
-
return init({
|
|
9804
|
-
state,
|
|
9805
|
-
...options,
|
|
9806
|
-
setCurrent: false
|
|
9807
|
-
});
|
|
9808
|
-
}
|
|
9809
|
-
function callEvaluatorData(data) {
|
|
9810
|
-
const dataResult = typeof data === "function" ? data() : data;
|
|
9811
|
-
let baseExperiment = void 0;
|
|
9812
|
-
if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
|
|
9813
|
-
baseExperiment = dataResult.name;
|
|
9814
|
-
}
|
|
9815
|
-
return {
|
|
9816
|
-
data: dataResult,
|
|
9817
|
-
baseExperiment
|
|
9818
|
-
};
|
|
9819
|
-
}
|
|
9820
|
-
function isAsyncIterable2(value) {
|
|
9821
|
-
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
9822
|
-
}
|
|
9823
|
-
function isIterable(value) {
|
|
9824
|
-
return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
|
|
9825
|
-
}
|
|
9826
|
-
globalThis._evals = {
|
|
9827
|
-
functions: [],
|
|
9828
|
-
prompts: [],
|
|
9829
|
-
evaluators: {},
|
|
9830
|
-
reporters: {}
|
|
9831
|
-
};
|
|
9832
|
-
function _initializeSpanContext() {
|
|
9833
|
-
globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
|
|
9834
|
-
}
|
|
9835
|
-
async function Eval(name, evaluator, reporterOrOpts) {
|
|
9836
|
-
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
9837
|
-
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
9838
|
-
if (globalThis._evals.evaluators[evalName]) {
|
|
9839
|
-
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
9840
|
-
}
|
|
9841
|
-
if (globalThis._lazy_load) {
|
|
9842
|
-
globalThis._evals.evaluators[evalName] = {
|
|
9843
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
9844
|
-
evaluator: {
|
|
9845
|
-
evalName,
|
|
9846
|
-
projectName: name,
|
|
9847
|
-
...evaluator
|
|
9848
|
-
},
|
|
9849
|
-
reporter: options.reporter
|
|
9850
|
-
};
|
|
9851
|
-
_initializeSpanContext();
|
|
9852
|
-
return new EvalResultWithSummary(
|
|
9853
|
-
{
|
|
9854
|
-
scores: {},
|
|
9855
|
-
metrics: {},
|
|
9856
|
-
projectName: "",
|
|
9857
|
-
experimentName: ""
|
|
9858
|
-
},
|
|
9859
|
-
[]
|
|
9860
|
-
);
|
|
9861
|
-
}
|
|
9862
|
-
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
9863
|
-
const shouldCollectResults = options.returnResults ?? true;
|
|
9864
|
-
if (typeof options.reporter === "string") {
|
|
9865
|
-
throw new Error(
|
|
9866
|
-
"Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
|
|
9867
|
-
);
|
|
9868
|
-
}
|
|
9869
|
-
const resolvedReporter = options.reporter || defaultReporter;
|
|
9870
|
-
try {
|
|
9871
|
-
const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
|
|
9872
|
-
evaluator.data
|
|
9873
|
-
);
|
|
9874
|
-
const experiment = options.parent || options.noSendLogs ? null : initExperiment(evaluator.state, {
|
|
9875
|
-
...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
|
|
9876
|
-
experiment: evaluator.experimentName,
|
|
9877
|
-
description: evaluator.description,
|
|
9878
|
-
metadata: evaluator.metadata,
|
|
9879
|
-
isPublic: evaluator.isPublic,
|
|
9880
|
-
update: evaluator.update,
|
|
9881
|
-
baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
|
|
9882
|
-
baseExperimentId: evaluator.baseExperimentId,
|
|
9883
|
-
gitMetadataSettings: evaluator.gitMetadataSettings,
|
|
9884
|
-
repoInfo: evaluator.repoInfo,
|
|
9885
|
-
dataset: Dataset2.isDataset(data) ? data : void 0
|
|
9886
|
-
});
|
|
9887
|
-
if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
|
|
9888
|
-
await experiment._waitForId();
|
|
9889
|
-
}
|
|
9890
|
-
if (experiment && options.onStart) {
|
|
9891
|
-
const summary = await experiment.summarize({ summarizeScores: false });
|
|
9892
|
-
options.onStart(summary);
|
|
9893
|
-
}
|
|
9894
|
-
try {
|
|
9895
|
-
const evalDef = {
|
|
9896
|
-
evalName,
|
|
9897
|
-
projectName: name,
|
|
9898
|
-
...evaluator,
|
|
9899
|
-
data
|
|
9900
|
-
};
|
|
9901
|
-
const enableCache = options.enableCache ?? true;
|
|
9902
|
-
let ret;
|
|
9903
|
-
if (options.parent) {
|
|
9904
|
-
ret = await withParent(
|
|
9905
|
-
options.parent,
|
|
9906
|
-
() => runEvaluator(
|
|
9907
|
-
null,
|
|
9908
|
-
evalDef,
|
|
9909
|
-
progressReporter,
|
|
9910
|
-
[],
|
|
9911
|
-
options.stream,
|
|
9912
|
-
options.parameters,
|
|
9913
|
-
shouldCollectResults,
|
|
9914
|
-
enableCache
|
|
9915
|
-
),
|
|
9916
|
-
evaluator.state
|
|
9917
|
-
);
|
|
9918
|
-
} else {
|
|
9919
|
-
ret = await runEvaluator(
|
|
9920
|
-
experiment,
|
|
9921
|
-
evalDef,
|
|
9922
|
-
progressReporter,
|
|
9923
|
-
[],
|
|
9924
|
-
options.stream,
|
|
9925
|
-
options.parameters,
|
|
9926
|
-
shouldCollectResults,
|
|
9927
|
-
enableCache
|
|
9928
|
-
);
|
|
9929
|
-
}
|
|
9930
|
-
progressReporter.stop();
|
|
9931
|
-
resolvedReporter.reportEval(evalDef, ret, {
|
|
9932
|
-
verbose: true,
|
|
9933
|
-
jsonl: false
|
|
9934
|
-
});
|
|
9935
|
-
return ret;
|
|
9936
|
-
} finally {
|
|
9937
|
-
if (experiment) {
|
|
9938
|
-
await experiment.flush().catch(console.error);
|
|
9939
|
-
} else if (options.parent) {
|
|
9940
|
-
await flush().catch(console.error);
|
|
9941
|
-
}
|
|
9942
|
-
}
|
|
9943
|
-
} finally {
|
|
9944
|
-
progressReporter.stop();
|
|
9945
|
-
}
|
|
9946
|
-
}
|
|
9947
|
-
function serializeJSONWithPlainString(v) {
|
|
9948
|
-
if (typeof v === "string") {
|
|
9949
|
-
return v;
|
|
9950
|
-
} else {
|
|
9951
|
-
return JSON.stringify(v);
|
|
9952
|
-
}
|
|
9953
|
-
}
|
|
9954
|
-
function evaluateFilter(object, filter2) {
|
|
9955
|
-
const { path: path2, pattern } = filter2;
|
|
9956
|
-
const key = path2.reduce(
|
|
9957
|
-
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
9958
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
9959
|
-
acc[p]
|
|
9960
|
-
) : void 0,
|
|
9961
|
-
object
|
|
9962
|
-
);
|
|
9963
|
-
if (key === void 0) {
|
|
9964
|
-
return false;
|
|
9965
|
-
}
|
|
9966
|
-
return pattern.test(serializeJSONWithPlainString(key));
|
|
9967
|
-
}
|
|
9968
|
-
function scorerName(scorer, scorer_idx) {
|
|
9969
|
-
return scorer.name || `scorer_${scorer_idx}`;
|
|
9970
|
-
}
|
|
9971
|
-
async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
|
|
9972
|
-
return await runEvaluatorInternal(
|
|
9973
|
-
experiment,
|
|
9974
|
-
evaluator,
|
|
9975
|
-
progressReporter,
|
|
9976
|
-
filters,
|
|
9977
|
-
stream,
|
|
9978
|
-
parameters,
|
|
9979
|
-
collectResults,
|
|
9980
|
-
enableCache
|
|
9981
|
-
);
|
|
9982
|
-
}
|
|
9983
|
-
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
9984
|
-
if (enableCache) {
|
|
9985
|
-
(evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
|
|
9986
|
-
}
|
|
9987
|
-
try {
|
|
9988
|
-
if (typeof evaluator.data === "string") {
|
|
9989
|
-
throw new Error("Unimplemented: string data paths");
|
|
9990
|
-
}
|
|
9991
|
-
let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
|
|
9992
|
-
parameters = validateParameters(
|
|
9993
|
-
parameters ?? {},
|
|
9994
|
-
evaluator.parameters ?? {}
|
|
9995
|
-
);
|
|
9996
|
-
if ("_type" in dataResult) {
|
|
9997
|
-
if (dataResult._type !== "BaseExperiment") {
|
|
9998
|
-
throw new Error("Invalid _type");
|
|
9999
|
-
}
|
|
10000
|
-
if (!experiment) {
|
|
10001
|
-
throw new Error(
|
|
10002
|
-
"Cannot use BaseExperiment() without connecting to Braintrust (you most likely set --no-send-logs)"
|
|
10189
|
+
let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
|
|
10190
|
+
parameters = await validateParameters(
|
|
10191
|
+
parameters ?? {},
|
|
10192
|
+
evaluator.parameters
|
|
10193
|
+
);
|
|
10194
|
+
if ("_type" in dataResult) {
|
|
10195
|
+
if (dataResult._type !== "BaseExperiment") {
|
|
10196
|
+
throw new Error("Invalid _type");
|
|
10197
|
+
}
|
|
10198
|
+
if (!experiment) {
|
|
10199
|
+
throw new Error(
|
|
10200
|
+
"Cannot use BaseExperiment() without connecting to Braintrust (you most likely set --no-send-logs)"
|
|
10003
10201
|
);
|
|
10004
10202
|
}
|
|
10005
10203
|
let name = dataResult.name;
|
|
@@ -10150,6 +10348,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
10150
10348
|
} else {
|
|
10151
10349
|
rootSpan.log({ output, metadata, expected });
|
|
10152
10350
|
}
|
|
10351
|
+
if (evaluator.flushBeforeScoring) {
|
|
10352
|
+
await rootSpan.flush();
|
|
10353
|
+
}
|
|
10153
10354
|
const scoringArgs = {
|
|
10154
10355
|
input: datum.input,
|
|
10155
10356
|
expected: "expected" in datum ? datum.expected : void 0,
|
|
@@ -10378,427 +10579,918 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
10378
10579
|
collectedResults.length = 0;
|
|
10379
10580
|
}
|
|
10380
10581
|
}
|
|
10381
|
-
const summary = experiment ? await experiment.summarize({
|
|
10382
|
-
summarizeScores: evaluator.summarizeScores
|
|
10383
|
-
}) : buildLocalSummary(
|
|
10384
|
-
evaluator,
|
|
10385
|
-
collectResults ? collectedResults : [],
|
|
10386
|
-
localScoreAccumulator ?? void 0
|
|
10387
|
-
);
|
|
10388
|
-
return new EvalResultWithSummary(
|
|
10389
|
-
summary,
|
|
10390
|
-
collectResults ? collectedResults : []
|
|
10391
|
-
);
|
|
10392
|
-
} finally {
|
|
10393
|
-
if (enableCache) {
|
|
10394
|
-
const spanCache = (evaluator.state ?? _internalGetGlobalState())?.spanCache;
|
|
10395
|
-
spanCache?.dispose();
|
|
10396
|
-
spanCache?.stop();
|
|
10582
|
+
const summary = experiment ? await experiment.summarize({
|
|
10583
|
+
summarizeScores: evaluator.summarizeScores
|
|
10584
|
+
}) : buildLocalSummary(
|
|
10585
|
+
evaluator,
|
|
10586
|
+
collectResults ? collectedResults : [],
|
|
10587
|
+
localScoreAccumulator ?? void 0
|
|
10588
|
+
);
|
|
10589
|
+
return new EvalResultWithSummary(
|
|
10590
|
+
summary,
|
|
10591
|
+
collectResults ? collectedResults : []
|
|
10592
|
+
);
|
|
10593
|
+
} finally {
|
|
10594
|
+
if (enableCache) {
|
|
10595
|
+
const spanCache = (evaluator.state ?? _internalGetGlobalState())?.spanCache;
|
|
10596
|
+
spanCache?.dispose();
|
|
10597
|
+
spanCache?.stop();
|
|
10598
|
+
}
|
|
10599
|
+
}
|
|
10600
|
+
}
|
|
10601
|
+
var warning = (text) => `Warning: ${text}`;
|
|
10602
|
+
function logError2(e, verbose) {
|
|
10603
|
+
if (!verbose) {
|
|
10604
|
+
console.error(`${e}`);
|
|
10605
|
+
} else {
|
|
10606
|
+
console.error(e);
|
|
10607
|
+
}
|
|
10608
|
+
}
|
|
10609
|
+
function accumulateScores(accumulator, scores) {
|
|
10610
|
+
for (const [name, score] of Object.entries(scores)) {
|
|
10611
|
+
if (score === null || score === void 0) {
|
|
10612
|
+
continue;
|
|
10613
|
+
}
|
|
10614
|
+
const existing = accumulator[name] ?? { total: 0, count: 0 };
|
|
10615
|
+
accumulator[name] = {
|
|
10616
|
+
total: existing.total + score,
|
|
10617
|
+
count: existing.count + 1
|
|
10618
|
+
};
|
|
10619
|
+
}
|
|
10620
|
+
}
|
|
10621
|
+
function ensureScoreAccumulator(results) {
|
|
10622
|
+
const accumulator = {};
|
|
10623
|
+
for (const result of results) {
|
|
10624
|
+
accumulateScores(accumulator, result.scores);
|
|
10625
|
+
}
|
|
10626
|
+
return accumulator;
|
|
10627
|
+
}
|
|
10628
|
+
function buildLocalSummary(evaluator, results, precomputedScores) {
|
|
10629
|
+
const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
|
|
10630
|
+
return {
|
|
10631
|
+
projectName: evaluator.projectName,
|
|
10632
|
+
experimentName: evaluator.evalName,
|
|
10633
|
+
scores: Object.fromEntries(
|
|
10634
|
+
Object.entries(scoresByName).map(([name, { total, count }]) => [
|
|
10635
|
+
name,
|
|
10636
|
+
{
|
|
10637
|
+
name,
|
|
10638
|
+
score: count === 0 ? 0 : total / count,
|
|
10639
|
+
improvements: 0,
|
|
10640
|
+
regressions: 0
|
|
10641
|
+
}
|
|
10642
|
+
])
|
|
10643
|
+
)
|
|
10644
|
+
};
|
|
10645
|
+
}
|
|
10646
|
+
function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
|
|
10647
|
+
if (failingResults.length > 0) {
|
|
10648
|
+
console.error(
|
|
10649
|
+
warning(
|
|
10650
|
+
`Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
|
|
10651
|
+
)
|
|
10652
|
+
);
|
|
10653
|
+
if (jsonl) {
|
|
10654
|
+
console.log(
|
|
10655
|
+
JSON.stringify({
|
|
10656
|
+
evaluatorName: evaluator.evalName,
|
|
10657
|
+
errors: failingResults.map(
|
|
10658
|
+
(r) => `${r.error instanceof Error ? r.error.stack : r.error}`
|
|
10659
|
+
)
|
|
10660
|
+
})
|
|
10661
|
+
);
|
|
10662
|
+
} else {
|
|
10663
|
+
for (const result of failingResults) {
|
|
10664
|
+
logError2(result.error, verbose);
|
|
10665
|
+
}
|
|
10666
|
+
}
|
|
10667
|
+
if (!verbose && !jsonl) {
|
|
10668
|
+
console.error(warning("Add --verbose to see full stack traces."));
|
|
10669
|
+
}
|
|
10670
|
+
}
|
|
10671
|
+
}
|
|
10672
|
+
var defaultReporter = {
|
|
10673
|
+
name: "Braintrust default reporter",
|
|
10674
|
+
async reportEval(evaluator, result, { verbose, jsonl }) {
|
|
10675
|
+
const { results, summary } = result;
|
|
10676
|
+
const failingResults = results.filter(
|
|
10677
|
+
(r) => r.error !== void 0
|
|
10678
|
+
);
|
|
10679
|
+
if (failingResults.length > 0) {
|
|
10680
|
+
reportFailures(evaluator, failingResults, { verbose, jsonl });
|
|
10681
|
+
}
|
|
10682
|
+
if (jsonl) {
|
|
10683
|
+
isomorph_default.writeln(JSON.stringify(summary));
|
|
10684
|
+
} else {
|
|
10685
|
+
isomorph_default.writeln("Experiment summary");
|
|
10686
|
+
isomorph_default.writeln("==================");
|
|
10687
|
+
if (summary.comparisonExperimentName) {
|
|
10688
|
+
isomorph_default.writeln(
|
|
10689
|
+
`${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
|
|
10690
|
+
);
|
|
10691
|
+
isomorph_default.writeln("");
|
|
10692
|
+
}
|
|
10693
|
+
const hasScores = Object.keys(summary.scores).length > 0;
|
|
10694
|
+
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
10695
|
+
const hasComparison = !!summary.comparisonExperimentName;
|
|
10696
|
+
if (hasScores || hasMetrics) {
|
|
10697
|
+
if (hasComparison) {
|
|
10698
|
+
isomorph_default.writeln(
|
|
10699
|
+
"Name Value Change Improvements Regressions"
|
|
10700
|
+
);
|
|
10701
|
+
isomorph_default.writeln(
|
|
10702
|
+
"----------------------------------------------------------------"
|
|
10703
|
+
);
|
|
10704
|
+
}
|
|
10705
|
+
for (const score of Object.values(summary.scores)) {
|
|
10706
|
+
const scorePercent = (score.score * 100).toFixed(2);
|
|
10707
|
+
const scoreValue = `${scorePercent}%`;
|
|
10708
|
+
if (hasComparison) {
|
|
10709
|
+
let diffString = "-";
|
|
10710
|
+
if (!isEmpty2(score.diff)) {
|
|
10711
|
+
const diffPercent = (score.diff * 100).toFixed(2);
|
|
10712
|
+
const diffSign = score.diff > 0 ? "+" : "";
|
|
10713
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
10714
|
+
}
|
|
10715
|
+
const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
|
|
10716
|
+
const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
|
|
10717
|
+
isomorph_default.writeln(
|
|
10718
|
+
`${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
10719
|
+
);
|
|
10720
|
+
} else {
|
|
10721
|
+
isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
|
|
10722
|
+
}
|
|
10723
|
+
}
|
|
10724
|
+
for (const metric of Object.values(summary.metrics ?? {})) {
|
|
10725
|
+
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
10726
|
+
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
10727
|
+
const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
|
|
10728
|
+
if (hasComparison) {
|
|
10729
|
+
let diffString = "-";
|
|
10730
|
+
if (!isEmpty2(metric.diff)) {
|
|
10731
|
+
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
10732
|
+
const diffSign = metric.diff > 0 ? "+" : "";
|
|
10733
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
10734
|
+
}
|
|
10735
|
+
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
10736
|
+
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
10737
|
+
isomorph_default.writeln(
|
|
10738
|
+
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
10739
|
+
);
|
|
10740
|
+
} else {
|
|
10741
|
+
isomorph_default.writeln(
|
|
10742
|
+
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
10743
|
+
);
|
|
10744
|
+
}
|
|
10745
|
+
}
|
|
10746
|
+
}
|
|
10747
|
+
if (summary.experimentUrl) {
|
|
10748
|
+
isomorph_default.writeln("");
|
|
10749
|
+
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
10750
|
+
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
10751
|
+
}
|
|
10752
|
+
}
|
|
10753
|
+
isomorph_default.writeln("");
|
|
10754
|
+
return failingResults.length === 0;
|
|
10755
|
+
},
|
|
10756
|
+
async reportRun(evalReports) {
|
|
10757
|
+
return evalReports.every((r) => r);
|
|
10758
|
+
}
|
|
10759
|
+
};
|
|
10760
|
+
|
|
10761
|
+
// dev/errorHandler.ts
|
|
10762
|
+
import { z as z11 } from "zod/v3";
|
|
10763
|
+
var errorHandler = (err, req, res, next) => {
|
|
10764
|
+
if ("status" in err) {
|
|
10765
|
+
res.status(err.status).json({
|
|
10766
|
+
error: {
|
|
10767
|
+
message: err.message,
|
|
10768
|
+
status: err.status
|
|
10769
|
+
}
|
|
10770
|
+
});
|
|
10771
|
+
return;
|
|
10772
|
+
}
|
|
10773
|
+
if (err instanceof z11.ZodError) {
|
|
10774
|
+
res.status(400).json({
|
|
10775
|
+
error: {
|
|
10776
|
+
message: "Invalid request",
|
|
10777
|
+
errors: err.errors
|
|
10778
|
+
}
|
|
10779
|
+
});
|
|
10780
|
+
return;
|
|
10781
|
+
}
|
|
10782
|
+
console.error("Internal server error", err);
|
|
10783
|
+
res.status(500).json({
|
|
10784
|
+
error: {
|
|
10785
|
+
message: "Internal server error",
|
|
10786
|
+
status: 500
|
|
10787
|
+
}
|
|
10788
|
+
});
|
|
10789
|
+
};
|
|
10790
|
+
|
|
10791
|
+
// dev/authorize.ts
|
|
10792
|
+
import createError from "http-errors";
|
|
10793
|
+
function authorizeRequest(req, res, next) {
|
|
10794
|
+
try {
|
|
10795
|
+
const ctx = {
|
|
10796
|
+
appOrigin: extractAllowedOrigin(req.headers[ORIGIN_HEADER]),
|
|
10797
|
+
token: void 0,
|
|
10798
|
+
state: void 0,
|
|
10799
|
+
projectId: parseHeader(req.headers, PROJECT_ID_HEADER)
|
|
10800
|
+
};
|
|
10801
|
+
if (req.headers.authorization || req.headers[BRAINTRUST_AUTH_TOKEN_HEADER]) {
|
|
10802
|
+
const tokenText = parseBraintrustAuthHeader(req.headers);
|
|
10803
|
+
if (!tokenText) {
|
|
10804
|
+
return next(createError(400, "Invalid authorization token format"));
|
|
10805
|
+
}
|
|
10806
|
+
ctx.token = tokenText.toLowerCase() === "null" ? void 0 : tokenText;
|
|
10807
|
+
}
|
|
10808
|
+
req.ctx = ctx;
|
|
10809
|
+
next();
|
|
10810
|
+
} catch (e) {
|
|
10811
|
+
next(e);
|
|
10812
|
+
}
|
|
10813
|
+
}
|
|
10814
|
+
var loginCache = new LRUCache({
|
|
10815
|
+
max: 32
|
|
10816
|
+
// TODO: Make this configurable
|
|
10817
|
+
});
|
|
10818
|
+
async function cachedLogin(options) {
|
|
10819
|
+
const key = JSON.stringify(options);
|
|
10820
|
+
const cached = loginCache.get(key);
|
|
10821
|
+
if (cached) {
|
|
10822
|
+
return cached;
|
|
10823
|
+
}
|
|
10824
|
+
const state = await loginToState(options);
|
|
10825
|
+
loginCache.set(key, state);
|
|
10826
|
+
return state;
|
|
10827
|
+
}
|
|
10828
|
+
function makeCheckAuthorized(allowedOrgName) {
|
|
10829
|
+
return async (req, _res, next) => {
|
|
10830
|
+
if (!req.ctx?.token) {
|
|
10831
|
+
return next(createError(401, "Unauthorized"));
|
|
10832
|
+
}
|
|
10833
|
+
try {
|
|
10834
|
+
const orgName = parseHeader(req.headers, "x-bt-org-name");
|
|
10835
|
+
if (!orgName) {
|
|
10836
|
+
return next(createError(400, "Missing x-bt-org-name header"));
|
|
10837
|
+
}
|
|
10838
|
+
if (allowedOrgName && allowedOrgName !== orgName) {
|
|
10839
|
+
const errorMessage = `Org '${orgName}' is not allowed. Only org '${allowedOrgName}' is allowed.`;
|
|
10840
|
+
return next(createError(403, errorMessage));
|
|
10841
|
+
}
|
|
10842
|
+
const state = await cachedLogin({
|
|
10843
|
+
apiKey: req.ctx?.token,
|
|
10844
|
+
orgName
|
|
10845
|
+
});
|
|
10846
|
+
req.ctx.state = state;
|
|
10847
|
+
next();
|
|
10848
|
+
} catch (e) {
|
|
10849
|
+
console.error("Authorization error:", e);
|
|
10850
|
+
return next(createError(401, "Unauthorized"));
|
|
10851
|
+
}
|
|
10852
|
+
};
|
|
10853
|
+
}
|
|
10854
|
+
function parseBraintrustAuthHeader(headers) {
|
|
10855
|
+
const tokenString = parseHeader(headers, BRAINTRUST_AUTH_TOKEN_HEADER);
|
|
10856
|
+
return tokenString ?? parseAuthHeader(headers) ?? void 0;
|
|
10857
|
+
}
|
|
10858
|
+
function parseHeader(headers, headerName) {
|
|
10859
|
+
const token = headers[headerName];
|
|
10860
|
+
let tokenString;
|
|
10861
|
+
if (typeof token === "string") {
|
|
10862
|
+
tokenString = token;
|
|
10863
|
+
} else if (Array.isArray(token) && token.length > 0) {
|
|
10864
|
+
tokenString = token[0];
|
|
10865
|
+
}
|
|
10866
|
+
return tokenString;
|
|
10867
|
+
}
|
|
10868
|
+
function checkOrigin(requestOrigin, callback) {
|
|
10869
|
+
if (!requestOrigin) {
|
|
10870
|
+
return callback(null, true);
|
|
10871
|
+
}
|
|
10872
|
+
for (const origin of WHITELISTED_ORIGINS || []) {
|
|
10873
|
+
if (origin instanceof RegExp && origin.test(requestOrigin) || origin === requestOrigin) {
|
|
10874
|
+
return callback(null, requestOrigin);
|
|
10875
|
+
}
|
|
10876
|
+
}
|
|
10877
|
+
return callback(null, false);
|
|
10878
|
+
}
|
|
10879
|
+
var BRAINTRUST_AUTH_TOKEN_HEADER = "x-bt-auth-token";
|
|
10880
|
+
var ORIGIN_HEADER = "origin";
|
|
10881
|
+
var PROJECT_ID_HEADER = "x-bt-project-id";
|
|
10882
|
+
function extractAllowedOrigin(originHeader) {
|
|
10883
|
+
let allowedOrigin = MAIN_ORIGIN;
|
|
10884
|
+
checkOrigin(originHeader, (err, origin) => {
|
|
10885
|
+
if (!err && originHeader && origin) {
|
|
10886
|
+
allowedOrigin = originHeader;
|
|
10887
|
+
}
|
|
10888
|
+
});
|
|
10889
|
+
return allowedOrigin;
|
|
10890
|
+
}
|
|
10891
|
+
var MAIN_ORIGIN = "https://www.braintrust.dev";
|
|
10892
|
+
var WHITELISTED_ORIGINS = [
|
|
10893
|
+
MAIN_ORIGIN,
|
|
10894
|
+
"https://www.braintrustdata.com",
|
|
10895
|
+
new RegExp("https://.*.preview.braintrust.dev")
|
|
10896
|
+
].concat(
|
|
10897
|
+
process.env.WHITELISTED_ORIGIN ? [process.env.WHITELISTED_ORIGIN] : []
|
|
10898
|
+
).concat(
|
|
10899
|
+
process.env.BRAINTRUST_APP_URL ? [process.env.BRAINTRUST_APP_URL] : []
|
|
10900
|
+
);
|
|
10901
|
+
function parseAuthHeader(headers) {
|
|
10902
|
+
const authHeader = headers["authorization"];
|
|
10903
|
+
let authValue = null;
|
|
10904
|
+
if (Array.isArray(authHeader)) {
|
|
10905
|
+
authValue = authHeader[authHeader.length - 1];
|
|
10906
|
+
} else {
|
|
10907
|
+
authValue = authHeader;
|
|
10908
|
+
}
|
|
10909
|
+
if (!authValue) {
|
|
10910
|
+
return null;
|
|
10911
|
+
}
|
|
10912
|
+
const parts = authValue.split(" ");
|
|
10913
|
+
if (parts.length !== 2) {
|
|
10914
|
+
return null;
|
|
10915
|
+
}
|
|
10916
|
+
return parts[1];
|
|
10917
|
+
}
|
|
10918
|
+
var baseAllowedHeaders = [
|
|
10919
|
+
"Content-Type",
|
|
10920
|
+
"X-Amz-Date",
|
|
10921
|
+
"Authorization",
|
|
10922
|
+
"X-Api-Key",
|
|
10923
|
+
"X-Amz-Security-Token",
|
|
10924
|
+
"x-bt-auth-token",
|
|
10925
|
+
"x-bt-parent",
|
|
10926
|
+
// These are eval-specific
|
|
10927
|
+
"x-bt-org-name",
|
|
10928
|
+
"x-bt-project-id",
|
|
10929
|
+
"x-bt-stream-fmt",
|
|
10930
|
+
"x-bt-use-cache",
|
|
10931
|
+
"x-stainless-os",
|
|
10932
|
+
"x-stainless-lang",
|
|
10933
|
+
"x-stainless-package-version",
|
|
10934
|
+
"x-stainless-runtime",
|
|
10935
|
+
"x-stainless-runtime-version",
|
|
10936
|
+
"x-stainless-arch"
|
|
10937
|
+
];
|
|
10938
|
+
|
|
10939
|
+
// dev/stream.ts
|
|
10940
|
+
function serializeSSEEvent(event) {
|
|
10941
|
+
return Object.entries(event).filter(([_key, value]) => value !== void 0).map(([key, value]) => `${key}: ${value}`).join("\n") + "\n\n";
|
|
10942
|
+
}
|
|
10943
|
+
|
|
10944
|
+
// dev/types.ts
|
|
10945
|
+
import { z as z12 } from "zod/v3";
|
|
10946
|
+
var evalBodySchema = z12.object({
|
|
10947
|
+
name: z12.string(),
|
|
10948
|
+
parameters: z12.record(z12.string(), z12.unknown()).nullish(),
|
|
10949
|
+
data: RunEval.shape.data,
|
|
10950
|
+
scores: z12.array(
|
|
10951
|
+
z12.object({
|
|
10952
|
+
function_id: FunctionId,
|
|
10953
|
+
name: z12.string()
|
|
10954
|
+
})
|
|
10955
|
+
).nullish(),
|
|
10956
|
+
experiment_name: z12.string().nullish(),
|
|
10957
|
+
project_id: z12.string().nullish(),
|
|
10958
|
+
parent: InvokeParent.optional(),
|
|
10959
|
+
stream: z12.boolean().optional()
|
|
10960
|
+
});
|
|
10961
|
+
var staticParametersSchema = z12.record(
|
|
10962
|
+
z12.string(),
|
|
10963
|
+
z12.union([
|
|
10964
|
+
z12.object({
|
|
10965
|
+
type: z12.literal("prompt"),
|
|
10966
|
+
default: PromptData.optional(),
|
|
10967
|
+
description: z12.string().optional()
|
|
10968
|
+
}),
|
|
10969
|
+
z12.object({
|
|
10970
|
+
type: z12.literal("data"),
|
|
10971
|
+
schema: z12.record(z12.unknown()),
|
|
10972
|
+
default: z12.unknown().optional(),
|
|
10973
|
+
description: z12.string().optional()
|
|
10974
|
+
})
|
|
10975
|
+
])
|
|
10976
|
+
);
|
|
10977
|
+
var parametersSchema = z12.object({
|
|
10978
|
+
type: z12.literal("object"),
|
|
10979
|
+
properties: z12.record(z12.string(), z12.record(z12.unknown())),
|
|
10980
|
+
required: z12.array(z12.string()).optional(),
|
|
10981
|
+
additionalProperties: z12.boolean().optional()
|
|
10982
|
+
});
|
|
10983
|
+
var parametersSourceSchema = z12.object({
|
|
10984
|
+
parametersId: z12.string().optional(),
|
|
10985
|
+
slug: z12.string(),
|
|
10986
|
+
name: z12.string(),
|
|
10987
|
+
projectId: z12.string().optional(),
|
|
10988
|
+
version: z12.string().optional()
|
|
10989
|
+
});
|
|
10990
|
+
var parametersContainerSchema = z12.object({
|
|
10991
|
+
type: z12.literal("braintrust.parameters"),
|
|
10992
|
+
schema: parametersSchema,
|
|
10993
|
+
source: parametersSourceSchema
|
|
10994
|
+
});
|
|
10995
|
+
var staticParametersContainerSchema = z12.object({
|
|
10996
|
+
type: z12.literal("braintrust.staticParameters"),
|
|
10997
|
+
schema: staticParametersSchema,
|
|
10998
|
+
source: z12.null()
|
|
10999
|
+
});
|
|
11000
|
+
var serializedParametersContainerSchema = z12.union([
|
|
11001
|
+
parametersContainerSchema,
|
|
11002
|
+
staticParametersContainerSchema,
|
|
11003
|
+
// keeping this type here since old versions of the SDK will still pass the unwrapped schema and we need to handle this in the app
|
|
11004
|
+
staticParametersSchema
|
|
11005
|
+
]);
|
|
11006
|
+
var evaluatorDefinitionSchema = z12.object({
|
|
11007
|
+
parameters: serializedParametersContainerSchema.optional(),
|
|
11008
|
+
scores: z12.array(z12.object({ name: z12.string() })).optional()
|
|
11009
|
+
});
|
|
11010
|
+
var evaluatorDefinitionsSchema = z12.record(
|
|
11011
|
+
z12.string(),
|
|
11012
|
+
evaluatorDefinitionSchema
|
|
11013
|
+
);
|
|
11014
|
+
|
|
11015
|
+
// dev/server.ts
|
|
11016
|
+
import { z as z14 } from "zod/v3";
|
|
11017
|
+
import { ValidationError } from "ajv";
|
|
11018
|
+
|
|
11019
|
+
// src/framework2.ts
|
|
11020
|
+
import { z as z13 } from "zod/v3";
|
|
11021
|
+
|
|
11022
|
+
// src/zod/utils.ts
|
|
11023
|
+
import { zodToJsonSchema as zodToJsonSchemaV3 } from "zod-to-json-schema";
|
|
11024
|
+
import * as z42 from "zod/v4";
|
|
11025
|
+
function isZodV4(zodObject) {
|
|
11026
|
+
return typeof zodObject === "object" && zodObject !== null && "_zod" in zodObject && zodObject._zod !== void 0;
|
|
11027
|
+
}
|
|
11028
|
+
function zodToJsonSchema(schema) {
|
|
11029
|
+
if (isZodV4(schema)) {
|
|
11030
|
+
return z42.toJSONSchema(schema, {
|
|
11031
|
+
target: "draft-7"
|
|
11032
|
+
});
|
|
11033
|
+
}
|
|
11034
|
+
return zodToJsonSchemaV3(schema);
|
|
11035
|
+
}
|
|
11036
|
+
|
|
11037
|
+
// src/framework2.ts
|
|
11038
|
+
var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
|
|
11039
|
+
var ProjectBuilder = class {
|
|
11040
|
+
create(opts) {
|
|
11041
|
+
return new Project2(opts);
|
|
11042
|
+
}
|
|
11043
|
+
};
|
|
11044
|
+
var projects = new ProjectBuilder();
|
|
11045
|
+
var Project2 = class {
|
|
11046
|
+
name;
|
|
11047
|
+
id;
|
|
11048
|
+
tools;
|
|
11049
|
+
prompts;
|
|
11050
|
+
parameters;
|
|
11051
|
+
scorers;
|
|
11052
|
+
_publishableCodeFunctions = [];
|
|
11053
|
+
_publishablePrompts = [];
|
|
11054
|
+
_publishableParameters = [];
|
|
11055
|
+
constructor(args) {
|
|
11056
|
+
_initializeSpanContext();
|
|
11057
|
+
this.name = "name" in args ? args.name : void 0;
|
|
11058
|
+
this.id = "id" in args ? args.id : void 0;
|
|
11059
|
+
this.tools = new ToolBuilder(this);
|
|
11060
|
+
this.prompts = new PromptBuilder(this);
|
|
11061
|
+
this.parameters = new ParametersBuilder(this);
|
|
11062
|
+
this.scorers = new ScorerBuilder(this);
|
|
11063
|
+
}
|
|
11064
|
+
addPrompt(prompt) {
|
|
11065
|
+
this._publishablePrompts.push(prompt);
|
|
11066
|
+
if (globalThis._lazy_load) {
|
|
11067
|
+
globalThis._evals.prompts.push(prompt);
|
|
11068
|
+
}
|
|
11069
|
+
}
|
|
11070
|
+
addParameters(parameters) {
|
|
11071
|
+
this._publishableParameters.push(parameters);
|
|
11072
|
+
if (globalThis._lazy_load) {
|
|
11073
|
+
if (globalThis._evals.parameters == null)
|
|
11074
|
+
globalThis._evals.parameters = [];
|
|
11075
|
+
globalThis._evals.parameters.push(parameters);
|
|
10397
11076
|
}
|
|
10398
11077
|
}
|
|
10399
|
-
|
|
10400
|
-
|
|
10401
|
-
|
|
10402
|
-
|
|
10403
|
-
|
|
10404
|
-
} else {
|
|
10405
|
-
console.error(e);
|
|
11078
|
+
addCodeFunction(fn) {
|
|
11079
|
+
this._publishableCodeFunctions.push(fn);
|
|
11080
|
+
if (globalThis._lazy_load) {
|
|
11081
|
+
globalThis._evals.functions.push(fn);
|
|
11082
|
+
}
|
|
10406
11083
|
}
|
|
10407
|
-
|
|
10408
|
-
|
|
10409
|
-
|
|
10410
|
-
|
|
10411
|
-
continue;
|
|
11084
|
+
async publish() {
|
|
11085
|
+
if (globalThis._lazy_load) {
|
|
11086
|
+
console.warn("publish() is a no-op when running `braintrust push`.");
|
|
11087
|
+
return;
|
|
10412
11088
|
}
|
|
10413
|
-
|
|
10414
|
-
|
|
10415
|
-
|
|
10416
|
-
|
|
10417
|
-
|
|
11089
|
+
await login();
|
|
11090
|
+
const projectMap = new ProjectNameIdMap();
|
|
11091
|
+
const functionDefinitions = [];
|
|
11092
|
+
if (this._publishableCodeFunctions.length > 0) {
|
|
11093
|
+
console.warn(
|
|
11094
|
+
"Code functions cannot be published directly. Use `braintrust push` instead."
|
|
11095
|
+
);
|
|
11096
|
+
}
|
|
11097
|
+
if (this._publishablePrompts.length > 0) {
|
|
11098
|
+
for (const prompt of this._publishablePrompts) {
|
|
11099
|
+
const functionDefinition = await prompt.toFunctionDefinition(projectMap);
|
|
11100
|
+
functionDefinitions.push(functionDefinition);
|
|
11101
|
+
}
|
|
11102
|
+
}
|
|
11103
|
+
await _internalGetGlobalState().apiConn().post_json("insert-functions", {
|
|
11104
|
+
functions: functionDefinitions
|
|
11105
|
+
});
|
|
10418
11106
|
}
|
|
10419
|
-
}
|
|
10420
|
-
|
|
10421
|
-
|
|
10422
|
-
|
|
10423
|
-
accumulateScores(accumulator, result.scores);
|
|
11107
|
+
};
|
|
11108
|
+
var ToolBuilder = class {
|
|
11109
|
+
constructor(project) {
|
|
11110
|
+
this.project = project;
|
|
10424
11111
|
}
|
|
10425
|
-
|
|
10426
|
-
|
|
10427
|
-
|
|
10428
|
-
|
|
10429
|
-
|
|
10430
|
-
|
|
10431
|
-
|
|
10432
|
-
|
|
10433
|
-
|
|
10434
|
-
|
|
10435
|
-
|
|
10436
|
-
|
|
10437
|
-
|
|
10438
|
-
|
|
10439
|
-
|
|
11112
|
+
taskCounter = 0;
|
|
11113
|
+
// This type definition is just a catch all so that the implementation can be
|
|
11114
|
+
// less specific than the two more specific declarations above.
|
|
11115
|
+
create(opts) {
|
|
11116
|
+
this.taskCounter++;
|
|
11117
|
+
opts = opts ?? {};
|
|
11118
|
+
const { handler, name, slug, parameters, returns, ...rest } = opts;
|
|
11119
|
+
let resolvedName = name ?? handler.name;
|
|
11120
|
+
if (resolvedName.trim().length === 0) {
|
|
11121
|
+
resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
11122
|
+
}
|
|
11123
|
+
const tool = new CodeFunction(this.project, {
|
|
11124
|
+
handler,
|
|
11125
|
+
name: resolvedName,
|
|
11126
|
+
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
11127
|
+
type: "tool",
|
|
11128
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
11129
|
+
parameters,
|
|
11130
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
11131
|
+
returns,
|
|
11132
|
+
...rest
|
|
11133
|
+
});
|
|
11134
|
+
this.project.addCodeFunction(tool);
|
|
11135
|
+
return tool;
|
|
11136
|
+
}
|
|
11137
|
+
};
|
|
11138
|
+
var ScorerBuilder = class {
|
|
11139
|
+
constructor(project) {
|
|
11140
|
+
this.project = project;
|
|
11141
|
+
}
|
|
11142
|
+
taskCounter = 0;
|
|
11143
|
+
create(opts) {
|
|
11144
|
+
this.taskCounter++;
|
|
11145
|
+
let resolvedName = opts.name;
|
|
11146
|
+
if (!resolvedName && "handler" in opts) {
|
|
11147
|
+
resolvedName = opts.handler.name;
|
|
11148
|
+
}
|
|
11149
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
11150
|
+
resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
11151
|
+
}
|
|
11152
|
+
const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
|
|
11153
|
+
if ("handler" in opts) {
|
|
11154
|
+
const scorer = new CodeFunction(this.project, {
|
|
11155
|
+
...opts,
|
|
11156
|
+
name: resolvedName,
|
|
11157
|
+
slug,
|
|
11158
|
+
type: "scorer"
|
|
11159
|
+
});
|
|
11160
|
+
this.project.addCodeFunction(scorer);
|
|
11161
|
+
} else {
|
|
11162
|
+
const promptBlock = "messages" in opts ? {
|
|
11163
|
+
type: "chat",
|
|
11164
|
+
messages: opts.messages
|
|
11165
|
+
} : {
|
|
11166
|
+
type: "completion",
|
|
11167
|
+
content: opts.prompt
|
|
11168
|
+
};
|
|
11169
|
+
const promptData = {
|
|
11170
|
+
prompt: promptBlock,
|
|
11171
|
+
options: {
|
|
11172
|
+
model: opts.model,
|
|
11173
|
+
params: opts.params
|
|
11174
|
+
},
|
|
11175
|
+
parser: {
|
|
11176
|
+
type: "llm_classifier",
|
|
11177
|
+
use_cot: opts.useCot,
|
|
11178
|
+
choice_scores: opts.choiceScores
|
|
10440
11179
|
}
|
|
10441
|
-
|
|
10442
|
-
|
|
10443
|
-
|
|
10444
|
-
|
|
10445
|
-
|
|
10446
|
-
|
|
10447
|
-
|
|
10448
|
-
|
|
10449
|
-
|
|
10450
|
-
|
|
10451
|
-
|
|
10452
|
-
if (jsonl) {
|
|
10453
|
-
console.log(
|
|
10454
|
-
JSON.stringify({
|
|
10455
|
-
evaluatorName: evaluator.evalName,
|
|
10456
|
-
errors: failingResults.map(
|
|
10457
|
-
(r) => `${r.error instanceof Error ? r.error.stack : r.error}`
|
|
10458
|
-
)
|
|
10459
|
-
})
|
|
11180
|
+
};
|
|
11181
|
+
const codePrompt = new CodePrompt(
|
|
11182
|
+
this.project,
|
|
11183
|
+
promptData,
|
|
11184
|
+
[],
|
|
11185
|
+
{
|
|
11186
|
+
...opts,
|
|
11187
|
+
name: resolvedName,
|
|
11188
|
+
slug
|
|
11189
|
+
},
|
|
11190
|
+
"scorer"
|
|
10460
11191
|
);
|
|
10461
|
-
|
|
10462
|
-
for (const result of failingResults) {
|
|
10463
|
-
logError2(result.error, verbose);
|
|
10464
|
-
}
|
|
11192
|
+
this.project.addPrompt(codePrompt);
|
|
10465
11193
|
}
|
|
10466
|
-
|
|
10467
|
-
|
|
11194
|
+
}
|
|
11195
|
+
};
|
|
11196
|
+
var CodeFunction = class {
|
|
11197
|
+
constructor(project, opts) {
|
|
11198
|
+
this.project = project;
|
|
11199
|
+
this.handler = opts.handler;
|
|
11200
|
+
this.name = opts.name;
|
|
11201
|
+
this.slug = opts.slug;
|
|
11202
|
+
this.description = opts.description;
|
|
11203
|
+
this.type = opts.type;
|
|
11204
|
+
this.ifExists = opts.ifExists;
|
|
11205
|
+
this.metadata = opts.metadata;
|
|
11206
|
+
this.parameters = opts.parameters;
|
|
11207
|
+
this.returns = opts.returns;
|
|
11208
|
+
if (this.returns && !this.parameters) {
|
|
11209
|
+
throw new Error("parameters are required if return type is defined");
|
|
10468
11210
|
}
|
|
10469
11211
|
}
|
|
10470
|
-
|
|
10471
|
-
|
|
10472
|
-
|
|
10473
|
-
|
|
10474
|
-
|
|
10475
|
-
|
|
10476
|
-
|
|
10477
|
-
|
|
10478
|
-
|
|
10479
|
-
|
|
10480
|
-
|
|
10481
|
-
|
|
10482
|
-
|
|
10483
|
-
|
|
10484
|
-
|
|
10485
|
-
|
|
10486
|
-
|
|
10487
|
-
|
|
10488
|
-
|
|
10489
|
-
|
|
10490
|
-
|
|
10491
|
-
|
|
10492
|
-
|
|
10493
|
-
|
|
10494
|
-
|
|
10495
|
-
|
|
10496
|
-
|
|
10497
|
-
|
|
10498
|
-
|
|
10499
|
-
|
|
10500
|
-
|
|
10501
|
-
|
|
10502
|
-
|
|
10503
|
-
|
|
10504
|
-
|
|
10505
|
-
|
|
10506
|
-
|
|
10507
|
-
|
|
10508
|
-
|
|
10509
|
-
|
|
10510
|
-
|
|
10511
|
-
|
|
10512
|
-
|
|
10513
|
-
|
|
10514
|
-
|
|
10515
|
-
|
|
10516
|
-
|
|
10517
|
-
|
|
10518
|
-
|
|
10519
|
-
|
|
10520
|
-
|
|
10521
|
-
|
|
10522
|
-
|
|
10523
|
-
for (const metric of Object.values(summary.metrics ?? {})) {
|
|
10524
|
-
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
10525
|
-
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
10526
|
-
const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
|
|
10527
|
-
if (hasComparison) {
|
|
10528
|
-
let diffString = "-";
|
|
10529
|
-
if (!isEmpty2(metric.diff)) {
|
|
10530
|
-
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
10531
|
-
const diffSign = metric.diff > 0 ? "+" : "";
|
|
10532
|
-
diffString = `${diffSign}${diffPercent}%`;
|
|
10533
|
-
}
|
|
10534
|
-
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
10535
|
-
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
10536
|
-
isomorph_default.writeln(
|
|
10537
|
-
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
10538
|
-
);
|
|
11212
|
+
handler;
|
|
11213
|
+
name;
|
|
11214
|
+
slug;
|
|
11215
|
+
type;
|
|
11216
|
+
description;
|
|
11217
|
+
parameters;
|
|
11218
|
+
returns;
|
|
11219
|
+
ifExists;
|
|
11220
|
+
metadata;
|
|
11221
|
+
key() {
|
|
11222
|
+
return JSON.stringify([
|
|
11223
|
+
this.project.id ?? "",
|
|
11224
|
+
this.project.name ?? "",
|
|
11225
|
+
this.slug
|
|
11226
|
+
]);
|
|
11227
|
+
}
|
|
11228
|
+
};
|
|
11229
|
+
var CodePrompt = class {
|
|
11230
|
+
project;
|
|
11231
|
+
name;
|
|
11232
|
+
slug;
|
|
11233
|
+
prompt;
|
|
11234
|
+
ifExists;
|
|
11235
|
+
description;
|
|
11236
|
+
id;
|
|
11237
|
+
functionType;
|
|
11238
|
+
toolFunctions;
|
|
11239
|
+
metadata;
|
|
11240
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
11241
|
+
this.project = project;
|
|
11242
|
+
this.name = opts.name;
|
|
11243
|
+
this.slug = opts.slug;
|
|
11244
|
+
this.prompt = prompt;
|
|
11245
|
+
this.toolFunctions = toolFunctions;
|
|
11246
|
+
this.ifExists = opts.ifExists;
|
|
11247
|
+
this.description = opts.description;
|
|
11248
|
+
this.id = opts.id;
|
|
11249
|
+
this.functionType = functionType;
|
|
11250
|
+
this.metadata = opts.metadata;
|
|
11251
|
+
}
|
|
11252
|
+
async toFunctionDefinition(projectNameToId) {
|
|
11253
|
+
const prompt_data = {
|
|
11254
|
+
...this.prompt
|
|
11255
|
+
};
|
|
11256
|
+
if (this.toolFunctions.length > 0) {
|
|
11257
|
+
const resolvableToolFunctions = await Promise.all(
|
|
11258
|
+
this.toolFunctions.map(async (fn) => {
|
|
11259
|
+
if ("slug" in fn) {
|
|
11260
|
+
return {
|
|
11261
|
+
type: "slug",
|
|
11262
|
+
project_id: await projectNameToId.resolve(fn.project),
|
|
11263
|
+
slug: fn.slug
|
|
11264
|
+
};
|
|
10539
11265
|
} else {
|
|
10540
|
-
|
|
10541
|
-
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
10542
|
-
);
|
|
11266
|
+
return fn;
|
|
10543
11267
|
}
|
|
10544
|
-
}
|
|
10545
|
-
|
|
10546
|
-
|
|
10547
|
-
|
|
10548
|
-
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
10549
|
-
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
10550
|
-
}
|
|
11268
|
+
})
|
|
11269
|
+
);
|
|
11270
|
+
prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
11271
|
+
resolvableToolFunctions;
|
|
10551
11272
|
}
|
|
10552
|
-
|
|
10553
|
-
|
|
10554
|
-
|
|
10555
|
-
|
|
10556
|
-
|
|
11273
|
+
return {
|
|
11274
|
+
project_id: await projectNameToId.resolve(this.project),
|
|
11275
|
+
name: this.name,
|
|
11276
|
+
slug: this.slug,
|
|
11277
|
+
description: this.description ?? "",
|
|
11278
|
+
function_data: {
|
|
11279
|
+
type: "prompt"
|
|
11280
|
+
},
|
|
11281
|
+
function_type: this.functionType,
|
|
11282
|
+
prompt_data,
|
|
11283
|
+
if_exists: this.ifExists,
|
|
11284
|
+
metadata: this.metadata
|
|
11285
|
+
};
|
|
10557
11286
|
}
|
|
10558
11287
|
};
|
|
10559
|
-
|
|
10560
|
-
|
|
10561
|
-
|
|
10562
|
-
var errorHandler = (err, req, res, next) => {
|
|
10563
|
-
if ("status" in err) {
|
|
10564
|
-
res.status(err.status).json({
|
|
10565
|
-
error: {
|
|
10566
|
-
message: err.message,
|
|
10567
|
-
status: err.status
|
|
10568
|
-
}
|
|
10569
|
-
});
|
|
10570
|
-
return;
|
|
11288
|
+
var PromptBuilder = class {
|
|
11289
|
+
constructor(project) {
|
|
11290
|
+
this.project = project;
|
|
10571
11291
|
}
|
|
10572
|
-
|
|
10573
|
-
|
|
10574
|
-
|
|
10575
|
-
|
|
10576
|
-
|
|
11292
|
+
create(opts) {
|
|
11293
|
+
const toolFunctions = [];
|
|
11294
|
+
const rawTools = [];
|
|
11295
|
+
for (const tool of opts.tools ?? []) {
|
|
11296
|
+
if (tool instanceof CodeFunction) {
|
|
11297
|
+
toolFunctions.push(tool);
|
|
11298
|
+
} else if ("type" in tool && !("function" in tool)) {
|
|
11299
|
+
toolFunctions.push(tool);
|
|
11300
|
+
} else {
|
|
11301
|
+
rawTools.push(tool);
|
|
10577
11302
|
}
|
|
11303
|
+
}
|
|
11304
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
11305
|
+
const promptData = promptDefinitionToPromptData(opts, rawTools);
|
|
11306
|
+
const promptRow = {
|
|
11307
|
+
id: opts.id,
|
|
11308
|
+
_xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
|
|
11309
|
+
name: opts.name,
|
|
11310
|
+
slug,
|
|
11311
|
+
prompt_data: promptData,
|
|
11312
|
+
...this.project.id !== void 0 ? { project_id: this.project.id } : {}
|
|
11313
|
+
};
|
|
11314
|
+
const prompt = new Prompt2(
|
|
11315
|
+
promptRow,
|
|
11316
|
+
{},
|
|
11317
|
+
// It doesn't make sense to specify defaults here.
|
|
11318
|
+
opts.noTrace ?? false
|
|
11319
|
+
);
|
|
11320
|
+
const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
|
|
11321
|
+
...opts,
|
|
11322
|
+
slug
|
|
10578
11323
|
});
|
|
10579
|
-
|
|
11324
|
+
this.project.addPrompt(codePrompt);
|
|
11325
|
+
return prompt;
|
|
10580
11326
|
}
|
|
10581
|
-
console.error("Internal server error", err);
|
|
10582
|
-
res.status(500).json({
|
|
10583
|
-
error: {
|
|
10584
|
-
message: "Internal server error",
|
|
10585
|
-
status: 500
|
|
10586
|
-
}
|
|
10587
|
-
});
|
|
10588
11327
|
};
|
|
10589
|
-
|
|
10590
|
-
|
|
10591
|
-
|
|
10592
|
-
|
|
10593
|
-
|
|
10594
|
-
|
|
10595
|
-
|
|
10596
|
-
|
|
10597
|
-
|
|
10598
|
-
|
|
11328
|
+
var CodeParameters = class {
|
|
11329
|
+
project;
|
|
11330
|
+
name;
|
|
11331
|
+
slug;
|
|
11332
|
+
description;
|
|
11333
|
+
schema;
|
|
11334
|
+
ifExists;
|
|
11335
|
+
metadata;
|
|
11336
|
+
constructor(project, opts) {
|
|
11337
|
+
this.project = project;
|
|
11338
|
+
this.name = opts.name;
|
|
11339
|
+
this.slug = opts.slug;
|
|
11340
|
+
this.description = opts.description;
|
|
11341
|
+
this.schema = opts.schema;
|
|
11342
|
+
this.ifExists = opts.ifExists;
|
|
11343
|
+
this.metadata = opts.metadata;
|
|
11344
|
+
}
|
|
11345
|
+
async toFunctionDefinition(projectNameToId) {
|
|
11346
|
+
return {
|
|
11347
|
+
project_id: await projectNameToId.resolve(this.project),
|
|
11348
|
+
name: this.name,
|
|
11349
|
+
slug: this.slug,
|
|
11350
|
+
description: this.description ?? "",
|
|
11351
|
+
function_type: "parameters",
|
|
11352
|
+
function_data: {
|
|
11353
|
+
type: "parameters",
|
|
11354
|
+
data: {},
|
|
11355
|
+
__schema: serializeEvalParameterstoParametersSchema(this.schema)
|
|
11356
|
+
},
|
|
11357
|
+
if_exists: this.ifExists,
|
|
11358
|
+
metadata: this.metadata
|
|
10599
11359
|
};
|
|
10600
|
-
if (req.headers.authorization || req.headers[BRAINTRUST_AUTH_TOKEN_HEADER]) {
|
|
10601
|
-
const tokenText = parseBraintrustAuthHeader(req.headers);
|
|
10602
|
-
if (!tokenText) {
|
|
10603
|
-
return next(createError(400, "Invalid authorization token format"));
|
|
10604
|
-
}
|
|
10605
|
-
ctx.token = tokenText.toLowerCase() === "null" ? void 0 : tokenText;
|
|
10606
|
-
}
|
|
10607
|
-
req.ctx = ctx;
|
|
10608
|
-
next();
|
|
10609
|
-
} catch (e) {
|
|
10610
|
-
next(e);
|
|
10611
11360
|
}
|
|
10612
|
-
}
|
|
10613
|
-
var
|
|
10614
|
-
|
|
10615
|
-
|
|
10616
|
-
});
|
|
10617
|
-
async function cachedLogin(options) {
|
|
10618
|
-
const key = JSON.stringify(options);
|
|
10619
|
-
const cached = loginCache.get(key);
|
|
10620
|
-
if (cached) {
|
|
10621
|
-
return cached;
|
|
11361
|
+
};
|
|
11362
|
+
var ParametersBuilder = class {
|
|
11363
|
+
constructor(project) {
|
|
11364
|
+
this.project = project;
|
|
10622
11365
|
}
|
|
10623
|
-
|
|
10624
|
-
|
|
10625
|
-
|
|
11366
|
+
create(opts) {
|
|
11367
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
11368
|
+
const codeParameters = new CodeParameters(this.project, {
|
|
11369
|
+
name: opts.name,
|
|
11370
|
+
slug,
|
|
11371
|
+
description: opts.description,
|
|
11372
|
+
schema: opts.schema,
|
|
11373
|
+
ifExists: opts.ifExists,
|
|
11374
|
+
metadata: opts.metadata
|
|
11375
|
+
});
|
|
11376
|
+
this.project.addParameters(codeParameters);
|
|
11377
|
+
return opts.schema;
|
|
11378
|
+
}
|
|
11379
|
+
};
|
|
11380
|
+
function serializeEvalParametersToStaticParametersSchema(parameters) {
|
|
11381
|
+
return Object.fromEntries(
|
|
11382
|
+
Object.entries(parameters).map(([name, value]) => {
|
|
11383
|
+
if ("type" in value && value.type === "prompt") {
|
|
11384
|
+
return [
|
|
11385
|
+
name,
|
|
11386
|
+
{
|
|
11387
|
+
type: "prompt",
|
|
11388
|
+
default: value.default ? promptDefinitionToPromptData(value.default) : void 0,
|
|
11389
|
+
description: value.description
|
|
11390
|
+
}
|
|
11391
|
+
];
|
|
11392
|
+
} else {
|
|
11393
|
+
const schemaObj = zodToJsonSchema(value);
|
|
11394
|
+
return [
|
|
11395
|
+
name,
|
|
11396
|
+
{
|
|
11397
|
+
type: "data",
|
|
11398
|
+
schema: schemaObj,
|
|
11399
|
+
default: schemaObj.default,
|
|
11400
|
+
description: schemaObj.description
|
|
11401
|
+
}
|
|
11402
|
+
];
|
|
11403
|
+
}
|
|
11404
|
+
})
|
|
11405
|
+
);
|
|
10626
11406
|
}
|
|
10627
|
-
function
|
|
10628
|
-
|
|
10629
|
-
|
|
10630
|
-
|
|
10631
|
-
|
|
10632
|
-
|
|
10633
|
-
|
|
10634
|
-
|
|
10635
|
-
|
|
11407
|
+
function serializeEvalParameterstoParametersSchema(parameters) {
|
|
11408
|
+
const properties = {};
|
|
11409
|
+
const required = [];
|
|
11410
|
+
for (const [name, value] of Object.entries(parameters)) {
|
|
11411
|
+
if ("type" in value && value.type === "prompt") {
|
|
11412
|
+
const defaultPromptData = value.default ? promptDefinitionToPromptData(value.default) : void 0;
|
|
11413
|
+
properties[name] = {
|
|
11414
|
+
type: "object",
|
|
11415
|
+
"x-bt-type": "prompt",
|
|
11416
|
+
...value.description ? { description: value.description } : {},
|
|
11417
|
+
...defaultPromptData ? { default: defaultPromptData } : {}
|
|
11418
|
+
};
|
|
11419
|
+
if (!defaultPromptData) {
|
|
11420
|
+
required.push(name);
|
|
10636
11421
|
}
|
|
10637
|
-
|
|
10638
|
-
|
|
10639
|
-
|
|
11422
|
+
} else {
|
|
11423
|
+
const schemaObj = zodToJsonSchema(value);
|
|
11424
|
+
properties[name] = schemaObj;
|
|
11425
|
+
if (!("default" in schemaObj)) {
|
|
11426
|
+
required.push(name);
|
|
10640
11427
|
}
|
|
10641
|
-
const state = await cachedLogin({
|
|
10642
|
-
apiKey: req.ctx?.token,
|
|
10643
|
-
orgName
|
|
10644
|
-
});
|
|
10645
|
-
req.ctx.state = state;
|
|
10646
|
-
next();
|
|
10647
|
-
} catch (e) {
|
|
10648
|
-
console.error("Authorization error:", e);
|
|
10649
|
-
return next(createError(401, "Unauthorized"));
|
|
10650
11428
|
}
|
|
11429
|
+
}
|
|
11430
|
+
return {
|
|
11431
|
+
type: "object",
|
|
11432
|
+
properties,
|
|
11433
|
+
...required.length > 0 ? { required } : {},
|
|
11434
|
+
additionalProperties: true
|
|
10651
11435
|
};
|
|
10652
11436
|
}
|
|
10653
|
-
function
|
|
10654
|
-
|
|
10655
|
-
|
|
10656
|
-
|
|
10657
|
-
|
|
10658
|
-
|
|
10659
|
-
|
|
10660
|
-
|
|
10661
|
-
|
|
10662
|
-
|
|
10663
|
-
|
|
11437
|
+
function serializeRemoteEvalParametersContainer(parameters) {
|
|
11438
|
+
if (RemoteEvalParameters.isParameters(parameters)) {
|
|
11439
|
+
return {
|
|
11440
|
+
type: "braintrust.parameters",
|
|
11441
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
11442
|
+
schema: parameters.schema,
|
|
11443
|
+
source: {
|
|
11444
|
+
parametersId: parameters.id,
|
|
11445
|
+
slug: parameters.slug,
|
|
11446
|
+
name: parameters.name,
|
|
11447
|
+
projectId: parameters.projectId,
|
|
11448
|
+
version: parameters.version
|
|
11449
|
+
}
|
|
11450
|
+
};
|
|
10664
11451
|
}
|
|
10665
|
-
return
|
|
11452
|
+
return {
|
|
11453
|
+
type: "braintrust.staticParameters",
|
|
11454
|
+
schema: serializeEvalParametersToStaticParametersSchema(parameters),
|
|
11455
|
+
source: null
|
|
11456
|
+
};
|
|
10666
11457
|
}
|
|
10667
|
-
|
|
10668
|
-
|
|
10669
|
-
|
|
10670
|
-
|
|
10671
|
-
|
|
10672
|
-
|
|
10673
|
-
|
|
11458
|
+
var ProjectNameIdMap = class {
|
|
11459
|
+
nameToId = {};
|
|
11460
|
+
idToName = {};
|
|
11461
|
+
async getId(projectName) {
|
|
11462
|
+
if (!(projectName in this.nameToId)) {
|
|
11463
|
+
const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
|
|
11464
|
+
project_name: projectName
|
|
11465
|
+
});
|
|
11466
|
+
const result = z13.object({
|
|
11467
|
+
project: Project
|
|
11468
|
+
}).parse(response);
|
|
11469
|
+
const projectId = result.project.id;
|
|
11470
|
+
this.nameToId[projectName] = projectId;
|
|
11471
|
+
this.idToName[projectId] = projectName;
|
|
10674
11472
|
}
|
|
11473
|
+
return this.nameToId[projectName];
|
|
10675
11474
|
}
|
|
10676
|
-
|
|
10677
|
-
|
|
10678
|
-
|
|
10679
|
-
|
|
10680
|
-
|
|
10681
|
-
|
|
10682
|
-
|
|
10683
|
-
|
|
10684
|
-
|
|
10685
|
-
allowedOrigin = originHeader;
|
|
11475
|
+
async getName(projectId) {
|
|
11476
|
+
if (!(projectId in this.idToName)) {
|
|
11477
|
+
const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
|
|
11478
|
+
id: projectId
|
|
11479
|
+
});
|
|
11480
|
+
const result = z13.array(Project).nonempty().parse(response);
|
|
11481
|
+
const projectName = result[0].name;
|
|
11482
|
+
this.idToName[projectId] = projectName;
|
|
11483
|
+
this.nameToId[projectName] = projectId;
|
|
10686
11484
|
}
|
|
10687
|
-
|
|
10688
|
-
return allowedOrigin;
|
|
10689
|
-
}
|
|
10690
|
-
var MAIN_ORIGIN = "https://www.braintrust.dev";
|
|
10691
|
-
var WHITELISTED_ORIGINS = [
|
|
10692
|
-
MAIN_ORIGIN,
|
|
10693
|
-
"https://www.braintrustdata.com",
|
|
10694
|
-
new RegExp("https://.*.preview.braintrust.dev")
|
|
10695
|
-
].concat(
|
|
10696
|
-
process.env.WHITELISTED_ORIGIN ? [process.env.WHITELISTED_ORIGIN] : []
|
|
10697
|
-
).concat(
|
|
10698
|
-
process.env.BRAINTRUST_APP_URL ? [process.env.BRAINTRUST_APP_URL] : []
|
|
10699
|
-
);
|
|
10700
|
-
function parseAuthHeader(headers) {
|
|
10701
|
-
const authHeader = headers["authorization"];
|
|
10702
|
-
let authValue = null;
|
|
10703
|
-
if (Array.isArray(authHeader)) {
|
|
10704
|
-
authValue = authHeader[authHeader.length - 1];
|
|
10705
|
-
} else {
|
|
10706
|
-
authValue = authHeader;
|
|
10707
|
-
}
|
|
10708
|
-
if (!authValue) {
|
|
10709
|
-
return null;
|
|
10710
|
-
}
|
|
10711
|
-
const parts = authValue.split(" ");
|
|
10712
|
-
if (parts.length !== 2) {
|
|
10713
|
-
return null;
|
|
11485
|
+
return this.idToName[projectId];
|
|
10714
11486
|
}
|
|
10715
|
-
|
|
10716
|
-
|
|
10717
|
-
|
|
10718
|
-
|
|
10719
|
-
|
|
10720
|
-
"Authorization",
|
|
10721
|
-
"X-Api-Key",
|
|
10722
|
-
"X-Amz-Security-Token",
|
|
10723
|
-
"x-bt-auth-token",
|
|
10724
|
-
"x-bt-parent",
|
|
10725
|
-
// These are eval-specific
|
|
10726
|
-
"x-bt-org-name",
|
|
10727
|
-
"x-bt-project-id",
|
|
10728
|
-
"x-bt-stream-fmt",
|
|
10729
|
-
"x-bt-use-cache",
|
|
10730
|
-
"x-stainless-os",
|
|
10731
|
-
"x-stainless-lang",
|
|
10732
|
-
"x-stainless-package-version",
|
|
10733
|
-
"x-stainless-runtime",
|
|
10734
|
-
"x-stainless-runtime-version",
|
|
10735
|
-
"x-stainless-arch"
|
|
10736
|
-
];
|
|
10737
|
-
|
|
10738
|
-
// dev/stream.ts
|
|
10739
|
-
function serializeSSEEvent(event) {
|
|
10740
|
-
return Object.entries(event).filter(([_key, value]) => value !== void 0).map(([key, value]) => `${key}: ${value}`).join("\n") + "\n\n";
|
|
10741
|
-
}
|
|
10742
|
-
|
|
10743
|
-
// dev/types.ts
|
|
10744
|
-
import { z as z12 } from "zod/v3";
|
|
10745
|
-
var evalBodySchema = z12.object({
|
|
10746
|
-
name: z12.string(),
|
|
10747
|
-
parameters: z12.record(z12.string(), z12.unknown()).nullish(),
|
|
10748
|
-
data: RunEval.shape.data,
|
|
10749
|
-
scores: z12.array(
|
|
10750
|
-
z12.object({
|
|
10751
|
-
function_id: FunctionId,
|
|
10752
|
-
name: z12.string()
|
|
10753
|
-
})
|
|
10754
|
-
).nullish(),
|
|
10755
|
-
experiment_name: z12.string().nullish(),
|
|
10756
|
-
project_id: z12.string().nullish(),
|
|
10757
|
-
parent: InvokeParent.optional(),
|
|
10758
|
-
stream: z12.boolean().optional()
|
|
10759
|
-
});
|
|
10760
|
-
var evalParametersSerializedSchema = z12.record(
|
|
10761
|
-
z12.string(),
|
|
10762
|
-
z12.union([
|
|
10763
|
-
z12.object({
|
|
10764
|
-
type: z12.literal("prompt"),
|
|
10765
|
-
default: PromptData.optional(),
|
|
10766
|
-
description: z12.string().optional()
|
|
10767
|
-
}),
|
|
10768
|
-
z12.object({
|
|
10769
|
-
type: z12.literal("data"),
|
|
10770
|
-
schema: z12.record(z12.unknown()),
|
|
10771
|
-
// JSON Schema
|
|
10772
|
-
default: z12.unknown().optional(),
|
|
10773
|
-
description: z12.string().optional()
|
|
10774
|
-
})
|
|
10775
|
-
])
|
|
10776
|
-
);
|
|
10777
|
-
var evaluatorDefinitionSchema = z12.object({
|
|
10778
|
-
parameters: evalParametersSerializedSchema.optional()
|
|
10779
|
-
});
|
|
10780
|
-
var evaluatorDefinitionsSchema = z12.record(
|
|
10781
|
-
z12.string(),
|
|
10782
|
-
evaluatorDefinitionSchema
|
|
10783
|
-
);
|
|
10784
|
-
|
|
10785
|
-
// dev/server.ts
|
|
10786
|
-
import { z as z13 } from "zod/v3";
|
|
10787
|
-
|
|
10788
|
-
// src/zod/utils.ts
|
|
10789
|
-
import { zodToJsonSchema as zodToJsonSchemaV3 } from "zod-to-json-schema";
|
|
10790
|
-
import * as z42 from "zod/v4";
|
|
10791
|
-
function isZodV4(zodObject) {
|
|
10792
|
-
return typeof zodObject === "object" && zodObject !== null && "_zod" in zodObject && zodObject._zod !== void 0;
|
|
10793
|
-
}
|
|
10794
|
-
function zodToJsonSchema(schema) {
|
|
10795
|
-
if (isZodV4(schema)) {
|
|
10796
|
-
return z42.toJSONSchema(schema, {
|
|
10797
|
-
target: "draft-7"
|
|
10798
|
-
});
|
|
11487
|
+
async resolve(project) {
|
|
11488
|
+
if (project.id) {
|
|
11489
|
+
return project.id;
|
|
11490
|
+
}
|
|
11491
|
+
return this.getId(project.name);
|
|
10799
11492
|
}
|
|
10800
|
-
|
|
10801
|
-
}
|
|
11493
|
+
};
|
|
10802
11494
|
|
|
10803
11495
|
// dev/server.ts
|
|
10804
11496
|
function runDevServer(evaluators, opts) {
|
|
@@ -10835,20 +11527,27 @@ function runDevServer(evaluators, opts) {
|
|
|
10835
11527
|
app.get("/", (req, res) => {
|
|
10836
11528
|
res.send("Hello, world!");
|
|
10837
11529
|
});
|
|
10838
|
-
app.get(
|
|
10839
|
-
|
|
10840
|
-
|
|
10841
|
-
|
|
10842
|
-
|
|
10843
|
-
|
|
11530
|
+
app.get(
|
|
11531
|
+
"/list",
|
|
11532
|
+
checkAuthorized,
|
|
11533
|
+
asyncHandler(async (req, res) => {
|
|
11534
|
+
const evalDefs = {};
|
|
11535
|
+
for (const [name, evaluator] of Object.entries(allEvaluators)) {
|
|
11536
|
+
let parameters;
|
|
11537
|
+
if (evaluator.parameters) {
|
|
11538
|
+
const resolvedParams = await Promise.resolve(evaluator.parameters);
|
|
11539
|
+
parameters = serializeRemoteEvalParametersContainer(resolvedParams);
|
|
11540
|
+
}
|
|
11541
|
+
evalDefs[name] = {
|
|
11542
|
+
parameters,
|
|
10844
11543
|
scores: evaluator.scores.map((score, idx) => ({
|
|
10845
11544
|
name: scorerName(score, idx)
|
|
10846
11545
|
}))
|
|
10847
|
-
}
|
|
10848
|
-
|
|
10849
|
-
|
|
10850
|
-
|
|
10851
|
-
|
|
11546
|
+
};
|
|
11547
|
+
}
|
|
11548
|
+
res.json(evalDefs);
|
|
11549
|
+
})
|
|
11550
|
+
);
|
|
10852
11551
|
app.post(
|
|
10853
11552
|
"/eval",
|
|
10854
11553
|
checkAuthorized,
|
|
@@ -10873,18 +11572,12 @@ function runDevServer(evaluators, opts) {
|
|
|
10873
11572
|
res.status(404).json({ error: `Evaluator '${name}' not found` });
|
|
10874
11573
|
return;
|
|
10875
11574
|
}
|
|
10876
|
-
if (evaluator.parameters
|
|
11575
|
+
if (evaluator.parameters) {
|
|
10877
11576
|
try {
|
|
10878
|
-
|
|
10879
|
-
res.status(400).json({
|
|
10880
|
-
error: `Evaluator '${name}' does not accept parameters`
|
|
10881
|
-
});
|
|
10882
|
-
return;
|
|
10883
|
-
}
|
|
10884
|
-
validateParameters(parameters ?? {}, evaluator.parameters);
|
|
11577
|
+
await validateParameters(parameters ?? {}, evaluator.parameters);
|
|
10885
11578
|
} catch (e) {
|
|
10886
11579
|
console.error("Error validating parameters", e);
|
|
10887
|
-
if (e instanceof
|
|
11580
|
+
if (e instanceof z14.ZodError || e instanceof ValidationError || e instanceof Error) {
|
|
10888
11581
|
res.status(400).json({
|
|
10889
11582
|
error: e.message
|
|
10890
11583
|
});
|
|
@@ -11033,9 +11726,9 @@ async function getDataset(state, data) {
|
|
|
11033
11726
|
return data.data;
|
|
11034
11727
|
}
|
|
11035
11728
|
}
|
|
11036
|
-
var datasetFetchSchema =
|
|
11037
|
-
project_id:
|
|
11038
|
-
name:
|
|
11729
|
+
var datasetFetchSchema = z14.object({
|
|
11730
|
+
project_id: z14.string(),
|
|
11731
|
+
name: z14.string()
|
|
11039
11732
|
});
|
|
11040
11733
|
async function getDatasetById({
|
|
11041
11734
|
state,
|
|
@@ -11044,7 +11737,7 @@ async function getDatasetById({
|
|
|
11044
11737
|
const dataset = await state.appConn().post_json("api/dataset/get", {
|
|
11045
11738
|
id: datasetId
|
|
11046
11739
|
});
|
|
11047
|
-
const parsed =
|
|
11740
|
+
const parsed = z14.array(datasetFetchSchema).parse(dataset);
|
|
11048
11741
|
if (parsed.length === 0) {
|
|
11049
11742
|
throw new Error(`Dataset '${datasetId}' not found`);
|
|
11050
11743
|
}
|
|
@@ -11077,33 +11770,6 @@ function makeScorer(state, name, score, projectId) {
|
|
|
11077
11770
|
});
|
|
11078
11771
|
return ret;
|
|
11079
11772
|
}
|
|
11080
|
-
function makeEvalParametersSchema(parameters) {
|
|
11081
|
-
return Object.fromEntries(
|
|
11082
|
-
Object.entries(parameters).map(([name, value]) => {
|
|
11083
|
-
if ("type" in value && value.type === "prompt") {
|
|
11084
|
-
return [
|
|
11085
|
-
name,
|
|
11086
|
-
{
|
|
11087
|
-
type: "prompt",
|
|
11088
|
-
default: value.default ? promptDefinitionToPromptData(value.default) : void 0,
|
|
11089
|
-
description: value.description
|
|
11090
|
-
}
|
|
11091
|
-
];
|
|
11092
|
-
} else {
|
|
11093
|
-
const schemaObj = zodToJsonSchema(value);
|
|
11094
|
-
return [
|
|
11095
|
-
name,
|
|
11096
|
-
{
|
|
11097
|
-
type: "data",
|
|
11098
|
-
schema: schemaObj,
|
|
11099
|
-
default: schemaObj.default,
|
|
11100
|
-
description: schemaObj.description
|
|
11101
|
-
}
|
|
11102
|
-
];
|
|
11103
|
-
}
|
|
11104
|
-
})
|
|
11105
|
-
);
|
|
11106
|
-
}
|
|
11107
11773
|
|
|
11108
11774
|
// dev/index.ts
|
|
11109
11775
|
configureNode();
|