braintrust 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/dist/index.d.mts +4844 -3703
- package/dev/dist/index.d.ts +4844 -3703
- package/dev/dist/index.js +2068 -1402
- package/dev/dist/index.mjs +1954 -1288
- package/dist/browser.d.mts +16987 -8720
- package/dist/browser.d.ts +16987 -8720
- package/dist/browser.js +1810 -841
- package/dist/browser.mjs +2056 -1087
- package/dist/cli.js +2403 -1729
- package/dist/index.d.mts +16987 -8720
- package/dist/index.d.ts +16987 -8720
- package/dist/index.js +1810 -841
- package/dist/index.mjs +2056 -1087
- package/package.json +2 -1
- package/util/dist/index.d.mts +10 -8
- package/util/dist/index.d.ts +10 -8
- package/util/dist/index.js +27 -142
- package/util/dist/index.mjs +26 -141
package/dist/index.mjs
CHANGED
|
@@ -332,11 +332,19 @@ function getIdGenerator() {
|
|
|
332
332
|
|
|
333
333
|
// util/db_fields.ts
|
|
334
334
|
var TRANSACTION_ID_FIELD = "_xact_id";
|
|
335
|
+
var OBJECT_DELETE_FIELD = "_object_delete";
|
|
335
336
|
var IS_MERGE_FIELD = "_is_merge";
|
|
336
337
|
var AUDIT_SOURCE_FIELD = "_audit_source";
|
|
337
338
|
var AUDIT_METADATA_FIELD = "_audit_metadata";
|
|
338
339
|
var VALID_SOURCES = ["app", "api", "external"];
|
|
339
|
-
var
|
|
340
|
+
var OBJECT_ID_KEYS = [
|
|
341
|
+
"experiment_id",
|
|
342
|
+
"dataset_id",
|
|
343
|
+
"prompt_session_id",
|
|
344
|
+
"project_id",
|
|
345
|
+
"log_id",
|
|
346
|
+
"function_data"
|
|
347
|
+
];
|
|
340
348
|
|
|
341
349
|
// util/span_identifier_v3.ts
|
|
342
350
|
import * as uuid3 from "uuid";
|
|
@@ -1007,13 +1015,6 @@ function mergeDictsWithPathsHelper({
|
|
|
1007
1015
|
function mergeDicts(mergeInto, mergeFrom) {
|
|
1008
1016
|
return mergeDictsWithPaths({ mergeInto, mergeFrom, mergePaths: [] });
|
|
1009
1017
|
}
|
|
1010
|
-
function mapAt(m, k) {
|
|
1011
|
-
const ret = m.get(k);
|
|
1012
|
-
if (ret === void 0) {
|
|
1013
|
-
throw new Error(`Map does not contain key ${k}`);
|
|
1014
|
-
}
|
|
1015
|
-
return ret;
|
|
1016
|
-
}
|
|
1017
1018
|
function recordFind(m, k) {
|
|
1018
1019
|
return m[k];
|
|
1019
1020
|
}
|
|
@@ -1028,72 +1029,8 @@ function getObjValueByPath(row, path2) {
|
|
|
1028
1029
|
return curr;
|
|
1029
1030
|
}
|
|
1030
1031
|
|
|
1031
|
-
// util/graph_util.ts
|
|
1032
|
-
function depthFirstSearch(args) {
|
|
1033
|
-
const { graph, firstVisitF, lastVisitF } = args;
|
|
1034
|
-
for (const vs of graph.values()) {
|
|
1035
|
-
for (const v of vs.values()) {
|
|
1036
|
-
if (!graph.has(v)) {
|
|
1037
|
-
throw new Error(`Outgoing vertex ${v} must be a key in the graph`);
|
|
1038
|
-
}
|
|
1039
|
-
}
|
|
1040
|
-
}
|
|
1041
|
-
const firstVisitedVertices = /* @__PURE__ */ new Set();
|
|
1042
|
-
const visitationOrder = args.visitationOrder ?? [...graph.keys()];
|
|
1043
|
-
const events = visitationOrder.map((vertex) => ({ eventType: "first", vertex, extras: {} })).reverse();
|
|
1044
|
-
while (events.length) {
|
|
1045
|
-
const { eventType, vertex, extras } = events.pop();
|
|
1046
|
-
if (eventType === "last") {
|
|
1047
|
-
lastVisitF?.(vertex);
|
|
1048
|
-
continue;
|
|
1049
|
-
}
|
|
1050
|
-
if (firstVisitedVertices.has(vertex)) {
|
|
1051
|
-
continue;
|
|
1052
|
-
}
|
|
1053
|
-
firstVisitedVertices.add(vertex);
|
|
1054
|
-
firstVisitF?.(vertex, { parentVertex: extras.parentVertex });
|
|
1055
|
-
events.push({ eventType: "last", vertex, extras: {} });
|
|
1056
|
-
mapAt(graph, vertex).forEach((child) => {
|
|
1057
|
-
events.push({
|
|
1058
|
-
eventType: "first",
|
|
1059
|
-
vertex: child,
|
|
1060
|
-
extras: { parentVertex: vertex }
|
|
1061
|
-
});
|
|
1062
|
-
});
|
|
1063
|
-
}
|
|
1064
|
-
}
|
|
1065
|
-
function undirectedConnectedComponents(graph) {
|
|
1066
|
-
const directedGraph = new Map(
|
|
1067
|
-
[...graph.vertices].map((v) => [v, /* @__PURE__ */ new Set()])
|
|
1068
|
-
);
|
|
1069
|
-
for (const [i, j] of graph.edges) {
|
|
1070
|
-
mapAt(directedGraph, i).add(j);
|
|
1071
|
-
mapAt(directedGraph, j).add(i);
|
|
1072
|
-
}
|
|
1073
|
-
let labelCounter = 0;
|
|
1074
|
-
const vertexLabels = /* @__PURE__ */ new Map();
|
|
1075
|
-
const firstVisitF = (vertex, args) => {
|
|
1076
|
-
const label = args?.parentVertex !== void 0 ? mapAt(vertexLabels, args?.parentVertex) : labelCounter++;
|
|
1077
|
-
vertexLabels.set(vertex, label);
|
|
1078
|
-
};
|
|
1079
|
-
depthFirstSearch({ graph: directedGraph, firstVisitF });
|
|
1080
|
-
const output = Array.from({ length: labelCounter }).map(() => []);
|
|
1081
|
-
for (const [vertex, label] of vertexLabels.entries()) {
|
|
1082
|
-
output[label].push(vertex);
|
|
1083
|
-
}
|
|
1084
|
-
return output;
|
|
1085
|
-
}
|
|
1086
|
-
function topologicalSort(graph, visitationOrder) {
|
|
1087
|
-
const reverseOrdering = [];
|
|
1088
|
-
const lastVisitF = (vertex) => {
|
|
1089
|
-
reverseOrdering.push(vertex);
|
|
1090
|
-
};
|
|
1091
|
-
depthFirstSearch({ graph, lastVisitF, visitationOrder });
|
|
1092
|
-
return reverseOrdering.reverse();
|
|
1093
|
-
}
|
|
1094
|
-
|
|
1095
1032
|
// util/merge_row_batch.ts
|
|
1096
|
-
function generateMergedRowKey(row
|
|
1033
|
+
function generateMergedRowKey(row) {
|
|
1097
1034
|
return JSON.stringify(
|
|
1098
1035
|
[
|
|
1099
1036
|
"org_id",
|
|
@@ -1102,7 +1039,7 @@ function generateMergedRowKey(row, useParentIdForId) {
|
|
|
1102
1039
|
"dataset_id",
|
|
1103
1040
|
"prompt_session_id",
|
|
1104
1041
|
"log_id",
|
|
1105
|
-
|
|
1042
|
+
"id"
|
|
1106
1043
|
].map((k) => row[k])
|
|
1107
1044
|
);
|
|
1108
1045
|
}
|
|
@@ -1156,96 +1093,34 @@ function mergeRowBatch(rows) {
|
|
|
1156
1093
|
rowGroups.set(key, row);
|
|
1157
1094
|
}
|
|
1158
1095
|
}
|
|
1159
|
-
|
|
1160
|
-
const rowToLabel = new Map(
|
|
1161
|
-
merged.map((r, i) => [generateMergedRowKey(r), i])
|
|
1162
|
-
);
|
|
1163
|
-
const graph = new Map(
|
|
1164
|
-
Array.from({ length: merged.length }).map((_, i) => [i, /* @__PURE__ */ new Set()])
|
|
1165
|
-
);
|
|
1166
|
-
merged.forEach((r, i) => {
|
|
1167
|
-
const parentId = r[PARENT_ID_FIELD];
|
|
1168
|
-
if (!parentId) {
|
|
1169
|
-
return;
|
|
1170
|
-
}
|
|
1171
|
-
const parentRowKey = generateMergedRowKey(
|
|
1172
|
-
r,
|
|
1173
|
-
true
|
|
1174
|
-
/* useParentIdForId */
|
|
1175
|
-
);
|
|
1176
|
-
const parentLabel = rowToLabel.get(parentRowKey);
|
|
1177
|
-
if (parentLabel !== void 0) {
|
|
1178
|
-
mapAt(graph, parentLabel).add(i);
|
|
1179
|
-
}
|
|
1180
|
-
});
|
|
1181
|
-
const connectedComponents = undirectedConnectedComponents({
|
|
1182
|
-
vertices: new Set(graph.keys()),
|
|
1183
|
-
edges: new Set(
|
|
1184
|
-
[...graph.entries()].flatMap(
|
|
1185
|
-
([k, vs]) => [...vs].map((v) => {
|
|
1186
|
-
const ret = [k, v];
|
|
1187
|
-
return ret;
|
|
1188
|
-
})
|
|
1189
|
-
)
|
|
1190
|
-
)
|
|
1191
|
-
});
|
|
1192
|
-
const buckets = connectedComponents.map(
|
|
1193
|
-
(cc) => topologicalSort(
|
|
1194
|
-
graph,
|
|
1195
|
-
cc
|
|
1196
|
-
/* visitationOrder */
|
|
1197
|
-
)
|
|
1198
|
-
);
|
|
1199
|
-
return buckets.map((bucket) => bucket.map((i) => merged[i]));
|
|
1096
|
+
return [...rowGroups.values()];
|
|
1200
1097
|
}
|
|
1201
1098
|
function batchItems(args) {
|
|
1202
|
-
|
|
1099
|
+
const { items } = args;
|
|
1203
1100
|
const batchMaxNumItems = args.batchMaxNumItems ?? Number.POSITIVE_INFINITY;
|
|
1204
1101
|
const batchMaxNumBytes = args.batchMaxNumBytes ?? Number.POSITIVE_INFINITY;
|
|
1102
|
+
const getByteSize = args.getByteSize;
|
|
1205
1103
|
const output = [];
|
|
1206
|
-
let nextItems = [];
|
|
1207
|
-
let batchSet = [];
|
|
1208
1104
|
let batch = [];
|
|
1209
1105
|
let batchLen = 0;
|
|
1210
1106
|
function addToBatch(item) {
|
|
1211
1107
|
batch.push(item);
|
|
1212
|
-
batchLen += item
|
|
1108
|
+
batchLen += getByteSize(item);
|
|
1213
1109
|
}
|
|
1214
1110
|
function flushBatch() {
|
|
1215
|
-
|
|
1111
|
+
output.push(batch);
|
|
1216
1112
|
batch = [];
|
|
1217
1113
|
batchLen = 0;
|
|
1218
1114
|
}
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
for (const item of bucket) {
|
|
1223
|
-
if (batch.length === 0 || item.length + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems) {
|
|
1224
|
-
addToBatch(item);
|
|
1225
|
-
} else if (i === 0) {
|
|
1226
|
-
flushBatch();
|
|
1227
|
-
addToBatch(item);
|
|
1228
|
-
} else {
|
|
1229
|
-
break;
|
|
1230
|
-
}
|
|
1231
|
-
++i;
|
|
1232
|
-
}
|
|
1233
|
-
if (i < bucket.length) {
|
|
1234
|
-
nextItems.push(bucket.slice(i));
|
|
1235
|
-
}
|
|
1236
|
-
if (batchLen >= batchMaxNumBytes || batch.length > batchMaxNumItems) {
|
|
1237
|
-
flushBatch();
|
|
1238
|
-
}
|
|
1239
|
-
}
|
|
1240
|
-
if (batch.length) {
|
|
1115
|
+
for (const item of items) {
|
|
1116
|
+
const itemSize = getByteSize(item);
|
|
1117
|
+
if (batch.length > 0 && !(itemSize + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems)) {
|
|
1241
1118
|
flushBatch();
|
|
1242
1119
|
}
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
items = nextItems;
|
|
1248
|
-
nextItems = [];
|
|
1120
|
+
addToBatch(item);
|
|
1121
|
+
}
|
|
1122
|
+
if (batch.length > 0) {
|
|
1123
|
+
flushBatch();
|
|
1249
1124
|
}
|
|
1250
1125
|
return output;
|
|
1251
1126
|
}
|
|
@@ -1832,10 +1707,15 @@ var FunctionTypeEnum = z6.enum([
|
|
|
1832
1707
|
"preprocessor",
|
|
1833
1708
|
"facet",
|
|
1834
1709
|
"classifier",
|
|
1835
|
-
"tag"
|
|
1710
|
+
"tag",
|
|
1711
|
+
"parameters"
|
|
1836
1712
|
]);
|
|
1837
1713
|
var NullableSavedFunctionId = z6.union([
|
|
1838
|
-
z6.object({
|
|
1714
|
+
z6.object({
|
|
1715
|
+
type: z6.literal("function"),
|
|
1716
|
+
id: z6.string(),
|
|
1717
|
+
version: z6.string().optional()
|
|
1718
|
+
}),
|
|
1839
1719
|
z6.object({
|
|
1840
1720
|
type: z6.literal("global"),
|
|
1841
1721
|
name: z6.string(),
|
|
@@ -1843,6 +1723,67 @@ var NullableSavedFunctionId = z6.union([
|
|
|
1843
1723
|
}),
|
|
1844
1724
|
z6.null()
|
|
1845
1725
|
]);
|
|
1726
|
+
var TopicMapReport = z6.object({
|
|
1727
|
+
version: z6.literal(1),
|
|
1728
|
+
created_at: z6.string().optional(),
|
|
1729
|
+
settings: z6.object({
|
|
1730
|
+
algorithm: z6.enum(["hdbscan", "kmeans", "hierarchical"]),
|
|
1731
|
+
dimension_reduction: z6.enum(["umap", "pca", "none"]),
|
|
1732
|
+
vector_field: z6.string(),
|
|
1733
|
+
embedding_model: z6.string(),
|
|
1734
|
+
n_clusters: z6.union([z6.number(), z6.null()]).optional(),
|
|
1735
|
+
umap_dimensions: z6.union([z6.number(), z6.null()]).optional(),
|
|
1736
|
+
min_cluster_size: z6.union([z6.number(), z6.null()]).optional(),
|
|
1737
|
+
min_samples: z6.union([z6.number(), z6.null()]).optional()
|
|
1738
|
+
}),
|
|
1739
|
+
query_settings: z6.object({
|
|
1740
|
+
hierarchy_threshold: z6.union([z6.number(), z6.null()]),
|
|
1741
|
+
auto_naming: z6.boolean(),
|
|
1742
|
+
skip_cache: z6.boolean(),
|
|
1743
|
+
viz_mode: z6.enum(["bar", "scatter"]),
|
|
1744
|
+
naming_model: z6.string()
|
|
1745
|
+
}).partial(),
|
|
1746
|
+
clusters: z6.array(
|
|
1747
|
+
z6.object({
|
|
1748
|
+
cluster_id: z6.number(),
|
|
1749
|
+
parent_cluster_id: z6.union([z6.number(), z6.null()]).optional(),
|
|
1750
|
+
topic_id: z6.string(),
|
|
1751
|
+
count: z6.number(),
|
|
1752
|
+
sample_texts: z6.array(z6.string()),
|
|
1753
|
+
samples: z6.array(
|
|
1754
|
+
z6.object({
|
|
1755
|
+
id: z6.string(),
|
|
1756
|
+
text: z6.string(),
|
|
1757
|
+
root_span_id: z6.string(),
|
|
1758
|
+
span_id: z6.string()
|
|
1759
|
+
})
|
|
1760
|
+
),
|
|
1761
|
+
name: z6.string().optional(),
|
|
1762
|
+
description: z6.string().optional(),
|
|
1763
|
+
keywords: z6.array(z6.string()).optional(),
|
|
1764
|
+
centroid: z6.array(z6.number()).optional(),
|
|
1765
|
+
parent_id: z6.union([z6.number(), z6.null()]).optional(),
|
|
1766
|
+
is_leaf: z6.boolean().optional(),
|
|
1767
|
+
depth: z6.number().optional()
|
|
1768
|
+
})
|
|
1769
|
+
),
|
|
1770
|
+
embedding_points: z6.array(
|
|
1771
|
+
z6.object({
|
|
1772
|
+
x: z6.number(),
|
|
1773
|
+
y: z6.number(),
|
|
1774
|
+
cluster: z6.number(),
|
|
1775
|
+
text: z6.string().optional()
|
|
1776
|
+
})
|
|
1777
|
+
).optional()
|
|
1778
|
+
});
|
|
1779
|
+
var TopicMapData = z6.object({
|
|
1780
|
+
type: z6.literal("topic_map"),
|
|
1781
|
+
source_facet: z6.string(),
|
|
1782
|
+
embedding_model: z6.string(),
|
|
1783
|
+
bundle_key: z6.string(),
|
|
1784
|
+
distance_threshold: z6.number().optional(),
|
|
1785
|
+
report: TopicMapReport.optional()
|
|
1786
|
+
});
|
|
1846
1787
|
var BatchedFacetData = z6.object({
|
|
1847
1788
|
type: z6.literal("batched_facet"),
|
|
1848
1789
|
preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
|
|
@@ -1851,9 +1792,17 @@ var BatchedFacetData = z6.object({
|
|
|
1851
1792
|
name: z6.string(),
|
|
1852
1793
|
prompt: z6.string(),
|
|
1853
1794
|
model: z6.string().optional(),
|
|
1795
|
+
embedding_model: z6.string().optional(),
|
|
1854
1796
|
no_match_pattern: z6.string().optional()
|
|
1855
1797
|
})
|
|
1856
|
-
)
|
|
1798
|
+
),
|
|
1799
|
+
topic_maps: z6.record(
|
|
1800
|
+
z6.object({
|
|
1801
|
+
function_name: z6.string(),
|
|
1802
|
+
topic_map_id: z6.string().optional(),
|
|
1803
|
+
topic_map_data: TopicMapData
|
|
1804
|
+
})
|
|
1805
|
+
).optional()
|
|
1857
1806
|
});
|
|
1858
1807
|
var BraintrustModelParams = z6.object({
|
|
1859
1808
|
use_cache: z6.boolean(),
|
|
@@ -2064,6 +2013,18 @@ var ObjectReferenceNullish = z6.union([
|
|
|
2064
2013
|
}),
|
|
2065
2014
|
z6.null()
|
|
2066
2015
|
]);
|
|
2016
|
+
var SavedFunctionId = z6.union([
|
|
2017
|
+
z6.object({
|
|
2018
|
+
type: z6.literal("function"),
|
|
2019
|
+
id: z6.string(),
|
|
2020
|
+
version: z6.string().optional()
|
|
2021
|
+
}),
|
|
2022
|
+
z6.object({
|
|
2023
|
+
type: z6.literal("global"),
|
|
2024
|
+
name: z6.string(),
|
|
2025
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2026
|
+
})
|
|
2027
|
+
]);
|
|
2067
2028
|
var DatasetEvent = z6.object({
|
|
2068
2029
|
id: z6.string(),
|
|
2069
2030
|
_xact_id: z6.string(),
|
|
@@ -2083,7 +2044,36 @@ var DatasetEvent = z6.object({
|
|
|
2083
2044
|
is_root: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
2084
2045
|
origin: ObjectReferenceNullish.optional(),
|
|
2085
2046
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2086
|
-
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
|
|
2047
|
+
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2048
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2049
|
+
classifications: z6.union([
|
|
2050
|
+
z6.record(
|
|
2051
|
+
z6.array(
|
|
2052
|
+
z6.object({
|
|
2053
|
+
id: z6.string(),
|
|
2054
|
+
label: z6.string().optional(),
|
|
2055
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
2056
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2057
|
+
source: SavedFunctionId.and(
|
|
2058
|
+
z6.union([
|
|
2059
|
+
z6.object({
|
|
2060
|
+
type: z6.literal("function"),
|
|
2061
|
+
id: z6.string(),
|
|
2062
|
+
version: z6.string().optional()
|
|
2063
|
+
}),
|
|
2064
|
+
z6.object({
|
|
2065
|
+
type: z6.literal("global"),
|
|
2066
|
+
name: z6.string(),
|
|
2067
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2068
|
+
}),
|
|
2069
|
+
z6.null()
|
|
2070
|
+
])
|
|
2071
|
+
).optional()
|
|
2072
|
+
})
|
|
2073
|
+
)
|
|
2074
|
+
),
|
|
2075
|
+
z6.null()
|
|
2076
|
+
]).optional()
|
|
2087
2077
|
});
|
|
2088
2078
|
var EnvVar = z6.object({
|
|
2089
2079
|
id: z6.string().uuid(),
|
|
@@ -2160,7 +2150,8 @@ var SpanType = z6.union([
|
|
|
2160
2150
|
"automation",
|
|
2161
2151
|
"facet",
|
|
2162
2152
|
"preprocessor",
|
|
2163
|
-
"classifier"
|
|
2153
|
+
"classifier",
|
|
2154
|
+
"review"
|
|
2164
2155
|
]),
|
|
2165
2156
|
z6.null()
|
|
2166
2157
|
]);
|
|
@@ -2201,10 +2192,43 @@ var ExperimentEvent = z6.object({
|
|
|
2201
2192
|
is_root: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
2202
2193
|
origin: ObjectReferenceNullish.optional(),
|
|
2203
2194
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2204
|
-
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
|
|
2195
|
+
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2196
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2197
|
+
classifications: z6.union([
|
|
2198
|
+
z6.record(
|
|
2199
|
+
z6.array(
|
|
2200
|
+
z6.object({
|
|
2201
|
+
id: z6.string(),
|
|
2202
|
+
label: z6.string().optional(),
|
|
2203
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
2204
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2205
|
+
source: SavedFunctionId.and(
|
|
2206
|
+
z6.union([
|
|
2207
|
+
z6.object({
|
|
2208
|
+
type: z6.literal("function"),
|
|
2209
|
+
id: z6.string(),
|
|
2210
|
+
version: z6.string().optional()
|
|
2211
|
+
}),
|
|
2212
|
+
z6.object({
|
|
2213
|
+
type: z6.literal("global"),
|
|
2214
|
+
name: z6.string(),
|
|
2215
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2216
|
+
}),
|
|
2217
|
+
z6.null()
|
|
2218
|
+
])
|
|
2219
|
+
).optional()
|
|
2220
|
+
})
|
|
2221
|
+
)
|
|
2222
|
+
),
|
|
2223
|
+
z6.null()
|
|
2224
|
+
]).optional()
|
|
2205
2225
|
});
|
|
2206
2226
|
var ExtendedSavedFunctionId = z6.union([
|
|
2207
|
-
z6.object({
|
|
2227
|
+
z6.object({
|
|
2228
|
+
type: z6.literal("function"),
|
|
2229
|
+
id: z6.string(),
|
|
2230
|
+
version: z6.string().optional()
|
|
2231
|
+
}),
|
|
2208
2232
|
z6.object({
|
|
2209
2233
|
type: z6.literal("global"),
|
|
2210
2234
|
name: z6.string(),
|
|
@@ -2221,6 +2245,7 @@ var FacetData = z6.object({
|
|
|
2221
2245
|
preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
|
|
2222
2246
|
prompt: z6.string(),
|
|
2223
2247
|
model: z6.string().optional(),
|
|
2248
|
+
embedding_model: z6.string().optional(),
|
|
2224
2249
|
no_match_pattern: z6.string().optional()
|
|
2225
2250
|
});
|
|
2226
2251
|
var PromptBlockDataNullish = z6.union([
|
|
@@ -2310,14 +2335,6 @@ var PromptParserNullish = z6.union([
|
|
|
2310
2335
|
}),
|
|
2311
2336
|
z6.null()
|
|
2312
2337
|
]);
|
|
2313
|
-
var SavedFunctionId = z6.union([
|
|
2314
|
-
z6.object({ type: z6.literal("function"), id: z6.string() }),
|
|
2315
|
-
z6.object({
|
|
2316
|
-
type: z6.literal("global"),
|
|
2317
|
-
name: z6.string(),
|
|
2318
|
-
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2319
|
-
})
|
|
2320
|
-
]);
|
|
2321
2338
|
var PromptDataNullish = z6.union([
|
|
2322
2339
|
z6.object({
|
|
2323
2340
|
prompt: PromptBlockDataNullish,
|
|
@@ -2368,7 +2385,8 @@ var FunctionTypeEnumNullish = z6.union([
|
|
|
2368
2385
|
"preprocessor",
|
|
2369
2386
|
"facet",
|
|
2370
2387
|
"classifier",
|
|
2371
|
-
"tag"
|
|
2388
|
+
"tag",
|
|
2389
|
+
"parameters"
|
|
2372
2390
|
]),
|
|
2373
2391
|
z6.null()
|
|
2374
2392
|
]);
|
|
@@ -2460,7 +2478,8 @@ var FunctionData = z6.union([
|
|
|
2460
2478
|
type: z6.literal("remote_eval"),
|
|
2461
2479
|
endpoint: z6.string(),
|
|
2462
2480
|
eval_name: z6.string(),
|
|
2463
|
-
parameters: z6.object({}).partial().passthrough()
|
|
2481
|
+
parameters: z6.object({}).partial().passthrough(),
|
|
2482
|
+
parameters_version: z6.union([z6.string(), z6.null()]).optional()
|
|
2464
2483
|
}),
|
|
2465
2484
|
z6.object({
|
|
2466
2485
|
type: z6.literal("global"),
|
|
@@ -2469,7 +2488,18 @@ var FunctionData = z6.union([
|
|
|
2469
2488
|
config: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional()
|
|
2470
2489
|
}),
|
|
2471
2490
|
FacetData,
|
|
2472
|
-
BatchedFacetData
|
|
2491
|
+
BatchedFacetData,
|
|
2492
|
+
z6.object({
|
|
2493
|
+
type: z6.literal("parameters"),
|
|
2494
|
+
data: z6.object({}).partial().passthrough(),
|
|
2495
|
+
__schema: z6.object({
|
|
2496
|
+
type: z6.literal("object"),
|
|
2497
|
+
properties: z6.record(z6.object({}).partial().passthrough()),
|
|
2498
|
+
required: z6.array(z6.string()).optional(),
|
|
2499
|
+
additionalProperties: z6.boolean().optional()
|
|
2500
|
+
})
|
|
2501
|
+
}),
|
|
2502
|
+
TopicMapData.and(z6.unknown())
|
|
2473
2503
|
]);
|
|
2474
2504
|
var Function = z6.object({
|
|
2475
2505
|
id: z6.string().uuid(),
|
|
@@ -2499,7 +2529,13 @@ var Function = z6.object({
|
|
|
2499
2529
|
z6.null()
|
|
2500
2530
|
]).optional()
|
|
2501
2531
|
});
|
|
2502
|
-
var FunctionFormat = z6.enum([
|
|
2532
|
+
var FunctionFormat = z6.enum([
|
|
2533
|
+
"llm",
|
|
2534
|
+
"code",
|
|
2535
|
+
"global",
|
|
2536
|
+
"graph",
|
|
2537
|
+
"topic_map"
|
|
2538
|
+
]);
|
|
2503
2539
|
var PromptData = z6.object({
|
|
2504
2540
|
prompt: PromptBlockDataNullish,
|
|
2505
2541
|
options: PromptOptionsNullish,
|
|
@@ -2582,13 +2618,14 @@ var FunctionObjectType = z6.enum([
|
|
|
2582
2618
|
"custom_view",
|
|
2583
2619
|
"preprocessor",
|
|
2584
2620
|
"facet",
|
|
2585
|
-
"classifier"
|
|
2621
|
+
"classifier",
|
|
2622
|
+
"parameters"
|
|
2586
2623
|
]);
|
|
2587
2624
|
var FunctionOutputType = z6.enum([
|
|
2588
2625
|
"completion",
|
|
2589
2626
|
"score",
|
|
2590
2627
|
"facet",
|
|
2591
|
-
"
|
|
2628
|
+
"classification",
|
|
2592
2629
|
"any"
|
|
2593
2630
|
]);
|
|
2594
2631
|
var GitMetadataSettings = z6.object({
|
|
@@ -2624,6 +2661,10 @@ var GroupScope = z6.object({
|
|
|
2624
2661
|
idle_seconds: z6.number().optional()
|
|
2625
2662
|
});
|
|
2626
2663
|
var IfExists = z6.enum(["error", "ignore", "replace"]);
|
|
2664
|
+
var ImageRenderingMode = z6.union([
|
|
2665
|
+
z6.enum(["auto", "click_to_load", "blocked"]),
|
|
2666
|
+
z6.null()
|
|
2667
|
+
]);
|
|
2627
2668
|
var InvokeParent = z6.union([
|
|
2628
2669
|
z6.object({
|
|
2629
2670
|
object_type: z6.enum(["project_logs", "experiment", "playground_logs"]),
|
|
@@ -2716,7 +2757,8 @@ var Organization = z6.object({
|
|
|
2716
2757
|
is_universal_api: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
2717
2758
|
proxy_url: z6.union([z6.string(), z6.null()]).optional(),
|
|
2718
2759
|
realtime_url: z6.union([z6.string(), z6.null()]).optional(),
|
|
2719
|
-
created: z6.union([z6.string(), z6.null()]).optional()
|
|
2760
|
+
created: z6.union([z6.string(), z6.null()]).optional(),
|
|
2761
|
+
image_rendering_mode: ImageRenderingMode.optional()
|
|
2720
2762
|
});
|
|
2721
2763
|
var ProjectSettings = z6.union([
|
|
2722
2764
|
z6.object({
|
|
@@ -2857,7 +2899,36 @@ var ProjectLogsEvent = z6.object({
|
|
|
2857
2899
|
origin: ObjectReferenceNullish.optional(),
|
|
2858
2900
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2859
2901
|
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2860
|
-
_async_scoring_state: z6.unknown().optional()
|
|
2902
|
+
_async_scoring_state: z6.unknown().optional(),
|
|
2903
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2904
|
+
classifications: z6.union([
|
|
2905
|
+
z6.record(
|
|
2906
|
+
z6.array(
|
|
2907
|
+
z6.object({
|
|
2908
|
+
id: z6.string(),
|
|
2909
|
+
label: z6.string().optional(),
|
|
2910
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
2911
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2912
|
+
source: SavedFunctionId.and(
|
|
2913
|
+
z6.union([
|
|
2914
|
+
z6.object({
|
|
2915
|
+
type: z6.literal("function"),
|
|
2916
|
+
id: z6.string(),
|
|
2917
|
+
version: z6.string().optional()
|
|
2918
|
+
}),
|
|
2919
|
+
z6.object({
|
|
2920
|
+
type: z6.literal("global"),
|
|
2921
|
+
name: z6.string(),
|
|
2922
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2923
|
+
}),
|
|
2924
|
+
z6.null()
|
|
2925
|
+
])
|
|
2926
|
+
).optional()
|
|
2927
|
+
})
|
|
2928
|
+
)
|
|
2929
|
+
),
|
|
2930
|
+
z6.null()
|
|
2931
|
+
]).optional()
|
|
2861
2932
|
});
|
|
2862
2933
|
var ProjectScoreType = z6.enum([
|
|
2863
2934
|
"slider",
|
|
@@ -3159,12 +3230,15 @@ var View = z6.object({
|
|
|
3159
3230
|
"datasets",
|
|
3160
3231
|
"dataset",
|
|
3161
3232
|
"prompts",
|
|
3233
|
+
"parameters",
|
|
3162
3234
|
"tools",
|
|
3163
3235
|
"scorers",
|
|
3164
3236
|
"classifiers",
|
|
3165
3237
|
"logs",
|
|
3166
3238
|
"monitor",
|
|
3167
|
-
"
|
|
3239
|
+
"for_review_project_log",
|
|
3240
|
+
"for_review_experiments",
|
|
3241
|
+
"for_review_datasets"
|
|
3168
3242
|
]),
|
|
3169
3243
|
name: z6.string(),
|
|
3170
3244
|
created: z6.union([z6.string(), z6.null()]).optional(),
|
|
@@ -3909,6 +3983,52 @@ var PromptCache = class {
|
|
|
3909
3983
|
}
|
|
3910
3984
|
};
|
|
3911
3985
|
|
|
3986
|
+
// src/prompt-cache/parameters-cache.ts
|
|
3987
|
+
function createCacheKey2(key) {
|
|
3988
|
+
if (key.id) {
|
|
3989
|
+
return `parameters:id:${key.id}`;
|
|
3990
|
+
}
|
|
3991
|
+
const prefix = key.projectId ?? key.projectName;
|
|
3992
|
+
if (!prefix) {
|
|
3993
|
+
throw new Error("Either projectId or projectName must be provided");
|
|
3994
|
+
}
|
|
3995
|
+
if (!key.slug) {
|
|
3996
|
+
throw new Error("Slug must be provided when not using ID");
|
|
3997
|
+
}
|
|
3998
|
+
return `parameters:${prefix}:${key.slug}:${key.version ?? "latest"}`;
|
|
3999
|
+
}
|
|
4000
|
+
var ParametersCache = class {
|
|
4001
|
+
memoryCache;
|
|
4002
|
+
diskCache;
|
|
4003
|
+
constructor(options) {
|
|
4004
|
+
this.memoryCache = options.memoryCache;
|
|
4005
|
+
this.diskCache = options.diskCache;
|
|
4006
|
+
}
|
|
4007
|
+
async get(key) {
|
|
4008
|
+
const cacheKey = createCacheKey2(key);
|
|
4009
|
+
const memoryParams = this.memoryCache.get(cacheKey);
|
|
4010
|
+
if (memoryParams !== void 0) {
|
|
4011
|
+
return memoryParams;
|
|
4012
|
+
}
|
|
4013
|
+
if (this.diskCache) {
|
|
4014
|
+
const diskParams = await this.diskCache.get(cacheKey);
|
|
4015
|
+
if (!diskParams) {
|
|
4016
|
+
return void 0;
|
|
4017
|
+
}
|
|
4018
|
+
this.memoryCache.set(cacheKey, diskParams);
|
|
4019
|
+
return diskParams;
|
|
4020
|
+
}
|
|
4021
|
+
return void 0;
|
|
4022
|
+
}
|
|
4023
|
+
async set(key, value) {
|
|
4024
|
+
const cacheKey = createCacheKey2(key);
|
|
4025
|
+
this.memoryCache.set(cacheKey, value);
|
|
4026
|
+
if (this.diskCache) {
|
|
4027
|
+
await this.diskCache.set(cacheKey, value);
|
|
4028
|
+
}
|
|
4029
|
+
}
|
|
4030
|
+
};
|
|
4031
|
+
|
|
3912
4032
|
// src/span-cache.ts
|
|
3913
4033
|
var activeCaches = /* @__PURE__ */ new Set();
|
|
3914
4034
|
var exitHandlersRegistered = false;
|
|
@@ -4199,7 +4319,24 @@ var SpanCache = class {
|
|
|
4199
4319
|
// src/logger.ts
|
|
4200
4320
|
var BRAINTRUST_ATTACHMENT = BraintrustAttachmentReference.shape.type.value;
|
|
4201
4321
|
var EXTERNAL_ATTACHMENT = ExternalAttachmentReference.shape.type.value;
|
|
4322
|
+
var LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow";
|
|
4202
4323
|
var BRAINTRUST_PARAMS = Object.keys(BraintrustModelParams.shape);
|
|
4324
|
+
var DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024;
|
|
4325
|
+
var parametersRowSchema = z8.object({
|
|
4326
|
+
id: z8.string().uuid(),
|
|
4327
|
+
_xact_id: z8.string(),
|
|
4328
|
+
project_id: z8.string().uuid(),
|
|
4329
|
+
name: z8.string(),
|
|
4330
|
+
slug: z8.string(),
|
|
4331
|
+
description: z8.union([z8.string(), z8.null()]).optional(),
|
|
4332
|
+
function_type: z8.literal("parameters"),
|
|
4333
|
+
function_data: z8.object({
|
|
4334
|
+
type: z8.literal("parameters"),
|
|
4335
|
+
data: z8.record(z8.unknown()).optional(),
|
|
4336
|
+
__schema: z8.record(z8.unknown())
|
|
4337
|
+
}),
|
|
4338
|
+
metadata: z8.union([z8.object({}).partial().passthrough(), z8.null()]).optional()
|
|
4339
|
+
});
|
|
4203
4340
|
var LoginInvalidOrgError = class extends Error {
|
|
4204
4341
|
constructor(message) {
|
|
4205
4342
|
super(message);
|
|
@@ -4376,6 +4513,17 @@ var BraintrustState = class _BraintrustState {
|
|
|
4376
4513
|
max: Number(isomorph_default.getEnv("BRAINTRUST_PROMPT_CACHE_DISK_MAX")) ?? 1 << 20
|
|
4377
4514
|
}) : void 0;
|
|
4378
4515
|
this.promptCache = new PromptCache({ memoryCache, diskCache });
|
|
4516
|
+
const parametersMemoryCache = new LRUCache({
|
|
4517
|
+
max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_MEMORY_MAX")) ?? 1 << 10
|
|
4518
|
+
});
|
|
4519
|
+
const parametersDiskCache = canUseDiskCache() ? new DiskCache({
|
|
4520
|
+
cacheDir: isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DIR") ?? `${isomorph_default.getEnv("HOME") ?? isomorph_default.homedir()}/.braintrust/parameters_cache`,
|
|
4521
|
+
max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DISK_MAX")) ?? 1 << 20
|
|
4522
|
+
}) : void 0;
|
|
4523
|
+
this.parametersCache = new ParametersCache({
|
|
4524
|
+
memoryCache: parametersMemoryCache,
|
|
4525
|
+
diskCache: parametersDiskCache
|
|
4526
|
+
});
|
|
4379
4527
|
this.spanCache = new SpanCache({ disabled: loginParams.disableSpanCache });
|
|
4380
4528
|
}
|
|
4381
4529
|
id;
|
|
@@ -4405,6 +4553,7 @@ var BraintrustState = class _BraintrustState {
|
|
|
4405
4553
|
_apiConn = null;
|
|
4406
4554
|
_proxyConn = null;
|
|
4407
4555
|
promptCache;
|
|
4556
|
+
parametersCache;
|
|
4408
4557
|
spanCache;
|
|
4409
4558
|
_idGenerator = null;
|
|
4410
4559
|
_contextManager = null;
|
|
@@ -5659,8 +5808,100 @@ function castLogger(logger, asyncFlush) {
|
|
|
5659
5808
|
}
|
|
5660
5809
|
return logger;
|
|
5661
5810
|
}
|
|
5811
|
+
var logs3OverflowUploadSchema = z8.object({
|
|
5812
|
+
method: z8.enum(["PUT", "POST"]),
|
|
5813
|
+
signedUrl: z8.string().url(),
|
|
5814
|
+
headers: z8.record(z8.string()).optional(),
|
|
5815
|
+
fields: z8.record(z8.string()).optional(),
|
|
5816
|
+
key: z8.string().min(1)
|
|
5817
|
+
});
|
|
5662
5818
|
function constructLogs3Data(items) {
|
|
5663
|
-
return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
|
|
5819
|
+
return `{"rows": ${constructJsonArray(items.map((i) => i.str))}, "api_version": 2}`;
|
|
5820
|
+
}
|
|
5821
|
+
function constructLogs3OverflowRequest(key) {
|
|
5822
|
+
return {
|
|
5823
|
+
rows: {
|
|
5824
|
+
type: LOGS3_OVERFLOW_REFERENCE_TYPE,
|
|
5825
|
+
key
|
|
5826
|
+
},
|
|
5827
|
+
api_version: 2
|
|
5828
|
+
};
|
|
5829
|
+
}
|
|
5830
|
+
function pickLogs3OverflowObjectIds(row) {
|
|
5831
|
+
const objectIds = {};
|
|
5832
|
+
for (const key of OBJECT_ID_KEYS) {
|
|
5833
|
+
if (key in row) {
|
|
5834
|
+
objectIds[key] = row[key];
|
|
5835
|
+
}
|
|
5836
|
+
}
|
|
5837
|
+
return objectIds;
|
|
5838
|
+
}
|
|
5839
|
+
async function uploadLogs3OverflowPayload(upload, payload, fetchFn = fetch) {
|
|
5840
|
+
if (upload.method === "POST") {
|
|
5841
|
+
if (!upload.fields) {
|
|
5842
|
+
throw new Error("Missing logs3 overflow upload fields");
|
|
5843
|
+
}
|
|
5844
|
+
if (typeof FormData === "undefined" || typeof Blob === "undefined") {
|
|
5845
|
+
throw new Error("FormData is not available for logs3 overflow upload");
|
|
5846
|
+
}
|
|
5847
|
+
const form = new FormData();
|
|
5848
|
+
for (const [key, value] of Object.entries(upload.fields)) {
|
|
5849
|
+
form.append(key, value);
|
|
5850
|
+
}
|
|
5851
|
+
const contentType = upload.fields["Content-Type"] ?? "application/json";
|
|
5852
|
+
form.append("file", new Blob([payload], { type: contentType }));
|
|
5853
|
+
const headers2 = {};
|
|
5854
|
+
for (const [key, value] of Object.entries(upload.headers ?? {})) {
|
|
5855
|
+
if (key.toLowerCase() !== "content-type") {
|
|
5856
|
+
headers2[key] = value;
|
|
5857
|
+
}
|
|
5858
|
+
}
|
|
5859
|
+
const response2 = await fetchFn(upload.signedUrl, {
|
|
5860
|
+
method: "POST",
|
|
5861
|
+
headers: headers2,
|
|
5862
|
+
body: form
|
|
5863
|
+
});
|
|
5864
|
+
if (!response2.ok) {
|
|
5865
|
+
const responseText = await response2.text().catch(() => "");
|
|
5866
|
+
throw new Error(
|
|
5867
|
+
`Failed to upload logs3 overflow payload: ${response2.status} ${responseText}`
|
|
5868
|
+
);
|
|
5869
|
+
}
|
|
5870
|
+
return;
|
|
5871
|
+
}
|
|
5872
|
+
const headers = { ...upload.headers ?? {} };
|
|
5873
|
+
addAzureBlobHeaders(headers, upload.signedUrl);
|
|
5874
|
+
const response = await fetchFn(upload.signedUrl, {
|
|
5875
|
+
method: "PUT",
|
|
5876
|
+
headers,
|
|
5877
|
+
body: payload
|
|
5878
|
+
});
|
|
5879
|
+
if (!response.ok) {
|
|
5880
|
+
const responseText = await response.text().catch(() => "");
|
|
5881
|
+
throw new Error(
|
|
5882
|
+
`Failed to upload logs3 overflow payload: ${response.status} ${responseText}`
|
|
5883
|
+
);
|
|
5884
|
+
}
|
|
5885
|
+
}
|
|
5886
|
+
function stringifyWithOverflowMeta(item) {
|
|
5887
|
+
const str = JSON.stringify(item);
|
|
5888
|
+
const record = item;
|
|
5889
|
+
return {
|
|
5890
|
+
str,
|
|
5891
|
+
overflowMeta: {
|
|
5892
|
+
object_ids: pickLogs3OverflowObjectIds(record),
|
|
5893
|
+
is_delete: record[OBJECT_DELETE_FIELD] === true,
|
|
5894
|
+
input_row: {
|
|
5895
|
+
byte_size: utf8ByteLength(str)
|
|
5896
|
+
}
|
|
5897
|
+
}
|
|
5898
|
+
};
|
|
5899
|
+
}
|
|
5900
|
+
function utf8ByteLength(value) {
|
|
5901
|
+
if (typeof TextEncoder !== "undefined") {
|
|
5902
|
+
return new TextEncoder().encode(value).length;
|
|
5903
|
+
}
|
|
5904
|
+
return value.length;
|
|
5664
5905
|
}
|
|
5665
5906
|
function now() {
|
|
5666
5907
|
return (/* @__PURE__ */ new Date()).getTime();
|
|
@@ -5686,10 +5927,9 @@ var TestBackgroundLogger = class {
|
|
|
5686
5927
|
events.push(await event.get());
|
|
5687
5928
|
}
|
|
5688
5929
|
}
|
|
5689
|
-
|
|
5690
|
-
let flatBatch = batch.flat();
|
|
5930
|
+
let batch = mergeRowBatch(events);
|
|
5691
5931
|
if (this.maskingFunction) {
|
|
5692
|
-
|
|
5932
|
+
batch = batch.map((item) => {
|
|
5693
5933
|
const maskedItem = { ...item };
|
|
5694
5934
|
for (const field of REDACTION_FIELDS) {
|
|
5695
5935
|
if (item[field] !== void 0) {
|
|
@@ -5714,7 +5954,7 @@ var TestBackgroundLogger = class {
|
|
|
5714
5954
|
return maskedItem;
|
|
5715
5955
|
});
|
|
5716
5956
|
}
|
|
5717
|
-
return
|
|
5957
|
+
return batch;
|
|
5718
5958
|
}
|
|
5719
5959
|
};
|
|
5720
5960
|
var BACKGROUND_LOGGER_BASE_SLEEP_TIME_S = 1;
|
|
@@ -5727,8 +5967,8 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5727
5967
|
onFlushError;
|
|
5728
5968
|
maskingFunction = null;
|
|
5729
5969
|
syncFlush = false;
|
|
5730
|
-
|
|
5731
|
-
|
|
5970
|
+
maxRequestSizeOverride = null;
|
|
5971
|
+
_maxRequestSizePromise = null;
|
|
5732
5972
|
defaultBatchSize = 100;
|
|
5733
5973
|
numTries = 3;
|
|
5734
5974
|
queueDropExceedingMaxsize = DEFAULT_QUEUE_SIZE;
|
|
@@ -5756,7 +5996,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5756
5996
|
}
|
|
5757
5997
|
const maxRequestSizeEnv = Number(isomorph_default.getEnv("BRAINTRUST_MAX_REQUEST_SIZE"));
|
|
5758
5998
|
if (!isNaN(maxRequestSizeEnv)) {
|
|
5759
|
-
this.
|
|
5999
|
+
this.maxRequestSizeOverride = maxRequestSizeEnv;
|
|
5760
6000
|
}
|
|
5761
6001
|
const numTriesEnv = Number(isomorph_default.getEnv("BRAINTRUST_NUM_RETRIES"));
|
|
5762
6002
|
if (!isNaN(numTriesEnv)) {
|
|
@@ -5818,6 +6058,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5818
6058
|
}
|
|
5819
6059
|
}
|
|
5820
6060
|
}
|
|
6061
|
+
getMaxRequestSize() {
|
|
6062
|
+
if (!this._maxRequestSizePromise) {
|
|
6063
|
+
this._maxRequestSizePromise = (async () => {
|
|
6064
|
+
let serverLimit = null;
|
|
6065
|
+
try {
|
|
6066
|
+
const conn = await this.apiConn.get();
|
|
6067
|
+
const versionInfo = await conn.get_json("version");
|
|
6068
|
+
serverLimit = z8.object({ logs3_payload_max_bytes: z8.number().nullish() }).parse(versionInfo).logs3_payload_max_bytes ?? null;
|
|
6069
|
+
} catch (e) {
|
|
6070
|
+
console.warn("Failed to fetch version info for payload limit:", e);
|
|
6071
|
+
}
|
|
6072
|
+
const validServerLimit = serverLimit !== null && serverLimit > 0 ? serverLimit : null;
|
|
6073
|
+
const canUseOverflow = validServerLimit !== null;
|
|
6074
|
+
let maxRequestSize = DEFAULT_MAX_REQUEST_SIZE;
|
|
6075
|
+
if (this.maxRequestSizeOverride !== null) {
|
|
6076
|
+
maxRequestSize = validServerLimit !== null ? Math.min(this.maxRequestSizeOverride, validServerLimit) : this.maxRequestSizeOverride;
|
|
6077
|
+
} else if (validServerLimit !== null) {
|
|
6078
|
+
maxRequestSize = validServerLimit;
|
|
6079
|
+
}
|
|
6080
|
+
return { maxRequestSize, canUseOverflow };
|
|
6081
|
+
})();
|
|
6082
|
+
}
|
|
6083
|
+
return this._maxRequestSizePromise;
|
|
6084
|
+
}
|
|
5821
6085
|
async flush() {
|
|
5822
6086
|
if (this.syncFlush) {
|
|
5823
6087
|
this.triggerActiveFlush();
|
|
@@ -5861,33 +6125,33 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5861
6125
|
if (allItems.length === 0) {
|
|
5862
6126
|
return;
|
|
5863
6127
|
}
|
|
5864
|
-
const
|
|
5865
|
-
(
|
|
6128
|
+
const allItemsWithMeta = allItems.map(
|
|
6129
|
+
(item) => stringifyWithOverflowMeta(item)
|
|
5866
6130
|
);
|
|
5867
|
-
const
|
|
5868
|
-
|
|
6131
|
+
const maxRequestSizeResult = await this.getMaxRequestSize();
|
|
6132
|
+
const batches = batchItems({
|
|
6133
|
+
items: allItemsWithMeta,
|
|
5869
6134
|
batchMaxNumItems: batchSize,
|
|
5870
|
-
batchMaxNumBytes:
|
|
6135
|
+
batchMaxNumBytes: maxRequestSizeResult.maxRequestSize / 2,
|
|
6136
|
+
getByteSize: (item) => item.str.length
|
|
5871
6137
|
});
|
|
5872
|
-
|
|
5873
|
-
|
|
5874
|
-
|
|
5875
|
-
|
|
5876
|
-
|
|
5877
|
-
|
|
5878
|
-
|
|
5879
|
-
|
|
5880
|
-
|
|
5881
|
-
|
|
6138
|
+
const postPromises = batches.map(
|
|
6139
|
+
(batch) => (async () => {
|
|
6140
|
+
try {
|
|
6141
|
+
await this.submitLogsRequest(batch, maxRequestSizeResult);
|
|
6142
|
+
return { type: "success" };
|
|
6143
|
+
} catch (e) {
|
|
6144
|
+
return { type: "error", value: e };
|
|
6145
|
+
}
|
|
6146
|
+
})()
|
|
6147
|
+
);
|
|
6148
|
+
const results = await Promise.all(postPromises);
|
|
6149
|
+
const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
|
|
6150
|
+
if (failingResultErrors.length) {
|
|
6151
|
+
throw new AggregateError(
|
|
6152
|
+
failingResultErrors,
|
|
6153
|
+
`Encountered the following errors while logging:`
|
|
5882
6154
|
);
|
|
5883
|
-
const results = await Promise.all(postPromises);
|
|
5884
|
-
const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
|
|
5885
|
-
if (failingResultErrors.length) {
|
|
5886
|
-
throw new AggregateError(
|
|
5887
|
-
failingResultErrors,
|
|
5888
|
-
`Encountered the following errors while logging:`
|
|
5889
|
-
);
|
|
5890
|
-
}
|
|
5891
6155
|
}
|
|
5892
6156
|
const attachmentErrors = [];
|
|
5893
6157
|
for (const attachment of attachments) {
|
|
@@ -5917,32 +6181,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5917
6181
|
items.forEach((item) => extractAttachments(item, attachments));
|
|
5918
6182
|
let mergedItems = mergeRowBatch(items);
|
|
5919
6183
|
if (this.maskingFunction) {
|
|
5920
|
-
mergedItems = mergedItems.map(
|
|
5921
|
-
|
|
5922
|
-
|
|
5923
|
-
|
|
5924
|
-
|
|
5925
|
-
|
|
5926
|
-
|
|
5927
|
-
|
|
5928
|
-
|
|
5929
|
-
|
|
5930
|
-
|
|
5931
|
-
|
|
5932
|
-
|
|
5933
|
-
|
|
5934
|
-
maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
|
|
5935
|
-
} else {
|
|
5936
|
-
maskedItem.error = maskedValue.errorMsg;
|
|
5937
|
-
}
|
|
6184
|
+
mergedItems = mergedItems.map((item) => {
|
|
6185
|
+
const maskedItem = { ...item };
|
|
6186
|
+
for (const field of REDACTION_FIELDS) {
|
|
6187
|
+
if (item[field] !== void 0) {
|
|
6188
|
+
const maskedValue = applyMaskingToField(
|
|
6189
|
+
this.maskingFunction,
|
|
6190
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
6191
|
+
item[field],
|
|
6192
|
+
field
|
|
6193
|
+
);
|
|
6194
|
+
if (maskedValue instanceof MaskingError) {
|
|
6195
|
+
delete maskedItem[field];
|
|
6196
|
+
if (maskedItem.error) {
|
|
6197
|
+
maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
|
|
5938
6198
|
} else {
|
|
5939
|
-
maskedItem
|
|
6199
|
+
maskedItem.error = maskedValue.errorMsg;
|
|
5940
6200
|
}
|
|
6201
|
+
} else {
|
|
6202
|
+
maskedItem[field] = maskedValue;
|
|
5941
6203
|
}
|
|
5942
6204
|
}
|
|
5943
|
-
|
|
5944
|
-
|
|
5945
|
-
);
|
|
6205
|
+
}
|
|
6206
|
+
return maskedItem;
|
|
6207
|
+
});
|
|
5946
6208
|
}
|
|
5947
6209
|
return [mergedItems, attachments];
|
|
5948
6210
|
} catch (e) {
|
|
@@ -5969,20 +6231,73 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5969
6231
|
}
|
|
5970
6232
|
throw new Error("Impossible");
|
|
5971
6233
|
}
|
|
5972
|
-
async
|
|
6234
|
+
async requestLogs3OverflowUpload(conn, args) {
|
|
6235
|
+
let response;
|
|
6236
|
+
try {
|
|
6237
|
+
response = await conn.post_json("logs3/overflow", {
|
|
6238
|
+
content_type: "application/json",
|
|
6239
|
+
size_bytes: args.sizeBytes,
|
|
6240
|
+
rows: args.rows
|
|
6241
|
+
});
|
|
6242
|
+
} catch (error) {
|
|
6243
|
+
const errorStr = JSON.stringify(error);
|
|
6244
|
+
throw new Error(
|
|
6245
|
+
`Failed to request logs3 overflow upload URL: ${errorStr}`
|
|
6246
|
+
);
|
|
6247
|
+
}
|
|
6248
|
+
try {
|
|
6249
|
+
return logs3OverflowUploadSchema.parse(response);
|
|
6250
|
+
} catch (error) {
|
|
6251
|
+
if (error instanceof ZodError) {
|
|
6252
|
+
const errorStr = JSON.stringify(error.flatten());
|
|
6253
|
+
throw new Error(`Invalid response from API server: ${errorStr}`);
|
|
6254
|
+
}
|
|
6255
|
+
throw error;
|
|
6256
|
+
}
|
|
6257
|
+
}
|
|
6258
|
+
async _uploadLogs3OverflowPayload(conn, upload, payload) {
|
|
6259
|
+
await uploadLogs3OverflowPayload(upload, payload, conn.fetch.bind(conn));
|
|
6260
|
+
}
|
|
6261
|
+
async submitLogsRequest(items, {
|
|
6262
|
+
maxRequestSize,
|
|
6263
|
+
canUseOverflow
|
|
6264
|
+
}) {
|
|
5973
6265
|
const conn = await this.apiConn.get();
|
|
5974
6266
|
const dataStr = constructLogs3Data(items);
|
|
6267
|
+
const payloadBytes = utf8ByteLength(dataStr);
|
|
6268
|
+
const useOverflow = canUseOverflow && payloadBytes > maxRequestSize;
|
|
5975
6269
|
if (this.allPublishPayloadsDir) {
|
|
5976
6270
|
await _HTTPBackgroundLogger.writePayloadToDir({
|
|
5977
6271
|
payloadDir: this.allPublishPayloadsDir,
|
|
5978
6272
|
payload: dataStr
|
|
5979
6273
|
});
|
|
5980
6274
|
}
|
|
6275
|
+
let overflowUpload = null;
|
|
6276
|
+
const overflowRows = useOverflow ? items.map((item) => item.overflowMeta) : null;
|
|
5981
6277
|
for (let i = 0; i < this.numTries; i++) {
|
|
5982
6278
|
const startTime = now();
|
|
5983
6279
|
let error = void 0;
|
|
5984
6280
|
try {
|
|
5985
|
-
|
|
6281
|
+
if (overflowRows) {
|
|
6282
|
+
if (!overflowUpload) {
|
|
6283
|
+
const currentUpload = await this.requestLogs3OverflowUpload(conn, {
|
|
6284
|
+
rows: overflowRows,
|
|
6285
|
+
sizeBytes: payloadBytes
|
|
6286
|
+
});
|
|
6287
|
+
await this._uploadLogs3OverflowPayload(
|
|
6288
|
+
conn,
|
|
6289
|
+
currentUpload,
|
|
6290
|
+
dataStr
|
|
6291
|
+
);
|
|
6292
|
+
overflowUpload = currentUpload;
|
|
6293
|
+
}
|
|
6294
|
+
await conn.post_json(
|
|
6295
|
+
"logs3",
|
|
6296
|
+
constructLogs3OverflowRequest(overflowUpload.key)
|
|
6297
|
+
);
|
|
6298
|
+
} else {
|
|
6299
|
+
await conn.post_json("logs3", dataStr);
|
|
6300
|
+
}
|
|
5986
6301
|
} catch (e) {
|
|
5987
6302
|
error = e;
|
|
5988
6303
|
}
|
|
@@ -5998,7 +6313,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5998
6313
|
return `${error}`;
|
|
5999
6314
|
}
|
|
6000
6315
|
})();
|
|
6001
|
-
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${
|
|
6316
|
+
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${payloadBytes}.${retryingText}
|
|
6002
6317
|
Error: ${errorText}`;
|
|
6003
6318
|
if (!isRetrying && this.failedPublishPayloadsDir) {
|
|
6004
6319
|
await _HTTPBackgroundLogger.writePayloadToDir({
|
|
@@ -6052,7 +6367,7 @@ Error: ${errorText}`;
|
|
|
6052
6367
|
try {
|
|
6053
6368
|
const [allItems, allAttachments] = await this.unwrapLazyValues(wrappedItems);
|
|
6054
6369
|
const dataStr = constructLogs3Data(
|
|
6055
|
-
allItems.map((x) =>
|
|
6370
|
+
allItems.map((x) => stringifyWithOverflowMeta(x))
|
|
6056
6371
|
);
|
|
6057
6372
|
const attachmentStr = JSON.stringify(
|
|
6058
6373
|
allAttachments.map((a) => a.debugInfo())
|
|
@@ -6619,23 +6934,144 @@ async function loadPrompt({
|
|
|
6619
6934
|
}
|
|
6620
6935
|
return prompt;
|
|
6621
6936
|
}
|
|
6622
|
-
function
|
|
6623
|
-
|
|
6624
|
-
|
|
6625
|
-
|
|
6626
|
-
|
|
6627
|
-
|
|
6628
|
-
|
|
6629
|
-
|
|
6630
|
-
|
|
6631
|
-
|
|
6632
|
-
|
|
6633
|
-
|
|
6634
|
-
|
|
6635
|
-
|
|
6636
|
-
|
|
6637
|
-
|
|
6638
|
-
"
|
|
6937
|
+
async function loadParameters({
|
|
6938
|
+
projectName,
|
|
6939
|
+
projectId,
|
|
6940
|
+
slug,
|
|
6941
|
+
version,
|
|
6942
|
+
environment,
|
|
6943
|
+
id,
|
|
6944
|
+
appUrl,
|
|
6945
|
+
apiKey,
|
|
6946
|
+
orgName,
|
|
6947
|
+
fetch: fetch2,
|
|
6948
|
+
forceLogin,
|
|
6949
|
+
state: stateArg
|
|
6950
|
+
}) {
|
|
6951
|
+
if (version && environment) {
|
|
6952
|
+
throw new Error(
|
|
6953
|
+
"Cannot specify both 'version' and 'environment' parameters. Please use only one (remove the other)."
|
|
6954
|
+
);
|
|
6955
|
+
}
|
|
6956
|
+
if (id) {
|
|
6957
|
+
} else if (isEmpty2(projectName) && isEmpty2(projectId)) {
|
|
6958
|
+
throw new Error("Must specify either projectName or projectId");
|
|
6959
|
+
} else if (isEmpty2(slug)) {
|
|
6960
|
+
throw new Error("Must specify slug");
|
|
6961
|
+
}
|
|
6962
|
+
const state = stateArg ?? _globalState;
|
|
6963
|
+
let response;
|
|
6964
|
+
try {
|
|
6965
|
+
await state.login({
|
|
6966
|
+
orgName,
|
|
6967
|
+
apiKey,
|
|
6968
|
+
appUrl,
|
|
6969
|
+
fetch: fetch2,
|
|
6970
|
+
forceLogin
|
|
6971
|
+
});
|
|
6972
|
+
if (id) {
|
|
6973
|
+
response = await state.apiConn().get_json(`v1/function/${id}`, {
|
|
6974
|
+
...version && { version },
|
|
6975
|
+
...environment && { environment }
|
|
6976
|
+
});
|
|
6977
|
+
if (response) {
|
|
6978
|
+
response = { objects: [response] };
|
|
6979
|
+
}
|
|
6980
|
+
} else {
|
|
6981
|
+
response = await state.apiConn().get_json("v1/function", {
|
|
6982
|
+
project_name: projectName,
|
|
6983
|
+
project_id: projectId,
|
|
6984
|
+
slug,
|
|
6985
|
+
version,
|
|
6986
|
+
function_type: "parameters",
|
|
6987
|
+
...environment && { environment }
|
|
6988
|
+
});
|
|
6989
|
+
}
|
|
6990
|
+
} catch (e) {
|
|
6991
|
+
if (environment || version) {
|
|
6992
|
+
throw new Error(`Parameters not found with specified parameters: ${e}`);
|
|
6993
|
+
}
|
|
6994
|
+
console.warn(
|
|
6995
|
+
"Failed to load parameters, attempting to fall back to cache:",
|
|
6996
|
+
e
|
|
6997
|
+
);
|
|
6998
|
+
let parameters2;
|
|
6999
|
+
if (id) {
|
|
7000
|
+
parameters2 = await state.parametersCache.get({ id });
|
|
7001
|
+
if (!parameters2) {
|
|
7002
|
+
throw new Error(
|
|
7003
|
+
`Parameters with id ${id} not found (not found on server or in local cache): ${e}`
|
|
7004
|
+
);
|
|
7005
|
+
}
|
|
7006
|
+
} else {
|
|
7007
|
+
parameters2 = await state.parametersCache.get({
|
|
7008
|
+
slug,
|
|
7009
|
+
projectId,
|
|
7010
|
+
projectName,
|
|
7011
|
+
version: version ?? "latest"
|
|
7012
|
+
});
|
|
7013
|
+
if (!parameters2) {
|
|
7014
|
+
throw new Error(
|
|
7015
|
+
`Parameters ${slug} (version ${version ?? "latest"}) not found in ${[
|
|
7016
|
+
projectName ?? projectId
|
|
7017
|
+
]} (not found on server or in local cache): ${e}`
|
|
7018
|
+
);
|
|
7019
|
+
}
|
|
7020
|
+
}
|
|
7021
|
+
return parameters2;
|
|
7022
|
+
}
|
|
7023
|
+
if (!("objects" in response) || response.objects.length === 0) {
|
|
7024
|
+
if (id) {
|
|
7025
|
+
throw new Error(`Parameters with id ${id} not found.`);
|
|
7026
|
+
} else {
|
|
7027
|
+
throw new Error(
|
|
7028
|
+
`Parameters ${slug} not found in ${[projectName ?? projectId]}`
|
|
7029
|
+
);
|
|
7030
|
+
}
|
|
7031
|
+
} else if (response.objects.length > 1) {
|
|
7032
|
+
if (id) {
|
|
7033
|
+
throw new Error(
|
|
7034
|
+
`Multiple parameters found with id ${id}. This should never happen.`
|
|
7035
|
+
);
|
|
7036
|
+
} else {
|
|
7037
|
+
throw new Error(
|
|
7038
|
+
`Multiple parameters found with slug ${slug} in project ${projectName ?? projectId}. This should never happen.`
|
|
7039
|
+
);
|
|
7040
|
+
}
|
|
7041
|
+
}
|
|
7042
|
+
const metadata = parametersRowSchema.parse(response["objects"][0]);
|
|
7043
|
+
const parameters = new RemoteEvalParameters(metadata);
|
|
7044
|
+
try {
|
|
7045
|
+
if (id) {
|
|
7046
|
+
await state.parametersCache.set({ id }, parameters);
|
|
7047
|
+
} else if (slug) {
|
|
7048
|
+
await state.parametersCache.set(
|
|
7049
|
+
{ slug, projectId, projectName, version: version ?? "latest" },
|
|
7050
|
+
parameters
|
|
7051
|
+
);
|
|
7052
|
+
}
|
|
7053
|
+
} catch (e) {
|
|
7054
|
+
console.warn("Failed to set parameters in cache:", e);
|
|
7055
|
+
}
|
|
7056
|
+
return parameters;
|
|
7057
|
+
}
|
|
7058
|
+
function setMaskingFunction(maskingFunction) {
|
|
7059
|
+
_globalState.setMaskingFunction(maskingFunction);
|
|
7060
|
+
}
|
|
7061
|
+
async function login(options = {}) {
|
|
7062
|
+
const { forceLogin = false } = options || {};
|
|
7063
|
+
if (_globalState.loggedIn && !forceLogin) {
|
|
7064
|
+
let checkUpdatedParam2 = function(varname, arg, orig) {
|
|
7065
|
+
if (!isEmpty2(arg) && !isEmpty2(orig) && arg !== orig) {
|
|
7066
|
+
throw new Error(
|
|
7067
|
+
`Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
|
|
7068
|
+
);
|
|
7069
|
+
}
|
|
7070
|
+
};
|
|
7071
|
+
var checkUpdatedParam = checkUpdatedParam2;
|
|
7072
|
+
checkUpdatedParam2("appUrl", options.appUrl, _globalState.appUrl);
|
|
7073
|
+
checkUpdatedParam2(
|
|
7074
|
+
"apiKey",
|
|
6639
7075
|
options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
|
|
6640
7076
|
_globalState.loginToken
|
|
6641
7077
|
);
|
|
@@ -8689,6 +9125,55 @@ var Prompt2 = class _Prompt {
|
|
|
8689
9125
|
);
|
|
8690
9126
|
}
|
|
8691
9127
|
};
|
|
9128
|
+
var RemoteEvalParameters = class {
|
|
9129
|
+
constructor(metadata) {
|
|
9130
|
+
this.metadata = metadata;
|
|
9131
|
+
}
|
|
9132
|
+
__braintrust_parameters_marker = true;
|
|
9133
|
+
get id() {
|
|
9134
|
+
return this.metadata.id;
|
|
9135
|
+
}
|
|
9136
|
+
get projectId() {
|
|
9137
|
+
return this.metadata.project_id;
|
|
9138
|
+
}
|
|
9139
|
+
get name() {
|
|
9140
|
+
return this.metadata.name;
|
|
9141
|
+
}
|
|
9142
|
+
get slug() {
|
|
9143
|
+
return this.metadata.slug;
|
|
9144
|
+
}
|
|
9145
|
+
get version() {
|
|
9146
|
+
return this.metadata[TRANSACTION_ID_FIELD];
|
|
9147
|
+
}
|
|
9148
|
+
get schema() {
|
|
9149
|
+
return this.metadata.function_data.__schema;
|
|
9150
|
+
}
|
|
9151
|
+
get data() {
|
|
9152
|
+
return this.metadata.function_data.data ?? {};
|
|
9153
|
+
}
|
|
9154
|
+
validate(data) {
|
|
9155
|
+
if (typeof data !== "object" || data === null) {
|
|
9156
|
+
return false;
|
|
9157
|
+
}
|
|
9158
|
+
const schemaProps = this.schema.properties;
|
|
9159
|
+
if (typeof schemaProps !== "object" || schemaProps === null) {
|
|
9160
|
+
return true;
|
|
9161
|
+
}
|
|
9162
|
+
for (const key of Object.keys(schemaProps)) {
|
|
9163
|
+
if (!(key in data)) {
|
|
9164
|
+
const required = Array.isArray(this.schema.required) ? this.schema.required : [];
|
|
9165
|
+
if (required.includes(key)) {
|
|
9166
|
+
return false;
|
|
9167
|
+
}
|
|
9168
|
+
}
|
|
9169
|
+
}
|
|
9170
|
+
return true;
|
|
9171
|
+
}
|
|
9172
|
+
static isParameters(x) {
|
|
9173
|
+
return typeof x === "object" && x !== null && "__braintrust_parameters_marker" in x && // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
9174
|
+
x.__braintrust_parameters_marker === true;
|
|
9175
|
+
}
|
|
9176
|
+
};
|
|
8692
9177
|
var TEST_API_KEY = "___TEST_API_KEY__THIS_IS_NOT_REAL___";
|
|
8693
9178
|
function setInitialTestState() {
|
|
8694
9179
|
if (!_internalGetGlobalState()) {
|
|
@@ -8829,6 +9314,7 @@ __export(exports_exports, {
|
|
|
8829
9314
|
CodePrompt: () => CodePrompt,
|
|
8830
9315
|
ContextManager: () => ContextManager,
|
|
8831
9316
|
DEFAULT_FETCH_BATCH_SIZE: () => DEFAULT_FETCH_BATCH_SIZE,
|
|
9317
|
+
DEFAULT_MAX_REQUEST_SIZE: () => DEFAULT_MAX_REQUEST_SIZE,
|
|
8832
9318
|
Dataset: () => Dataset2,
|
|
8833
9319
|
ERR_PERMALINK: () => ERR_PERMALINK,
|
|
8834
9320
|
Eval: () => Eval,
|
|
@@ -8839,6 +9325,7 @@ __export(exports_exports, {
|
|
|
8839
9325
|
IDGenerator: () => IDGenerator,
|
|
8840
9326
|
JSONAttachment: () => JSONAttachment,
|
|
8841
9327
|
LEGACY_CACHED_HEADER: () => LEGACY_CACHED_HEADER,
|
|
9328
|
+
LOGS3_OVERFLOW_REFERENCE_TYPE: () => LOGS3_OVERFLOW_REFERENCE_TYPE,
|
|
8842
9329
|
LazyValue: () => LazyValue,
|
|
8843
9330
|
Logger: () => Logger,
|
|
8844
9331
|
LoginInvalidOrgError: () => LoginInvalidOrgError,
|
|
@@ -8863,8 +9350,10 @@ __export(exports_exports, {
|
|
|
8863
9350
|
_exportsForTestingOnly: () => _exportsForTestingOnly,
|
|
8864
9351
|
_internalGetGlobalState: () => _internalGetGlobalState,
|
|
8865
9352
|
_internalSetInitialState: () => _internalSetInitialState,
|
|
9353
|
+
addAzureBlobHeaders: () => addAzureBlobHeaders,
|
|
8866
9354
|
braintrustStreamChunkSchema: () => braintrustStreamChunkSchema,
|
|
8867
9355
|
buildLocalSummary: () => buildLocalSummary,
|
|
9356
|
+
constructLogs3OverflowRequest: () => constructLogs3OverflowRequest,
|
|
8868
9357
|
createFinalValuePassThroughStream: () => createFinalValuePassThroughStream,
|
|
8869
9358
|
currentExperiment: () => currentExperiment,
|
|
8870
9359
|
currentLogger: () => currentLogger,
|
|
@@ -8888,15 +9377,18 @@ __export(exports_exports, {
|
|
|
8888
9377
|
initLogger: () => initLogger,
|
|
8889
9378
|
invoke: () => invoke,
|
|
8890
9379
|
isTemplateFormat: () => isTemplateFormat,
|
|
9380
|
+
loadParameters: () => loadParameters,
|
|
8891
9381
|
loadPrompt: () => loadPrompt,
|
|
8892
9382
|
log: () => log,
|
|
8893
9383
|
logError: () => logError,
|
|
8894
9384
|
login: () => login,
|
|
8895
9385
|
loginToState: () => loginToState,
|
|
9386
|
+
logs3OverflowUploadSchema: () => logs3OverflowUploadSchema,
|
|
8896
9387
|
newId: () => newId,
|
|
8897
9388
|
parseCachedHeader: () => parseCachedHeader,
|
|
8898
9389
|
parseTemplateFormat: () => parseTemplateFormat,
|
|
8899
9390
|
permalink: () => permalink,
|
|
9391
|
+
pickLogs3OverflowObjectIds: () => pickLogs3OverflowObjectIds,
|
|
8900
9392
|
projects: () => projects,
|
|
8901
9393
|
promptContentsSchema: () => promptContentsSchema,
|
|
8902
9394
|
promptDefinitionSchema: () => promptDefinitionSchema,
|
|
@@ -8917,6 +9409,8 @@ __export(exports_exports, {
|
|
|
8917
9409
|
traceable: () => traceable,
|
|
8918
9410
|
traced: () => traced,
|
|
8919
9411
|
updateSpan: () => updateSpan,
|
|
9412
|
+
uploadLogs3OverflowPayload: () => uploadLogs3OverflowPayload,
|
|
9413
|
+
utf8ByteLength: () => utf8ByteLength,
|
|
8920
9414
|
withCurrent: () => withCurrent,
|
|
8921
9415
|
withDataset: () => withDataset,
|
|
8922
9416
|
withExperiment: () => withExperiment,
|
|
@@ -12387,6 +12881,189 @@ function wrapMastraAgent(agent, _options) {
|
|
|
12387
12881
|
}
|
|
12388
12882
|
|
|
12389
12883
|
// src/wrappers/claude-agent-sdk/claude-agent-sdk.ts
|
|
12884
|
+
function getMcpServerMetadata(serverName, mcpServers) {
|
|
12885
|
+
if (!serverName || !mcpServers) {
|
|
12886
|
+
return {};
|
|
12887
|
+
}
|
|
12888
|
+
const serverConfig = mcpServers[serverName];
|
|
12889
|
+
if (!serverConfig) {
|
|
12890
|
+
return {};
|
|
12891
|
+
}
|
|
12892
|
+
const metadata = {};
|
|
12893
|
+
if (serverConfig.type) {
|
|
12894
|
+
metadata["mcp.type"] = serverConfig.type;
|
|
12895
|
+
} else if (typeof serverConfig === "object" && "transport" in serverConfig) {
|
|
12896
|
+
metadata["mcp.type"] = "sdk";
|
|
12897
|
+
}
|
|
12898
|
+
if (serverConfig.url) {
|
|
12899
|
+
metadata["mcp.url"] = serverConfig.url;
|
|
12900
|
+
}
|
|
12901
|
+
if (serverConfig.command) {
|
|
12902
|
+
metadata["mcp.command"] = serverConfig.command;
|
|
12903
|
+
if (serverConfig.args) {
|
|
12904
|
+
metadata["mcp.args"] = serverConfig.args.join(" ");
|
|
12905
|
+
}
|
|
12906
|
+
}
|
|
12907
|
+
return metadata;
|
|
12908
|
+
}
|
|
12909
|
+
function parseToolName(rawToolName) {
|
|
12910
|
+
const mcpMatch = rawToolName.match(/^mcp__([^_]+)__(.+)$/);
|
|
12911
|
+
if (mcpMatch) {
|
|
12912
|
+
const [, mcpServer, toolName] = mcpMatch;
|
|
12913
|
+
return {
|
|
12914
|
+
displayName: `tool: ${mcpServer}/${toolName}`,
|
|
12915
|
+
toolName,
|
|
12916
|
+
mcpServer,
|
|
12917
|
+
rawToolName
|
|
12918
|
+
};
|
|
12919
|
+
}
|
|
12920
|
+
return {
|
|
12921
|
+
displayName: `tool: ${rawToolName}`,
|
|
12922
|
+
toolName: rawToolName,
|
|
12923
|
+
rawToolName
|
|
12924
|
+
};
|
|
12925
|
+
}
|
|
12926
|
+
function createToolTracingHooks(resolveParentSpan, activeToolSpans, mcpServers, subAgentSpans, endedSubAgentSpans) {
|
|
12927
|
+
const preToolUse = async (input, toolUseID) => {
|
|
12928
|
+
if (input.hook_event_name !== "PreToolUse" || !toolUseID) {
|
|
12929
|
+
return {};
|
|
12930
|
+
}
|
|
12931
|
+
if (input.tool_name === "Task") {
|
|
12932
|
+
return {};
|
|
12933
|
+
}
|
|
12934
|
+
const parsed = parseToolName(input.tool_name);
|
|
12935
|
+
const mcpMetadata = getMcpServerMetadata(parsed.mcpServer, mcpServers);
|
|
12936
|
+
const parentExport = await resolveParentSpan(toolUseID);
|
|
12937
|
+
const toolSpan = startSpan({
|
|
12938
|
+
name: parsed.displayName,
|
|
12939
|
+
spanAttributes: { type: "tool" /* TOOL */ },
|
|
12940
|
+
event: {
|
|
12941
|
+
input: input.tool_input,
|
|
12942
|
+
metadata: {
|
|
12943
|
+
// GenAI semantic conventions
|
|
12944
|
+
"gen_ai.tool.name": parsed.toolName,
|
|
12945
|
+
"gen_ai.tool.call.id": toolUseID,
|
|
12946
|
+
// MCP-specific metadata
|
|
12947
|
+
...parsed.mcpServer && { "mcp.server": parsed.mcpServer },
|
|
12948
|
+
...mcpMetadata,
|
|
12949
|
+
// Claude SDK metadata
|
|
12950
|
+
"claude_agent_sdk.raw_tool_name": parsed.rawToolName,
|
|
12951
|
+
"claude_agent_sdk.session_id": input.session_id,
|
|
12952
|
+
"claude_agent_sdk.cwd": input.cwd
|
|
12953
|
+
}
|
|
12954
|
+
},
|
|
12955
|
+
parent: parentExport
|
|
12956
|
+
});
|
|
12957
|
+
activeToolSpans.set(toolUseID, toolSpan);
|
|
12958
|
+
return {};
|
|
12959
|
+
};
|
|
12960
|
+
const postToolUse = async (input, toolUseID) => {
|
|
12961
|
+
if (input.hook_event_name !== "PostToolUse" || !toolUseID) {
|
|
12962
|
+
return {};
|
|
12963
|
+
}
|
|
12964
|
+
const subAgentSpan = subAgentSpans.get(toolUseID);
|
|
12965
|
+
if (subAgentSpan) {
|
|
12966
|
+
try {
|
|
12967
|
+
const response = input.tool_response;
|
|
12968
|
+
const metadata = {};
|
|
12969
|
+
if (response?.status) {
|
|
12970
|
+
metadata["claude_agent_sdk.status"] = response.status;
|
|
12971
|
+
}
|
|
12972
|
+
if (response?.totalDurationMs) {
|
|
12973
|
+
metadata["claude_agent_sdk.duration_ms"] = response.totalDurationMs;
|
|
12974
|
+
}
|
|
12975
|
+
if (response?.totalToolUseCount !== void 0) {
|
|
12976
|
+
metadata["claude_agent_sdk.tool_use_count"] = response.totalToolUseCount;
|
|
12977
|
+
}
|
|
12978
|
+
subAgentSpan.log({
|
|
12979
|
+
output: response?.content,
|
|
12980
|
+
metadata
|
|
12981
|
+
});
|
|
12982
|
+
} finally {
|
|
12983
|
+
subAgentSpan.end();
|
|
12984
|
+
endedSubAgentSpans.add(toolUseID);
|
|
12985
|
+
}
|
|
12986
|
+
return {};
|
|
12987
|
+
}
|
|
12988
|
+
const toolSpan = activeToolSpans.get(toolUseID);
|
|
12989
|
+
if (!toolSpan) {
|
|
12990
|
+
return {};
|
|
12991
|
+
}
|
|
12992
|
+
try {
|
|
12993
|
+
toolSpan.log({ output: input.tool_response });
|
|
12994
|
+
} finally {
|
|
12995
|
+
toolSpan.end();
|
|
12996
|
+
activeToolSpans.delete(toolUseID);
|
|
12997
|
+
}
|
|
12998
|
+
return {};
|
|
12999
|
+
};
|
|
13000
|
+
const postToolUseFailure = async (input, toolUseID) => {
|
|
13001
|
+
if (input.hook_event_name !== "PostToolUseFailure" || !toolUseID) {
|
|
13002
|
+
return {};
|
|
13003
|
+
}
|
|
13004
|
+
const subAgentSpan = subAgentSpans.get(toolUseID);
|
|
13005
|
+
if (subAgentSpan) {
|
|
13006
|
+
try {
|
|
13007
|
+
subAgentSpan.log({ error: input.error });
|
|
13008
|
+
} finally {
|
|
13009
|
+
subAgentSpan.end();
|
|
13010
|
+
endedSubAgentSpans.add(toolUseID);
|
|
13011
|
+
}
|
|
13012
|
+
return {};
|
|
13013
|
+
}
|
|
13014
|
+
const toolSpan = activeToolSpans.get(toolUseID);
|
|
13015
|
+
if (!toolSpan) {
|
|
13016
|
+
return {};
|
|
13017
|
+
}
|
|
13018
|
+
const parsed = parseToolName(input.tool_name);
|
|
13019
|
+
try {
|
|
13020
|
+
toolSpan.log({
|
|
13021
|
+
error: input.error,
|
|
13022
|
+
metadata: {
|
|
13023
|
+
"gen_ai.tool.name": parsed.toolName,
|
|
13024
|
+
"gen_ai.tool.call.id": toolUseID,
|
|
13025
|
+
...parsed.mcpServer && { "mcp.server": parsed.mcpServer },
|
|
13026
|
+
"claude_agent_sdk.is_interrupt": input.is_interrupt,
|
|
13027
|
+
"claude_agent_sdk.session_id": input.session_id
|
|
13028
|
+
}
|
|
13029
|
+
});
|
|
13030
|
+
} finally {
|
|
13031
|
+
toolSpan.end();
|
|
13032
|
+
activeToolSpans.delete(toolUseID);
|
|
13033
|
+
}
|
|
13034
|
+
return {};
|
|
13035
|
+
};
|
|
13036
|
+
return { preToolUse, postToolUse, postToolUseFailure };
|
|
13037
|
+
}
|
|
13038
|
+
function injectTracingHooks(options, resolveParentSpan, activeToolSpans, subAgentSpans, endedSubAgentSpans) {
|
|
13039
|
+
const mcpServers = options.mcpServers;
|
|
13040
|
+
const { preToolUse, postToolUse, postToolUseFailure } = createToolTracingHooks(
|
|
13041
|
+
resolveParentSpan,
|
|
13042
|
+
activeToolSpans,
|
|
13043
|
+
mcpServers,
|
|
13044
|
+
subAgentSpans,
|
|
13045
|
+
endedSubAgentSpans
|
|
13046
|
+
);
|
|
13047
|
+
const existingHooks = options.hooks ?? {};
|
|
13048
|
+
return {
|
|
13049
|
+
...options,
|
|
13050
|
+
hooks: {
|
|
13051
|
+
...existingHooks,
|
|
13052
|
+
PreToolUse: [
|
|
13053
|
+
...existingHooks.PreToolUse ?? [],
|
|
13054
|
+
{ hooks: [preToolUse] }
|
|
13055
|
+
],
|
|
13056
|
+
PostToolUse: [
|
|
13057
|
+
...existingHooks.PostToolUse ?? [],
|
|
13058
|
+
{ hooks: [postToolUse] }
|
|
13059
|
+
],
|
|
13060
|
+
PostToolUseFailure: [
|
|
13061
|
+
...existingHooks.PostToolUseFailure ?? [],
|
|
13062
|
+
{ hooks: [postToolUseFailure] }
|
|
13063
|
+
]
|
|
13064
|
+
}
|
|
13065
|
+
};
|
|
13066
|
+
}
|
|
12390
13067
|
function filterSerializableOptions(options) {
|
|
12391
13068
|
const allowedKeys = [
|
|
12392
13069
|
"model",
|
|
@@ -12411,18 +13088,45 @@ function filterSerializableOptions(options) {
|
|
|
12411
13088
|
}
|
|
12412
13089
|
return filtered;
|
|
12413
13090
|
}
|
|
13091
|
+
function isAsyncIterable(value) {
|
|
13092
|
+
return value !== null && value !== void 0 && typeof value[Symbol.asyncIterator] === "function";
|
|
13093
|
+
}
|
|
12414
13094
|
function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
12415
13095
|
const proxy = new Proxy(queryFn, {
|
|
12416
13096
|
apply(target, thisArg, argArray) {
|
|
12417
13097
|
const params = argArray[0] ?? {};
|
|
12418
13098
|
const { prompt, options = {} } = params;
|
|
13099
|
+
const promptIsAsyncIterable = isAsyncIterable(prompt);
|
|
13100
|
+
let capturedPromptMessages;
|
|
13101
|
+
let promptForQuery = prompt;
|
|
13102
|
+
let promptStarted = false;
|
|
13103
|
+
let resolvePromptDone;
|
|
13104
|
+
const promptDone = new Promise((resolve) => {
|
|
13105
|
+
resolvePromptDone = resolve;
|
|
13106
|
+
});
|
|
13107
|
+
if (promptIsAsyncIterable) {
|
|
13108
|
+
capturedPromptMessages = [];
|
|
13109
|
+
const originalPrompt = prompt;
|
|
13110
|
+
const capturingPrompt = (async function* () {
|
|
13111
|
+
promptStarted = true;
|
|
13112
|
+
try {
|
|
13113
|
+
for await (const msg of originalPrompt) {
|
|
13114
|
+
capturedPromptMessages.push(msg);
|
|
13115
|
+
yield msg;
|
|
13116
|
+
}
|
|
13117
|
+
} finally {
|
|
13118
|
+
resolvePromptDone?.();
|
|
13119
|
+
}
|
|
13120
|
+
})();
|
|
13121
|
+
promptForQuery = capturingPrompt;
|
|
13122
|
+
}
|
|
12419
13123
|
const span = startSpan({
|
|
12420
13124
|
name: "Claude Agent",
|
|
12421
13125
|
spanAttributes: {
|
|
12422
13126
|
type: "task" /* TASK */
|
|
12423
13127
|
},
|
|
12424
13128
|
event: {
|
|
12425
|
-
input: typeof prompt === "string" ? prompt :
|
|
13129
|
+
input: typeof prompt === "string" ? prompt : promptIsAsyncIterable ? void 0 : prompt !== void 0 ? String(prompt) : void 0,
|
|
12426
13130
|
metadata: filterSerializableOptions(options)
|
|
12427
13131
|
}
|
|
12428
13132
|
});
|
|
@@ -12433,13 +13137,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12433
13137
|
let currentMessageStartTime = getCurrentUnixTimestamp();
|
|
12434
13138
|
const currentMessages = [];
|
|
12435
13139
|
const createLLMSpan = async () => {
|
|
13140
|
+
const parentToolUseId = currentMessages[0]?.parent_tool_use_id ?? null;
|
|
13141
|
+
let parentSpanExport;
|
|
13142
|
+
if (parentToolUseId) {
|
|
13143
|
+
const subAgentSpan = subAgentSpans.get(parentToolUseId);
|
|
13144
|
+
parentSpanExport = subAgentSpan ? await subAgentSpan.export() : await span.export();
|
|
13145
|
+
} else {
|
|
13146
|
+
parentSpanExport = await span.export();
|
|
13147
|
+
}
|
|
12436
13148
|
const finalMessageContent = await _createLLMSpanForMessages(
|
|
12437
13149
|
currentMessages,
|
|
12438
13150
|
prompt,
|
|
12439
13151
|
finalResults,
|
|
12440
13152
|
options,
|
|
12441
13153
|
currentMessageStartTime,
|
|
12442
|
-
|
|
13154
|
+
capturedPromptMessages,
|
|
13155
|
+
parentSpanExport
|
|
12443
13156
|
);
|
|
12444
13157
|
if (finalMessageContent) {
|
|
12445
13158
|
finalResults.push(finalMessageContent);
|
|
@@ -12452,14 +13165,78 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12452
13165
|
currentMessages.length = 0;
|
|
12453
13166
|
};
|
|
12454
13167
|
const invocationTarget = thisArg === proxy || thisArg === void 0 ? defaultThis ?? thisArg : thisArg;
|
|
13168
|
+
const activeToolSpans = /* @__PURE__ */ new Map();
|
|
13169
|
+
const subAgentSpans = /* @__PURE__ */ new Map();
|
|
13170
|
+
const endedSubAgentSpans = /* @__PURE__ */ new Set();
|
|
13171
|
+
const toolUseToParent = /* @__PURE__ */ new Map();
|
|
13172
|
+
const pendingSubAgentNames = /* @__PURE__ */ new Map();
|
|
13173
|
+
const resolveParentSpan = async (toolUseID) => {
|
|
13174
|
+
const parentToolUseId = toolUseToParent.get(toolUseID);
|
|
13175
|
+
if (parentToolUseId) {
|
|
13176
|
+
const subAgentSpan = subAgentSpans.get(parentToolUseId);
|
|
13177
|
+
if (subAgentSpan) {
|
|
13178
|
+
return subAgentSpan.export();
|
|
13179
|
+
}
|
|
13180
|
+
}
|
|
13181
|
+
return span.export();
|
|
13182
|
+
};
|
|
13183
|
+
const optionsWithHooks = injectTracingHooks(
|
|
13184
|
+
options,
|
|
13185
|
+
resolveParentSpan,
|
|
13186
|
+
activeToolSpans,
|
|
13187
|
+
subAgentSpans,
|
|
13188
|
+
endedSubAgentSpans
|
|
13189
|
+
);
|
|
13190
|
+
const modifiedArgArray = [
|
|
13191
|
+
{
|
|
13192
|
+
...params,
|
|
13193
|
+
...promptForQuery !== void 0 ? { prompt: promptForQuery } : {},
|
|
13194
|
+
options: optionsWithHooks
|
|
13195
|
+
}
|
|
13196
|
+
];
|
|
12455
13197
|
const originalGenerator = withCurrent(
|
|
12456
13198
|
span,
|
|
12457
|
-
() => Reflect.apply(target, invocationTarget,
|
|
13199
|
+
() => Reflect.apply(target, invocationTarget, modifiedArgArray)
|
|
12458
13200
|
);
|
|
12459
13201
|
const wrappedGenerator = (async function* () {
|
|
12460
13202
|
try {
|
|
12461
13203
|
for await (const message of originalGenerator) {
|
|
12462
13204
|
const currentTime = getCurrentUnixTimestamp();
|
|
13205
|
+
if (message.type === "assistant" && Array.isArray(message.message?.content)) {
|
|
13206
|
+
const parentToolUseId = message.parent_tool_use_id ?? null;
|
|
13207
|
+
for (const block of message.message.content) {
|
|
13208
|
+
if (block.type === "tool_use" && block.id) {
|
|
13209
|
+
toolUseToParent.set(block.id, parentToolUseId);
|
|
13210
|
+
if (block.name === "Task" && block.input?.subagent_type) {
|
|
13211
|
+
pendingSubAgentNames.set(
|
|
13212
|
+
block.id,
|
|
13213
|
+
block.input.subagent_type
|
|
13214
|
+
);
|
|
13215
|
+
}
|
|
13216
|
+
}
|
|
13217
|
+
}
|
|
13218
|
+
}
|
|
13219
|
+
if ("parent_tool_use_id" in message) {
|
|
13220
|
+
const parentToolUseId = message.parent_tool_use_id;
|
|
13221
|
+
if (parentToolUseId && !subAgentSpans.has(parentToolUseId)) {
|
|
13222
|
+
const agentName = pendingSubAgentNames.get(parentToolUseId);
|
|
13223
|
+
const spanName = agentName ? `Agent: ${agentName}` : "Agent: sub-agent";
|
|
13224
|
+
const parentExport = await span.export();
|
|
13225
|
+
const subAgentSpan = startSpan({
|
|
13226
|
+
name: spanName,
|
|
13227
|
+
spanAttributes: { type: "task" /* TASK */ },
|
|
13228
|
+
event: {
|
|
13229
|
+
metadata: {
|
|
13230
|
+
...agentName && {
|
|
13231
|
+
"claude_agent_sdk.agent_type": agentName
|
|
13232
|
+
}
|
|
13233
|
+
}
|
|
13234
|
+
},
|
|
13235
|
+
parent: parentExport
|
|
13236
|
+
});
|
|
13237
|
+
subAgentSpans.set(parentToolUseId, subAgentSpan);
|
|
13238
|
+
}
|
|
13239
|
+
}
|
|
12463
13240
|
const messageId = message.message?.id;
|
|
12464
13241
|
if (messageId && messageId !== currentMessageId) {
|
|
12465
13242
|
await createLLMSpan();
|
|
@@ -12505,6 +13282,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12505
13282
|
});
|
|
12506
13283
|
throw error;
|
|
12507
13284
|
} finally {
|
|
13285
|
+
for (const [id, subSpan] of subAgentSpans) {
|
|
13286
|
+
if (!endedSubAgentSpans.has(id)) {
|
|
13287
|
+
subSpan.end();
|
|
13288
|
+
}
|
|
13289
|
+
}
|
|
13290
|
+
subAgentSpans.clear();
|
|
13291
|
+
if (capturedPromptMessages) {
|
|
13292
|
+
if (promptStarted) {
|
|
13293
|
+
await promptDone;
|
|
13294
|
+
}
|
|
13295
|
+
if (capturedPromptMessages.length > 0) {
|
|
13296
|
+
span.log({
|
|
13297
|
+
input: _formatCapturedMessages(capturedPromptMessages)
|
|
13298
|
+
});
|
|
13299
|
+
}
|
|
13300
|
+
}
|
|
12508
13301
|
span.end();
|
|
12509
13302
|
}
|
|
12510
13303
|
})();
|
|
@@ -12532,43 +13325,25 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12532
13325
|
});
|
|
12533
13326
|
return proxy;
|
|
12534
13327
|
}
|
|
12535
|
-
function
|
|
12536
|
-
const
|
|
12537
|
-
|
|
12538
|
-
|
|
12539
|
-
|
|
12540
|
-
|
|
12541
|
-
|
|
12542
|
-
|
|
12543
|
-
|
|
12544
|
-
}
|
|
12545
|
-
});
|
|
12546
|
-
const result = await originalHandler(args, extra);
|
|
12547
|
-
span.log({
|
|
12548
|
-
output: result
|
|
12549
|
-
});
|
|
12550
|
-
return result;
|
|
12551
|
-
},
|
|
12552
|
-
{
|
|
12553
|
-
name: `${toolDef.name}`,
|
|
12554
|
-
spanAttributes: {
|
|
12555
|
-
type: "tool" /* TOOL */
|
|
13328
|
+
function _buildLLMInput(prompt, conversationHistory, capturedPromptMessages) {
|
|
13329
|
+
const promptMessages = [];
|
|
13330
|
+
if (typeof prompt === "string") {
|
|
13331
|
+
promptMessages.push({ content: prompt, role: "user" });
|
|
13332
|
+
} else if (capturedPromptMessages && capturedPromptMessages.length > 0) {
|
|
13333
|
+
for (const msg of capturedPromptMessages) {
|
|
13334
|
+
const role = msg.message?.role;
|
|
13335
|
+
const content = msg.message?.content;
|
|
13336
|
+
if (role && content !== void 0) {
|
|
13337
|
+
promptMessages.push({ content, role });
|
|
12556
13338
|
}
|
|
12557
13339
|
}
|
|
12558
|
-
|
|
12559
|
-
|
|
12560
|
-
...toolDef,
|
|
12561
|
-
handler: wrappedHandler
|
|
12562
|
-
};
|
|
12563
|
-
}
|
|
12564
|
-
function _buildLLMInput(prompt, conversationHistory) {
|
|
12565
|
-
const promptMessage = typeof prompt === "string" ? { content: prompt, role: "user" } : void 0;
|
|
12566
|
-
const inputParts = [
|
|
12567
|
-
...promptMessage ? [promptMessage] : [],
|
|
12568
|
-
...conversationHistory
|
|
12569
|
-
];
|
|
13340
|
+
}
|
|
13341
|
+
const inputParts = [...promptMessages, ...conversationHistory];
|
|
12570
13342
|
return inputParts.length > 0 ? inputParts : void 0;
|
|
12571
13343
|
}
|
|
13344
|
+
function _formatCapturedMessages(messages) {
|
|
13345
|
+
return messages.length > 0 ? messages : [];
|
|
13346
|
+
}
|
|
12572
13347
|
function _extractUsageFromMessage(message) {
|
|
12573
13348
|
const metrics = {};
|
|
12574
13349
|
let usage;
|
|
@@ -12602,7 +13377,7 @@ function _extractUsageFromMessage(message) {
|
|
|
12602
13377
|
}
|
|
12603
13378
|
return metrics;
|
|
12604
13379
|
}
|
|
12605
|
-
async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, parentSpan) {
|
|
13380
|
+
async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, capturedPromptMessages, parentSpan) {
|
|
12606
13381
|
if (messages.length === 0) return void 0;
|
|
12607
13382
|
const lastMessage = messages[messages.length - 1];
|
|
12608
13383
|
if (lastMessage.type !== "assistant" || !lastMessage.message?.usage) {
|
|
@@ -12610,7 +13385,11 @@ async function _createLLMSpanForMessages(messages, prompt, conversationHistory,
|
|
|
12610
13385
|
}
|
|
12611
13386
|
const model = lastMessage.message.model || options.model;
|
|
12612
13387
|
const usage = _extractUsageFromMessage(lastMessage);
|
|
12613
|
-
const input = _buildLLMInput(
|
|
13388
|
+
const input = _buildLLMInput(
|
|
13389
|
+
prompt,
|
|
13390
|
+
conversationHistory,
|
|
13391
|
+
capturedPromptMessages
|
|
13392
|
+
);
|
|
12614
13393
|
const outputs = messages.map(
|
|
12615
13394
|
(m) => m.message?.content && m.message?.role ? { content: m.message.content, role: m.message.role } : void 0
|
|
12616
13395
|
).filter((c) => c !== void 0);
|
|
@@ -12651,25 +13430,9 @@ function wrapClaudeAgentSDK(sdk) {
|
|
|
12651
13430
|
return wrappedQuery;
|
|
12652
13431
|
}
|
|
12653
13432
|
if (prop === "tool" && typeof value === "function") {
|
|
12654
|
-
const
|
|
12655
|
-
|
|
12656
|
-
|
|
12657
|
-
const invocationTarget = thisArg === receiver || thisArg === void 0 ? target : thisArg;
|
|
12658
|
-
const toolDef = Reflect.apply(
|
|
12659
|
-
toolTarget,
|
|
12660
|
-
invocationTarget,
|
|
12661
|
-
argArray
|
|
12662
|
-
);
|
|
12663
|
-
if (toolDef && typeof toolDef === "object" && "handler" in toolDef) {
|
|
12664
|
-
return wrapClaudeAgentTool(
|
|
12665
|
-
toolDef
|
|
12666
|
-
);
|
|
12667
|
-
}
|
|
12668
|
-
return toolDef;
|
|
12669
|
-
}
|
|
12670
|
-
});
|
|
12671
|
-
cache.set(prop, wrappedToolFactory);
|
|
12672
|
-
return wrappedToolFactory;
|
|
13433
|
+
const bound = value.bind(target);
|
|
13434
|
+
cache.set(prop, bound);
|
|
13435
|
+
return bound;
|
|
12673
13436
|
}
|
|
12674
13437
|
if (typeof value === "function") {
|
|
12675
13438
|
const bound = value.bind(target);
|
|
@@ -13454,7 +14217,7 @@ function isAsync(fn) {
|
|
|
13454
14217
|
function isAsyncGenerator2(fn) {
|
|
13455
14218
|
return fn[Symbol.toStringTag] === "AsyncGenerator";
|
|
13456
14219
|
}
|
|
13457
|
-
function
|
|
14220
|
+
function isAsyncIterable2(obj) {
|
|
13458
14221
|
return typeof obj[Symbol.asyncIterator] === "function";
|
|
13459
14222
|
}
|
|
13460
14223
|
function wrapAsync(asyncFn) {
|
|
@@ -13625,7 +14388,7 @@ var eachOfLimit$2 = (limit) => {
|
|
|
13625
14388
|
if (isAsyncGenerator2(obj)) {
|
|
13626
14389
|
return asyncEachOfLimit(obj, limit, iteratee, callback);
|
|
13627
14390
|
}
|
|
13628
|
-
if (
|
|
14391
|
+
if (isAsyncIterable2(obj)) {
|
|
13629
14392
|
return asyncEachOfLimit(obj[Symbol.asyncIterator](), limit, iteratee, callback);
|
|
13630
14393
|
}
|
|
13631
14394
|
var nextElem = createIterator(obj);
|
|
@@ -14554,6 +15317,7 @@ var LocalTrace = class {
|
|
|
14554
15317
|
spansFlushed = false;
|
|
14555
15318
|
spansFlushPromise = null;
|
|
14556
15319
|
cachedFetcher;
|
|
15320
|
+
threadCache = /* @__PURE__ */ new Map();
|
|
14557
15321
|
constructor({
|
|
14558
15322
|
objectType,
|
|
14559
15323
|
objectId,
|
|
@@ -14624,6 +15388,36 @@ var LocalTrace = class {
|
|
|
14624
15388
|
}
|
|
14625
15389
|
return this.cachedFetcher.getSpans({ spanType });
|
|
14626
15390
|
}
|
|
15391
|
+
/**
|
|
15392
|
+
* Get the thread (preprocessed messages) for this trace.
|
|
15393
|
+
* Calls the API with the project_default preprocessor (which falls back to "thread").
|
|
15394
|
+
*/
|
|
15395
|
+
async getThread(options) {
|
|
15396
|
+
const cacheKey = options?.preprocessor ?? "project_default";
|
|
15397
|
+
if (!this.threadCache.has(cacheKey)) {
|
|
15398
|
+
const promise = this.fetchThread(options);
|
|
15399
|
+
this.threadCache.set(cacheKey, promise);
|
|
15400
|
+
}
|
|
15401
|
+
return this.threadCache.get(cacheKey);
|
|
15402
|
+
}
|
|
15403
|
+
async fetchThread(options) {
|
|
15404
|
+
await this.ensureSpansReady();
|
|
15405
|
+
await this.state.login({});
|
|
15406
|
+
const result = await invoke({
|
|
15407
|
+
globalFunction: options?.preprocessor ?? "project_default",
|
|
15408
|
+
functionType: "preprocessor",
|
|
15409
|
+
input: {
|
|
15410
|
+
trace_ref: {
|
|
15411
|
+
object_type: this.objectType,
|
|
15412
|
+
object_id: this.objectId,
|
|
15413
|
+
root_span_id: this.rootSpanId
|
|
15414
|
+
}
|
|
15415
|
+
},
|
|
15416
|
+
mode: "json",
|
|
15417
|
+
state: this.state
|
|
15418
|
+
});
|
|
15419
|
+
return Array.isArray(result) ? result : [];
|
|
15420
|
+
}
|
|
14627
15421
|
async ensureSpansReady() {
|
|
14628
15422
|
if (this.spansFlushed || !this.ensureSpansFlushed) {
|
|
14629
15423
|
return;
|
|
@@ -14658,648 +15452,371 @@ var SimpleProgressReporter = class {
|
|
|
14658
15452
|
|
|
14659
15453
|
// src/eval-parameters.ts
|
|
14660
15454
|
import { z as z10 } from "zod/v3";
|
|
15455
|
+
import Ajv from "ajv";
|
|
14661
15456
|
|
|
14662
|
-
// src/
|
|
15457
|
+
// src/prompt-schemas.ts
|
|
14663
15458
|
import { z as z9 } from "zod/v3";
|
|
14664
|
-
var
|
|
14665
|
-
|
|
14666
|
-
|
|
14667
|
-
|
|
15459
|
+
var promptContentsSchema = z9.union([
|
|
15460
|
+
z9.object({
|
|
15461
|
+
prompt: z9.string()
|
|
15462
|
+
}),
|
|
15463
|
+
z9.object({
|
|
15464
|
+
messages: z9.array(ChatCompletionMessageParam)
|
|
15465
|
+
})
|
|
15466
|
+
]);
|
|
15467
|
+
var promptDefinitionSchema = promptContentsSchema.and(
|
|
15468
|
+
z9.object({
|
|
15469
|
+
model: z9.string(),
|
|
15470
|
+
params: ModelParams.optional(),
|
|
15471
|
+
templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
|
|
15472
|
+
})
|
|
15473
|
+
);
|
|
15474
|
+
var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
|
|
15475
|
+
z9.object({
|
|
15476
|
+
tools: z9.array(ToolFunctionDefinition).optional()
|
|
15477
|
+
})
|
|
15478
|
+
);
|
|
15479
|
+
function promptDefinitionToPromptData(promptDefinition, rawTools) {
|
|
15480
|
+
const promptBlock = "messages" in promptDefinition ? {
|
|
15481
|
+
type: "chat",
|
|
15482
|
+
messages: promptDefinition.messages,
|
|
15483
|
+
tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
|
|
15484
|
+
} : {
|
|
15485
|
+
type: "completion",
|
|
15486
|
+
content: promptDefinition.prompt
|
|
15487
|
+
};
|
|
15488
|
+
return {
|
|
15489
|
+
prompt: promptBlock,
|
|
15490
|
+
options: {
|
|
15491
|
+
model: promptDefinition.model,
|
|
15492
|
+
params: promptDefinition.params
|
|
15493
|
+
},
|
|
15494
|
+
...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
|
|
15495
|
+
};
|
|
15496
|
+
}
|
|
15497
|
+
|
|
15498
|
+
// src/eval-parameters.ts
|
|
15499
|
+
var evalParametersSchema = z10.record(
|
|
15500
|
+
z10.string(),
|
|
15501
|
+
z10.union([
|
|
15502
|
+
z10.object({
|
|
15503
|
+
type: z10.literal("prompt"),
|
|
15504
|
+
default: promptDefinitionWithToolsSchema.optional(),
|
|
15505
|
+
description: z10.string().optional()
|
|
15506
|
+
}),
|
|
15507
|
+
z10.instanceof(z10.ZodType)
|
|
15508
|
+
// For Zod schemas
|
|
15509
|
+
])
|
|
15510
|
+
);
|
|
15511
|
+
async function validateParameters(parameters, parameterSchema) {
|
|
15512
|
+
let resolvedSchema = parameterSchema;
|
|
15513
|
+
if (resolvedSchema instanceof Promise) {
|
|
15514
|
+
resolvedSchema = await resolvedSchema;
|
|
15515
|
+
}
|
|
15516
|
+
if (resolvedSchema === void 0 || resolvedSchema === null) {
|
|
15517
|
+
return parameters;
|
|
15518
|
+
}
|
|
15519
|
+
if (RemoteEvalParameters.isParameters(resolvedSchema)) {
|
|
15520
|
+
const mergedParameters = parameters && Object.keys(parameters).length > 0 ? {
|
|
15521
|
+
...resolvedSchema.data,
|
|
15522
|
+
...parameters
|
|
15523
|
+
} : resolvedSchema.data;
|
|
15524
|
+
return validateParametersWithJsonSchema(
|
|
15525
|
+
mergedParameters,
|
|
15526
|
+
resolvedSchema.schema
|
|
15527
|
+
);
|
|
14668
15528
|
}
|
|
14669
|
-
|
|
14670
|
-
|
|
14671
|
-
|
|
14672
|
-
|
|
14673
|
-
|
|
14674
|
-
|
|
14675
|
-
|
|
14676
|
-
|
|
14677
|
-
|
|
14678
|
-
|
|
14679
|
-
|
|
14680
|
-
|
|
14681
|
-
|
|
14682
|
-
|
|
14683
|
-
|
|
14684
|
-
|
|
14685
|
-
|
|
15529
|
+
return validateParametersWithZod(
|
|
15530
|
+
parameters,
|
|
15531
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15532
|
+
resolvedSchema
|
|
15533
|
+
);
|
|
15534
|
+
}
|
|
15535
|
+
function validateParametersWithZod(parameters, parameterSchema) {
|
|
15536
|
+
return Object.fromEntries(
|
|
15537
|
+
Object.entries(parameterSchema).map(([name, schema]) => {
|
|
15538
|
+
const value = parameters[name];
|
|
15539
|
+
try {
|
|
15540
|
+
if ("type" in schema && schema.type === "prompt") {
|
|
15541
|
+
const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
|
|
15542
|
+
schema.default,
|
|
15543
|
+
schema.default.tools
|
|
15544
|
+
) : void 0;
|
|
15545
|
+
if (!promptData) {
|
|
15546
|
+
throw new Error(`Parameter '${name}' is required`);
|
|
15547
|
+
}
|
|
15548
|
+
return [name, Prompt2.fromPromptData(name, promptData)];
|
|
15549
|
+
} else {
|
|
15550
|
+
const schemaCasted = schema;
|
|
15551
|
+
return [name, schemaCasted.parse(value)];
|
|
15552
|
+
}
|
|
15553
|
+
} catch (e) {
|
|
15554
|
+
console.error("Error validating parameter", name, e);
|
|
15555
|
+
throw Error(
|
|
15556
|
+
`Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
|
|
15557
|
+
);
|
|
15558
|
+
}
|
|
15559
|
+
})
|
|
15560
|
+
);
|
|
15561
|
+
}
|
|
15562
|
+
function validateParametersWithJsonSchema(parameters, schema) {
|
|
15563
|
+
const ajv = new Ajv({ coerceTypes: true, useDefaults: true, strict: false });
|
|
15564
|
+
const validate = ajv.compile(schema);
|
|
15565
|
+
if (!validate(parameters)) {
|
|
15566
|
+
const errorMessages = validate.errors?.map((err) => {
|
|
15567
|
+
const path2 = err.instancePath || "root";
|
|
15568
|
+
return `${path2}: ${err.message}`;
|
|
15569
|
+
}).join(", ");
|
|
15570
|
+
throw Error(`Invalid parameters: ${errorMessages}`);
|
|
14686
15571
|
}
|
|
14687
|
-
|
|
14688
|
-
|
|
14689
|
-
|
|
14690
|
-
|
|
14691
|
-
|
|
15572
|
+
return parameters;
|
|
15573
|
+
}
|
|
15574
|
+
|
|
15575
|
+
// src/framework.ts
|
|
15576
|
+
function BaseExperiment(options = {}) {
|
|
15577
|
+
return { _type: "BaseExperiment", ...options };
|
|
15578
|
+
}
|
|
15579
|
+
var EvalResultWithSummary = class {
|
|
15580
|
+
constructor(summary, results) {
|
|
15581
|
+
this.summary = summary;
|
|
15582
|
+
this.results = results;
|
|
14692
15583
|
}
|
|
14693
|
-
|
|
14694
|
-
|
|
14695
|
-
|
|
14696
|
-
|
|
14697
|
-
|
|
15584
|
+
/**
|
|
15585
|
+
* @deprecated Use `summary` instead.
|
|
15586
|
+
*/
|
|
15587
|
+
toString() {
|
|
15588
|
+
return JSON.stringify(this.summary);
|
|
14698
15589
|
}
|
|
14699
|
-
|
|
14700
|
-
|
|
14701
|
-
|
|
14702
|
-
|
|
14703
|
-
|
|
14704
|
-
|
|
14705
|
-
|
|
14706
|
-
|
|
14707
|
-
if (this._publishableCodeFunctions.length > 0) {
|
|
14708
|
-
console.warn(
|
|
14709
|
-
"Code functions cannot be published directly. Use `braintrust push` instead."
|
|
14710
|
-
);
|
|
14711
|
-
}
|
|
14712
|
-
if (this._publishablePrompts.length > 0) {
|
|
14713
|
-
for (const prompt of this._publishablePrompts) {
|
|
14714
|
-
const functionDefinition = await prompt.toFunctionDefinition(projectMap);
|
|
14715
|
-
functionDefinitions.push(functionDefinition);
|
|
14716
|
-
}
|
|
14717
|
-
}
|
|
14718
|
-
await _internalGetGlobalState().apiConn().post_json("insert-functions", {
|
|
14719
|
-
functions: functionDefinitions
|
|
14720
|
-
});
|
|
15590
|
+
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
15591
|
+
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
15592
|
+
}
|
|
15593
|
+
toJSON() {
|
|
15594
|
+
return {
|
|
15595
|
+
summary: this.summary,
|
|
15596
|
+
results: this.results
|
|
15597
|
+
};
|
|
14721
15598
|
}
|
|
14722
15599
|
};
|
|
14723
|
-
|
|
14724
|
-
|
|
14725
|
-
|
|
15600
|
+
function makeEvalName(projectName, experimentName) {
|
|
15601
|
+
let out = projectName;
|
|
15602
|
+
if (experimentName) {
|
|
15603
|
+
out += ` [experimentName=${experimentName}]`;
|
|
14726
15604
|
}
|
|
14727
|
-
|
|
14728
|
-
|
|
14729
|
-
|
|
14730
|
-
|
|
14731
|
-
|
|
14732
|
-
|
|
14733
|
-
|
|
14734
|
-
|
|
14735
|
-
|
|
14736
|
-
|
|
14737
|
-
|
|
14738
|
-
|
|
14739
|
-
|
|
14740
|
-
|
|
14741
|
-
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
14742
|
-
type: "tool",
|
|
14743
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
14744
|
-
parameters,
|
|
14745
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
14746
|
-
returns,
|
|
14747
|
-
...rest
|
|
14748
|
-
});
|
|
14749
|
-
this.project.addCodeFunction(tool);
|
|
14750
|
-
return tool;
|
|
15605
|
+
return out;
|
|
15606
|
+
}
|
|
15607
|
+
function initExperiment2(state, options = {}) {
|
|
15608
|
+
return init({
|
|
15609
|
+
state,
|
|
15610
|
+
...options,
|
|
15611
|
+
setCurrent: false
|
|
15612
|
+
});
|
|
15613
|
+
}
|
|
15614
|
+
function callEvaluatorData(data) {
|
|
15615
|
+
const dataResult = typeof data === "function" ? data() : data;
|
|
15616
|
+
let baseExperiment = void 0;
|
|
15617
|
+
if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
|
|
15618
|
+
baseExperiment = dataResult.name;
|
|
14751
15619
|
}
|
|
15620
|
+
return {
|
|
15621
|
+
data: dataResult,
|
|
15622
|
+
baseExperiment
|
|
15623
|
+
};
|
|
15624
|
+
}
|
|
15625
|
+
function isAsyncIterable3(value) {
|
|
15626
|
+
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
15627
|
+
}
|
|
15628
|
+
function isIterable(value) {
|
|
15629
|
+
return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
|
|
15630
|
+
}
|
|
15631
|
+
globalThis._evals = {
|
|
15632
|
+
functions: [],
|
|
15633
|
+
prompts: [],
|
|
15634
|
+
parameters: [],
|
|
15635
|
+
evaluators: {},
|
|
15636
|
+
reporters: {}
|
|
14752
15637
|
};
|
|
14753
|
-
|
|
14754
|
-
|
|
14755
|
-
|
|
15638
|
+
function _initializeSpanContext() {
|
|
15639
|
+
globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
|
|
15640
|
+
}
|
|
15641
|
+
async function Eval(name, evaluator, reporterOrOpts) {
|
|
15642
|
+
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
15643
|
+
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
15644
|
+
if (globalThis._evals.evaluators[evalName]) {
|
|
15645
|
+
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
14756
15646
|
}
|
|
14757
|
-
|
|
14758
|
-
|
|
14759
|
-
|
|
14760
|
-
|
|
14761
|
-
|
|
14762
|
-
|
|
15647
|
+
if (globalThis._lazy_load) {
|
|
15648
|
+
globalThis._evals.evaluators[evalName] = {
|
|
15649
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15650
|
+
evaluator: {
|
|
15651
|
+
evalName,
|
|
15652
|
+
projectName: name,
|
|
15653
|
+
...evaluator
|
|
15654
|
+
},
|
|
15655
|
+
reporter: options.reporter
|
|
15656
|
+
};
|
|
15657
|
+
_initializeSpanContext();
|
|
15658
|
+
return new EvalResultWithSummary(
|
|
15659
|
+
{
|
|
15660
|
+
scores: {},
|
|
15661
|
+
metrics: {},
|
|
15662
|
+
projectName: "",
|
|
15663
|
+
experimentName: ""
|
|
15664
|
+
},
|
|
15665
|
+
[]
|
|
15666
|
+
);
|
|
15667
|
+
}
|
|
15668
|
+
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
15669
|
+
const shouldCollectResults = options.returnResults ?? true;
|
|
15670
|
+
if (typeof options.reporter === "string") {
|
|
15671
|
+
throw new Error(
|
|
15672
|
+
"Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
|
|
15673
|
+
);
|
|
15674
|
+
}
|
|
15675
|
+
const resolvedReporter = options.reporter || defaultReporter;
|
|
15676
|
+
try {
|
|
15677
|
+
const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
|
|
15678
|
+
evaluator.data
|
|
15679
|
+
);
|
|
15680
|
+
const experiment = options.parent || options.noSendLogs ? null : initExperiment2(evaluator.state, {
|
|
15681
|
+
...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
|
|
15682
|
+
experiment: evaluator.experimentName,
|
|
15683
|
+
description: evaluator.description,
|
|
15684
|
+
metadata: evaluator.metadata,
|
|
15685
|
+
isPublic: evaluator.isPublic,
|
|
15686
|
+
update: evaluator.update,
|
|
15687
|
+
baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
|
|
15688
|
+
baseExperimentId: evaluator.baseExperimentId,
|
|
15689
|
+
gitMetadataSettings: evaluator.gitMetadataSettings,
|
|
15690
|
+
repoInfo: evaluator.repoInfo,
|
|
15691
|
+
dataset: Dataset2.isDataset(data) ? data : void 0
|
|
15692
|
+
});
|
|
15693
|
+
if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
|
|
15694
|
+
await experiment._waitForId();
|
|
14763
15695
|
}
|
|
14764
|
-
if (
|
|
14765
|
-
|
|
15696
|
+
if (experiment && options.onStart) {
|
|
15697
|
+
const summary = await experiment.summarize({ summarizeScores: false });
|
|
15698
|
+
options.onStart(summary);
|
|
14766
15699
|
}
|
|
14767
|
-
|
|
14768
|
-
|
|
14769
|
-
|
|
14770
|
-
|
|
14771
|
-
|
|
14772
|
-
|
|
14773
|
-
type: "scorer"
|
|
14774
|
-
});
|
|
14775
|
-
this.project.addCodeFunction(scorer);
|
|
14776
|
-
} else {
|
|
14777
|
-
const promptBlock = "messages" in opts ? {
|
|
14778
|
-
type: "chat",
|
|
14779
|
-
messages: opts.messages
|
|
14780
|
-
} : {
|
|
14781
|
-
type: "completion",
|
|
14782
|
-
content: opts.prompt
|
|
14783
|
-
};
|
|
14784
|
-
const promptData = {
|
|
14785
|
-
prompt: promptBlock,
|
|
14786
|
-
options: {
|
|
14787
|
-
model: opts.model,
|
|
14788
|
-
params: opts.params
|
|
14789
|
-
},
|
|
14790
|
-
parser: {
|
|
14791
|
-
type: "llm_classifier",
|
|
14792
|
-
use_cot: opts.useCot,
|
|
14793
|
-
choice_scores: opts.choiceScores
|
|
14794
|
-
}
|
|
15700
|
+
try {
|
|
15701
|
+
const evalDef = {
|
|
15702
|
+
evalName,
|
|
15703
|
+
projectName: name,
|
|
15704
|
+
...evaluator,
|
|
15705
|
+
data
|
|
14795
15706
|
};
|
|
14796
|
-
const
|
|
14797
|
-
|
|
14798
|
-
|
|
14799
|
-
|
|
14800
|
-
|
|
14801
|
-
|
|
14802
|
-
|
|
14803
|
-
|
|
14804
|
-
|
|
14805
|
-
|
|
14806
|
-
|
|
14807
|
-
|
|
14808
|
-
|
|
14809
|
-
|
|
14810
|
-
|
|
14811
|
-
|
|
14812
|
-
|
|
14813
|
-
|
|
14814
|
-
|
|
14815
|
-
|
|
14816
|
-
|
|
14817
|
-
|
|
14818
|
-
|
|
14819
|
-
|
|
14820
|
-
|
|
14821
|
-
|
|
14822
|
-
|
|
14823
|
-
|
|
14824
|
-
|
|
15707
|
+
const enableCache = options.enableCache ?? true;
|
|
15708
|
+
let ret;
|
|
15709
|
+
if (options.parent) {
|
|
15710
|
+
ret = await withParent(
|
|
15711
|
+
options.parent,
|
|
15712
|
+
() => runEvaluator(
|
|
15713
|
+
null,
|
|
15714
|
+
evalDef,
|
|
15715
|
+
progressReporter,
|
|
15716
|
+
[],
|
|
15717
|
+
options.stream,
|
|
15718
|
+
options.parameters,
|
|
15719
|
+
shouldCollectResults,
|
|
15720
|
+
enableCache
|
|
15721
|
+
),
|
|
15722
|
+
evaluator.state
|
|
15723
|
+
);
|
|
15724
|
+
} else {
|
|
15725
|
+
ret = await runEvaluator(
|
|
15726
|
+
experiment,
|
|
15727
|
+
evalDef,
|
|
15728
|
+
progressReporter,
|
|
15729
|
+
[],
|
|
15730
|
+
options.stream,
|
|
15731
|
+
options.parameters,
|
|
15732
|
+
shouldCollectResults,
|
|
15733
|
+
enableCache
|
|
15734
|
+
);
|
|
15735
|
+
}
|
|
15736
|
+
progressReporter.stop();
|
|
15737
|
+
resolvedReporter.reportEval(evalDef, ret, {
|
|
15738
|
+
verbose: true,
|
|
15739
|
+
jsonl: false
|
|
15740
|
+
});
|
|
15741
|
+
return ret;
|
|
15742
|
+
} finally {
|
|
15743
|
+
if (experiment) {
|
|
15744
|
+
await experiment.flush().catch(console.error);
|
|
15745
|
+
} else if (options.parent) {
|
|
15746
|
+
await flush().catch(console.error);
|
|
15747
|
+
}
|
|
14825
15748
|
}
|
|
15749
|
+
} finally {
|
|
15750
|
+
progressReporter.stop();
|
|
14826
15751
|
}
|
|
14827
|
-
|
|
14828
|
-
|
|
14829
|
-
|
|
14830
|
-
|
|
14831
|
-
|
|
14832
|
-
parameters;
|
|
14833
|
-
returns;
|
|
14834
|
-
ifExists;
|
|
14835
|
-
metadata;
|
|
14836
|
-
key() {
|
|
14837
|
-
return JSON.stringify([
|
|
14838
|
-
this.project.id ?? "",
|
|
14839
|
-
this.project.name ?? "",
|
|
14840
|
-
this.slug
|
|
14841
|
-
]);
|
|
15752
|
+
}
|
|
15753
|
+
function Reporter(name, reporter) {
|
|
15754
|
+
const ret = { name, ...reporter };
|
|
15755
|
+
if (_evals.reporters[name]) {
|
|
15756
|
+
throw new Error(`Reporter ${name} already exists`);
|
|
14842
15757
|
}
|
|
14843
|
-
|
|
14844
|
-
|
|
14845
|
-
project;
|
|
14846
|
-
name;
|
|
14847
|
-
slug;
|
|
14848
|
-
prompt;
|
|
14849
|
-
ifExists;
|
|
14850
|
-
description;
|
|
14851
|
-
id;
|
|
14852
|
-
functionType;
|
|
14853
|
-
toolFunctions;
|
|
14854
|
-
metadata;
|
|
14855
|
-
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
14856
|
-
this.project = project;
|
|
14857
|
-
this.name = opts.name;
|
|
14858
|
-
this.slug = opts.slug;
|
|
14859
|
-
this.prompt = prompt;
|
|
14860
|
-
this.toolFunctions = toolFunctions;
|
|
14861
|
-
this.ifExists = opts.ifExists;
|
|
14862
|
-
this.description = opts.description;
|
|
14863
|
-
this.id = opts.id;
|
|
14864
|
-
this.functionType = functionType;
|
|
14865
|
-
this.metadata = opts.metadata;
|
|
15758
|
+
if (globalThis._lazy_load) {
|
|
15759
|
+
_evals.reporters[name] = ret;
|
|
14866
15760
|
}
|
|
14867
|
-
|
|
14868
|
-
|
|
14869
|
-
|
|
14870
|
-
|
|
14871
|
-
|
|
14872
|
-
|
|
14873
|
-
|
|
14874
|
-
if ("slug" in fn) {
|
|
14875
|
-
return {
|
|
14876
|
-
type: "slug",
|
|
14877
|
-
project_id: await projectNameToId.resolve(fn.project),
|
|
14878
|
-
slug: fn.slug
|
|
14879
|
-
};
|
|
14880
|
-
} else {
|
|
14881
|
-
return fn;
|
|
14882
|
-
}
|
|
14883
|
-
})
|
|
14884
|
-
);
|
|
14885
|
-
prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
14886
|
-
resolvableToolFunctions;
|
|
14887
|
-
}
|
|
14888
|
-
return {
|
|
14889
|
-
project_id: await projectNameToId.resolve(this.project),
|
|
14890
|
-
name: this.name,
|
|
14891
|
-
slug: this.slug,
|
|
14892
|
-
description: this.description ?? "",
|
|
14893
|
-
function_data: {
|
|
14894
|
-
type: "prompt"
|
|
14895
|
-
},
|
|
14896
|
-
function_type: this.functionType,
|
|
14897
|
-
prompt_data,
|
|
14898
|
-
if_exists: this.ifExists,
|
|
14899
|
-
metadata: this.metadata
|
|
14900
|
-
};
|
|
15761
|
+
return ret;
|
|
15762
|
+
}
|
|
15763
|
+
function serializeJSONWithPlainString(v) {
|
|
15764
|
+
if (typeof v === "string") {
|
|
15765
|
+
return v;
|
|
15766
|
+
} else {
|
|
15767
|
+
return JSON.stringify(v);
|
|
14901
15768
|
}
|
|
14902
|
-
};
|
|
14903
|
-
var promptContentsSchema = z9.union([
|
|
14904
|
-
z9.object({
|
|
14905
|
-
prompt: z9.string()
|
|
14906
|
-
}),
|
|
14907
|
-
z9.object({
|
|
14908
|
-
messages: z9.array(ChatCompletionMessageParam)
|
|
14909
|
-
})
|
|
14910
|
-
]);
|
|
14911
|
-
var promptDefinitionSchema = promptContentsSchema.and(
|
|
14912
|
-
z9.object({
|
|
14913
|
-
model: z9.string(),
|
|
14914
|
-
params: ModelParams.optional(),
|
|
14915
|
-
templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
|
|
14916
|
-
})
|
|
14917
|
-
);
|
|
14918
|
-
var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
|
|
14919
|
-
z9.object({
|
|
14920
|
-
tools: z9.array(ToolFunctionDefinition).optional()
|
|
14921
|
-
})
|
|
14922
|
-
);
|
|
14923
|
-
var PromptBuilder = class {
|
|
14924
|
-
constructor(project) {
|
|
14925
|
-
this.project = project;
|
|
14926
|
-
}
|
|
14927
|
-
create(opts) {
|
|
14928
|
-
const toolFunctions = [];
|
|
14929
|
-
const rawTools = [];
|
|
14930
|
-
for (const tool of opts.tools ?? []) {
|
|
14931
|
-
if (tool instanceof CodeFunction) {
|
|
14932
|
-
toolFunctions.push(tool);
|
|
14933
|
-
} else if ("type" in tool && !("function" in tool)) {
|
|
14934
|
-
toolFunctions.push(tool);
|
|
14935
|
-
} else {
|
|
14936
|
-
rawTools.push(tool);
|
|
14937
|
-
}
|
|
14938
|
-
}
|
|
14939
|
-
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
14940
|
-
const promptData = promptDefinitionToPromptData(opts, rawTools);
|
|
14941
|
-
const promptRow = {
|
|
14942
|
-
id: opts.id,
|
|
14943
|
-
_xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
|
|
14944
|
-
name: opts.name,
|
|
14945
|
-
slug,
|
|
14946
|
-
prompt_data: promptData,
|
|
14947
|
-
...this.project.id !== void 0 ? { project_id: this.project.id } : {}
|
|
14948
|
-
};
|
|
14949
|
-
const prompt = new Prompt2(
|
|
14950
|
-
promptRow,
|
|
14951
|
-
{},
|
|
14952
|
-
// It doesn't make sense to specify defaults here.
|
|
14953
|
-
opts.noTrace ?? false
|
|
14954
|
-
);
|
|
14955
|
-
const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
|
|
14956
|
-
...opts,
|
|
14957
|
-
slug
|
|
14958
|
-
});
|
|
14959
|
-
this.project.addPrompt(codePrompt);
|
|
14960
|
-
return prompt;
|
|
14961
|
-
}
|
|
14962
|
-
};
|
|
14963
|
-
function promptDefinitionToPromptData(promptDefinition, rawTools) {
|
|
14964
|
-
const promptBlock = "messages" in promptDefinition ? {
|
|
14965
|
-
type: "chat",
|
|
14966
|
-
messages: promptDefinition.messages,
|
|
14967
|
-
tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
|
|
14968
|
-
} : {
|
|
14969
|
-
type: "completion",
|
|
14970
|
-
content: promptDefinition.prompt
|
|
14971
|
-
};
|
|
14972
|
-
return {
|
|
14973
|
-
prompt: promptBlock,
|
|
14974
|
-
options: {
|
|
14975
|
-
model: promptDefinition.model,
|
|
14976
|
-
params: promptDefinition.params
|
|
14977
|
-
},
|
|
14978
|
-
...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
|
|
14979
|
-
};
|
|
14980
15769
|
}
|
|
14981
|
-
|
|
14982
|
-
|
|
14983
|
-
|
|
14984
|
-
|
|
14985
|
-
|
|
14986
|
-
|
|
14987
|
-
|
|
14988
|
-
|
|
14989
|
-
const result = z9.object({
|
|
14990
|
-
project: Project
|
|
14991
|
-
}).parse(response);
|
|
14992
|
-
const projectId = result.project.id;
|
|
14993
|
-
this.nameToId[projectName] = projectId;
|
|
14994
|
-
this.idToName[projectId] = projectName;
|
|
14995
|
-
}
|
|
14996
|
-
return this.nameToId[projectName];
|
|
14997
|
-
}
|
|
14998
|
-
async getName(projectId) {
|
|
14999
|
-
if (!(projectId in this.idToName)) {
|
|
15000
|
-
const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
|
|
15001
|
-
id: projectId
|
|
15002
|
-
});
|
|
15003
|
-
const result = z9.array(Project).nonempty().parse(response);
|
|
15004
|
-
const projectName = result[0].name;
|
|
15005
|
-
this.idToName[projectId] = projectName;
|
|
15006
|
-
this.nameToId[projectName] = projectId;
|
|
15007
|
-
}
|
|
15008
|
-
return this.idToName[projectId];
|
|
15009
|
-
}
|
|
15010
|
-
async resolve(project) {
|
|
15011
|
-
if (project.id) {
|
|
15012
|
-
return project.id;
|
|
15013
|
-
}
|
|
15014
|
-
return this.getId(project.name);
|
|
15015
|
-
}
|
|
15016
|
-
};
|
|
15017
|
-
|
|
15018
|
-
// src/eval-parameters.ts
|
|
15019
|
-
var evalParametersSchema = z10.record(
|
|
15020
|
-
z10.string(),
|
|
15021
|
-
z10.union([
|
|
15022
|
-
z10.object({
|
|
15023
|
-
type: z10.literal("prompt"),
|
|
15024
|
-
default: promptDefinitionWithToolsSchema.optional(),
|
|
15025
|
-
description: z10.string().optional()
|
|
15026
|
-
}),
|
|
15027
|
-
z10.instanceof(z10.ZodType)
|
|
15028
|
-
// For Zod schemas
|
|
15029
|
-
])
|
|
15030
|
-
);
|
|
15031
|
-
function validateParameters(parameters, parameterSchema) {
|
|
15032
|
-
return Object.fromEntries(
|
|
15033
|
-
Object.entries(parameterSchema).map(([name, schema]) => {
|
|
15034
|
-
const value = parameters[name];
|
|
15035
|
-
try {
|
|
15036
|
-
if ("type" in schema && schema.type === "prompt") {
|
|
15037
|
-
const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
|
|
15038
|
-
schema.default,
|
|
15039
|
-
schema.default.tools
|
|
15040
|
-
) : void 0;
|
|
15041
|
-
if (!promptData) {
|
|
15042
|
-
throw new Error(`Parameter '${name}' is required`);
|
|
15043
|
-
}
|
|
15044
|
-
return [name, Prompt2.fromPromptData(name, promptData)];
|
|
15045
|
-
} else {
|
|
15046
|
-
const schemaCasted = schema;
|
|
15047
|
-
return [name, schemaCasted.parse(value)];
|
|
15048
|
-
}
|
|
15049
|
-
} catch (e) {
|
|
15050
|
-
console.error("Error validating parameter", name, e);
|
|
15051
|
-
throw Error(
|
|
15052
|
-
`Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
|
|
15053
|
-
);
|
|
15054
|
-
}
|
|
15055
|
-
})
|
|
15770
|
+
function evaluateFilter(object, filter2) {
|
|
15771
|
+
const { path: path2, pattern } = filter2;
|
|
15772
|
+
const key = path2.reduce(
|
|
15773
|
+
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
15774
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15775
|
+
acc[p]
|
|
15776
|
+
) : void 0,
|
|
15777
|
+
object
|
|
15056
15778
|
);
|
|
15057
|
-
|
|
15058
|
-
|
|
15059
|
-
// src/framework.ts
|
|
15060
|
-
function BaseExperiment(options = {}) {
|
|
15061
|
-
return { _type: "BaseExperiment", ...options };
|
|
15062
|
-
}
|
|
15063
|
-
var EvalResultWithSummary = class {
|
|
15064
|
-
constructor(summary, results) {
|
|
15065
|
-
this.summary = summary;
|
|
15066
|
-
this.results = results;
|
|
15067
|
-
}
|
|
15068
|
-
/**
|
|
15069
|
-
* @deprecated Use `summary` instead.
|
|
15070
|
-
*/
|
|
15071
|
-
toString() {
|
|
15072
|
-
return JSON.stringify(this.summary);
|
|
15073
|
-
}
|
|
15074
|
-
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
15075
|
-
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
15076
|
-
}
|
|
15077
|
-
toJSON() {
|
|
15078
|
-
return {
|
|
15079
|
-
summary: this.summary,
|
|
15080
|
-
results: this.results
|
|
15081
|
-
};
|
|
15082
|
-
}
|
|
15083
|
-
};
|
|
15084
|
-
function makeEvalName(projectName, experimentName) {
|
|
15085
|
-
let out = projectName;
|
|
15086
|
-
if (experimentName) {
|
|
15087
|
-
out += ` [experimentName=${experimentName}]`;
|
|
15088
|
-
}
|
|
15089
|
-
return out;
|
|
15090
|
-
}
|
|
15091
|
-
function initExperiment2(state, options = {}) {
|
|
15092
|
-
return init({
|
|
15093
|
-
state,
|
|
15094
|
-
...options,
|
|
15095
|
-
setCurrent: false
|
|
15096
|
-
});
|
|
15097
|
-
}
|
|
15098
|
-
function callEvaluatorData(data) {
|
|
15099
|
-
const dataResult = typeof data === "function" ? data() : data;
|
|
15100
|
-
let baseExperiment = void 0;
|
|
15101
|
-
if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
|
|
15102
|
-
baseExperiment = dataResult.name;
|
|
15779
|
+
if (key === void 0) {
|
|
15780
|
+
return false;
|
|
15103
15781
|
}
|
|
15104
|
-
return
|
|
15105
|
-
data: dataResult,
|
|
15106
|
-
baseExperiment
|
|
15107
|
-
};
|
|
15782
|
+
return pattern.test(serializeJSONWithPlainString(key));
|
|
15108
15783
|
}
|
|
15109
|
-
function
|
|
15110
|
-
return
|
|
15784
|
+
function scorerName(scorer, scorer_idx) {
|
|
15785
|
+
return scorer.name || `scorer_${scorer_idx}`;
|
|
15111
15786
|
}
|
|
15112
|
-
function
|
|
15113
|
-
return
|
|
15787
|
+
async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
|
|
15788
|
+
return await runEvaluatorInternal(
|
|
15789
|
+
experiment,
|
|
15790
|
+
evaluator,
|
|
15791
|
+
progressReporter,
|
|
15792
|
+
filters,
|
|
15793
|
+
stream,
|
|
15794
|
+
parameters,
|
|
15795
|
+
collectResults,
|
|
15796
|
+
enableCache
|
|
15797
|
+
);
|
|
15114
15798
|
}
|
|
15115
|
-
|
|
15116
|
-
|
|
15117
|
-
|
|
15118
|
-
|
|
15119
|
-
|
|
15799
|
+
var defaultErrorScoreHandler = ({
|
|
15800
|
+
rootSpan,
|
|
15801
|
+
data: _,
|
|
15802
|
+
unhandledScores
|
|
15803
|
+
}) => {
|
|
15804
|
+
const scores = Object.fromEntries(unhandledScores.map((s) => [s, 0]));
|
|
15805
|
+
rootSpan.log({ scores });
|
|
15806
|
+
return scores;
|
|
15120
15807
|
};
|
|
15121
|
-
function
|
|
15122
|
-
|
|
15123
|
-
|
|
15124
|
-
async function Eval(name, evaluator, reporterOrOpts) {
|
|
15125
|
-
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
15126
|
-
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
15127
|
-
if (globalThis._evals.evaluators[evalName]) {
|
|
15128
|
-
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
15129
|
-
}
|
|
15130
|
-
if (globalThis._lazy_load) {
|
|
15131
|
-
globalThis._evals.evaluators[evalName] = {
|
|
15132
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15133
|
-
evaluator: {
|
|
15134
|
-
evalName,
|
|
15135
|
-
projectName: name,
|
|
15136
|
-
...evaluator
|
|
15137
|
-
},
|
|
15138
|
-
reporter: options.reporter
|
|
15139
|
-
};
|
|
15140
|
-
_initializeSpanContext();
|
|
15141
|
-
return new EvalResultWithSummary(
|
|
15142
|
-
{
|
|
15143
|
-
scores: {},
|
|
15144
|
-
metrics: {},
|
|
15145
|
-
projectName: "",
|
|
15146
|
-
experimentName: ""
|
|
15147
|
-
},
|
|
15148
|
-
[]
|
|
15149
|
-
);
|
|
15150
|
-
}
|
|
15151
|
-
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
15152
|
-
const shouldCollectResults = options.returnResults ?? true;
|
|
15153
|
-
if (typeof options.reporter === "string") {
|
|
15154
|
-
throw new Error(
|
|
15155
|
-
"Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
|
|
15156
|
-
);
|
|
15157
|
-
}
|
|
15158
|
-
const resolvedReporter = options.reporter || defaultReporter;
|
|
15159
|
-
try {
|
|
15160
|
-
const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
|
|
15161
|
-
evaluator.data
|
|
15162
|
-
);
|
|
15163
|
-
const experiment = options.parent || options.noSendLogs ? null : initExperiment2(evaluator.state, {
|
|
15164
|
-
...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
|
|
15165
|
-
experiment: evaluator.experimentName,
|
|
15166
|
-
description: evaluator.description,
|
|
15167
|
-
metadata: evaluator.metadata,
|
|
15168
|
-
isPublic: evaluator.isPublic,
|
|
15169
|
-
update: evaluator.update,
|
|
15170
|
-
baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
|
|
15171
|
-
baseExperimentId: evaluator.baseExperimentId,
|
|
15172
|
-
gitMetadataSettings: evaluator.gitMetadataSettings,
|
|
15173
|
-
repoInfo: evaluator.repoInfo,
|
|
15174
|
-
dataset: Dataset2.isDataset(data) ? data : void 0
|
|
15175
|
-
});
|
|
15176
|
-
if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
|
|
15177
|
-
await experiment._waitForId();
|
|
15178
|
-
}
|
|
15179
|
-
if (experiment && options.onStart) {
|
|
15180
|
-
const summary = await experiment.summarize({ summarizeScores: false });
|
|
15181
|
-
options.onStart(summary);
|
|
15182
|
-
}
|
|
15183
|
-
try {
|
|
15184
|
-
const evalDef = {
|
|
15185
|
-
evalName,
|
|
15186
|
-
projectName: name,
|
|
15187
|
-
...evaluator,
|
|
15188
|
-
data
|
|
15189
|
-
};
|
|
15190
|
-
const enableCache = options.enableCache ?? true;
|
|
15191
|
-
let ret;
|
|
15192
|
-
if (options.parent) {
|
|
15193
|
-
ret = await withParent(
|
|
15194
|
-
options.parent,
|
|
15195
|
-
() => runEvaluator(
|
|
15196
|
-
null,
|
|
15197
|
-
evalDef,
|
|
15198
|
-
progressReporter,
|
|
15199
|
-
[],
|
|
15200
|
-
options.stream,
|
|
15201
|
-
options.parameters,
|
|
15202
|
-
shouldCollectResults,
|
|
15203
|
-
enableCache
|
|
15204
|
-
),
|
|
15205
|
-
evaluator.state
|
|
15206
|
-
);
|
|
15207
|
-
} else {
|
|
15208
|
-
ret = await runEvaluator(
|
|
15209
|
-
experiment,
|
|
15210
|
-
evalDef,
|
|
15211
|
-
progressReporter,
|
|
15212
|
-
[],
|
|
15213
|
-
options.stream,
|
|
15214
|
-
options.parameters,
|
|
15215
|
-
shouldCollectResults,
|
|
15216
|
-
enableCache
|
|
15217
|
-
);
|
|
15218
|
-
}
|
|
15219
|
-
progressReporter.stop();
|
|
15220
|
-
resolvedReporter.reportEval(evalDef, ret, {
|
|
15221
|
-
verbose: true,
|
|
15222
|
-
jsonl: false
|
|
15223
|
-
});
|
|
15224
|
-
return ret;
|
|
15225
|
-
} finally {
|
|
15226
|
-
if (experiment) {
|
|
15227
|
-
await experiment.flush().catch(console.error);
|
|
15228
|
-
} else if (options.parent) {
|
|
15229
|
-
await flush().catch(console.error);
|
|
15230
|
-
}
|
|
15231
|
-
}
|
|
15232
|
-
} finally {
|
|
15233
|
-
progressReporter.stop();
|
|
15234
|
-
}
|
|
15235
|
-
}
|
|
15236
|
-
function Reporter(name, reporter) {
|
|
15237
|
-
const ret = { name, ...reporter };
|
|
15238
|
-
if (_evals.reporters[name]) {
|
|
15239
|
-
throw new Error(`Reporter ${name} already exists`);
|
|
15240
|
-
}
|
|
15241
|
-
if (globalThis._lazy_load) {
|
|
15242
|
-
_evals.reporters[name] = ret;
|
|
15243
|
-
}
|
|
15244
|
-
return ret;
|
|
15245
|
-
}
|
|
15246
|
-
function serializeJSONWithPlainString(v) {
|
|
15247
|
-
if (typeof v === "string") {
|
|
15248
|
-
return v;
|
|
15249
|
-
} else {
|
|
15250
|
-
return JSON.stringify(v);
|
|
15251
|
-
}
|
|
15252
|
-
}
|
|
15253
|
-
function evaluateFilter(object, filter2) {
|
|
15254
|
-
const { path: path2, pattern } = filter2;
|
|
15255
|
-
const key = path2.reduce(
|
|
15256
|
-
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
15257
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15258
|
-
acc[p]
|
|
15259
|
-
) : void 0,
|
|
15260
|
-
object
|
|
15261
|
-
);
|
|
15262
|
-
if (key === void 0) {
|
|
15263
|
-
return false;
|
|
15264
|
-
}
|
|
15265
|
-
return pattern.test(serializeJSONWithPlainString(key));
|
|
15266
|
-
}
|
|
15267
|
-
function scorerName(scorer, scorer_idx) {
|
|
15268
|
-
return scorer.name || `scorer_${scorer_idx}`;
|
|
15269
|
-
}
|
|
15270
|
-
async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
|
|
15271
|
-
return await runEvaluatorInternal(
|
|
15272
|
-
experiment,
|
|
15273
|
-
evaluator,
|
|
15274
|
-
progressReporter,
|
|
15275
|
-
filters,
|
|
15276
|
-
stream,
|
|
15277
|
-
parameters,
|
|
15278
|
-
collectResults,
|
|
15279
|
-
enableCache
|
|
15280
|
-
);
|
|
15281
|
-
}
|
|
15282
|
-
var defaultErrorScoreHandler = ({
|
|
15283
|
-
rootSpan,
|
|
15284
|
-
data: _,
|
|
15285
|
-
unhandledScores
|
|
15286
|
-
}) => {
|
|
15287
|
-
const scores = Object.fromEntries(unhandledScores.map((s) => [s, 0]));
|
|
15288
|
-
rootSpan.log({ scores });
|
|
15289
|
-
return scores;
|
|
15290
|
-
};
|
|
15291
|
-
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
15292
|
-
if (enableCache) {
|
|
15293
|
-
(evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
|
|
15808
|
+
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
15809
|
+
if (enableCache) {
|
|
15810
|
+
(evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
|
|
15294
15811
|
}
|
|
15295
15812
|
try {
|
|
15296
15813
|
if (typeof evaluator.data === "string") {
|
|
15297
15814
|
throw new Error("Unimplemented: string data paths");
|
|
15298
15815
|
}
|
|
15299
15816
|
let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
|
|
15300
|
-
parameters = validateParameters(
|
|
15817
|
+
parameters = await validateParameters(
|
|
15301
15818
|
parameters ?? {},
|
|
15302
|
-
evaluator.parameters
|
|
15819
|
+
evaluator.parameters
|
|
15303
15820
|
);
|
|
15304
15821
|
if ("_type" in dataResult) {
|
|
15305
15822
|
if (dataResult._type !== "BaseExperiment") {
|
|
@@ -15326,7 +15843,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15326
15843
|
}
|
|
15327
15844
|
const resolvedDataResult = dataResult instanceof Promise ? await dataResult : dataResult;
|
|
15328
15845
|
const dataIterable = (() => {
|
|
15329
|
-
if (
|
|
15846
|
+
if (isAsyncIterable3(resolvedDataResult)) {
|
|
15330
15847
|
return resolvedDataResult;
|
|
15331
15848
|
}
|
|
15332
15849
|
if (Array.isArray(resolvedDataResult) || isIterable(resolvedDataResult)) {
|
|
@@ -15458,6 +15975,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15458
15975
|
} else {
|
|
15459
15976
|
rootSpan.log({ output, metadata, expected });
|
|
15460
15977
|
}
|
|
15978
|
+
if (evaluator.flushBeforeScoring) {
|
|
15979
|
+
await rootSpan.flush();
|
|
15980
|
+
}
|
|
15461
15981
|
const scoringArgs = {
|
|
15462
15982
|
input: datum.input,
|
|
15463
15983
|
expected: "expected" in datum ? datum.expected : void 0,
|
|
@@ -15704,206 +16224,646 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15704
16224
|
spanCache?.stop();
|
|
15705
16225
|
}
|
|
15706
16226
|
}
|
|
15707
|
-
}
|
|
15708
|
-
var warning = (text) => `Warning: ${text}`;
|
|
15709
|
-
function logError2(e, verbose) {
|
|
15710
|
-
if (!verbose) {
|
|
15711
|
-
console.error(`${e}`);
|
|
15712
|
-
} else {
|
|
15713
|
-
console.error(e);
|
|
16227
|
+
}
|
|
16228
|
+
var warning = (text) => `Warning: ${text}`;
|
|
16229
|
+
function logError2(e, verbose) {
|
|
16230
|
+
if (!verbose) {
|
|
16231
|
+
console.error(`${e}`);
|
|
16232
|
+
} else {
|
|
16233
|
+
console.error(e);
|
|
16234
|
+
}
|
|
16235
|
+
}
|
|
16236
|
+
function accumulateScores(accumulator, scores) {
|
|
16237
|
+
for (const [name, score] of Object.entries(scores)) {
|
|
16238
|
+
if (score === null || score === void 0) {
|
|
16239
|
+
continue;
|
|
16240
|
+
}
|
|
16241
|
+
const existing = accumulator[name] ?? { total: 0, count: 0 };
|
|
16242
|
+
accumulator[name] = {
|
|
16243
|
+
total: existing.total + score,
|
|
16244
|
+
count: existing.count + 1
|
|
16245
|
+
};
|
|
16246
|
+
}
|
|
16247
|
+
}
|
|
16248
|
+
function ensureScoreAccumulator(results) {
|
|
16249
|
+
const accumulator = {};
|
|
16250
|
+
for (const result of results) {
|
|
16251
|
+
accumulateScores(accumulator, result.scores);
|
|
16252
|
+
}
|
|
16253
|
+
return accumulator;
|
|
16254
|
+
}
|
|
16255
|
+
function buildLocalSummary(evaluator, results, precomputedScores) {
|
|
16256
|
+
const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
|
|
16257
|
+
return {
|
|
16258
|
+
projectName: evaluator.projectName,
|
|
16259
|
+
experimentName: evaluator.evalName,
|
|
16260
|
+
scores: Object.fromEntries(
|
|
16261
|
+
Object.entries(scoresByName).map(([name, { total, count }]) => [
|
|
16262
|
+
name,
|
|
16263
|
+
{
|
|
16264
|
+
name,
|
|
16265
|
+
score: count === 0 ? 0 : total / count,
|
|
16266
|
+
improvements: 0,
|
|
16267
|
+
regressions: 0
|
|
16268
|
+
}
|
|
16269
|
+
])
|
|
16270
|
+
)
|
|
16271
|
+
};
|
|
16272
|
+
}
|
|
16273
|
+
function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
|
|
16274
|
+
if (failingResults.length > 0) {
|
|
16275
|
+
console.error(
|
|
16276
|
+
warning(
|
|
16277
|
+
`Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
|
|
16278
|
+
)
|
|
16279
|
+
);
|
|
16280
|
+
if (jsonl) {
|
|
16281
|
+
console.log(
|
|
16282
|
+
JSON.stringify({
|
|
16283
|
+
evaluatorName: evaluator.evalName,
|
|
16284
|
+
errors: failingResults.map(
|
|
16285
|
+
(r) => `${r.error instanceof Error ? r.error.stack : r.error}`
|
|
16286
|
+
)
|
|
16287
|
+
})
|
|
16288
|
+
);
|
|
16289
|
+
} else {
|
|
16290
|
+
for (const result of failingResults) {
|
|
16291
|
+
logError2(result.error, verbose);
|
|
16292
|
+
}
|
|
16293
|
+
}
|
|
16294
|
+
if (!verbose && !jsonl) {
|
|
16295
|
+
console.error(warning("Add --verbose to see full stack traces."));
|
|
16296
|
+
}
|
|
16297
|
+
}
|
|
16298
|
+
}
|
|
16299
|
+
var defaultReporter = {
|
|
16300
|
+
name: "Braintrust default reporter",
|
|
16301
|
+
async reportEval(evaluator, result, { verbose, jsonl }) {
|
|
16302
|
+
const { results, summary } = result;
|
|
16303
|
+
const failingResults = results.filter(
|
|
16304
|
+
(r) => r.error !== void 0
|
|
16305
|
+
);
|
|
16306
|
+
if (failingResults.length > 0) {
|
|
16307
|
+
reportFailures(evaluator, failingResults, { verbose, jsonl });
|
|
16308
|
+
}
|
|
16309
|
+
if (jsonl) {
|
|
16310
|
+
isomorph_default.writeln(JSON.stringify(summary));
|
|
16311
|
+
} else {
|
|
16312
|
+
isomorph_default.writeln("Experiment summary");
|
|
16313
|
+
isomorph_default.writeln("==================");
|
|
16314
|
+
if (summary.comparisonExperimentName) {
|
|
16315
|
+
isomorph_default.writeln(
|
|
16316
|
+
`${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
|
|
16317
|
+
);
|
|
16318
|
+
isomorph_default.writeln("");
|
|
16319
|
+
}
|
|
16320
|
+
const hasScores = Object.keys(summary.scores).length > 0;
|
|
16321
|
+
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
16322
|
+
const hasComparison = !!summary.comparisonExperimentName;
|
|
16323
|
+
if (hasScores || hasMetrics) {
|
|
16324
|
+
if (hasComparison) {
|
|
16325
|
+
isomorph_default.writeln(
|
|
16326
|
+
"Name Value Change Improvements Regressions"
|
|
16327
|
+
);
|
|
16328
|
+
isomorph_default.writeln(
|
|
16329
|
+
"----------------------------------------------------------------"
|
|
16330
|
+
);
|
|
16331
|
+
}
|
|
16332
|
+
for (const score of Object.values(summary.scores)) {
|
|
16333
|
+
const scorePercent = (score.score * 100).toFixed(2);
|
|
16334
|
+
const scoreValue = `${scorePercent}%`;
|
|
16335
|
+
if (hasComparison) {
|
|
16336
|
+
let diffString = "-";
|
|
16337
|
+
if (!isEmpty2(score.diff)) {
|
|
16338
|
+
const diffPercent = (score.diff * 100).toFixed(2);
|
|
16339
|
+
const diffSign = score.diff > 0 ? "+" : "";
|
|
16340
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
16341
|
+
}
|
|
16342
|
+
const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
|
|
16343
|
+
const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
|
|
16344
|
+
isomorph_default.writeln(
|
|
16345
|
+
`${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
16346
|
+
);
|
|
16347
|
+
} else {
|
|
16348
|
+
isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
|
|
16349
|
+
}
|
|
16350
|
+
}
|
|
16351
|
+
for (const metric of Object.values(summary.metrics ?? {})) {
|
|
16352
|
+
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
16353
|
+
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
16354
|
+
const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
|
|
16355
|
+
if (hasComparison) {
|
|
16356
|
+
let diffString = "-";
|
|
16357
|
+
if (!isEmpty2(metric.diff)) {
|
|
16358
|
+
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
16359
|
+
const diffSign = metric.diff > 0 ? "+" : "";
|
|
16360
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
16361
|
+
}
|
|
16362
|
+
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
16363
|
+
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
16364
|
+
isomorph_default.writeln(
|
|
16365
|
+
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
16366
|
+
);
|
|
16367
|
+
} else {
|
|
16368
|
+
isomorph_default.writeln(
|
|
16369
|
+
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
16370
|
+
);
|
|
16371
|
+
}
|
|
16372
|
+
}
|
|
16373
|
+
}
|
|
16374
|
+
if (summary.experimentUrl) {
|
|
16375
|
+
isomorph_default.writeln("");
|
|
16376
|
+
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
16377
|
+
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
16378
|
+
}
|
|
16379
|
+
}
|
|
16380
|
+
isomorph_default.writeln("");
|
|
16381
|
+
return failingResults.length === 0;
|
|
16382
|
+
},
|
|
16383
|
+
async reportRun(evalReports) {
|
|
16384
|
+
return evalReports.every((r) => r);
|
|
16385
|
+
}
|
|
16386
|
+
};
|
|
16387
|
+
|
|
16388
|
+
// src/framework2.ts
|
|
16389
|
+
import { z as z11 } from "zod/v3";
|
|
16390
|
+
var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
|
|
16391
|
+
var ProjectBuilder = class {
|
|
16392
|
+
create(opts) {
|
|
16393
|
+
return new Project2(opts);
|
|
16394
|
+
}
|
|
16395
|
+
};
|
|
16396
|
+
var projects = new ProjectBuilder();
|
|
16397
|
+
var Project2 = class {
|
|
16398
|
+
name;
|
|
16399
|
+
id;
|
|
16400
|
+
tools;
|
|
16401
|
+
prompts;
|
|
16402
|
+
parameters;
|
|
16403
|
+
scorers;
|
|
16404
|
+
_publishableCodeFunctions = [];
|
|
16405
|
+
_publishablePrompts = [];
|
|
16406
|
+
_publishableParameters = [];
|
|
16407
|
+
constructor(args) {
|
|
16408
|
+
_initializeSpanContext();
|
|
16409
|
+
this.name = "name" in args ? args.name : void 0;
|
|
16410
|
+
this.id = "id" in args ? args.id : void 0;
|
|
16411
|
+
this.tools = new ToolBuilder(this);
|
|
16412
|
+
this.prompts = new PromptBuilder(this);
|
|
16413
|
+
this.parameters = new ParametersBuilder(this);
|
|
16414
|
+
this.scorers = new ScorerBuilder(this);
|
|
16415
|
+
}
|
|
16416
|
+
addPrompt(prompt) {
|
|
16417
|
+
this._publishablePrompts.push(prompt);
|
|
16418
|
+
if (globalThis._lazy_load) {
|
|
16419
|
+
globalThis._evals.prompts.push(prompt);
|
|
16420
|
+
}
|
|
16421
|
+
}
|
|
16422
|
+
addParameters(parameters) {
|
|
16423
|
+
this._publishableParameters.push(parameters);
|
|
16424
|
+
if (globalThis._lazy_load) {
|
|
16425
|
+
if (globalThis._evals.parameters == null)
|
|
16426
|
+
globalThis._evals.parameters = [];
|
|
16427
|
+
globalThis._evals.parameters.push(parameters);
|
|
16428
|
+
}
|
|
16429
|
+
}
|
|
16430
|
+
addCodeFunction(fn) {
|
|
16431
|
+
this._publishableCodeFunctions.push(fn);
|
|
16432
|
+
if (globalThis._lazy_load) {
|
|
16433
|
+
globalThis._evals.functions.push(fn);
|
|
16434
|
+
}
|
|
16435
|
+
}
|
|
16436
|
+
async publish() {
|
|
16437
|
+
if (globalThis._lazy_load) {
|
|
16438
|
+
console.warn("publish() is a no-op when running `braintrust push`.");
|
|
16439
|
+
return;
|
|
16440
|
+
}
|
|
16441
|
+
await login();
|
|
16442
|
+
const projectMap = new ProjectNameIdMap();
|
|
16443
|
+
const functionDefinitions = [];
|
|
16444
|
+
if (this._publishableCodeFunctions.length > 0) {
|
|
16445
|
+
console.warn(
|
|
16446
|
+
"Code functions cannot be published directly. Use `braintrust push` instead."
|
|
16447
|
+
);
|
|
16448
|
+
}
|
|
16449
|
+
if (this._publishablePrompts.length > 0) {
|
|
16450
|
+
for (const prompt of this._publishablePrompts) {
|
|
16451
|
+
const functionDefinition = await prompt.toFunctionDefinition(projectMap);
|
|
16452
|
+
functionDefinitions.push(functionDefinition);
|
|
16453
|
+
}
|
|
16454
|
+
}
|
|
16455
|
+
await _internalGetGlobalState().apiConn().post_json("insert-functions", {
|
|
16456
|
+
functions: functionDefinitions
|
|
16457
|
+
});
|
|
16458
|
+
}
|
|
16459
|
+
};
|
|
16460
|
+
var ToolBuilder = class {
|
|
16461
|
+
constructor(project) {
|
|
16462
|
+
this.project = project;
|
|
16463
|
+
}
|
|
16464
|
+
taskCounter = 0;
|
|
16465
|
+
// This type definition is just a catch all so that the implementation can be
|
|
16466
|
+
// less specific than the two more specific declarations above.
|
|
16467
|
+
create(opts) {
|
|
16468
|
+
this.taskCounter++;
|
|
16469
|
+
opts = opts ?? {};
|
|
16470
|
+
const { handler, name, slug, parameters, returns, ...rest } = opts;
|
|
16471
|
+
let resolvedName = name ?? handler.name;
|
|
16472
|
+
if (resolvedName.trim().length === 0) {
|
|
16473
|
+
resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
16474
|
+
}
|
|
16475
|
+
const tool = new CodeFunction(this.project, {
|
|
16476
|
+
handler,
|
|
16477
|
+
name: resolvedName,
|
|
16478
|
+
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
16479
|
+
type: "tool",
|
|
16480
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
16481
|
+
parameters,
|
|
16482
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
16483
|
+
returns,
|
|
16484
|
+
...rest
|
|
16485
|
+
});
|
|
16486
|
+
this.project.addCodeFunction(tool);
|
|
16487
|
+
return tool;
|
|
16488
|
+
}
|
|
16489
|
+
};
|
|
16490
|
+
var ScorerBuilder = class {
|
|
16491
|
+
constructor(project) {
|
|
16492
|
+
this.project = project;
|
|
16493
|
+
}
|
|
16494
|
+
taskCounter = 0;
|
|
16495
|
+
create(opts) {
|
|
16496
|
+
this.taskCounter++;
|
|
16497
|
+
let resolvedName = opts.name;
|
|
16498
|
+
if (!resolvedName && "handler" in opts) {
|
|
16499
|
+
resolvedName = opts.handler.name;
|
|
16500
|
+
}
|
|
16501
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
16502
|
+
resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
16503
|
+
}
|
|
16504
|
+
const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
|
|
16505
|
+
if ("handler" in opts) {
|
|
16506
|
+
const scorer = new CodeFunction(this.project, {
|
|
16507
|
+
...opts,
|
|
16508
|
+
name: resolvedName,
|
|
16509
|
+
slug,
|
|
16510
|
+
type: "scorer"
|
|
16511
|
+
});
|
|
16512
|
+
this.project.addCodeFunction(scorer);
|
|
16513
|
+
} else {
|
|
16514
|
+
const promptBlock = "messages" in opts ? {
|
|
16515
|
+
type: "chat",
|
|
16516
|
+
messages: opts.messages
|
|
16517
|
+
} : {
|
|
16518
|
+
type: "completion",
|
|
16519
|
+
content: opts.prompt
|
|
16520
|
+
};
|
|
16521
|
+
const promptData = {
|
|
16522
|
+
prompt: promptBlock,
|
|
16523
|
+
options: {
|
|
16524
|
+
model: opts.model,
|
|
16525
|
+
params: opts.params
|
|
16526
|
+
},
|
|
16527
|
+
parser: {
|
|
16528
|
+
type: "llm_classifier",
|
|
16529
|
+
use_cot: opts.useCot,
|
|
16530
|
+
choice_scores: opts.choiceScores
|
|
16531
|
+
}
|
|
16532
|
+
};
|
|
16533
|
+
const codePrompt = new CodePrompt(
|
|
16534
|
+
this.project,
|
|
16535
|
+
promptData,
|
|
16536
|
+
[],
|
|
16537
|
+
{
|
|
16538
|
+
...opts,
|
|
16539
|
+
name: resolvedName,
|
|
16540
|
+
slug
|
|
16541
|
+
},
|
|
16542
|
+
"scorer"
|
|
16543
|
+
);
|
|
16544
|
+
this.project.addPrompt(codePrompt);
|
|
16545
|
+
}
|
|
16546
|
+
}
|
|
16547
|
+
};
|
|
16548
|
+
var CodeFunction = class {
|
|
16549
|
+
constructor(project, opts) {
|
|
16550
|
+
this.project = project;
|
|
16551
|
+
this.handler = opts.handler;
|
|
16552
|
+
this.name = opts.name;
|
|
16553
|
+
this.slug = opts.slug;
|
|
16554
|
+
this.description = opts.description;
|
|
16555
|
+
this.type = opts.type;
|
|
16556
|
+
this.ifExists = opts.ifExists;
|
|
16557
|
+
this.metadata = opts.metadata;
|
|
16558
|
+
this.parameters = opts.parameters;
|
|
16559
|
+
this.returns = opts.returns;
|
|
16560
|
+
if (this.returns && !this.parameters) {
|
|
16561
|
+
throw new Error("parameters are required if return type is defined");
|
|
16562
|
+
}
|
|
16563
|
+
}
|
|
16564
|
+
handler;
|
|
16565
|
+
name;
|
|
16566
|
+
slug;
|
|
16567
|
+
type;
|
|
16568
|
+
description;
|
|
16569
|
+
parameters;
|
|
16570
|
+
returns;
|
|
16571
|
+
ifExists;
|
|
16572
|
+
metadata;
|
|
16573
|
+
key() {
|
|
16574
|
+
return JSON.stringify([
|
|
16575
|
+
this.project.id ?? "",
|
|
16576
|
+
this.project.name ?? "",
|
|
16577
|
+
this.slug
|
|
16578
|
+
]);
|
|
16579
|
+
}
|
|
16580
|
+
};
|
|
16581
|
+
var CodePrompt = class {
|
|
16582
|
+
project;
|
|
16583
|
+
name;
|
|
16584
|
+
slug;
|
|
16585
|
+
prompt;
|
|
16586
|
+
ifExists;
|
|
16587
|
+
description;
|
|
16588
|
+
id;
|
|
16589
|
+
functionType;
|
|
16590
|
+
toolFunctions;
|
|
16591
|
+
metadata;
|
|
16592
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
16593
|
+
this.project = project;
|
|
16594
|
+
this.name = opts.name;
|
|
16595
|
+
this.slug = opts.slug;
|
|
16596
|
+
this.prompt = prompt;
|
|
16597
|
+
this.toolFunctions = toolFunctions;
|
|
16598
|
+
this.ifExists = opts.ifExists;
|
|
16599
|
+
this.description = opts.description;
|
|
16600
|
+
this.id = opts.id;
|
|
16601
|
+
this.functionType = functionType;
|
|
16602
|
+
this.metadata = opts.metadata;
|
|
16603
|
+
}
|
|
16604
|
+
async toFunctionDefinition(projectNameToId) {
|
|
16605
|
+
const prompt_data = {
|
|
16606
|
+
...this.prompt
|
|
16607
|
+
};
|
|
16608
|
+
if (this.toolFunctions.length > 0) {
|
|
16609
|
+
const resolvableToolFunctions = await Promise.all(
|
|
16610
|
+
this.toolFunctions.map(async (fn) => {
|
|
16611
|
+
if ("slug" in fn) {
|
|
16612
|
+
return {
|
|
16613
|
+
type: "slug",
|
|
16614
|
+
project_id: await projectNameToId.resolve(fn.project),
|
|
16615
|
+
slug: fn.slug
|
|
16616
|
+
};
|
|
16617
|
+
} else {
|
|
16618
|
+
return fn;
|
|
16619
|
+
}
|
|
16620
|
+
})
|
|
16621
|
+
);
|
|
16622
|
+
prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
16623
|
+
resolvableToolFunctions;
|
|
16624
|
+
}
|
|
16625
|
+
return {
|
|
16626
|
+
project_id: await projectNameToId.resolve(this.project),
|
|
16627
|
+
name: this.name,
|
|
16628
|
+
slug: this.slug,
|
|
16629
|
+
description: this.description ?? "",
|
|
16630
|
+
function_data: {
|
|
16631
|
+
type: "prompt"
|
|
16632
|
+
},
|
|
16633
|
+
function_type: this.functionType,
|
|
16634
|
+
prompt_data,
|
|
16635
|
+
if_exists: this.ifExists,
|
|
16636
|
+
metadata: this.metadata
|
|
16637
|
+
};
|
|
16638
|
+
}
|
|
16639
|
+
};
|
|
16640
|
+
var PromptBuilder = class {
|
|
16641
|
+
constructor(project) {
|
|
16642
|
+
this.project = project;
|
|
16643
|
+
}
|
|
16644
|
+
create(opts) {
|
|
16645
|
+
const toolFunctions = [];
|
|
16646
|
+
const rawTools = [];
|
|
16647
|
+
for (const tool of opts.tools ?? []) {
|
|
16648
|
+
if (tool instanceof CodeFunction) {
|
|
16649
|
+
toolFunctions.push(tool);
|
|
16650
|
+
} else if ("type" in tool && !("function" in tool)) {
|
|
16651
|
+
toolFunctions.push(tool);
|
|
16652
|
+
} else {
|
|
16653
|
+
rawTools.push(tool);
|
|
16654
|
+
}
|
|
16655
|
+
}
|
|
16656
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
16657
|
+
const promptData = promptDefinitionToPromptData(opts, rawTools);
|
|
16658
|
+
const promptRow = {
|
|
16659
|
+
id: opts.id,
|
|
16660
|
+
_xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
|
|
16661
|
+
name: opts.name,
|
|
16662
|
+
slug,
|
|
16663
|
+
prompt_data: promptData,
|
|
16664
|
+
...this.project.id !== void 0 ? { project_id: this.project.id } : {}
|
|
16665
|
+
};
|
|
16666
|
+
const prompt = new Prompt2(
|
|
16667
|
+
promptRow,
|
|
16668
|
+
{},
|
|
16669
|
+
// It doesn't make sense to specify defaults here.
|
|
16670
|
+
opts.noTrace ?? false
|
|
16671
|
+
);
|
|
16672
|
+
const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
|
|
16673
|
+
...opts,
|
|
16674
|
+
slug
|
|
16675
|
+
});
|
|
16676
|
+
this.project.addPrompt(codePrompt);
|
|
16677
|
+
return prompt;
|
|
16678
|
+
}
|
|
16679
|
+
};
|
|
16680
|
+
var CodeParameters = class {
|
|
16681
|
+
project;
|
|
16682
|
+
name;
|
|
16683
|
+
slug;
|
|
16684
|
+
description;
|
|
16685
|
+
schema;
|
|
16686
|
+
ifExists;
|
|
16687
|
+
metadata;
|
|
16688
|
+
constructor(project, opts) {
|
|
16689
|
+
this.project = project;
|
|
16690
|
+
this.name = opts.name;
|
|
16691
|
+
this.slug = opts.slug;
|
|
16692
|
+
this.description = opts.description;
|
|
16693
|
+
this.schema = opts.schema;
|
|
16694
|
+
this.ifExists = opts.ifExists;
|
|
16695
|
+
this.metadata = opts.metadata;
|
|
15714
16696
|
}
|
|
15715
|
-
|
|
15716
|
-
|
|
15717
|
-
|
|
15718
|
-
|
|
15719
|
-
|
|
15720
|
-
|
|
15721
|
-
|
|
15722
|
-
|
|
15723
|
-
|
|
15724
|
-
|
|
16697
|
+
async toFunctionDefinition(projectNameToId) {
|
|
16698
|
+
return {
|
|
16699
|
+
project_id: await projectNameToId.resolve(this.project),
|
|
16700
|
+
name: this.name,
|
|
16701
|
+
slug: this.slug,
|
|
16702
|
+
description: this.description ?? "",
|
|
16703
|
+
function_type: "parameters",
|
|
16704
|
+
function_data: {
|
|
16705
|
+
type: "parameters",
|
|
16706
|
+
data: {},
|
|
16707
|
+
__schema: serializeEvalParameterstoParametersSchema(this.schema)
|
|
16708
|
+
},
|
|
16709
|
+
if_exists: this.ifExists,
|
|
16710
|
+
metadata: this.metadata
|
|
15725
16711
|
};
|
|
15726
16712
|
}
|
|
15727
|
-
}
|
|
15728
|
-
|
|
15729
|
-
|
|
15730
|
-
|
|
15731
|
-
accumulateScores(accumulator, result.scores);
|
|
16713
|
+
};
|
|
16714
|
+
var ParametersBuilder = class {
|
|
16715
|
+
constructor(project) {
|
|
16716
|
+
this.project = project;
|
|
15732
16717
|
}
|
|
15733
|
-
|
|
15734
|
-
}
|
|
15735
|
-
|
|
15736
|
-
|
|
15737
|
-
|
|
15738
|
-
|
|
15739
|
-
|
|
15740
|
-
|
|
15741
|
-
|
|
15742
|
-
|
|
15743
|
-
|
|
15744
|
-
|
|
15745
|
-
|
|
15746
|
-
|
|
15747
|
-
|
|
15748
|
-
|
|
15749
|
-
|
|
15750
|
-
|
|
15751
|
-
|
|
15752
|
-
|
|
15753
|
-
|
|
15754
|
-
|
|
15755
|
-
|
|
15756
|
-
|
|
15757
|
-
|
|
15758
|
-
|
|
15759
|
-
|
|
15760
|
-
|
|
15761
|
-
|
|
15762
|
-
JSON.stringify({
|
|
15763
|
-
evaluatorName: evaluator.evalName,
|
|
15764
|
-
errors: failingResults.map(
|
|
15765
|
-
(r) => `${r.error instanceof Error ? r.error.stack : r.error}`
|
|
15766
|
-
)
|
|
15767
|
-
})
|
|
15768
|
-
);
|
|
16718
|
+
create(opts) {
|
|
16719
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
16720
|
+
const codeParameters = new CodeParameters(this.project, {
|
|
16721
|
+
name: opts.name,
|
|
16722
|
+
slug,
|
|
16723
|
+
description: opts.description,
|
|
16724
|
+
schema: opts.schema,
|
|
16725
|
+
ifExists: opts.ifExists,
|
|
16726
|
+
metadata: opts.metadata
|
|
16727
|
+
});
|
|
16728
|
+
this.project.addParameters(codeParameters);
|
|
16729
|
+
return opts.schema;
|
|
16730
|
+
}
|
|
16731
|
+
};
|
|
16732
|
+
function serializeEvalParameterstoParametersSchema(parameters) {
|
|
16733
|
+
const properties = {};
|
|
16734
|
+
const required = [];
|
|
16735
|
+
for (const [name, value] of Object.entries(parameters)) {
|
|
16736
|
+
if ("type" in value && value.type === "prompt") {
|
|
16737
|
+
const defaultPromptData = value.default ? promptDefinitionToPromptData(value.default) : void 0;
|
|
16738
|
+
properties[name] = {
|
|
16739
|
+
type: "object",
|
|
16740
|
+
"x-bt-type": "prompt",
|
|
16741
|
+
...value.description ? { description: value.description } : {},
|
|
16742
|
+
...defaultPromptData ? { default: defaultPromptData } : {}
|
|
16743
|
+
};
|
|
16744
|
+
if (!defaultPromptData) {
|
|
16745
|
+
required.push(name);
|
|
16746
|
+
}
|
|
15769
16747
|
} else {
|
|
15770
|
-
|
|
15771
|
-
|
|
16748
|
+
const schemaObj = zodToJsonSchema(value);
|
|
16749
|
+
properties[name] = schemaObj;
|
|
16750
|
+
if (!("default" in schemaObj)) {
|
|
16751
|
+
required.push(name);
|
|
15772
16752
|
}
|
|
15773
16753
|
}
|
|
15774
|
-
if (!verbose && !jsonl) {
|
|
15775
|
-
console.error(warning("Add --verbose to see full stack traces."));
|
|
15776
|
-
}
|
|
15777
16754
|
}
|
|
16755
|
+
return {
|
|
16756
|
+
type: "object",
|
|
16757
|
+
properties,
|
|
16758
|
+
...required.length > 0 ? { required } : {},
|
|
16759
|
+
additionalProperties: true
|
|
16760
|
+
};
|
|
15778
16761
|
}
|
|
15779
|
-
var
|
|
15780
|
-
|
|
15781
|
-
|
|
15782
|
-
|
|
15783
|
-
|
|
15784
|
-
|
|
15785
|
-
|
|
15786
|
-
|
|
15787
|
-
|
|
16762
|
+
var ProjectNameIdMap = class {
|
|
16763
|
+
nameToId = {};
|
|
16764
|
+
idToName = {};
|
|
16765
|
+
async getId(projectName) {
|
|
16766
|
+
if (!(projectName in this.nameToId)) {
|
|
16767
|
+
const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
|
|
16768
|
+
project_name: projectName
|
|
16769
|
+
});
|
|
16770
|
+
const result = z11.object({
|
|
16771
|
+
project: Project
|
|
16772
|
+
}).parse(response);
|
|
16773
|
+
const projectId = result.project.id;
|
|
16774
|
+
this.nameToId[projectName] = projectId;
|
|
16775
|
+
this.idToName[projectId] = projectName;
|
|
15788
16776
|
}
|
|
15789
|
-
|
|
15790
|
-
|
|
15791
|
-
|
|
15792
|
-
|
|
15793
|
-
|
|
15794
|
-
|
|
15795
|
-
|
|
15796
|
-
|
|
15797
|
-
|
|
15798
|
-
|
|
15799
|
-
|
|
15800
|
-
const hasScores = Object.keys(summary.scores).length > 0;
|
|
15801
|
-
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
15802
|
-
const hasComparison = !!summary.comparisonExperimentName;
|
|
15803
|
-
if (hasScores || hasMetrics) {
|
|
15804
|
-
if (hasComparison) {
|
|
15805
|
-
isomorph_default.writeln(
|
|
15806
|
-
"Name Value Change Improvements Regressions"
|
|
15807
|
-
);
|
|
15808
|
-
isomorph_default.writeln(
|
|
15809
|
-
"----------------------------------------------------------------"
|
|
15810
|
-
);
|
|
15811
|
-
}
|
|
15812
|
-
for (const score of Object.values(summary.scores)) {
|
|
15813
|
-
const scorePercent = (score.score * 100).toFixed(2);
|
|
15814
|
-
const scoreValue = `${scorePercent}%`;
|
|
15815
|
-
if (hasComparison) {
|
|
15816
|
-
let diffString = "-";
|
|
15817
|
-
if (!isEmpty2(score.diff)) {
|
|
15818
|
-
const diffPercent = (score.diff * 100).toFixed(2);
|
|
15819
|
-
const diffSign = score.diff > 0 ? "+" : "";
|
|
15820
|
-
diffString = `${diffSign}${diffPercent}%`;
|
|
15821
|
-
}
|
|
15822
|
-
const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
|
|
15823
|
-
const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
|
|
15824
|
-
isomorph_default.writeln(
|
|
15825
|
-
`${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
15826
|
-
);
|
|
15827
|
-
} else {
|
|
15828
|
-
isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
|
|
15829
|
-
}
|
|
15830
|
-
}
|
|
15831
|
-
for (const metric of Object.values(summary.metrics ?? {})) {
|
|
15832
|
-
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
15833
|
-
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
15834
|
-
const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
|
|
15835
|
-
if (hasComparison) {
|
|
15836
|
-
let diffString = "-";
|
|
15837
|
-
if (!isEmpty2(metric.diff)) {
|
|
15838
|
-
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
15839
|
-
const diffSign = metric.diff > 0 ? "+" : "";
|
|
15840
|
-
diffString = `${diffSign}${diffPercent}%`;
|
|
15841
|
-
}
|
|
15842
|
-
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
15843
|
-
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
15844
|
-
isomorph_default.writeln(
|
|
15845
|
-
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
15846
|
-
);
|
|
15847
|
-
} else {
|
|
15848
|
-
isomorph_default.writeln(
|
|
15849
|
-
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
15850
|
-
);
|
|
15851
|
-
}
|
|
15852
|
-
}
|
|
15853
|
-
}
|
|
15854
|
-
if (summary.experimentUrl) {
|
|
15855
|
-
isomorph_default.writeln("");
|
|
15856
|
-
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
15857
|
-
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
15858
|
-
}
|
|
16777
|
+
return this.nameToId[projectName];
|
|
16778
|
+
}
|
|
16779
|
+
async getName(projectId) {
|
|
16780
|
+
if (!(projectId in this.idToName)) {
|
|
16781
|
+
const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
|
|
16782
|
+
id: projectId
|
|
16783
|
+
});
|
|
16784
|
+
const result = z11.array(Project).nonempty().parse(response);
|
|
16785
|
+
const projectName = result[0].name;
|
|
16786
|
+
this.idToName[projectId] = projectName;
|
|
16787
|
+
this.nameToId[projectName] = projectId;
|
|
15859
16788
|
}
|
|
15860
|
-
|
|
15861
|
-
|
|
15862
|
-
|
|
15863
|
-
|
|
15864
|
-
|
|
16789
|
+
return this.idToName[projectId];
|
|
16790
|
+
}
|
|
16791
|
+
async resolve(project) {
|
|
16792
|
+
if (project.id) {
|
|
16793
|
+
return project.id;
|
|
16794
|
+
}
|
|
16795
|
+
return this.getId(project.name);
|
|
15865
16796
|
}
|
|
15866
16797
|
};
|
|
15867
16798
|
|
|
15868
16799
|
// dev/types.ts
|
|
15869
|
-
import { z as
|
|
15870
|
-
var evalBodySchema =
|
|
15871
|
-
name:
|
|
15872
|
-
parameters:
|
|
16800
|
+
import { z as z12 } from "zod/v3";
|
|
16801
|
+
var evalBodySchema = z12.object({
|
|
16802
|
+
name: z12.string(),
|
|
16803
|
+
parameters: z12.record(z12.string(), z12.unknown()).nullish(),
|
|
15873
16804
|
data: RunEval.shape.data,
|
|
15874
|
-
scores:
|
|
15875
|
-
|
|
16805
|
+
scores: z12.array(
|
|
16806
|
+
z12.object({
|
|
15876
16807
|
function_id: FunctionId,
|
|
15877
|
-
name:
|
|
16808
|
+
name: z12.string()
|
|
15878
16809
|
})
|
|
15879
16810
|
).nullish(),
|
|
15880
|
-
experiment_name:
|
|
15881
|
-
project_id:
|
|
16811
|
+
experiment_name: z12.string().nullish(),
|
|
16812
|
+
project_id: z12.string().nullish(),
|
|
15882
16813
|
parent: InvokeParent.optional(),
|
|
15883
|
-
stream:
|
|
16814
|
+
stream: z12.boolean().optional()
|
|
15884
16815
|
});
|
|
15885
|
-
var
|
|
15886
|
-
|
|
15887
|
-
|
|
15888
|
-
|
|
15889
|
-
type:
|
|
16816
|
+
var staticParametersSchema = z12.record(
|
|
16817
|
+
z12.string(),
|
|
16818
|
+
z12.union([
|
|
16819
|
+
z12.object({
|
|
16820
|
+
type: z12.literal("prompt"),
|
|
15890
16821
|
default: PromptData.optional(),
|
|
15891
|
-
description:
|
|
16822
|
+
description: z12.string().optional()
|
|
15892
16823
|
}),
|
|
15893
|
-
|
|
15894
|
-
type:
|
|
15895
|
-
schema:
|
|
15896
|
-
|
|
15897
|
-
|
|
15898
|
-
description: z11.string().optional()
|
|
16824
|
+
z12.object({
|
|
16825
|
+
type: z12.literal("data"),
|
|
16826
|
+
schema: z12.record(z12.unknown()),
|
|
16827
|
+
default: z12.unknown().optional(),
|
|
16828
|
+
description: z12.string().optional()
|
|
15899
16829
|
})
|
|
15900
16830
|
])
|
|
15901
16831
|
);
|
|
15902
|
-
var
|
|
15903
|
-
|
|
16832
|
+
var parametersSchema = z12.object({
|
|
16833
|
+
type: z12.literal("object"),
|
|
16834
|
+
properties: z12.record(z12.string(), z12.record(z12.unknown())),
|
|
16835
|
+
required: z12.array(z12.string()).optional(),
|
|
16836
|
+
additionalProperties: z12.boolean().optional()
|
|
16837
|
+
});
|
|
16838
|
+
var parametersSourceSchema = z12.object({
|
|
16839
|
+
parametersId: z12.string().optional(),
|
|
16840
|
+
slug: z12.string(),
|
|
16841
|
+
name: z12.string(),
|
|
16842
|
+
projectId: z12.string().optional(),
|
|
16843
|
+
version: z12.string().optional()
|
|
16844
|
+
});
|
|
16845
|
+
var parametersContainerSchema = z12.object({
|
|
16846
|
+
type: z12.literal("braintrust.parameters"),
|
|
16847
|
+
schema: parametersSchema,
|
|
16848
|
+
source: parametersSourceSchema
|
|
16849
|
+
});
|
|
16850
|
+
var staticParametersContainerSchema = z12.object({
|
|
16851
|
+
type: z12.literal("braintrust.staticParameters"),
|
|
16852
|
+
schema: staticParametersSchema,
|
|
16853
|
+
source: z12.null()
|
|
16854
|
+
});
|
|
16855
|
+
var serializedParametersContainerSchema = z12.union([
|
|
16856
|
+
parametersContainerSchema,
|
|
16857
|
+
staticParametersContainerSchema,
|
|
16858
|
+
// keeping this type here since old versions of the SDK will still pass the unwrapped schema and we need to handle this in the app
|
|
16859
|
+
staticParametersSchema
|
|
16860
|
+
]);
|
|
16861
|
+
var evaluatorDefinitionSchema = z12.object({
|
|
16862
|
+
parameters: serializedParametersContainerSchema.optional(),
|
|
16863
|
+
scores: z12.array(z12.object({ name: z12.string() })).optional()
|
|
15904
16864
|
});
|
|
15905
|
-
var evaluatorDefinitionsSchema =
|
|
15906
|
-
|
|
16865
|
+
var evaluatorDefinitionsSchema = z12.record(
|
|
16866
|
+
z12.string(),
|
|
15907
16867
|
evaluatorDefinitionSchema
|
|
15908
16868
|
);
|
|
15909
16869
|
|
|
@@ -15922,6 +16882,7 @@ export {
|
|
|
15922
16882
|
CodePrompt,
|
|
15923
16883
|
ContextManager,
|
|
15924
16884
|
DEFAULT_FETCH_BATCH_SIZE,
|
|
16885
|
+
DEFAULT_MAX_REQUEST_SIZE,
|
|
15925
16886
|
Dataset2 as Dataset,
|
|
15926
16887
|
ERR_PERMALINK,
|
|
15927
16888
|
Eval,
|
|
@@ -15932,6 +16893,7 @@ export {
|
|
|
15932
16893
|
IDGenerator,
|
|
15933
16894
|
JSONAttachment,
|
|
15934
16895
|
LEGACY_CACHED_HEADER,
|
|
16896
|
+
LOGS3_OVERFLOW_REFERENCE_TYPE,
|
|
15935
16897
|
LazyValue,
|
|
15936
16898
|
Logger,
|
|
15937
16899
|
LoginInvalidOrgError,
|
|
@@ -15956,8 +16918,10 @@ export {
|
|
|
15956
16918
|
_exportsForTestingOnly,
|
|
15957
16919
|
_internalGetGlobalState,
|
|
15958
16920
|
_internalSetInitialState,
|
|
16921
|
+
addAzureBlobHeaders,
|
|
15959
16922
|
braintrustStreamChunkSchema,
|
|
15960
16923
|
buildLocalSummary,
|
|
16924
|
+
constructLogs3OverflowRequest,
|
|
15961
16925
|
createFinalValuePassThroughStream,
|
|
15962
16926
|
currentExperiment,
|
|
15963
16927
|
currentLogger,
|
|
@@ -15982,15 +16946,18 @@ export {
|
|
|
15982
16946
|
initLogger,
|
|
15983
16947
|
invoke,
|
|
15984
16948
|
isTemplateFormat,
|
|
16949
|
+
loadParameters,
|
|
15985
16950
|
loadPrompt,
|
|
15986
16951
|
log,
|
|
15987
16952
|
logError,
|
|
15988
16953
|
login,
|
|
15989
16954
|
loginToState,
|
|
16955
|
+
logs3OverflowUploadSchema,
|
|
15990
16956
|
newId,
|
|
15991
16957
|
parseCachedHeader,
|
|
15992
16958
|
parseTemplateFormat,
|
|
15993
16959
|
permalink,
|
|
16960
|
+
pickLogs3OverflowObjectIds,
|
|
15994
16961
|
projects,
|
|
15995
16962
|
promptContentsSchema,
|
|
15996
16963
|
promptDefinitionSchema,
|
|
@@ -16011,6 +16978,8 @@ export {
|
|
|
16011
16978
|
traceable,
|
|
16012
16979
|
traced,
|
|
16013
16980
|
updateSpan,
|
|
16981
|
+
uploadLogs3OverflowPayload,
|
|
16982
|
+
utf8ByteLength,
|
|
16014
16983
|
withCurrent,
|
|
16015
16984
|
withDataset,
|
|
16016
16985
|
withExperiment,
|