braintrust 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/dist/index.d.mts +4844 -3703
- package/dev/dist/index.d.ts +4844 -3703
- package/dev/dist/index.js +2068 -1402
- package/dev/dist/index.mjs +1954 -1288
- package/dist/browser.d.mts +16987 -8720
- package/dist/browser.d.ts +16987 -8720
- package/dist/browser.js +1810 -841
- package/dist/browser.mjs +2056 -1087
- package/dist/cli.js +2403 -1729
- package/dist/index.d.mts +16987 -8720
- package/dist/index.d.ts +16987 -8720
- package/dist/index.js +1810 -841
- package/dist/index.mjs +2056 -1087
- package/package.json +2 -1
- package/util/dist/index.d.mts +10 -8
- package/util/dist/index.d.ts +10 -8
- package/util/dist/index.js +27 -142
- package/util/dist/index.mjs +26 -141
package/dist/browser.mjs
CHANGED
|
@@ -110,11 +110,19 @@ function getIdGenerator() {
|
|
|
110
110
|
|
|
111
111
|
// util/db_fields.ts
|
|
112
112
|
var TRANSACTION_ID_FIELD = "_xact_id";
|
|
113
|
+
var OBJECT_DELETE_FIELD = "_object_delete";
|
|
113
114
|
var IS_MERGE_FIELD = "_is_merge";
|
|
114
115
|
var AUDIT_SOURCE_FIELD = "_audit_source";
|
|
115
116
|
var AUDIT_METADATA_FIELD = "_audit_metadata";
|
|
116
117
|
var VALID_SOURCES = ["app", "api", "external"];
|
|
117
|
-
var
|
|
118
|
+
var OBJECT_ID_KEYS = [
|
|
119
|
+
"experiment_id",
|
|
120
|
+
"dataset_id",
|
|
121
|
+
"prompt_session_id",
|
|
122
|
+
"project_id",
|
|
123
|
+
"log_id",
|
|
124
|
+
"function_data"
|
|
125
|
+
];
|
|
118
126
|
|
|
119
127
|
// util/span_identifier_v3.ts
|
|
120
128
|
import * as uuid3 from "uuid";
|
|
@@ -785,13 +793,6 @@ function mergeDictsWithPathsHelper({
|
|
|
785
793
|
function mergeDicts(mergeInto, mergeFrom) {
|
|
786
794
|
return mergeDictsWithPaths({ mergeInto, mergeFrom, mergePaths: [] });
|
|
787
795
|
}
|
|
788
|
-
function mapAt(m, k) {
|
|
789
|
-
const ret = m.get(k);
|
|
790
|
-
if (ret === void 0) {
|
|
791
|
-
throw new Error(`Map does not contain key ${k}`);
|
|
792
|
-
}
|
|
793
|
-
return ret;
|
|
794
|
-
}
|
|
795
796
|
function recordFind(m, k) {
|
|
796
797
|
return m[k];
|
|
797
798
|
}
|
|
@@ -806,72 +807,8 @@ function getObjValueByPath(row, path) {
|
|
|
806
807
|
return curr;
|
|
807
808
|
}
|
|
808
809
|
|
|
809
|
-
// util/graph_util.ts
|
|
810
|
-
function depthFirstSearch(args) {
|
|
811
|
-
const { graph, firstVisitF, lastVisitF } = args;
|
|
812
|
-
for (const vs of graph.values()) {
|
|
813
|
-
for (const v of vs.values()) {
|
|
814
|
-
if (!graph.has(v)) {
|
|
815
|
-
throw new Error(`Outgoing vertex ${v} must be a key in the graph`);
|
|
816
|
-
}
|
|
817
|
-
}
|
|
818
|
-
}
|
|
819
|
-
const firstVisitedVertices = /* @__PURE__ */ new Set();
|
|
820
|
-
const visitationOrder = args.visitationOrder ?? [...graph.keys()];
|
|
821
|
-
const events = visitationOrder.map((vertex) => ({ eventType: "first", vertex, extras: {} })).reverse();
|
|
822
|
-
while (events.length) {
|
|
823
|
-
const { eventType, vertex, extras } = events.pop();
|
|
824
|
-
if (eventType === "last") {
|
|
825
|
-
lastVisitF?.(vertex);
|
|
826
|
-
continue;
|
|
827
|
-
}
|
|
828
|
-
if (firstVisitedVertices.has(vertex)) {
|
|
829
|
-
continue;
|
|
830
|
-
}
|
|
831
|
-
firstVisitedVertices.add(vertex);
|
|
832
|
-
firstVisitF?.(vertex, { parentVertex: extras.parentVertex });
|
|
833
|
-
events.push({ eventType: "last", vertex, extras: {} });
|
|
834
|
-
mapAt(graph, vertex).forEach((child) => {
|
|
835
|
-
events.push({
|
|
836
|
-
eventType: "first",
|
|
837
|
-
vertex: child,
|
|
838
|
-
extras: { parentVertex: vertex }
|
|
839
|
-
});
|
|
840
|
-
});
|
|
841
|
-
}
|
|
842
|
-
}
|
|
843
|
-
function undirectedConnectedComponents(graph) {
|
|
844
|
-
const directedGraph = new Map(
|
|
845
|
-
[...graph.vertices].map((v) => [v, /* @__PURE__ */ new Set()])
|
|
846
|
-
);
|
|
847
|
-
for (const [i, j] of graph.edges) {
|
|
848
|
-
mapAt(directedGraph, i).add(j);
|
|
849
|
-
mapAt(directedGraph, j).add(i);
|
|
850
|
-
}
|
|
851
|
-
let labelCounter = 0;
|
|
852
|
-
const vertexLabels = /* @__PURE__ */ new Map();
|
|
853
|
-
const firstVisitF = (vertex, args) => {
|
|
854
|
-
const label = args?.parentVertex !== void 0 ? mapAt(vertexLabels, args?.parentVertex) : labelCounter++;
|
|
855
|
-
vertexLabels.set(vertex, label);
|
|
856
|
-
};
|
|
857
|
-
depthFirstSearch({ graph: directedGraph, firstVisitF });
|
|
858
|
-
const output = Array.from({ length: labelCounter }).map(() => []);
|
|
859
|
-
for (const [vertex, label] of vertexLabels.entries()) {
|
|
860
|
-
output[label].push(vertex);
|
|
861
|
-
}
|
|
862
|
-
return output;
|
|
863
|
-
}
|
|
864
|
-
function topologicalSort(graph, visitationOrder) {
|
|
865
|
-
const reverseOrdering = [];
|
|
866
|
-
const lastVisitF = (vertex) => {
|
|
867
|
-
reverseOrdering.push(vertex);
|
|
868
|
-
};
|
|
869
|
-
depthFirstSearch({ graph, lastVisitF, visitationOrder });
|
|
870
|
-
return reverseOrdering.reverse();
|
|
871
|
-
}
|
|
872
|
-
|
|
873
810
|
// util/merge_row_batch.ts
|
|
874
|
-
function generateMergedRowKey(row
|
|
811
|
+
function generateMergedRowKey(row) {
|
|
875
812
|
return JSON.stringify(
|
|
876
813
|
[
|
|
877
814
|
"org_id",
|
|
@@ -880,7 +817,7 @@ function generateMergedRowKey(row, useParentIdForId) {
|
|
|
880
817
|
"dataset_id",
|
|
881
818
|
"prompt_session_id",
|
|
882
819
|
"log_id",
|
|
883
|
-
|
|
820
|
+
"id"
|
|
884
821
|
].map((k) => row[k])
|
|
885
822
|
);
|
|
886
823
|
}
|
|
@@ -934,96 +871,34 @@ function mergeRowBatch(rows) {
|
|
|
934
871
|
rowGroups.set(key, row);
|
|
935
872
|
}
|
|
936
873
|
}
|
|
937
|
-
|
|
938
|
-
const rowToLabel = new Map(
|
|
939
|
-
merged.map((r, i) => [generateMergedRowKey(r), i])
|
|
940
|
-
);
|
|
941
|
-
const graph = new Map(
|
|
942
|
-
Array.from({ length: merged.length }).map((_, i) => [i, /* @__PURE__ */ new Set()])
|
|
943
|
-
);
|
|
944
|
-
merged.forEach((r, i) => {
|
|
945
|
-
const parentId = r[PARENT_ID_FIELD];
|
|
946
|
-
if (!parentId) {
|
|
947
|
-
return;
|
|
948
|
-
}
|
|
949
|
-
const parentRowKey = generateMergedRowKey(
|
|
950
|
-
r,
|
|
951
|
-
true
|
|
952
|
-
/* useParentIdForId */
|
|
953
|
-
);
|
|
954
|
-
const parentLabel = rowToLabel.get(parentRowKey);
|
|
955
|
-
if (parentLabel !== void 0) {
|
|
956
|
-
mapAt(graph, parentLabel).add(i);
|
|
957
|
-
}
|
|
958
|
-
});
|
|
959
|
-
const connectedComponents = undirectedConnectedComponents({
|
|
960
|
-
vertices: new Set(graph.keys()),
|
|
961
|
-
edges: new Set(
|
|
962
|
-
[...graph.entries()].flatMap(
|
|
963
|
-
([k, vs]) => [...vs].map((v) => {
|
|
964
|
-
const ret = [k, v];
|
|
965
|
-
return ret;
|
|
966
|
-
})
|
|
967
|
-
)
|
|
968
|
-
)
|
|
969
|
-
});
|
|
970
|
-
const buckets = connectedComponents.map(
|
|
971
|
-
(cc) => topologicalSort(
|
|
972
|
-
graph,
|
|
973
|
-
cc
|
|
974
|
-
/* visitationOrder */
|
|
975
|
-
)
|
|
976
|
-
);
|
|
977
|
-
return buckets.map((bucket) => bucket.map((i) => merged[i]));
|
|
874
|
+
return [...rowGroups.values()];
|
|
978
875
|
}
|
|
979
876
|
function batchItems(args) {
|
|
980
|
-
|
|
877
|
+
const { items } = args;
|
|
981
878
|
const batchMaxNumItems = args.batchMaxNumItems ?? Number.POSITIVE_INFINITY;
|
|
982
879
|
const batchMaxNumBytes = args.batchMaxNumBytes ?? Number.POSITIVE_INFINITY;
|
|
880
|
+
const getByteSize = args.getByteSize;
|
|
983
881
|
const output = [];
|
|
984
|
-
let nextItems = [];
|
|
985
|
-
let batchSet = [];
|
|
986
882
|
let batch = [];
|
|
987
883
|
let batchLen = 0;
|
|
988
884
|
function addToBatch(item) {
|
|
989
885
|
batch.push(item);
|
|
990
|
-
batchLen += item
|
|
886
|
+
batchLen += getByteSize(item);
|
|
991
887
|
}
|
|
992
888
|
function flushBatch() {
|
|
993
|
-
|
|
889
|
+
output.push(batch);
|
|
994
890
|
batch = [];
|
|
995
891
|
batchLen = 0;
|
|
996
892
|
}
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
for (const item of bucket) {
|
|
1001
|
-
if (batch.length === 0 || item.length + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems) {
|
|
1002
|
-
addToBatch(item);
|
|
1003
|
-
} else if (i === 0) {
|
|
1004
|
-
flushBatch();
|
|
1005
|
-
addToBatch(item);
|
|
1006
|
-
} else {
|
|
1007
|
-
break;
|
|
1008
|
-
}
|
|
1009
|
-
++i;
|
|
1010
|
-
}
|
|
1011
|
-
if (i < bucket.length) {
|
|
1012
|
-
nextItems.push(bucket.slice(i));
|
|
1013
|
-
}
|
|
1014
|
-
if (batchLen >= batchMaxNumBytes || batch.length > batchMaxNumItems) {
|
|
1015
|
-
flushBatch();
|
|
1016
|
-
}
|
|
1017
|
-
}
|
|
1018
|
-
if (batch.length) {
|
|
893
|
+
for (const item of items) {
|
|
894
|
+
const itemSize = getByteSize(item);
|
|
895
|
+
if (batch.length > 0 && !(itemSize + batchLen < batchMaxNumBytes && batch.length < batchMaxNumItems)) {
|
|
1019
896
|
flushBatch();
|
|
1020
897
|
}
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
items = nextItems;
|
|
1026
|
-
nextItems = [];
|
|
898
|
+
addToBatch(item);
|
|
899
|
+
}
|
|
900
|
+
if (batch.length > 0) {
|
|
901
|
+
flushBatch();
|
|
1027
902
|
}
|
|
1028
903
|
return output;
|
|
1029
904
|
}
|
|
@@ -1610,10 +1485,15 @@ var FunctionTypeEnum = z6.enum([
|
|
|
1610
1485
|
"preprocessor",
|
|
1611
1486
|
"facet",
|
|
1612
1487
|
"classifier",
|
|
1613
|
-
"tag"
|
|
1488
|
+
"tag",
|
|
1489
|
+
"parameters"
|
|
1614
1490
|
]);
|
|
1615
1491
|
var NullableSavedFunctionId = z6.union([
|
|
1616
|
-
z6.object({
|
|
1492
|
+
z6.object({
|
|
1493
|
+
type: z6.literal("function"),
|
|
1494
|
+
id: z6.string(),
|
|
1495
|
+
version: z6.string().optional()
|
|
1496
|
+
}),
|
|
1617
1497
|
z6.object({
|
|
1618
1498
|
type: z6.literal("global"),
|
|
1619
1499
|
name: z6.string(),
|
|
@@ -1621,6 +1501,67 @@ var NullableSavedFunctionId = z6.union([
|
|
|
1621
1501
|
}),
|
|
1622
1502
|
z6.null()
|
|
1623
1503
|
]);
|
|
1504
|
+
var TopicMapReport = z6.object({
|
|
1505
|
+
version: z6.literal(1),
|
|
1506
|
+
created_at: z6.string().optional(),
|
|
1507
|
+
settings: z6.object({
|
|
1508
|
+
algorithm: z6.enum(["hdbscan", "kmeans", "hierarchical"]),
|
|
1509
|
+
dimension_reduction: z6.enum(["umap", "pca", "none"]),
|
|
1510
|
+
vector_field: z6.string(),
|
|
1511
|
+
embedding_model: z6.string(),
|
|
1512
|
+
n_clusters: z6.union([z6.number(), z6.null()]).optional(),
|
|
1513
|
+
umap_dimensions: z6.union([z6.number(), z6.null()]).optional(),
|
|
1514
|
+
min_cluster_size: z6.union([z6.number(), z6.null()]).optional(),
|
|
1515
|
+
min_samples: z6.union([z6.number(), z6.null()]).optional()
|
|
1516
|
+
}),
|
|
1517
|
+
query_settings: z6.object({
|
|
1518
|
+
hierarchy_threshold: z6.union([z6.number(), z6.null()]),
|
|
1519
|
+
auto_naming: z6.boolean(),
|
|
1520
|
+
skip_cache: z6.boolean(),
|
|
1521
|
+
viz_mode: z6.enum(["bar", "scatter"]),
|
|
1522
|
+
naming_model: z6.string()
|
|
1523
|
+
}).partial(),
|
|
1524
|
+
clusters: z6.array(
|
|
1525
|
+
z6.object({
|
|
1526
|
+
cluster_id: z6.number(),
|
|
1527
|
+
parent_cluster_id: z6.union([z6.number(), z6.null()]).optional(),
|
|
1528
|
+
topic_id: z6.string(),
|
|
1529
|
+
count: z6.number(),
|
|
1530
|
+
sample_texts: z6.array(z6.string()),
|
|
1531
|
+
samples: z6.array(
|
|
1532
|
+
z6.object({
|
|
1533
|
+
id: z6.string(),
|
|
1534
|
+
text: z6.string(),
|
|
1535
|
+
root_span_id: z6.string(),
|
|
1536
|
+
span_id: z6.string()
|
|
1537
|
+
})
|
|
1538
|
+
),
|
|
1539
|
+
name: z6.string().optional(),
|
|
1540
|
+
description: z6.string().optional(),
|
|
1541
|
+
keywords: z6.array(z6.string()).optional(),
|
|
1542
|
+
centroid: z6.array(z6.number()).optional(),
|
|
1543
|
+
parent_id: z6.union([z6.number(), z6.null()]).optional(),
|
|
1544
|
+
is_leaf: z6.boolean().optional(),
|
|
1545
|
+
depth: z6.number().optional()
|
|
1546
|
+
})
|
|
1547
|
+
),
|
|
1548
|
+
embedding_points: z6.array(
|
|
1549
|
+
z6.object({
|
|
1550
|
+
x: z6.number(),
|
|
1551
|
+
y: z6.number(),
|
|
1552
|
+
cluster: z6.number(),
|
|
1553
|
+
text: z6.string().optional()
|
|
1554
|
+
})
|
|
1555
|
+
).optional()
|
|
1556
|
+
});
|
|
1557
|
+
var TopicMapData = z6.object({
|
|
1558
|
+
type: z6.literal("topic_map"),
|
|
1559
|
+
source_facet: z6.string(),
|
|
1560
|
+
embedding_model: z6.string(),
|
|
1561
|
+
bundle_key: z6.string(),
|
|
1562
|
+
distance_threshold: z6.number().optional(),
|
|
1563
|
+
report: TopicMapReport.optional()
|
|
1564
|
+
});
|
|
1624
1565
|
var BatchedFacetData = z6.object({
|
|
1625
1566
|
type: z6.literal("batched_facet"),
|
|
1626
1567
|
preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
|
|
@@ -1629,9 +1570,17 @@ var BatchedFacetData = z6.object({
|
|
|
1629
1570
|
name: z6.string(),
|
|
1630
1571
|
prompt: z6.string(),
|
|
1631
1572
|
model: z6.string().optional(),
|
|
1573
|
+
embedding_model: z6.string().optional(),
|
|
1632
1574
|
no_match_pattern: z6.string().optional()
|
|
1633
1575
|
})
|
|
1634
|
-
)
|
|
1576
|
+
),
|
|
1577
|
+
topic_maps: z6.record(
|
|
1578
|
+
z6.object({
|
|
1579
|
+
function_name: z6.string(),
|
|
1580
|
+
topic_map_id: z6.string().optional(),
|
|
1581
|
+
topic_map_data: TopicMapData
|
|
1582
|
+
})
|
|
1583
|
+
).optional()
|
|
1635
1584
|
});
|
|
1636
1585
|
var BraintrustModelParams = z6.object({
|
|
1637
1586
|
use_cache: z6.boolean(),
|
|
@@ -1842,6 +1791,18 @@ var ObjectReferenceNullish = z6.union([
|
|
|
1842
1791
|
}),
|
|
1843
1792
|
z6.null()
|
|
1844
1793
|
]);
|
|
1794
|
+
var SavedFunctionId = z6.union([
|
|
1795
|
+
z6.object({
|
|
1796
|
+
type: z6.literal("function"),
|
|
1797
|
+
id: z6.string(),
|
|
1798
|
+
version: z6.string().optional()
|
|
1799
|
+
}),
|
|
1800
|
+
z6.object({
|
|
1801
|
+
type: z6.literal("global"),
|
|
1802
|
+
name: z6.string(),
|
|
1803
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
1804
|
+
})
|
|
1805
|
+
]);
|
|
1845
1806
|
var DatasetEvent = z6.object({
|
|
1846
1807
|
id: z6.string(),
|
|
1847
1808
|
_xact_id: z6.string(),
|
|
@@ -1861,7 +1822,36 @@ var DatasetEvent = z6.object({
|
|
|
1861
1822
|
is_root: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
1862
1823
|
origin: ObjectReferenceNullish.optional(),
|
|
1863
1824
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
1864
|
-
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
|
|
1825
|
+
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
1826
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
1827
|
+
classifications: z6.union([
|
|
1828
|
+
z6.record(
|
|
1829
|
+
z6.array(
|
|
1830
|
+
z6.object({
|
|
1831
|
+
id: z6.string(),
|
|
1832
|
+
label: z6.string().optional(),
|
|
1833
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
1834
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
1835
|
+
source: SavedFunctionId.and(
|
|
1836
|
+
z6.union([
|
|
1837
|
+
z6.object({
|
|
1838
|
+
type: z6.literal("function"),
|
|
1839
|
+
id: z6.string(),
|
|
1840
|
+
version: z6.string().optional()
|
|
1841
|
+
}),
|
|
1842
|
+
z6.object({
|
|
1843
|
+
type: z6.literal("global"),
|
|
1844
|
+
name: z6.string(),
|
|
1845
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
1846
|
+
}),
|
|
1847
|
+
z6.null()
|
|
1848
|
+
])
|
|
1849
|
+
).optional()
|
|
1850
|
+
})
|
|
1851
|
+
)
|
|
1852
|
+
),
|
|
1853
|
+
z6.null()
|
|
1854
|
+
]).optional()
|
|
1865
1855
|
});
|
|
1866
1856
|
var EnvVar = z6.object({
|
|
1867
1857
|
id: z6.string().uuid(),
|
|
@@ -1938,7 +1928,8 @@ var SpanType = z6.union([
|
|
|
1938
1928
|
"automation",
|
|
1939
1929
|
"facet",
|
|
1940
1930
|
"preprocessor",
|
|
1941
|
-
"classifier"
|
|
1931
|
+
"classifier",
|
|
1932
|
+
"review"
|
|
1942
1933
|
]),
|
|
1943
1934
|
z6.null()
|
|
1944
1935
|
]);
|
|
@@ -1979,10 +1970,43 @@ var ExperimentEvent = z6.object({
|
|
|
1979
1970
|
is_root: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
1980
1971
|
origin: ObjectReferenceNullish.optional(),
|
|
1981
1972
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
1982
|
-
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional()
|
|
1973
|
+
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
1974
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
1975
|
+
classifications: z6.union([
|
|
1976
|
+
z6.record(
|
|
1977
|
+
z6.array(
|
|
1978
|
+
z6.object({
|
|
1979
|
+
id: z6.string(),
|
|
1980
|
+
label: z6.string().optional(),
|
|
1981
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
1982
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
1983
|
+
source: SavedFunctionId.and(
|
|
1984
|
+
z6.union([
|
|
1985
|
+
z6.object({
|
|
1986
|
+
type: z6.literal("function"),
|
|
1987
|
+
id: z6.string(),
|
|
1988
|
+
version: z6.string().optional()
|
|
1989
|
+
}),
|
|
1990
|
+
z6.object({
|
|
1991
|
+
type: z6.literal("global"),
|
|
1992
|
+
name: z6.string(),
|
|
1993
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
1994
|
+
}),
|
|
1995
|
+
z6.null()
|
|
1996
|
+
])
|
|
1997
|
+
).optional()
|
|
1998
|
+
})
|
|
1999
|
+
)
|
|
2000
|
+
),
|
|
2001
|
+
z6.null()
|
|
2002
|
+
]).optional()
|
|
1983
2003
|
});
|
|
1984
2004
|
var ExtendedSavedFunctionId = z6.union([
|
|
1985
|
-
z6.object({
|
|
2005
|
+
z6.object({
|
|
2006
|
+
type: z6.literal("function"),
|
|
2007
|
+
id: z6.string(),
|
|
2008
|
+
version: z6.string().optional()
|
|
2009
|
+
}),
|
|
1986
2010
|
z6.object({
|
|
1987
2011
|
type: z6.literal("global"),
|
|
1988
2012
|
name: z6.string(),
|
|
@@ -1999,6 +2023,7 @@ var FacetData = z6.object({
|
|
|
1999
2023
|
preprocessor: NullableSavedFunctionId.and(z6.unknown()).optional(),
|
|
2000
2024
|
prompt: z6.string(),
|
|
2001
2025
|
model: z6.string().optional(),
|
|
2026
|
+
embedding_model: z6.string().optional(),
|
|
2002
2027
|
no_match_pattern: z6.string().optional()
|
|
2003
2028
|
});
|
|
2004
2029
|
var PromptBlockDataNullish = z6.union([
|
|
@@ -2088,14 +2113,6 @@ var PromptParserNullish = z6.union([
|
|
|
2088
2113
|
}),
|
|
2089
2114
|
z6.null()
|
|
2090
2115
|
]);
|
|
2091
|
-
var SavedFunctionId = z6.union([
|
|
2092
|
-
z6.object({ type: z6.literal("function"), id: z6.string() }),
|
|
2093
|
-
z6.object({
|
|
2094
|
-
type: z6.literal("global"),
|
|
2095
|
-
name: z6.string(),
|
|
2096
|
-
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2097
|
-
})
|
|
2098
|
-
]);
|
|
2099
2116
|
var PromptDataNullish = z6.union([
|
|
2100
2117
|
z6.object({
|
|
2101
2118
|
prompt: PromptBlockDataNullish,
|
|
@@ -2146,7 +2163,8 @@ var FunctionTypeEnumNullish = z6.union([
|
|
|
2146
2163
|
"preprocessor",
|
|
2147
2164
|
"facet",
|
|
2148
2165
|
"classifier",
|
|
2149
|
-
"tag"
|
|
2166
|
+
"tag",
|
|
2167
|
+
"parameters"
|
|
2150
2168
|
]),
|
|
2151
2169
|
z6.null()
|
|
2152
2170
|
]);
|
|
@@ -2238,7 +2256,8 @@ var FunctionData = z6.union([
|
|
|
2238
2256
|
type: z6.literal("remote_eval"),
|
|
2239
2257
|
endpoint: z6.string(),
|
|
2240
2258
|
eval_name: z6.string(),
|
|
2241
|
-
parameters: z6.object({}).partial().passthrough()
|
|
2259
|
+
parameters: z6.object({}).partial().passthrough(),
|
|
2260
|
+
parameters_version: z6.union([z6.string(), z6.null()]).optional()
|
|
2242
2261
|
}),
|
|
2243
2262
|
z6.object({
|
|
2244
2263
|
type: z6.literal("global"),
|
|
@@ -2247,7 +2266,18 @@ var FunctionData = z6.union([
|
|
|
2247
2266
|
config: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional()
|
|
2248
2267
|
}),
|
|
2249
2268
|
FacetData,
|
|
2250
|
-
BatchedFacetData
|
|
2269
|
+
BatchedFacetData,
|
|
2270
|
+
z6.object({
|
|
2271
|
+
type: z6.literal("parameters"),
|
|
2272
|
+
data: z6.object({}).partial().passthrough(),
|
|
2273
|
+
__schema: z6.object({
|
|
2274
|
+
type: z6.literal("object"),
|
|
2275
|
+
properties: z6.record(z6.object({}).partial().passthrough()),
|
|
2276
|
+
required: z6.array(z6.string()).optional(),
|
|
2277
|
+
additionalProperties: z6.boolean().optional()
|
|
2278
|
+
})
|
|
2279
|
+
}),
|
|
2280
|
+
TopicMapData.and(z6.unknown())
|
|
2251
2281
|
]);
|
|
2252
2282
|
var Function = z6.object({
|
|
2253
2283
|
id: z6.string().uuid(),
|
|
@@ -2277,7 +2307,13 @@ var Function = z6.object({
|
|
|
2277
2307
|
z6.null()
|
|
2278
2308
|
]).optional()
|
|
2279
2309
|
});
|
|
2280
|
-
var FunctionFormat = z6.enum([
|
|
2310
|
+
var FunctionFormat = z6.enum([
|
|
2311
|
+
"llm",
|
|
2312
|
+
"code",
|
|
2313
|
+
"global",
|
|
2314
|
+
"graph",
|
|
2315
|
+
"topic_map"
|
|
2316
|
+
]);
|
|
2281
2317
|
var PromptData = z6.object({
|
|
2282
2318
|
prompt: PromptBlockDataNullish,
|
|
2283
2319
|
options: PromptOptionsNullish,
|
|
@@ -2360,13 +2396,14 @@ var FunctionObjectType = z6.enum([
|
|
|
2360
2396
|
"custom_view",
|
|
2361
2397
|
"preprocessor",
|
|
2362
2398
|
"facet",
|
|
2363
|
-
"classifier"
|
|
2399
|
+
"classifier",
|
|
2400
|
+
"parameters"
|
|
2364
2401
|
]);
|
|
2365
2402
|
var FunctionOutputType = z6.enum([
|
|
2366
2403
|
"completion",
|
|
2367
2404
|
"score",
|
|
2368
2405
|
"facet",
|
|
2369
|
-
"
|
|
2406
|
+
"classification",
|
|
2370
2407
|
"any"
|
|
2371
2408
|
]);
|
|
2372
2409
|
var GitMetadataSettings = z6.object({
|
|
@@ -2402,6 +2439,10 @@ var GroupScope = z6.object({
|
|
|
2402
2439
|
idle_seconds: z6.number().optional()
|
|
2403
2440
|
});
|
|
2404
2441
|
var IfExists = z6.enum(["error", "ignore", "replace"]);
|
|
2442
|
+
var ImageRenderingMode = z6.union([
|
|
2443
|
+
z6.enum(["auto", "click_to_load", "blocked"]),
|
|
2444
|
+
z6.null()
|
|
2445
|
+
]);
|
|
2405
2446
|
var InvokeParent = z6.union([
|
|
2406
2447
|
z6.object({
|
|
2407
2448
|
object_type: z6.enum(["project_logs", "experiment", "playground_logs"]),
|
|
@@ -2494,7 +2535,8 @@ var Organization = z6.object({
|
|
|
2494
2535
|
is_universal_api: z6.union([z6.boolean(), z6.null()]).optional(),
|
|
2495
2536
|
proxy_url: z6.union([z6.string(), z6.null()]).optional(),
|
|
2496
2537
|
realtime_url: z6.union([z6.string(), z6.null()]).optional(),
|
|
2497
|
-
created: z6.union([z6.string(), z6.null()]).optional()
|
|
2538
|
+
created: z6.union([z6.string(), z6.null()]).optional(),
|
|
2539
|
+
image_rendering_mode: ImageRenderingMode.optional()
|
|
2498
2540
|
});
|
|
2499
2541
|
var ProjectSettings = z6.union([
|
|
2500
2542
|
z6.object({
|
|
@@ -2635,7 +2677,36 @@ var ProjectLogsEvent = z6.object({
|
|
|
2635
2677
|
origin: ObjectReferenceNullish.optional(),
|
|
2636
2678
|
comments: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2637
2679
|
audit_data: z6.union([z6.array(z6.unknown()), z6.null()]).optional(),
|
|
2638
|
-
_async_scoring_state: z6.unknown().optional()
|
|
2680
|
+
_async_scoring_state: z6.unknown().optional(),
|
|
2681
|
+
facets: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2682
|
+
classifications: z6.union([
|
|
2683
|
+
z6.record(
|
|
2684
|
+
z6.array(
|
|
2685
|
+
z6.object({
|
|
2686
|
+
id: z6.string(),
|
|
2687
|
+
label: z6.string().optional(),
|
|
2688
|
+
confidence: z6.union([z6.number(), z6.null()]).optional(),
|
|
2689
|
+
metadata: z6.union([z6.object({}).partial().passthrough(), z6.null()]).optional(),
|
|
2690
|
+
source: SavedFunctionId.and(
|
|
2691
|
+
z6.union([
|
|
2692
|
+
z6.object({
|
|
2693
|
+
type: z6.literal("function"),
|
|
2694
|
+
id: z6.string(),
|
|
2695
|
+
version: z6.string().optional()
|
|
2696
|
+
}),
|
|
2697
|
+
z6.object({
|
|
2698
|
+
type: z6.literal("global"),
|
|
2699
|
+
name: z6.string(),
|
|
2700
|
+
function_type: FunctionTypeEnum.optional().default("scorer")
|
|
2701
|
+
}),
|
|
2702
|
+
z6.null()
|
|
2703
|
+
])
|
|
2704
|
+
).optional()
|
|
2705
|
+
})
|
|
2706
|
+
)
|
|
2707
|
+
),
|
|
2708
|
+
z6.null()
|
|
2709
|
+
]).optional()
|
|
2639
2710
|
});
|
|
2640
2711
|
var ProjectScoreType = z6.enum([
|
|
2641
2712
|
"slider",
|
|
@@ -2937,12 +3008,15 @@ var View = z6.object({
|
|
|
2937
3008
|
"datasets",
|
|
2938
3009
|
"dataset",
|
|
2939
3010
|
"prompts",
|
|
3011
|
+
"parameters",
|
|
2940
3012
|
"tools",
|
|
2941
3013
|
"scorers",
|
|
2942
3014
|
"classifiers",
|
|
2943
3015
|
"logs",
|
|
2944
3016
|
"monitor",
|
|
2945
|
-
"
|
|
3017
|
+
"for_review_project_log",
|
|
3018
|
+
"for_review_experiments",
|
|
3019
|
+
"for_review_datasets"
|
|
2946
3020
|
]),
|
|
2947
3021
|
name: z6.string(),
|
|
2948
3022
|
created: z6.union([z6.string(), z6.null()]).optional(),
|
|
@@ -3687,6 +3761,52 @@ var PromptCache = class {
|
|
|
3687
3761
|
}
|
|
3688
3762
|
};
|
|
3689
3763
|
|
|
3764
|
+
// src/prompt-cache/parameters-cache.ts
|
|
3765
|
+
function createCacheKey2(key) {
|
|
3766
|
+
if (key.id) {
|
|
3767
|
+
return `parameters:id:${key.id}`;
|
|
3768
|
+
}
|
|
3769
|
+
const prefix = key.projectId ?? key.projectName;
|
|
3770
|
+
if (!prefix) {
|
|
3771
|
+
throw new Error("Either projectId or projectName must be provided");
|
|
3772
|
+
}
|
|
3773
|
+
if (!key.slug) {
|
|
3774
|
+
throw new Error("Slug must be provided when not using ID");
|
|
3775
|
+
}
|
|
3776
|
+
return `parameters:${prefix}:${key.slug}:${key.version ?? "latest"}`;
|
|
3777
|
+
}
|
|
3778
|
+
var ParametersCache = class {
|
|
3779
|
+
memoryCache;
|
|
3780
|
+
diskCache;
|
|
3781
|
+
constructor(options) {
|
|
3782
|
+
this.memoryCache = options.memoryCache;
|
|
3783
|
+
this.diskCache = options.diskCache;
|
|
3784
|
+
}
|
|
3785
|
+
async get(key) {
|
|
3786
|
+
const cacheKey = createCacheKey2(key);
|
|
3787
|
+
const memoryParams = this.memoryCache.get(cacheKey);
|
|
3788
|
+
if (memoryParams !== void 0) {
|
|
3789
|
+
return memoryParams;
|
|
3790
|
+
}
|
|
3791
|
+
if (this.diskCache) {
|
|
3792
|
+
const diskParams = await this.diskCache.get(cacheKey);
|
|
3793
|
+
if (!diskParams) {
|
|
3794
|
+
return void 0;
|
|
3795
|
+
}
|
|
3796
|
+
this.memoryCache.set(cacheKey, diskParams);
|
|
3797
|
+
return diskParams;
|
|
3798
|
+
}
|
|
3799
|
+
return void 0;
|
|
3800
|
+
}
|
|
3801
|
+
async set(key, value) {
|
|
3802
|
+
const cacheKey = createCacheKey2(key);
|
|
3803
|
+
this.memoryCache.set(cacheKey, value);
|
|
3804
|
+
if (this.diskCache) {
|
|
3805
|
+
await this.diskCache.set(cacheKey, value);
|
|
3806
|
+
}
|
|
3807
|
+
}
|
|
3808
|
+
};
|
|
3809
|
+
|
|
3690
3810
|
// src/span-cache.ts
|
|
3691
3811
|
var activeCaches = /* @__PURE__ */ new Set();
|
|
3692
3812
|
var exitHandlersRegistered = false;
|
|
@@ -3977,7 +4097,24 @@ var SpanCache = class {
|
|
|
3977
4097
|
// src/logger.ts
|
|
3978
4098
|
var BRAINTRUST_ATTACHMENT = BraintrustAttachmentReference.shape.type.value;
|
|
3979
4099
|
var EXTERNAL_ATTACHMENT = ExternalAttachmentReference.shape.type.value;
|
|
4100
|
+
var LOGS3_OVERFLOW_REFERENCE_TYPE = "logs3_overflow";
|
|
3980
4101
|
var BRAINTRUST_PARAMS = Object.keys(BraintrustModelParams.shape);
|
|
4102
|
+
var DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024;
|
|
4103
|
+
var parametersRowSchema = z8.object({
|
|
4104
|
+
id: z8.string().uuid(),
|
|
4105
|
+
_xact_id: z8.string(),
|
|
4106
|
+
project_id: z8.string().uuid(),
|
|
4107
|
+
name: z8.string(),
|
|
4108
|
+
slug: z8.string(),
|
|
4109
|
+
description: z8.union([z8.string(), z8.null()]).optional(),
|
|
4110
|
+
function_type: z8.literal("parameters"),
|
|
4111
|
+
function_data: z8.object({
|
|
4112
|
+
type: z8.literal("parameters"),
|
|
4113
|
+
data: z8.record(z8.unknown()).optional(),
|
|
4114
|
+
__schema: z8.record(z8.unknown())
|
|
4115
|
+
}),
|
|
4116
|
+
metadata: z8.union([z8.object({}).partial().passthrough(), z8.null()]).optional()
|
|
4117
|
+
});
|
|
3981
4118
|
var LoginInvalidOrgError = class extends Error {
|
|
3982
4119
|
constructor(message) {
|
|
3983
4120
|
super(message);
|
|
@@ -4154,6 +4291,17 @@ var BraintrustState = class _BraintrustState {
|
|
|
4154
4291
|
max: Number(isomorph_default.getEnv("BRAINTRUST_PROMPT_CACHE_DISK_MAX")) ?? 1 << 20
|
|
4155
4292
|
}) : void 0;
|
|
4156
4293
|
this.promptCache = new PromptCache({ memoryCache, diskCache });
|
|
4294
|
+
const parametersMemoryCache = new LRUCache({
|
|
4295
|
+
max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_MEMORY_MAX")) ?? 1 << 10
|
|
4296
|
+
});
|
|
4297
|
+
const parametersDiskCache = canUseDiskCache() ? new DiskCache({
|
|
4298
|
+
cacheDir: isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DIR") ?? `${isomorph_default.getEnv("HOME") ?? isomorph_default.homedir()}/.braintrust/parameters_cache`,
|
|
4299
|
+
max: Number(isomorph_default.getEnv("BRAINTRUST_PARAMETERS_CACHE_DISK_MAX")) ?? 1 << 20
|
|
4300
|
+
}) : void 0;
|
|
4301
|
+
this.parametersCache = new ParametersCache({
|
|
4302
|
+
memoryCache: parametersMemoryCache,
|
|
4303
|
+
diskCache: parametersDiskCache
|
|
4304
|
+
});
|
|
4157
4305
|
this.spanCache = new SpanCache({ disabled: loginParams.disableSpanCache });
|
|
4158
4306
|
}
|
|
4159
4307
|
id;
|
|
@@ -4183,6 +4331,7 @@ var BraintrustState = class _BraintrustState {
|
|
|
4183
4331
|
_apiConn = null;
|
|
4184
4332
|
_proxyConn = null;
|
|
4185
4333
|
promptCache;
|
|
4334
|
+
parametersCache;
|
|
4186
4335
|
spanCache;
|
|
4187
4336
|
_idGenerator = null;
|
|
4188
4337
|
_contextManager = null;
|
|
@@ -5437,8 +5586,100 @@ function castLogger(logger, asyncFlush) {
|
|
|
5437
5586
|
}
|
|
5438
5587
|
return logger;
|
|
5439
5588
|
}
|
|
5589
|
+
var logs3OverflowUploadSchema = z8.object({
|
|
5590
|
+
method: z8.enum(["PUT", "POST"]),
|
|
5591
|
+
signedUrl: z8.string().url(),
|
|
5592
|
+
headers: z8.record(z8.string()).optional(),
|
|
5593
|
+
fields: z8.record(z8.string()).optional(),
|
|
5594
|
+
key: z8.string().min(1)
|
|
5595
|
+
});
|
|
5440
5596
|
function constructLogs3Data(items) {
|
|
5441
|
-
return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
|
|
5597
|
+
return `{"rows": ${constructJsonArray(items.map((i) => i.str))}, "api_version": 2}`;
|
|
5598
|
+
}
|
|
5599
|
+
function constructLogs3OverflowRequest(key) {
|
|
5600
|
+
return {
|
|
5601
|
+
rows: {
|
|
5602
|
+
type: LOGS3_OVERFLOW_REFERENCE_TYPE,
|
|
5603
|
+
key
|
|
5604
|
+
},
|
|
5605
|
+
api_version: 2
|
|
5606
|
+
};
|
|
5607
|
+
}
|
|
5608
|
+
function pickLogs3OverflowObjectIds(row) {
|
|
5609
|
+
const objectIds = {};
|
|
5610
|
+
for (const key of OBJECT_ID_KEYS) {
|
|
5611
|
+
if (key in row) {
|
|
5612
|
+
objectIds[key] = row[key];
|
|
5613
|
+
}
|
|
5614
|
+
}
|
|
5615
|
+
return objectIds;
|
|
5616
|
+
}
|
|
5617
|
+
async function uploadLogs3OverflowPayload(upload, payload, fetchFn = fetch) {
|
|
5618
|
+
if (upload.method === "POST") {
|
|
5619
|
+
if (!upload.fields) {
|
|
5620
|
+
throw new Error("Missing logs3 overflow upload fields");
|
|
5621
|
+
}
|
|
5622
|
+
if (typeof FormData === "undefined" || typeof Blob === "undefined") {
|
|
5623
|
+
throw new Error("FormData is not available for logs3 overflow upload");
|
|
5624
|
+
}
|
|
5625
|
+
const form = new FormData();
|
|
5626
|
+
for (const [key, value] of Object.entries(upload.fields)) {
|
|
5627
|
+
form.append(key, value);
|
|
5628
|
+
}
|
|
5629
|
+
const contentType = upload.fields["Content-Type"] ?? "application/json";
|
|
5630
|
+
form.append("file", new Blob([payload], { type: contentType }));
|
|
5631
|
+
const headers2 = {};
|
|
5632
|
+
for (const [key, value] of Object.entries(upload.headers ?? {})) {
|
|
5633
|
+
if (key.toLowerCase() !== "content-type") {
|
|
5634
|
+
headers2[key] = value;
|
|
5635
|
+
}
|
|
5636
|
+
}
|
|
5637
|
+
const response2 = await fetchFn(upload.signedUrl, {
|
|
5638
|
+
method: "POST",
|
|
5639
|
+
headers: headers2,
|
|
5640
|
+
body: form
|
|
5641
|
+
});
|
|
5642
|
+
if (!response2.ok) {
|
|
5643
|
+
const responseText = await response2.text().catch(() => "");
|
|
5644
|
+
throw new Error(
|
|
5645
|
+
`Failed to upload logs3 overflow payload: ${response2.status} ${responseText}`
|
|
5646
|
+
);
|
|
5647
|
+
}
|
|
5648
|
+
return;
|
|
5649
|
+
}
|
|
5650
|
+
const headers = { ...upload.headers ?? {} };
|
|
5651
|
+
addAzureBlobHeaders(headers, upload.signedUrl);
|
|
5652
|
+
const response = await fetchFn(upload.signedUrl, {
|
|
5653
|
+
method: "PUT",
|
|
5654
|
+
headers,
|
|
5655
|
+
body: payload
|
|
5656
|
+
});
|
|
5657
|
+
if (!response.ok) {
|
|
5658
|
+
const responseText = await response.text().catch(() => "");
|
|
5659
|
+
throw new Error(
|
|
5660
|
+
`Failed to upload logs3 overflow payload: ${response.status} ${responseText}`
|
|
5661
|
+
);
|
|
5662
|
+
}
|
|
5663
|
+
}
|
|
5664
|
+
function stringifyWithOverflowMeta(item) {
|
|
5665
|
+
const str = JSON.stringify(item);
|
|
5666
|
+
const record = item;
|
|
5667
|
+
return {
|
|
5668
|
+
str,
|
|
5669
|
+
overflowMeta: {
|
|
5670
|
+
object_ids: pickLogs3OverflowObjectIds(record),
|
|
5671
|
+
is_delete: record[OBJECT_DELETE_FIELD] === true,
|
|
5672
|
+
input_row: {
|
|
5673
|
+
byte_size: utf8ByteLength(str)
|
|
5674
|
+
}
|
|
5675
|
+
}
|
|
5676
|
+
};
|
|
5677
|
+
}
|
|
5678
|
+
function utf8ByteLength(value) {
|
|
5679
|
+
if (typeof TextEncoder !== "undefined") {
|
|
5680
|
+
return new TextEncoder().encode(value).length;
|
|
5681
|
+
}
|
|
5682
|
+
return value.length;
|
|
5442
5683
|
}
|
|
5443
5684
|
function now() {
|
|
5444
5685
|
return (/* @__PURE__ */ new Date()).getTime();
|
|
@@ -5464,10 +5705,9 @@ var TestBackgroundLogger = class {
|
|
|
5464
5705
|
events.push(await event.get());
|
|
5465
5706
|
}
|
|
5466
5707
|
}
|
|
5467
|
-
|
|
5468
|
-
let flatBatch = batch.flat();
|
|
5708
|
+
let batch = mergeRowBatch(events);
|
|
5469
5709
|
if (this.maskingFunction) {
|
|
5470
|
-
|
|
5710
|
+
batch = batch.map((item) => {
|
|
5471
5711
|
const maskedItem = { ...item };
|
|
5472
5712
|
for (const field of REDACTION_FIELDS) {
|
|
5473
5713
|
if (item[field] !== void 0) {
|
|
@@ -5492,7 +5732,7 @@ var TestBackgroundLogger = class {
|
|
|
5492
5732
|
return maskedItem;
|
|
5493
5733
|
});
|
|
5494
5734
|
}
|
|
5495
|
-
return
|
|
5735
|
+
return batch;
|
|
5496
5736
|
}
|
|
5497
5737
|
};
|
|
5498
5738
|
var BACKGROUND_LOGGER_BASE_SLEEP_TIME_S = 1;
|
|
@@ -5505,8 +5745,8 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5505
5745
|
onFlushError;
|
|
5506
5746
|
maskingFunction = null;
|
|
5507
5747
|
syncFlush = false;
|
|
5508
|
-
|
|
5509
|
-
|
|
5748
|
+
maxRequestSizeOverride = null;
|
|
5749
|
+
_maxRequestSizePromise = null;
|
|
5510
5750
|
defaultBatchSize = 100;
|
|
5511
5751
|
numTries = 3;
|
|
5512
5752
|
queueDropExceedingMaxsize = DEFAULT_QUEUE_SIZE;
|
|
@@ -5534,7 +5774,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5534
5774
|
}
|
|
5535
5775
|
const maxRequestSizeEnv = Number(isomorph_default.getEnv("BRAINTRUST_MAX_REQUEST_SIZE"));
|
|
5536
5776
|
if (!isNaN(maxRequestSizeEnv)) {
|
|
5537
|
-
this.
|
|
5777
|
+
this.maxRequestSizeOverride = maxRequestSizeEnv;
|
|
5538
5778
|
}
|
|
5539
5779
|
const numTriesEnv = Number(isomorph_default.getEnv("BRAINTRUST_NUM_RETRIES"));
|
|
5540
5780
|
if (!isNaN(numTriesEnv)) {
|
|
@@ -5596,6 +5836,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5596
5836
|
}
|
|
5597
5837
|
}
|
|
5598
5838
|
}
|
|
5839
|
+
getMaxRequestSize() {
|
|
5840
|
+
if (!this._maxRequestSizePromise) {
|
|
5841
|
+
this._maxRequestSizePromise = (async () => {
|
|
5842
|
+
let serverLimit = null;
|
|
5843
|
+
try {
|
|
5844
|
+
const conn = await this.apiConn.get();
|
|
5845
|
+
const versionInfo = await conn.get_json("version");
|
|
5846
|
+
serverLimit = z8.object({ logs3_payload_max_bytes: z8.number().nullish() }).parse(versionInfo).logs3_payload_max_bytes ?? null;
|
|
5847
|
+
} catch (e) {
|
|
5848
|
+
console.warn("Failed to fetch version info for payload limit:", e);
|
|
5849
|
+
}
|
|
5850
|
+
const validServerLimit = serverLimit !== null && serverLimit > 0 ? serverLimit : null;
|
|
5851
|
+
const canUseOverflow = validServerLimit !== null;
|
|
5852
|
+
let maxRequestSize = DEFAULT_MAX_REQUEST_SIZE;
|
|
5853
|
+
if (this.maxRequestSizeOverride !== null) {
|
|
5854
|
+
maxRequestSize = validServerLimit !== null ? Math.min(this.maxRequestSizeOverride, validServerLimit) : this.maxRequestSizeOverride;
|
|
5855
|
+
} else if (validServerLimit !== null) {
|
|
5856
|
+
maxRequestSize = validServerLimit;
|
|
5857
|
+
}
|
|
5858
|
+
return { maxRequestSize, canUseOverflow };
|
|
5859
|
+
})();
|
|
5860
|
+
}
|
|
5861
|
+
return this._maxRequestSizePromise;
|
|
5862
|
+
}
|
|
5599
5863
|
async flush() {
|
|
5600
5864
|
if (this.syncFlush) {
|
|
5601
5865
|
this.triggerActiveFlush();
|
|
@@ -5639,33 +5903,33 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5639
5903
|
if (allItems.length === 0) {
|
|
5640
5904
|
return;
|
|
5641
5905
|
}
|
|
5642
|
-
const
|
|
5643
|
-
(
|
|
5906
|
+
const allItemsWithMeta = allItems.map(
|
|
5907
|
+
(item) => stringifyWithOverflowMeta(item)
|
|
5644
5908
|
);
|
|
5645
|
-
const
|
|
5646
|
-
|
|
5909
|
+
const maxRequestSizeResult = await this.getMaxRequestSize();
|
|
5910
|
+
const batches = batchItems({
|
|
5911
|
+
items: allItemsWithMeta,
|
|
5647
5912
|
batchMaxNumItems: batchSize,
|
|
5648
|
-
batchMaxNumBytes:
|
|
5913
|
+
batchMaxNumBytes: maxRequestSizeResult.maxRequestSize / 2,
|
|
5914
|
+
getByteSize: (item) => item.str.length
|
|
5649
5915
|
});
|
|
5650
|
-
|
|
5651
|
-
|
|
5652
|
-
|
|
5653
|
-
|
|
5654
|
-
|
|
5655
|
-
|
|
5656
|
-
|
|
5657
|
-
|
|
5658
|
-
|
|
5659
|
-
|
|
5916
|
+
const postPromises = batches.map(
|
|
5917
|
+
(batch) => (async () => {
|
|
5918
|
+
try {
|
|
5919
|
+
await this.submitLogsRequest(batch, maxRequestSizeResult);
|
|
5920
|
+
return { type: "success" };
|
|
5921
|
+
} catch (e) {
|
|
5922
|
+
return { type: "error", value: e };
|
|
5923
|
+
}
|
|
5924
|
+
})()
|
|
5925
|
+
);
|
|
5926
|
+
const results = await Promise.all(postPromises);
|
|
5927
|
+
const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
|
|
5928
|
+
if (failingResultErrors.length) {
|
|
5929
|
+
throw new AggregateError(
|
|
5930
|
+
failingResultErrors,
|
|
5931
|
+
`Encountered the following errors while logging:`
|
|
5660
5932
|
);
|
|
5661
|
-
const results = await Promise.all(postPromises);
|
|
5662
|
-
const failingResultErrors = results.map((r) => r.type === "success" ? void 0 : r.value).filter((r) => r !== void 0);
|
|
5663
|
-
if (failingResultErrors.length) {
|
|
5664
|
-
throw new AggregateError(
|
|
5665
|
-
failingResultErrors,
|
|
5666
|
-
`Encountered the following errors while logging:`
|
|
5667
|
-
);
|
|
5668
|
-
}
|
|
5669
5933
|
}
|
|
5670
5934
|
const attachmentErrors = [];
|
|
5671
5935
|
for (const attachment of attachments) {
|
|
@@ -5695,32 +5959,30 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5695
5959
|
items.forEach((item) => extractAttachments(item, attachments));
|
|
5696
5960
|
let mergedItems = mergeRowBatch(items);
|
|
5697
5961
|
if (this.maskingFunction) {
|
|
5698
|
-
mergedItems = mergedItems.map(
|
|
5699
|
-
|
|
5700
|
-
|
|
5701
|
-
|
|
5702
|
-
|
|
5703
|
-
|
|
5704
|
-
|
|
5705
|
-
|
|
5706
|
-
|
|
5707
|
-
|
|
5708
|
-
|
|
5709
|
-
|
|
5710
|
-
|
|
5711
|
-
|
|
5712
|
-
maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
|
|
5713
|
-
} else {
|
|
5714
|
-
maskedItem.error = maskedValue.errorMsg;
|
|
5715
|
-
}
|
|
5962
|
+
mergedItems = mergedItems.map((item) => {
|
|
5963
|
+
const maskedItem = { ...item };
|
|
5964
|
+
for (const field of REDACTION_FIELDS) {
|
|
5965
|
+
if (item[field] !== void 0) {
|
|
5966
|
+
const maskedValue = applyMaskingToField(
|
|
5967
|
+
this.maskingFunction,
|
|
5968
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
5969
|
+
item[field],
|
|
5970
|
+
field
|
|
5971
|
+
);
|
|
5972
|
+
if (maskedValue instanceof MaskingError) {
|
|
5973
|
+
delete maskedItem[field];
|
|
5974
|
+
if (maskedItem.error) {
|
|
5975
|
+
maskedItem.error = `${maskedItem.error}; ${maskedValue.errorMsg}`;
|
|
5716
5976
|
} else {
|
|
5717
|
-
maskedItem
|
|
5977
|
+
maskedItem.error = maskedValue.errorMsg;
|
|
5718
5978
|
}
|
|
5979
|
+
} else {
|
|
5980
|
+
maskedItem[field] = maskedValue;
|
|
5719
5981
|
}
|
|
5720
5982
|
}
|
|
5721
|
-
|
|
5722
|
-
|
|
5723
|
-
);
|
|
5983
|
+
}
|
|
5984
|
+
return maskedItem;
|
|
5985
|
+
});
|
|
5724
5986
|
}
|
|
5725
5987
|
return [mergedItems, attachments];
|
|
5726
5988
|
} catch (e) {
|
|
@@ -5747,20 +6009,73 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5747
6009
|
}
|
|
5748
6010
|
throw new Error("Impossible");
|
|
5749
6011
|
}
|
|
5750
|
-
async
|
|
6012
|
+
async requestLogs3OverflowUpload(conn, args) {
|
|
6013
|
+
let response;
|
|
6014
|
+
try {
|
|
6015
|
+
response = await conn.post_json("logs3/overflow", {
|
|
6016
|
+
content_type: "application/json",
|
|
6017
|
+
size_bytes: args.sizeBytes,
|
|
6018
|
+
rows: args.rows
|
|
6019
|
+
});
|
|
6020
|
+
} catch (error) {
|
|
6021
|
+
const errorStr = JSON.stringify(error);
|
|
6022
|
+
throw new Error(
|
|
6023
|
+
`Failed to request logs3 overflow upload URL: ${errorStr}`
|
|
6024
|
+
);
|
|
6025
|
+
}
|
|
6026
|
+
try {
|
|
6027
|
+
return logs3OverflowUploadSchema.parse(response);
|
|
6028
|
+
} catch (error) {
|
|
6029
|
+
if (error instanceof ZodError) {
|
|
6030
|
+
const errorStr = JSON.stringify(error.flatten());
|
|
6031
|
+
throw new Error(`Invalid response from API server: ${errorStr}`);
|
|
6032
|
+
}
|
|
6033
|
+
throw error;
|
|
6034
|
+
}
|
|
6035
|
+
}
|
|
6036
|
+
async _uploadLogs3OverflowPayload(conn, upload, payload) {
|
|
6037
|
+
await uploadLogs3OverflowPayload(upload, payload, conn.fetch.bind(conn));
|
|
6038
|
+
}
|
|
6039
|
+
async submitLogsRequest(items, {
|
|
6040
|
+
maxRequestSize,
|
|
6041
|
+
canUseOverflow
|
|
6042
|
+
}) {
|
|
5751
6043
|
const conn = await this.apiConn.get();
|
|
5752
6044
|
const dataStr = constructLogs3Data(items);
|
|
6045
|
+
const payloadBytes = utf8ByteLength(dataStr);
|
|
6046
|
+
const useOverflow = canUseOverflow && payloadBytes > maxRequestSize;
|
|
5753
6047
|
if (this.allPublishPayloadsDir) {
|
|
5754
6048
|
await _HTTPBackgroundLogger.writePayloadToDir({
|
|
5755
6049
|
payloadDir: this.allPublishPayloadsDir,
|
|
5756
6050
|
payload: dataStr
|
|
5757
6051
|
});
|
|
5758
6052
|
}
|
|
6053
|
+
let overflowUpload = null;
|
|
6054
|
+
const overflowRows = useOverflow ? items.map((item) => item.overflowMeta) : null;
|
|
5759
6055
|
for (let i = 0; i < this.numTries; i++) {
|
|
5760
6056
|
const startTime = now();
|
|
5761
6057
|
let error = void 0;
|
|
5762
6058
|
try {
|
|
5763
|
-
|
|
6059
|
+
if (overflowRows) {
|
|
6060
|
+
if (!overflowUpload) {
|
|
6061
|
+
const currentUpload = await this.requestLogs3OverflowUpload(conn, {
|
|
6062
|
+
rows: overflowRows,
|
|
6063
|
+
sizeBytes: payloadBytes
|
|
6064
|
+
});
|
|
6065
|
+
await this._uploadLogs3OverflowPayload(
|
|
6066
|
+
conn,
|
|
6067
|
+
currentUpload,
|
|
6068
|
+
dataStr
|
|
6069
|
+
);
|
|
6070
|
+
overflowUpload = currentUpload;
|
|
6071
|
+
}
|
|
6072
|
+
await conn.post_json(
|
|
6073
|
+
"logs3",
|
|
6074
|
+
constructLogs3OverflowRequest(overflowUpload.key)
|
|
6075
|
+
);
|
|
6076
|
+
} else {
|
|
6077
|
+
await conn.post_json("logs3", dataStr);
|
|
6078
|
+
}
|
|
5764
6079
|
} catch (e) {
|
|
5765
6080
|
error = e;
|
|
5766
6081
|
}
|
|
@@ -5776,7 +6091,7 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5776
6091
|
return `${error}`;
|
|
5777
6092
|
}
|
|
5778
6093
|
})();
|
|
5779
|
-
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${
|
|
6094
|
+
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${payloadBytes}.${retryingText}
|
|
5780
6095
|
Error: ${errorText}`;
|
|
5781
6096
|
if (!isRetrying && this.failedPublishPayloadsDir) {
|
|
5782
6097
|
await _HTTPBackgroundLogger.writePayloadToDir({
|
|
@@ -5830,7 +6145,7 @@ Error: ${errorText}`;
|
|
|
5830
6145
|
try {
|
|
5831
6146
|
const [allItems, allAttachments] = await this.unwrapLazyValues(wrappedItems);
|
|
5832
6147
|
const dataStr = constructLogs3Data(
|
|
5833
|
-
allItems.map((x) =>
|
|
6148
|
+
allItems.map((x) => stringifyWithOverflowMeta(x))
|
|
5834
6149
|
);
|
|
5835
6150
|
const attachmentStr = JSON.stringify(
|
|
5836
6151
|
allAttachments.map((a) => a.debugInfo())
|
|
@@ -6397,23 +6712,144 @@ async function loadPrompt({
|
|
|
6397
6712
|
}
|
|
6398
6713
|
return prompt;
|
|
6399
6714
|
}
|
|
6400
|
-
function
|
|
6401
|
-
|
|
6402
|
-
|
|
6403
|
-
|
|
6404
|
-
|
|
6405
|
-
|
|
6406
|
-
|
|
6407
|
-
|
|
6408
|
-
|
|
6409
|
-
|
|
6410
|
-
|
|
6411
|
-
|
|
6412
|
-
|
|
6413
|
-
|
|
6414
|
-
|
|
6415
|
-
|
|
6416
|
-
"
|
|
6715
|
+
async function loadParameters({
|
|
6716
|
+
projectName,
|
|
6717
|
+
projectId,
|
|
6718
|
+
slug,
|
|
6719
|
+
version,
|
|
6720
|
+
environment,
|
|
6721
|
+
id,
|
|
6722
|
+
appUrl,
|
|
6723
|
+
apiKey,
|
|
6724
|
+
orgName,
|
|
6725
|
+
fetch: fetch2,
|
|
6726
|
+
forceLogin,
|
|
6727
|
+
state: stateArg
|
|
6728
|
+
}) {
|
|
6729
|
+
if (version && environment) {
|
|
6730
|
+
throw new Error(
|
|
6731
|
+
"Cannot specify both 'version' and 'environment' parameters. Please use only one (remove the other)."
|
|
6732
|
+
);
|
|
6733
|
+
}
|
|
6734
|
+
if (id) {
|
|
6735
|
+
} else if (isEmpty2(projectName) && isEmpty2(projectId)) {
|
|
6736
|
+
throw new Error("Must specify either projectName or projectId");
|
|
6737
|
+
} else if (isEmpty2(slug)) {
|
|
6738
|
+
throw new Error("Must specify slug");
|
|
6739
|
+
}
|
|
6740
|
+
const state = stateArg ?? _globalState;
|
|
6741
|
+
let response;
|
|
6742
|
+
try {
|
|
6743
|
+
await state.login({
|
|
6744
|
+
orgName,
|
|
6745
|
+
apiKey,
|
|
6746
|
+
appUrl,
|
|
6747
|
+
fetch: fetch2,
|
|
6748
|
+
forceLogin
|
|
6749
|
+
});
|
|
6750
|
+
if (id) {
|
|
6751
|
+
response = await state.apiConn().get_json(`v1/function/${id}`, {
|
|
6752
|
+
...version && { version },
|
|
6753
|
+
...environment && { environment }
|
|
6754
|
+
});
|
|
6755
|
+
if (response) {
|
|
6756
|
+
response = { objects: [response] };
|
|
6757
|
+
}
|
|
6758
|
+
} else {
|
|
6759
|
+
response = await state.apiConn().get_json("v1/function", {
|
|
6760
|
+
project_name: projectName,
|
|
6761
|
+
project_id: projectId,
|
|
6762
|
+
slug,
|
|
6763
|
+
version,
|
|
6764
|
+
function_type: "parameters",
|
|
6765
|
+
...environment && { environment }
|
|
6766
|
+
});
|
|
6767
|
+
}
|
|
6768
|
+
} catch (e) {
|
|
6769
|
+
if (environment || version) {
|
|
6770
|
+
throw new Error(`Parameters not found with specified parameters: ${e}`);
|
|
6771
|
+
}
|
|
6772
|
+
console.warn(
|
|
6773
|
+
"Failed to load parameters, attempting to fall back to cache:",
|
|
6774
|
+
e
|
|
6775
|
+
);
|
|
6776
|
+
let parameters2;
|
|
6777
|
+
if (id) {
|
|
6778
|
+
parameters2 = await state.parametersCache.get({ id });
|
|
6779
|
+
if (!parameters2) {
|
|
6780
|
+
throw new Error(
|
|
6781
|
+
`Parameters with id ${id} not found (not found on server or in local cache): ${e}`
|
|
6782
|
+
);
|
|
6783
|
+
}
|
|
6784
|
+
} else {
|
|
6785
|
+
parameters2 = await state.parametersCache.get({
|
|
6786
|
+
slug,
|
|
6787
|
+
projectId,
|
|
6788
|
+
projectName,
|
|
6789
|
+
version: version ?? "latest"
|
|
6790
|
+
});
|
|
6791
|
+
if (!parameters2) {
|
|
6792
|
+
throw new Error(
|
|
6793
|
+
`Parameters ${slug} (version ${version ?? "latest"}) not found in ${[
|
|
6794
|
+
projectName ?? projectId
|
|
6795
|
+
]} (not found on server or in local cache): ${e}`
|
|
6796
|
+
);
|
|
6797
|
+
}
|
|
6798
|
+
}
|
|
6799
|
+
return parameters2;
|
|
6800
|
+
}
|
|
6801
|
+
if (!("objects" in response) || response.objects.length === 0) {
|
|
6802
|
+
if (id) {
|
|
6803
|
+
throw new Error(`Parameters with id ${id} not found.`);
|
|
6804
|
+
} else {
|
|
6805
|
+
throw new Error(
|
|
6806
|
+
`Parameters ${slug} not found in ${[projectName ?? projectId]}`
|
|
6807
|
+
);
|
|
6808
|
+
}
|
|
6809
|
+
} else if (response.objects.length > 1) {
|
|
6810
|
+
if (id) {
|
|
6811
|
+
throw new Error(
|
|
6812
|
+
`Multiple parameters found with id ${id}. This should never happen.`
|
|
6813
|
+
);
|
|
6814
|
+
} else {
|
|
6815
|
+
throw new Error(
|
|
6816
|
+
`Multiple parameters found with slug ${slug} in project ${projectName ?? projectId}. This should never happen.`
|
|
6817
|
+
);
|
|
6818
|
+
}
|
|
6819
|
+
}
|
|
6820
|
+
const metadata = parametersRowSchema.parse(response["objects"][0]);
|
|
6821
|
+
const parameters = new RemoteEvalParameters(metadata);
|
|
6822
|
+
try {
|
|
6823
|
+
if (id) {
|
|
6824
|
+
await state.parametersCache.set({ id }, parameters);
|
|
6825
|
+
} else if (slug) {
|
|
6826
|
+
await state.parametersCache.set(
|
|
6827
|
+
{ slug, projectId, projectName, version: version ?? "latest" },
|
|
6828
|
+
parameters
|
|
6829
|
+
);
|
|
6830
|
+
}
|
|
6831
|
+
} catch (e) {
|
|
6832
|
+
console.warn("Failed to set parameters in cache:", e);
|
|
6833
|
+
}
|
|
6834
|
+
return parameters;
|
|
6835
|
+
}
|
|
6836
|
+
function setMaskingFunction(maskingFunction) {
|
|
6837
|
+
_globalState.setMaskingFunction(maskingFunction);
|
|
6838
|
+
}
|
|
6839
|
+
async function login(options = {}) {
|
|
6840
|
+
const { forceLogin = false } = options || {};
|
|
6841
|
+
if (_globalState.loggedIn && !forceLogin) {
|
|
6842
|
+
let checkUpdatedParam2 = function(varname, arg, orig) {
|
|
6843
|
+
if (!isEmpty2(arg) && !isEmpty2(orig) && arg !== orig) {
|
|
6844
|
+
throw new Error(
|
|
6845
|
+
`Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
|
|
6846
|
+
);
|
|
6847
|
+
}
|
|
6848
|
+
};
|
|
6849
|
+
var checkUpdatedParam = checkUpdatedParam2;
|
|
6850
|
+
checkUpdatedParam2("appUrl", options.appUrl, _globalState.appUrl);
|
|
6851
|
+
checkUpdatedParam2(
|
|
6852
|
+
"apiKey",
|
|
6417
6853
|
options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
|
|
6418
6854
|
_globalState.loginToken
|
|
6419
6855
|
);
|
|
@@ -8467,6 +8903,55 @@ var Prompt2 = class _Prompt {
|
|
|
8467
8903
|
);
|
|
8468
8904
|
}
|
|
8469
8905
|
};
|
|
8906
|
+
var RemoteEvalParameters = class {
|
|
8907
|
+
constructor(metadata) {
|
|
8908
|
+
this.metadata = metadata;
|
|
8909
|
+
}
|
|
8910
|
+
__braintrust_parameters_marker = true;
|
|
8911
|
+
get id() {
|
|
8912
|
+
return this.metadata.id;
|
|
8913
|
+
}
|
|
8914
|
+
get projectId() {
|
|
8915
|
+
return this.metadata.project_id;
|
|
8916
|
+
}
|
|
8917
|
+
get name() {
|
|
8918
|
+
return this.metadata.name;
|
|
8919
|
+
}
|
|
8920
|
+
get slug() {
|
|
8921
|
+
return this.metadata.slug;
|
|
8922
|
+
}
|
|
8923
|
+
get version() {
|
|
8924
|
+
return this.metadata[TRANSACTION_ID_FIELD];
|
|
8925
|
+
}
|
|
8926
|
+
get schema() {
|
|
8927
|
+
return this.metadata.function_data.__schema;
|
|
8928
|
+
}
|
|
8929
|
+
get data() {
|
|
8930
|
+
return this.metadata.function_data.data ?? {};
|
|
8931
|
+
}
|
|
8932
|
+
validate(data) {
|
|
8933
|
+
if (typeof data !== "object" || data === null) {
|
|
8934
|
+
return false;
|
|
8935
|
+
}
|
|
8936
|
+
const schemaProps = this.schema.properties;
|
|
8937
|
+
if (typeof schemaProps !== "object" || schemaProps === null) {
|
|
8938
|
+
return true;
|
|
8939
|
+
}
|
|
8940
|
+
for (const key of Object.keys(schemaProps)) {
|
|
8941
|
+
if (!(key in data)) {
|
|
8942
|
+
const required = Array.isArray(this.schema.required) ? this.schema.required : [];
|
|
8943
|
+
if (required.includes(key)) {
|
|
8944
|
+
return false;
|
|
8945
|
+
}
|
|
8946
|
+
}
|
|
8947
|
+
}
|
|
8948
|
+
return true;
|
|
8949
|
+
}
|
|
8950
|
+
static isParameters(x) {
|
|
8951
|
+
return typeof x === "object" && x !== null && "__braintrust_parameters_marker" in x && // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
8952
|
+
x.__braintrust_parameters_marker === true;
|
|
8953
|
+
}
|
|
8954
|
+
};
|
|
8470
8955
|
var TEST_API_KEY = "___TEST_API_KEY__THIS_IS_NOT_REAL___";
|
|
8471
8956
|
function setInitialTestState() {
|
|
8472
8957
|
if (!_internalGetGlobalState()) {
|
|
@@ -8602,6 +9087,7 @@ __export(exports_exports, {
|
|
|
8602
9087
|
CodePrompt: () => CodePrompt,
|
|
8603
9088
|
ContextManager: () => ContextManager,
|
|
8604
9089
|
DEFAULT_FETCH_BATCH_SIZE: () => DEFAULT_FETCH_BATCH_SIZE,
|
|
9090
|
+
DEFAULT_MAX_REQUEST_SIZE: () => DEFAULT_MAX_REQUEST_SIZE,
|
|
8605
9091
|
Dataset: () => Dataset2,
|
|
8606
9092
|
ERR_PERMALINK: () => ERR_PERMALINK,
|
|
8607
9093
|
Eval: () => Eval,
|
|
@@ -8612,6 +9098,7 @@ __export(exports_exports, {
|
|
|
8612
9098
|
IDGenerator: () => IDGenerator,
|
|
8613
9099
|
JSONAttachment: () => JSONAttachment,
|
|
8614
9100
|
LEGACY_CACHED_HEADER: () => LEGACY_CACHED_HEADER,
|
|
9101
|
+
LOGS3_OVERFLOW_REFERENCE_TYPE: () => LOGS3_OVERFLOW_REFERENCE_TYPE,
|
|
8615
9102
|
LazyValue: () => LazyValue,
|
|
8616
9103
|
Logger: () => Logger,
|
|
8617
9104
|
LoginInvalidOrgError: () => LoginInvalidOrgError,
|
|
@@ -8636,8 +9123,10 @@ __export(exports_exports, {
|
|
|
8636
9123
|
_exportsForTestingOnly: () => _exportsForTestingOnly,
|
|
8637
9124
|
_internalGetGlobalState: () => _internalGetGlobalState,
|
|
8638
9125
|
_internalSetInitialState: () => _internalSetInitialState,
|
|
9126
|
+
addAzureBlobHeaders: () => addAzureBlobHeaders,
|
|
8639
9127
|
braintrustStreamChunkSchema: () => braintrustStreamChunkSchema,
|
|
8640
9128
|
buildLocalSummary: () => buildLocalSummary,
|
|
9129
|
+
constructLogs3OverflowRequest: () => constructLogs3OverflowRequest,
|
|
8641
9130
|
createFinalValuePassThroughStream: () => createFinalValuePassThroughStream,
|
|
8642
9131
|
currentExperiment: () => currentExperiment,
|
|
8643
9132
|
currentLogger: () => currentLogger,
|
|
@@ -8661,15 +9150,18 @@ __export(exports_exports, {
|
|
|
8661
9150
|
initLogger: () => initLogger,
|
|
8662
9151
|
invoke: () => invoke,
|
|
8663
9152
|
isTemplateFormat: () => isTemplateFormat,
|
|
9153
|
+
loadParameters: () => loadParameters,
|
|
8664
9154
|
loadPrompt: () => loadPrompt,
|
|
8665
9155
|
log: () => log,
|
|
8666
9156
|
logError: () => logError,
|
|
8667
9157
|
login: () => login,
|
|
8668
9158
|
loginToState: () => loginToState,
|
|
9159
|
+
logs3OverflowUploadSchema: () => logs3OverflowUploadSchema,
|
|
8669
9160
|
newId: () => newId,
|
|
8670
9161
|
parseCachedHeader: () => parseCachedHeader,
|
|
8671
9162
|
parseTemplateFormat: () => parseTemplateFormat,
|
|
8672
9163
|
permalink: () => permalink,
|
|
9164
|
+
pickLogs3OverflowObjectIds: () => pickLogs3OverflowObjectIds,
|
|
8673
9165
|
projects: () => projects,
|
|
8674
9166
|
promptContentsSchema: () => promptContentsSchema,
|
|
8675
9167
|
promptDefinitionSchema: () => promptDefinitionSchema,
|
|
@@ -8690,6 +9182,8 @@ __export(exports_exports, {
|
|
|
8690
9182
|
traceable: () => traceable,
|
|
8691
9183
|
traced: () => traced,
|
|
8692
9184
|
updateSpan: () => updateSpan,
|
|
9185
|
+
uploadLogs3OverflowPayload: () => uploadLogs3OverflowPayload,
|
|
9186
|
+
utf8ByteLength: () => utf8ByteLength,
|
|
8693
9187
|
withCurrent: () => withCurrent,
|
|
8694
9188
|
withDataset: () => withDataset,
|
|
8695
9189
|
withExperiment: () => withExperiment,
|
|
@@ -12160,6 +12654,189 @@ function wrapMastraAgent(agent, _options) {
|
|
|
12160
12654
|
}
|
|
12161
12655
|
|
|
12162
12656
|
// src/wrappers/claude-agent-sdk/claude-agent-sdk.ts
|
|
12657
|
+
function getMcpServerMetadata(serverName, mcpServers) {
|
|
12658
|
+
if (!serverName || !mcpServers) {
|
|
12659
|
+
return {};
|
|
12660
|
+
}
|
|
12661
|
+
const serverConfig = mcpServers[serverName];
|
|
12662
|
+
if (!serverConfig) {
|
|
12663
|
+
return {};
|
|
12664
|
+
}
|
|
12665
|
+
const metadata = {};
|
|
12666
|
+
if (serverConfig.type) {
|
|
12667
|
+
metadata["mcp.type"] = serverConfig.type;
|
|
12668
|
+
} else if (typeof serverConfig === "object" && "transport" in serverConfig) {
|
|
12669
|
+
metadata["mcp.type"] = "sdk";
|
|
12670
|
+
}
|
|
12671
|
+
if (serverConfig.url) {
|
|
12672
|
+
metadata["mcp.url"] = serverConfig.url;
|
|
12673
|
+
}
|
|
12674
|
+
if (serverConfig.command) {
|
|
12675
|
+
metadata["mcp.command"] = serverConfig.command;
|
|
12676
|
+
if (serverConfig.args) {
|
|
12677
|
+
metadata["mcp.args"] = serverConfig.args.join(" ");
|
|
12678
|
+
}
|
|
12679
|
+
}
|
|
12680
|
+
return metadata;
|
|
12681
|
+
}
|
|
12682
|
+
function parseToolName(rawToolName) {
|
|
12683
|
+
const mcpMatch = rawToolName.match(/^mcp__([^_]+)__(.+)$/);
|
|
12684
|
+
if (mcpMatch) {
|
|
12685
|
+
const [, mcpServer, toolName] = mcpMatch;
|
|
12686
|
+
return {
|
|
12687
|
+
displayName: `tool: ${mcpServer}/${toolName}`,
|
|
12688
|
+
toolName,
|
|
12689
|
+
mcpServer,
|
|
12690
|
+
rawToolName
|
|
12691
|
+
};
|
|
12692
|
+
}
|
|
12693
|
+
return {
|
|
12694
|
+
displayName: `tool: ${rawToolName}`,
|
|
12695
|
+
toolName: rawToolName,
|
|
12696
|
+
rawToolName
|
|
12697
|
+
};
|
|
12698
|
+
}
|
|
12699
|
+
function createToolTracingHooks(resolveParentSpan, activeToolSpans, mcpServers, subAgentSpans, endedSubAgentSpans) {
|
|
12700
|
+
const preToolUse = async (input, toolUseID) => {
|
|
12701
|
+
if (input.hook_event_name !== "PreToolUse" || !toolUseID) {
|
|
12702
|
+
return {};
|
|
12703
|
+
}
|
|
12704
|
+
if (input.tool_name === "Task") {
|
|
12705
|
+
return {};
|
|
12706
|
+
}
|
|
12707
|
+
const parsed = parseToolName(input.tool_name);
|
|
12708
|
+
const mcpMetadata = getMcpServerMetadata(parsed.mcpServer, mcpServers);
|
|
12709
|
+
const parentExport = await resolveParentSpan(toolUseID);
|
|
12710
|
+
const toolSpan = startSpan({
|
|
12711
|
+
name: parsed.displayName,
|
|
12712
|
+
spanAttributes: { type: "tool" /* TOOL */ },
|
|
12713
|
+
event: {
|
|
12714
|
+
input: input.tool_input,
|
|
12715
|
+
metadata: {
|
|
12716
|
+
// GenAI semantic conventions
|
|
12717
|
+
"gen_ai.tool.name": parsed.toolName,
|
|
12718
|
+
"gen_ai.tool.call.id": toolUseID,
|
|
12719
|
+
// MCP-specific metadata
|
|
12720
|
+
...parsed.mcpServer && { "mcp.server": parsed.mcpServer },
|
|
12721
|
+
...mcpMetadata,
|
|
12722
|
+
// Claude SDK metadata
|
|
12723
|
+
"claude_agent_sdk.raw_tool_name": parsed.rawToolName,
|
|
12724
|
+
"claude_agent_sdk.session_id": input.session_id,
|
|
12725
|
+
"claude_agent_sdk.cwd": input.cwd
|
|
12726
|
+
}
|
|
12727
|
+
},
|
|
12728
|
+
parent: parentExport
|
|
12729
|
+
});
|
|
12730
|
+
activeToolSpans.set(toolUseID, toolSpan);
|
|
12731
|
+
return {};
|
|
12732
|
+
};
|
|
12733
|
+
const postToolUse = async (input, toolUseID) => {
|
|
12734
|
+
if (input.hook_event_name !== "PostToolUse" || !toolUseID) {
|
|
12735
|
+
return {};
|
|
12736
|
+
}
|
|
12737
|
+
const subAgentSpan = subAgentSpans.get(toolUseID);
|
|
12738
|
+
if (subAgentSpan) {
|
|
12739
|
+
try {
|
|
12740
|
+
const response = input.tool_response;
|
|
12741
|
+
const metadata = {};
|
|
12742
|
+
if (response?.status) {
|
|
12743
|
+
metadata["claude_agent_sdk.status"] = response.status;
|
|
12744
|
+
}
|
|
12745
|
+
if (response?.totalDurationMs) {
|
|
12746
|
+
metadata["claude_agent_sdk.duration_ms"] = response.totalDurationMs;
|
|
12747
|
+
}
|
|
12748
|
+
if (response?.totalToolUseCount !== void 0) {
|
|
12749
|
+
metadata["claude_agent_sdk.tool_use_count"] = response.totalToolUseCount;
|
|
12750
|
+
}
|
|
12751
|
+
subAgentSpan.log({
|
|
12752
|
+
output: response?.content,
|
|
12753
|
+
metadata
|
|
12754
|
+
});
|
|
12755
|
+
} finally {
|
|
12756
|
+
subAgentSpan.end();
|
|
12757
|
+
endedSubAgentSpans.add(toolUseID);
|
|
12758
|
+
}
|
|
12759
|
+
return {};
|
|
12760
|
+
}
|
|
12761
|
+
const toolSpan = activeToolSpans.get(toolUseID);
|
|
12762
|
+
if (!toolSpan) {
|
|
12763
|
+
return {};
|
|
12764
|
+
}
|
|
12765
|
+
try {
|
|
12766
|
+
toolSpan.log({ output: input.tool_response });
|
|
12767
|
+
} finally {
|
|
12768
|
+
toolSpan.end();
|
|
12769
|
+
activeToolSpans.delete(toolUseID);
|
|
12770
|
+
}
|
|
12771
|
+
return {};
|
|
12772
|
+
};
|
|
12773
|
+
const postToolUseFailure = async (input, toolUseID) => {
|
|
12774
|
+
if (input.hook_event_name !== "PostToolUseFailure" || !toolUseID) {
|
|
12775
|
+
return {};
|
|
12776
|
+
}
|
|
12777
|
+
const subAgentSpan = subAgentSpans.get(toolUseID);
|
|
12778
|
+
if (subAgentSpan) {
|
|
12779
|
+
try {
|
|
12780
|
+
subAgentSpan.log({ error: input.error });
|
|
12781
|
+
} finally {
|
|
12782
|
+
subAgentSpan.end();
|
|
12783
|
+
endedSubAgentSpans.add(toolUseID);
|
|
12784
|
+
}
|
|
12785
|
+
return {};
|
|
12786
|
+
}
|
|
12787
|
+
const toolSpan = activeToolSpans.get(toolUseID);
|
|
12788
|
+
if (!toolSpan) {
|
|
12789
|
+
return {};
|
|
12790
|
+
}
|
|
12791
|
+
const parsed = parseToolName(input.tool_name);
|
|
12792
|
+
try {
|
|
12793
|
+
toolSpan.log({
|
|
12794
|
+
error: input.error,
|
|
12795
|
+
metadata: {
|
|
12796
|
+
"gen_ai.tool.name": parsed.toolName,
|
|
12797
|
+
"gen_ai.tool.call.id": toolUseID,
|
|
12798
|
+
...parsed.mcpServer && { "mcp.server": parsed.mcpServer },
|
|
12799
|
+
"claude_agent_sdk.is_interrupt": input.is_interrupt,
|
|
12800
|
+
"claude_agent_sdk.session_id": input.session_id
|
|
12801
|
+
}
|
|
12802
|
+
});
|
|
12803
|
+
} finally {
|
|
12804
|
+
toolSpan.end();
|
|
12805
|
+
activeToolSpans.delete(toolUseID);
|
|
12806
|
+
}
|
|
12807
|
+
return {};
|
|
12808
|
+
};
|
|
12809
|
+
return { preToolUse, postToolUse, postToolUseFailure };
|
|
12810
|
+
}
|
|
12811
|
+
function injectTracingHooks(options, resolveParentSpan, activeToolSpans, subAgentSpans, endedSubAgentSpans) {
|
|
12812
|
+
const mcpServers = options.mcpServers;
|
|
12813
|
+
const { preToolUse, postToolUse, postToolUseFailure } = createToolTracingHooks(
|
|
12814
|
+
resolveParentSpan,
|
|
12815
|
+
activeToolSpans,
|
|
12816
|
+
mcpServers,
|
|
12817
|
+
subAgentSpans,
|
|
12818
|
+
endedSubAgentSpans
|
|
12819
|
+
);
|
|
12820
|
+
const existingHooks = options.hooks ?? {};
|
|
12821
|
+
return {
|
|
12822
|
+
...options,
|
|
12823
|
+
hooks: {
|
|
12824
|
+
...existingHooks,
|
|
12825
|
+
PreToolUse: [
|
|
12826
|
+
...existingHooks.PreToolUse ?? [],
|
|
12827
|
+
{ hooks: [preToolUse] }
|
|
12828
|
+
],
|
|
12829
|
+
PostToolUse: [
|
|
12830
|
+
...existingHooks.PostToolUse ?? [],
|
|
12831
|
+
{ hooks: [postToolUse] }
|
|
12832
|
+
],
|
|
12833
|
+
PostToolUseFailure: [
|
|
12834
|
+
...existingHooks.PostToolUseFailure ?? [],
|
|
12835
|
+
{ hooks: [postToolUseFailure] }
|
|
12836
|
+
]
|
|
12837
|
+
}
|
|
12838
|
+
};
|
|
12839
|
+
}
|
|
12163
12840
|
function filterSerializableOptions(options) {
|
|
12164
12841
|
const allowedKeys = [
|
|
12165
12842
|
"model",
|
|
@@ -12184,18 +12861,45 @@ function filterSerializableOptions(options) {
|
|
|
12184
12861
|
}
|
|
12185
12862
|
return filtered;
|
|
12186
12863
|
}
|
|
12864
|
+
function isAsyncIterable(value) {
|
|
12865
|
+
return value !== null && value !== void 0 && typeof value[Symbol.asyncIterator] === "function";
|
|
12866
|
+
}
|
|
12187
12867
|
function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
12188
12868
|
const proxy = new Proxy(queryFn, {
|
|
12189
12869
|
apply(target, thisArg, argArray) {
|
|
12190
12870
|
const params = argArray[0] ?? {};
|
|
12191
12871
|
const { prompt, options = {} } = params;
|
|
12872
|
+
const promptIsAsyncIterable = isAsyncIterable(prompt);
|
|
12873
|
+
let capturedPromptMessages;
|
|
12874
|
+
let promptForQuery = prompt;
|
|
12875
|
+
let promptStarted = false;
|
|
12876
|
+
let resolvePromptDone;
|
|
12877
|
+
const promptDone = new Promise((resolve) => {
|
|
12878
|
+
resolvePromptDone = resolve;
|
|
12879
|
+
});
|
|
12880
|
+
if (promptIsAsyncIterable) {
|
|
12881
|
+
capturedPromptMessages = [];
|
|
12882
|
+
const originalPrompt = prompt;
|
|
12883
|
+
const capturingPrompt = (async function* () {
|
|
12884
|
+
promptStarted = true;
|
|
12885
|
+
try {
|
|
12886
|
+
for await (const msg of originalPrompt) {
|
|
12887
|
+
capturedPromptMessages.push(msg);
|
|
12888
|
+
yield msg;
|
|
12889
|
+
}
|
|
12890
|
+
} finally {
|
|
12891
|
+
resolvePromptDone?.();
|
|
12892
|
+
}
|
|
12893
|
+
})();
|
|
12894
|
+
promptForQuery = capturingPrompt;
|
|
12895
|
+
}
|
|
12192
12896
|
const span = startSpan({
|
|
12193
12897
|
name: "Claude Agent",
|
|
12194
12898
|
spanAttributes: {
|
|
12195
12899
|
type: "task" /* TASK */
|
|
12196
12900
|
},
|
|
12197
12901
|
event: {
|
|
12198
|
-
input: typeof prompt === "string" ? prompt :
|
|
12902
|
+
input: typeof prompt === "string" ? prompt : promptIsAsyncIterable ? void 0 : prompt !== void 0 ? String(prompt) : void 0,
|
|
12199
12903
|
metadata: filterSerializableOptions(options)
|
|
12200
12904
|
}
|
|
12201
12905
|
});
|
|
@@ -12206,13 +12910,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12206
12910
|
let currentMessageStartTime = getCurrentUnixTimestamp();
|
|
12207
12911
|
const currentMessages = [];
|
|
12208
12912
|
const createLLMSpan = async () => {
|
|
12913
|
+
const parentToolUseId = currentMessages[0]?.parent_tool_use_id ?? null;
|
|
12914
|
+
let parentSpanExport;
|
|
12915
|
+
if (parentToolUseId) {
|
|
12916
|
+
const subAgentSpan = subAgentSpans.get(parentToolUseId);
|
|
12917
|
+
parentSpanExport = subAgentSpan ? await subAgentSpan.export() : await span.export();
|
|
12918
|
+
} else {
|
|
12919
|
+
parentSpanExport = await span.export();
|
|
12920
|
+
}
|
|
12209
12921
|
const finalMessageContent = await _createLLMSpanForMessages(
|
|
12210
12922
|
currentMessages,
|
|
12211
12923
|
prompt,
|
|
12212
12924
|
finalResults,
|
|
12213
12925
|
options,
|
|
12214
12926
|
currentMessageStartTime,
|
|
12215
|
-
|
|
12927
|
+
capturedPromptMessages,
|
|
12928
|
+
parentSpanExport
|
|
12216
12929
|
);
|
|
12217
12930
|
if (finalMessageContent) {
|
|
12218
12931
|
finalResults.push(finalMessageContent);
|
|
@@ -12225,14 +12938,78 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12225
12938
|
currentMessages.length = 0;
|
|
12226
12939
|
};
|
|
12227
12940
|
const invocationTarget = thisArg === proxy || thisArg === void 0 ? defaultThis ?? thisArg : thisArg;
|
|
12941
|
+
const activeToolSpans = /* @__PURE__ */ new Map();
|
|
12942
|
+
const subAgentSpans = /* @__PURE__ */ new Map();
|
|
12943
|
+
const endedSubAgentSpans = /* @__PURE__ */ new Set();
|
|
12944
|
+
const toolUseToParent = /* @__PURE__ */ new Map();
|
|
12945
|
+
const pendingSubAgentNames = /* @__PURE__ */ new Map();
|
|
12946
|
+
const resolveParentSpan = async (toolUseID) => {
|
|
12947
|
+
const parentToolUseId = toolUseToParent.get(toolUseID);
|
|
12948
|
+
if (parentToolUseId) {
|
|
12949
|
+
const subAgentSpan = subAgentSpans.get(parentToolUseId);
|
|
12950
|
+
if (subAgentSpan) {
|
|
12951
|
+
return subAgentSpan.export();
|
|
12952
|
+
}
|
|
12953
|
+
}
|
|
12954
|
+
return span.export();
|
|
12955
|
+
};
|
|
12956
|
+
const optionsWithHooks = injectTracingHooks(
|
|
12957
|
+
options,
|
|
12958
|
+
resolveParentSpan,
|
|
12959
|
+
activeToolSpans,
|
|
12960
|
+
subAgentSpans,
|
|
12961
|
+
endedSubAgentSpans
|
|
12962
|
+
);
|
|
12963
|
+
const modifiedArgArray = [
|
|
12964
|
+
{
|
|
12965
|
+
...params,
|
|
12966
|
+
...promptForQuery !== void 0 ? { prompt: promptForQuery } : {},
|
|
12967
|
+
options: optionsWithHooks
|
|
12968
|
+
}
|
|
12969
|
+
];
|
|
12228
12970
|
const originalGenerator = withCurrent(
|
|
12229
12971
|
span,
|
|
12230
|
-
() => Reflect.apply(target, invocationTarget,
|
|
12972
|
+
() => Reflect.apply(target, invocationTarget, modifiedArgArray)
|
|
12231
12973
|
);
|
|
12232
12974
|
const wrappedGenerator = (async function* () {
|
|
12233
12975
|
try {
|
|
12234
12976
|
for await (const message of originalGenerator) {
|
|
12235
12977
|
const currentTime = getCurrentUnixTimestamp();
|
|
12978
|
+
if (message.type === "assistant" && Array.isArray(message.message?.content)) {
|
|
12979
|
+
const parentToolUseId = message.parent_tool_use_id ?? null;
|
|
12980
|
+
for (const block of message.message.content) {
|
|
12981
|
+
if (block.type === "tool_use" && block.id) {
|
|
12982
|
+
toolUseToParent.set(block.id, parentToolUseId);
|
|
12983
|
+
if (block.name === "Task" && block.input?.subagent_type) {
|
|
12984
|
+
pendingSubAgentNames.set(
|
|
12985
|
+
block.id,
|
|
12986
|
+
block.input.subagent_type
|
|
12987
|
+
);
|
|
12988
|
+
}
|
|
12989
|
+
}
|
|
12990
|
+
}
|
|
12991
|
+
}
|
|
12992
|
+
if ("parent_tool_use_id" in message) {
|
|
12993
|
+
const parentToolUseId = message.parent_tool_use_id;
|
|
12994
|
+
if (parentToolUseId && !subAgentSpans.has(parentToolUseId)) {
|
|
12995
|
+
const agentName = pendingSubAgentNames.get(parentToolUseId);
|
|
12996
|
+
const spanName = agentName ? `Agent: ${agentName}` : "Agent: sub-agent";
|
|
12997
|
+
const parentExport = await span.export();
|
|
12998
|
+
const subAgentSpan = startSpan({
|
|
12999
|
+
name: spanName,
|
|
13000
|
+
spanAttributes: { type: "task" /* TASK */ },
|
|
13001
|
+
event: {
|
|
13002
|
+
metadata: {
|
|
13003
|
+
...agentName && {
|
|
13004
|
+
"claude_agent_sdk.agent_type": agentName
|
|
13005
|
+
}
|
|
13006
|
+
}
|
|
13007
|
+
},
|
|
13008
|
+
parent: parentExport
|
|
13009
|
+
});
|
|
13010
|
+
subAgentSpans.set(parentToolUseId, subAgentSpan);
|
|
13011
|
+
}
|
|
13012
|
+
}
|
|
12236
13013
|
const messageId = message.message?.id;
|
|
12237
13014
|
if (messageId && messageId !== currentMessageId) {
|
|
12238
13015
|
await createLLMSpan();
|
|
@@ -12278,6 +13055,22 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12278
13055
|
});
|
|
12279
13056
|
throw error;
|
|
12280
13057
|
} finally {
|
|
13058
|
+
for (const [id, subSpan] of subAgentSpans) {
|
|
13059
|
+
if (!endedSubAgentSpans.has(id)) {
|
|
13060
|
+
subSpan.end();
|
|
13061
|
+
}
|
|
13062
|
+
}
|
|
13063
|
+
subAgentSpans.clear();
|
|
13064
|
+
if (capturedPromptMessages) {
|
|
13065
|
+
if (promptStarted) {
|
|
13066
|
+
await promptDone;
|
|
13067
|
+
}
|
|
13068
|
+
if (capturedPromptMessages.length > 0) {
|
|
13069
|
+
span.log({
|
|
13070
|
+
input: _formatCapturedMessages(capturedPromptMessages)
|
|
13071
|
+
});
|
|
13072
|
+
}
|
|
13073
|
+
}
|
|
12281
13074
|
span.end();
|
|
12282
13075
|
}
|
|
12283
13076
|
})();
|
|
@@ -12305,43 +13098,25 @@ function wrapClaudeAgentQuery(queryFn, defaultThis) {
|
|
|
12305
13098
|
});
|
|
12306
13099
|
return proxy;
|
|
12307
13100
|
}
|
|
12308
|
-
function
|
|
12309
|
-
const
|
|
12310
|
-
|
|
12311
|
-
|
|
12312
|
-
|
|
12313
|
-
|
|
12314
|
-
|
|
12315
|
-
|
|
12316
|
-
|
|
12317
|
-
}
|
|
12318
|
-
});
|
|
12319
|
-
const result = await originalHandler(args, extra);
|
|
12320
|
-
span.log({
|
|
12321
|
-
output: result
|
|
12322
|
-
});
|
|
12323
|
-
return result;
|
|
12324
|
-
},
|
|
12325
|
-
{
|
|
12326
|
-
name: `${toolDef.name}`,
|
|
12327
|
-
spanAttributes: {
|
|
12328
|
-
type: "tool" /* TOOL */
|
|
13101
|
+
function _buildLLMInput(prompt, conversationHistory, capturedPromptMessages) {
|
|
13102
|
+
const promptMessages = [];
|
|
13103
|
+
if (typeof prompt === "string") {
|
|
13104
|
+
promptMessages.push({ content: prompt, role: "user" });
|
|
13105
|
+
} else if (capturedPromptMessages && capturedPromptMessages.length > 0) {
|
|
13106
|
+
for (const msg of capturedPromptMessages) {
|
|
13107
|
+
const role = msg.message?.role;
|
|
13108
|
+
const content = msg.message?.content;
|
|
13109
|
+
if (role && content !== void 0) {
|
|
13110
|
+
promptMessages.push({ content, role });
|
|
12329
13111
|
}
|
|
12330
13112
|
}
|
|
12331
|
-
|
|
12332
|
-
|
|
12333
|
-
...toolDef,
|
|
12334
|
-
handler: wrappedHandler
|
|
12335
|
-
};
|
|
12336
|
-
}
|
|
12337
|
-
function _buildLLMInput(prompt, conversationHistory) {
|
|
12338
|
-
const promptMessage = typeof prompt === "string" ? { content: prompt, role: "user" } : void 0;
|
|
12339
|
-
const inputParts = [
|
|
12340
|
-
...promptMessage ? [promptMessage] : [],
|
|
12341
|
-
...conversationHistory
|
|
12342
|
-
];
|
|
13113
|
+
}
|
|
13114
|
+
const inputParts = [...promptMessages, ...conversationHistory];
|
|
12343
13115
|
return inputParts.length > 0 ? inputParts : void 0;
|
|
12344
13116
|
}
|
|
13117
|
+
function _formatCapturedMessages(messages) {
|
|
13118
|
+
return messages.length > 0 ? messages : [];
|
|
13119
|
+
}
|
|
12345
13120
|
function _extractUsageFromMessage(message) {
|
|
12346
13121
|
const metrics = {};
|
|
12347
13122
|
let usage;
|
|
@@ -12375,7 +13150,7 @@ function _extractUsageFromMessage(message) {
|
|
|
12375
13150
|
}
|
|
12376
13151
|
return metrics;
|
|
12377
13152
|
}
|
|
12378
|
-
async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, parentSpan) {
|
|
13153
|
+
async function _createLLMSpanForMessages(messages, prompt, conversationHistory, options, startTime, capturedPromptMessages, parentSpan) {
|
|
12379
13154
|
if (messages.length === 0) return void 0;
|
|
12380
13155
|
const lastMessage = messages[messages.length - 1];
|
|
12381
13156
|
if (lastMessage.type !== "assistant" || !lastMessage.message?.usage) {
|
|
@@ -12383,7 +13158,11 @@ async function _createLLMSpanForMessages(messages, prompt, conversationHistory,
|
|
|
12383
13158
|
}
|
|
12384
13159
|
const model = lastMessage.message.model || options.model;
|
|
12385
13160
|
const usage = _extractUsageFromMessage(lastMessage);
|
|
12386
|
-
const input = _buildLLMInput(
|
|
13161
|
+
const input = _buildLLMInput(
|
|
13162
|
+
prompt,
|
|
13163
|
+
conversationHistory,
|
|
13164
|
+
capturedPromptMessages
|
|
13165
|
+
);
|
|
12387
13166
|
const outputs = messages.map(
|
|
12388
13167
|
(m) => m.message?.content && m.message?.role ? { content: m.message.content, role: m.message.role } : void 0
|
|
12389
13168
|
).filter((c) => c !== void 0);
|
|
@@ -12424,25 +13203,9 @@ function wrapClaudeAgentSDK(sdk) {
|
|
|
12424
13203
|
return wrappedQuery;
|
|
12425
13204
|
}
|
|
12426
13205
|
if (prop === "tool" && typeof value === "function") {
|
|
12427
|
-
const
|
|
12428
|
-
|
|
12429
|
-
|
|
12430
|
-
const invocationTarget = thisArg === receiver || thisArg === void 0 ? target : thisArg;
|
|
12431
|
-
const toolDef = Reflect.apply(
|
|
12432
|
-
toolTarget,
|
|
12433
|
-
invocationTarget,
|
|
12434
|
-
argArray
|
|
12435
|
-
);
|
|
12436
|
-
if (toolDef && typeof toolDef === "object" && "handler" in toolDef) {
|
|
12437
|
-
return wrapClaudeAgentTool(
|
|
12438
|
-
toolDef
|
|
12439
|
-
);
|
|
12440
|
-
}
|
|
12441
|
-
return toolDef;
|
|
12442
|
-
}
|
|
12443
|
-
});
|
|
12444
|
-
cache.set(prop, wrappedToolFactory);
|
|
12445
|
-
return wrappedToolFactory;
|
|
13206
|
+
const bound = value.bind(target);
|
|
13207
|
+
cache.set(prop, bound);
|
|
13208
|
+
return bound;
|
|
12446
13209
|
}
|
|
12447
13210
|
if (typeof value === "function") {
|
|
12448
13211
|
const bound = value.bind(target);
|
|
@@ -13227,7 +13990,7 @@ function isAsync(fn) {
|
|
|
13227
13990
|
function isAsyncGenerator2(fn) {
|
|
13228
13991
|
return fn[Symbol.toStringTag] === "AsyncGenerator";
|
|
13229
13992
|
}
|
|
13230
|
-
function
|
|
13993
|
+
function isAsyncIterable2(obj) {
|
|
13231
13994
|
return typeof obj[Symbol.asyncIterator] === "function";
|
|
13232
13995
|
}
|
|
13233
13996
|
function wrapAsync(asyncFn) {
|
|
@@ -13398,7 +14161,7 @@ var eachOfLimit$2 = (limit) => {
|
|
|
13398
14161
|
if (isAsyncGenerator2(obj)) {
|
|
13399
14162
|
return asyncEachOfLimit(obj, limit, iteratee, callback);
|
|
13400
14163
|
}
|
|
13401
|
-
if (
|
|
14164
|
+
if (isAsyncIterable2(obj)) {
|
|
13402
14165
|
return asyncEachOfLimit(obj[Symbol.asyncIterator](), limit, iteratee, callback);
|
|
13403
14166
|
}
|
|
13404
14167
|
var nextElem = createIterator(obj);
|
|
@@ -14327,6 +15090,7 @@ var LocalTrace = class {
|
|
|
14327
15090
|
spansFlushed = false;
|
|
14328
15091
|
spansFlushPromise = null;
|
|
14329
15092
|
cachedFetcher;
|
|
15093
|
+
threadCache = /* @__PURE__ */ new Map();
|
|
14330
15094
|
constructor({
|
|
14331
15095
|
objectType,
|
|
14332
15096
|
objectId,
|
|
@@ -14397,6 +15161,36 @@ var LocalTrace = class {
|
|
|
14397
15161
|
}
|
|
14398
15162
|
return this.cachedFetcher.getSpans({ spanType });
|
|
14399
15163
|
}
|
|
15164
|
+
/**
|
|
15165
|
+
* Get the thread (preprocessed messages) for this trace.
|
|
15166
|
+
* Calls the API with the project_default preprocessor (which falls back to "thread").
|
|
15167
|
+
*/
|
|
15168
|
+
async getThread(options) {
|
|
15169
|
+
const cacheKey = options?.preprocessor ?? "project_default";
|
|
15170
|
+
if (!this.threadCache.has(cacheKey)) {
|
|
15171
|
+
const promise = this.fetchThread(options);
|
|
15172
|
+
this.threadCache.set(cacheKey, promise);
|
|
15173
|
+
}
|
|
15174
|
+
return this.threadCache.get(cacheKey);
|
|
15175
|
+
}
|
|
15176
|
+
async fetchThread(options) {
|
|
15177
|
+
await this.ensureSpansReady();
|
|
15178
|
+
await this.state.login({});
|
|
15179
|
+
const result = await invoke({
|
|
15180
|
+
globalFunction: options?.preprocessor ?? "project_default",
|
|
15181
|
+
functionType: "preprocessor",
|
|
15182
|
+
input: {
|
|
15183
|
+
trace_ref: {
|
|
15184
|
+
object_type: this.objectType,
|
|
15185
|
+
object_id: this.objectId,
|
|
15186
|
+
root_span_id: this.rootSpanId
|
|
15187
|
+
}
|
|
15188
|
+
},
|
|
15189
|
+
mode: "json",
|
|
15190
|
+
state: this.state
|
|
15191
|
+
});
|
|
15192
|
+
return Array.isArray(result) ? result : [];
|
|
15193
|
+
}
|
|
14400
15194
|
async ensureSpansReady() {
|
|
14401
15195
|
if (this.spansFlushed || !this.ensureSpansFlushed) {
|
|
14402
15196
|
return;
|
|
@@ -14431,648 +15225,371 @@ var SimpleProgressReporter = class {
|
|
|
14431
15225
|
|
|
14432
15226
|
// src/eval-parameters.ts
|
|
14433
15227
|
import { z as z10 } from "zod/v3";
|
|
15228
|
+
import Ajv from "ajv";
|
|
14434
15229
|
|
|
14435
|
-
// src/
|
|
15230
|
+
// src/prompt-schemas.ts
|
|
14436
15231
|
import { z as z9 } from "zod/v3";
|
|
14437
|
-
var
|
|
14438
|
-
|
|
14439
|
-
|
|
14440
|
-
|
|
15232
|
+
var promptContentsSchema = z9.union([
|
|
15233
|
+
z9.object({
|
|
15234
|
+
prompt: z9.string()
|
|
15235
|
+
}),
|
|
15236
|
+
z9.object({
|
|
15237
|
+
messages: z9.array(ChatCompletionMessageParam)
|
|
15238
|
+
})
|
|
15239
|
+
]);
|
|
15240
|
+
var promptDefinitionSchema = promptContentsSchema.and(
|
|
15241
|
+
z9.object({
|
|
15242
|
+
model: z9.string(),
|
|
15243
|
+
params: ModelParams.optional(),
|
|
15244
|
+
templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
|
|
15245
|
+
})
|
|
15246
|
+
);
|
|
15247
|
+
var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
|
|
15248
|
+
z9.object({
|
|
15249
|
+
tools: z9.array(ToolFunctionDefinition).optional()
|
|
15250
|
+
})
|
|
15251
|
+
);
|
|
15252
|
+
function promptDefinitionToPromptData(promptDefinition, rawTools) {
|
|
15253
|
+
const promptBlock = "messages" in promptDefinition ? {
|
|
15254
|
+
type: "chat",
|
|
15255
|
+
messages: promptDefinition.messages,
|
|
15256
|
+
tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
|
|
15257
|
+
} : {
|
|
15258
|
+
type: "completion",
|
|
15259
|
+
content: promptDefinition.prompt
|
|
15260
|
+
};
|
|
15261
|
+
return {
|
|
15262
|
+
prompt: promptBlock,
|
|
15263
|
+
options: {
|
|
15264
|
+
model: promptDefinition.model,
|
|
15265
|
+
params: promptDefinition.params
|
|
15266
|
+
},
|
|
15267
|
+
...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
|
|
15268
|
+
};
|
|
15269
|
+
}
|
|
15270
|
+
|
|
15271
|
+
// src/eval-parameters.ts
|
|
15272
|
+
var evalParametersSchema = z10.record(
|
|
15273
|
+
z10.string(),
|
|
15274
|
+
z10.union([
|
|
15275
|
+
z10.object({
|
|
15276
|
+
type: z10.literal("prompt"),
|
|
15277
|
+
default: promptDefinitionWithToolsSchema.optional(),
|
|
15278
|
+
description: z10.string().optional()
|
|
15279
|
+
}),
|
|
15280
|
+
z10.instanceof(z10.ZodType)
|
|
15281
|
+
// For Zod schemas
|
|
15282
|
+
])
|
|
15283
|
+
);
|
|
15284
|
+
async function validateParameters(parameters, parameterSchema) {
|
|
15285
|
+
let resolvedSchema = parameterSchema;
|
|
15286
|
+
if (resolvedSchema instanceof Promise) {
|
|
15287
|
+
resolvedSchema = await resolvedSchema;
|
|
15288
|
+
}
|
|
15289
|
+
if (resolvedSchema === void 0 || resolvedSchema === null) {
|
|
15290
|
+
return parameters;
|
|
15291
|
+
}
|
|
15292
|
+
if (RemoteEvalParameters.isParameters(resolvedSchema)) {
|
|
15293
|
+
const mergedParameters = parameters && Object.keys(parameters).length > 0 ? {
|
|
15294
|
+
...resolvedSchema.data,
|
|
15295
|
+
...parameters
|
|
15296
|
+
} : resolvedSchema.data;
|
|
15297
|
+
return validateParametersWithJsonSchema(
|
|
15298
|
+
mergedParameters,
|
|
15299
|
+
resolvedSchema.schema
|
|
15300
|
+
);
|
|
14441
15301
|
}
|
|
14442
|
-
|
|
14443
|
-
|
|
14444
|
-
|
|
14445
|
-
|
|
14446
|
-
|
|
14447
|
-
|
|
14448
|
-
|
|
14449
|
-
|
|
14450
|
-
|
|
14451
|
-
|
|
14452
|
-
|
|
14453
|
-
|
|
14454
|
-
|
|
14455
|
-
|
|
14456
|
-
|
|
14457
|
-
|
|
14458
|
-
|
|
15302
|
+
return validateParametersWithZod(
|
|
15303
|
+
parameters,
|
|
15304
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15305
|
+
resolvedSchema
|
|
15306
|
+
);
|
|
15307
|
+
}
|
|
15308
|
+
function validateParametersWithZod(parameters, parameterSchema) {
|
|
15309
|
+
return Object.fromEntries(
|
|
15310
|
+
Object.entries(parameterSchema).map(([name, schema]) => {
|
|
15311
|
+
const value = parameters[name];
|
|
15312
|
+
try {
|
|
15313
|
+
if ("type" in schema && schema.type === "prompt") {
|
|
15314
|
+
const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
|
|
15315
|
+
schema.default,
|
|
15316
|
+
schema.default.tools
|
|
15317
|
+
) : void 0;
|
|
15318
|
+
if (!promptData) {
|
|
15319
|
+
throw new Error(`Parameter '${name}' is required`);
|
|
15320
|
+
}
|
|
15321
|
+
return [name, Prompt2.fromPromptData(name, promptData)];
|
|
15322
|
+
} else {
|
|
15323
|
+
const schemaCasted = schema;
|
|
15324
|
+
return [name, schemaCasted.parse(value)];
|
|
15325
|
+
}
|
|
15326
|
+
} catch (e) {
|
|
15327
|
+
console.error("Error validating parameter", name, e);
|
|
15328
|
+
throw Error(
|
|
15329
|
+
`Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
|
|
15330
|
+
);
|
|
15331
|
+
}
|
|
15332
|
+
})
|
|
15333
|
+
);
|
|
15334
|
+
}
|
|
15335
|
+
function validateParametersWithJsonSchema(parameters, schema) {
|
|
15336
|
+
const ajv = new Ajv({ coerceTypes: true, useDefaults: true, strict: false });
|
|
15337
|
+
const validate = ajv.compile(schema);
|
|
15338
|
+
if (!validate(parameters)) {
|
|
15339
|
+
const errorMessages = validate.errors?.map((err) => {
|
|
15340
|
+
const path = err.instancePath || "root";
|
|
15341
|
+
return `${path}: ${err.message}`;
|
|
15342
|
+
}).join(", ");
|
|
15343
|
+
throw Error(`Invalid parameters: ${errorMessages}`);
|
|
14459
15344
|
}
|
|
14460
|
-
|
|
14461
|
-
|
|
14462
|
-
|
|
14463
|
-
|
|
14464
|
-
|
|
15345
|
+
return parameters;
|
|
15346
|
+
}
|
|
15347
|
+
|
|
15348
|
+
// src/framework.ts
|
|
15349
|
+
function BaseExperiment(options = {}) {
|
|
15350
|
+
return { _type: "BaseExperiment", ...options };
|
|
15351
|
+
}
|
|
15352
|
+
var EvalResultWithSummary = class {
|
|
15353
|
+
constructor(summary, results) {
|
|
15354
|
+
this.summary = summary;
|
|
15355
|
+
this.results = results;
|
|
14465
15356
|
}
|
|
14466
|
-
|
|
14467
|
-
|
|
14468
|
-
|
|
14469
|
-
|
|
14470
|
-
|
|
15357
|
+
/**
|
|
15358
|
+
* @deprecated Use `summary` instead.
|
|
15359
|
+
*/
|
|
15360
|
+
toString() {
|
|
15361
|
+
return JSON.stringify(this.summary);
|
|
14471
15362
|
}
|
|
14472
|
-
|
|
14473
|
-
|
|
14474
|
-
|
|
14475
|
-
|
|
14476
|
-
|
|
14477
|
-
|
|
14478
|
-
|
|
14479
|
-
|
|
14480
|
-
if (this._publishableCodeFunctions.length > 0) {
|
|
14481
|
-
console.warn(
|
|
14482
|
-
"Code functions cannot be published directly. Use `braintrust push` instead."
|
|
14483
|
-
);
|
|
14484
|
-
}
|
|
14485
|
-
if (this._publishablePrompts.length > 0) {
|
|
14486
|
-
for (const prompt of this._publishablePrompts) {
|
|
14487
|
-
const functionDefinition = await prompt.toFunctionDefinition(projectMap);
|
|
14488
|
-
functionDefinitions.push(functionDefinition);
|
|
14489
|
-
}
|
|
14490
|
-
}
|
|
14491
|
-
await _internalGetGlobalState().apiConn().post_json("insert-functions", {
|
|
14492
|
-
functions: functionDefinitions
|
|
14493
|
-
});
|
|
15363
|
+
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
15364
|
+
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
15365
|
+
}
|
|
15366
|
+
toJSON() {
|
|
15367
|
+
return {
|
|
15368
|
+
summary: this.summary,
|
|
15369
|
+
results: this.results
|
|
15370
|
+
};
|
|
14494
15371
|
}
|
|
14495
15372
|
};
|
|
14496
|
-
|
|
14497
|
-
|
|
14498
|
-
|
|
15373
|
+
function makeEvalName(projectName, experimentName) {
|
|
15374
|
+
let out = projectName;
|
|
15375
|
+
if (experimentName) {
|
|
15376
|
+
out += ` [experimentName=${experimentName}]`;
|
|
14499
15377
|
}
|
|
14500
|
-
|
|
14501
|
-
|
|
14502
|
-
|
|
14503
|
-
|
|
14504
|
-
|
|
14505
|
-
|
|
14506
|
-
|
|
14507
|
-
|
|
14508
|
-
|
|
14509
|
-
|
|
14510
|
-
|
|
14511
|
-
|
|
14512
|
-
|
|
14513
|
-
|
|
14514
|
-
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
14515
|
-
type: "tool",
|
|
14516
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
14517
|
-
parameters,
|
|
14518
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
14519
|
-
returns,
|
|
14520
|
-
...rest
|
|
14521
|
-
});
|
|
14522
|
-
this.project.addCodeFunction(tool);
|
|
14523
|
-
return tool;
|
|
15378
|
+
return out;
|
|
15379
|
+
}
|
|
15380
|
+
function initExperiment2(state, options = {}) {
|
|
15381
|
+
return init({
|
|
15382
|
+
state,
|
|
15383
|
+
...options,
|
|
15384
|
+
setCurrent: false
|
|
15385
|
+
});
|
|
15386
|
+
}
|
|
15387
|
+
function callEvaluatorData(data) {
|
|
15388
|
+
const dataResult = typeof data === "function" ? data() : data;
|
|
15389
|
+
let baseExperiment = void 0;
|
|
15390
|
+
if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
|
|
15391
|
+
baseExperiment = dataResult.name;
|
|
14524
15392
|
}
|
|
15393
|
+
return {
|
|
15394
|
+
data: dataResult,
|
|
15395
|
+
baseExperiment
|
|
15396
|
+
};
|
|
15397
|
+
}
|
|
15398
|
+
function isAsyncIterable3(value) {
|
|
15399
|
+
return typeof value === "object" && value !== null && typeof value[Symbol.asyncIterator] === "function";
|
|
15400
|
+
}
|
|
15401
|
+
function isIterable(value) {
|
|
15402
|
+
return typeof value === "object" && value !== null && typeof value[Symbol.iterator] === "function";
|
|
15403
|
+
}
|
|
15404
|
+
globalThis._evals = {
|
|
15405
|
+
functions: [],
|
|
15406
|
+
prompts: [],
|
|
15407
|
+
parameters: [],
|
|
15408
|
+
evaluators: {},
|
|
15409
|
+
reporters: {}
|
|
14525
15410
|
};
|
|
14526
|
-
|
|
14527
|
-
|
|
14528
|
-
|
|
15411
|
+
function _initializeSpanContext() {
|
|
15412
|
+
globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
|
|
15413
|
+
}
|
|
15414
|
+
async function Eval(name, evaluator, reporterOrOpts) {
|
|
15415
|
+
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
15416
|
+
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
15417
|
+
if (globalThis._evals.evaluators[evalName]) {
|
|
15418
|
+
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
14529
15419
|
}
|
|
14530
|
-
|
|
14531
|
-
|
|
14532
|
-
|
|
14533
|
-
|
|
14534
|
-
|
|
14535
|
-
|
|
15420
|
+
if (globalThis._lazy_load) {
|
|
15421
|
+
globalThis._evals.evaluators[evalName] = {
|
|
15422
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15423
|
+
evaluator: {
|
|
15424
|
+
evalName,
|
|
15425
|
+
projectName: name,
|
|
15426
|
+
...evaluator
|
|
15427
|
+
},
|
|
15428
|
+
reporter: options.reporter
|
|
15429
|
+
};
|
|
15430
|
+
_initializeSpanContext();
|
|
15431
|
+
return new EvalResultWithSummary(
|
|
15432
|
+
{
|
|
15433
|
+
scores: {},
|
|
15434
|
+
metrics: {},
|
|
15435
|
+
projectName: "",
|
|
15436
|
+
experimentName: ""
|
|
15437
|
+
},
|
|
15438
|
+
[]
|
|
15439
|
+
);
|
|
15440
|
+
}
|
|
15441
|
+
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
15442
|
+
const shouldCollectResults = options.returnResults ?? true;
|
|
15443
|
+
if (typeof options.reporter === "string") {
|
|
15444
|
+
throw new Error(
|
|
15445
|
+
"Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
|
|
15446
|
+
);
|
|
15447
|
+
}
|
|
15448
|
+
const resolvedReporter = options.reporter || defaultReporter;
|
|
15449
|
+
try {
|
|
15450
|
+
const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
|
|
15451
|
+
evaluator.data
|
|
15452
|
+
);
|
|
15453
|
+
const experiment = options.parent || options.noSendLogs ? null : initExperiment2(evaluator.state, {
|
|
15454
|
+
...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
|
|
15455
|
+
experiment: evaluator.experimentName,
|
|
15456
|
+
description: evaluator.description,
|
|
15457
|
+
metadata: evaluator.metadata,
|
|
15458
|
+
isPublic: evaluator.isPublic,
|
|
15459
|
+
update: evaluator.update,
|
|
15460
|
+
baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
|
|
15461
|
+
baseExperimentId: evaluator.baseExperimentId,
|
|
15462
|
+
gitMetadataSettings: evaluator.gitMetadataSettings,
|
|
15463
|
+
repoInfo: evaluator.repoInfo,
|
|
15464
|
+
dataset: Dataset2.isDataset(data) ? data : void 0
|
|
15465
|
+
});
|
|
15466
|
+
if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
|
|
15467
|
+
await experiment._waitForId();
|
|
14536
15468
|
}
|
|
14537
|
-
if (
|
|
14538
|
-
|
|
15469
|
+
if (experiment && options.onStart) {
|
|
15470
|
+
const summary = await experiment.summarize({ summarizeScores: false });
|
|
15471
|
+
options.onStart(summary);
|
|
14539
15472
|
}
|
|
14540
|
-
|
|
14541
|
-
|
|
14542
|
-
|
|
14543
|
-
|
|
14544
|
-
|
|
14545
|
-
|
|
14546
|
-
type: "scorer"
|
|
14547
|
-
});
|
|
14548
|
-
this.project.addCodeFunction(scorer);
|
|
14549
|
-
} else {
|
|
14550
|
-
const promptBlock = "messages" in opts ? {
|
|
14551
|
-
type: "chat",
|
|
14552
|
-
messages: opts.messages
|
|
14553
|
-
} : {
|
|
14554
|
-
type: "completion",
|
|
14555
|
-
content: opts.prompt
|
|
14556
|
-
};
|
|
14557
|
-
const promptData = {
|
|
14558
|
-
prompt: promptBlock,
|
|
14559
|
-
options: {
|
|
14560
|
-
model: opts.model,
|
|
14561
|
-
params: opts.params
|
|
14562
|
-
},
|
|
14563
|
-
parser: {
|
|
14564
|
-
type: "llm_classifier",
|
|
14565
|
-
use_cot: opts.useCot,
|
|
14566
|
-
choice_scores: opts.choiceScores
|
|
14567
|
-
}
|
|
15473
|
+
try {
|
|
15474
|
+
const evalDef = {
|
|
15475
|
+
evalName,
|
|
15476
|
+
projectName: name,
|
|
15477
|
+
...evaluator,
|
|
15478
|
+
data
|
|
14568
15479
|
};
|
|
14569
|
-
const
|
|
14570
|
-
|
|
14571
|
-
|
|
14572
|
-
|
|
14573
|
-
|
|
14574
|
-
|
|
14575
|
-
|
|
14576
|
-
|
|
14577
|
-
|
|
14578
|
-
|
|
14579
|
-
|
|
14580
|
-
|
|
14581
|
-
|
|
14582
|
-
|
|
14583
|
-
|
|
14584
|
-
|
|
14585
|
-
|
|
14586
|
-
|
|
14587
|
-
|
|
14588
|
-
|
|
14589
|
-
|
|
14590
|
-
|
|
14591
|
-
|
|
14592
|
-
|
|
14593
|
-
|
|
14594
|
-
|
|
14595
|
-
|
|
14596
|
-
|
|
14597
|
-
|
|
15480
|
+
const enableCache = options.enableCache ?? true;
|
|
15481
|
+
let ret;
|
|
15482
|
+
if (options.parent) {
|
|
15483
|
+
ret = await withParent(
|
|
15484
|
+
options.parent,
|
|
15485
|
+
() => runEvaluator(
|
|
15486
|
+
null,
|
|
15487
|
+
evalDef,
|
|
15488
|
+
progressReporter,
|
|
15489
|
+
[],
|
|
15490
|
+
options.stream,
|
|
15491
|
+
options.parameters,
|
|
15492
|
+
shouldCollectResults,
|
|
15493
|
+
enableCache
|
|
15494
|
+
),
|
|
15495
|
+
evaluator.state
|
|
15496
|
+
);
|
|
15497
|
+
} else {
|
|
15498
|
+
ret = await runEvaluator(
|
|
15499
|
+
experiment,
|
|
15500
|
+
evalDef,
|
|
15501
|
+
progressReporter,
|
|
15502
|
+
[],
|
|
15503
|
+
options.stream,
|
|
15504
|
+
options.parameters,
|
|
15505
|
+
shouldCollectResults,
|
|
15506
|
+
enableCache
|
|
15507
|
+
);
|
|
15508
|
+
}
|
|
15509
|
+
progressReporter.stop();
|
|
15510
|
+
resolvedReporter.reportEval(evalDef, ret, {
|
|
15511
|
+
verbose: true,
|
|
15512
|
+
jsonl: false
|
|
15513
|
+
});
|
|
15514
|
+
return ret;
|
|
15515
|
+
} finally {
|
|
15516
|
+
if (experiment) {
|
|
15517
|
+
await experiment.flush().catch(console.error);
|
|
15518
|
+
} else if (options.parent) {
|
|
15519
|
+
await flush().catch(console.error);
|
|
15520
|
+
}
|
|
14598
15521
|
}
|
|
15522
|
+
} finally {
|
|
15523
|
+
progressReporter.stop();
|
|
14599
15524
|
}
|
|
14600
|
-
|
|
14601
|
-
|
|
14602
|
-
|
|
14603
|
-
|
|
14604
|
-
|
|
14605
|
-
parameters;
|
|
14606
|
-
returns;
|
|
14607
|
-
ifExists;
|
|
14608
|
-
metadata;
|
|
14609
|
-
key() {
|
|
14610
|
-
return JSON.stringify([
|
|
14611
|
-
this.project.id ?? "",
|
|
14612
|
-
this.project.name ?? "",
|
|
14613
|
-
this.slug
|
|
14614
|
-
]);
|
|
15525
|
+
}
|
|
15526
|
+
function Reporter(name, reporter) {
|
|
15527
|
+
const ret = { name, ...reporter };
|
|
15528
|
+
if (_evals.reporters[name]) {
|
|
15529
|
+
throw new Error(`Reporter ${name} already exists`);
|
|
14615
15530
|
}
|
|
14616
|
-
|
|
14617
|
-
|
|
14618
|
-
project;
|
|
14619
|
-
name;
|
|
14620
|
-
slug;
|
|
14621
|
-
prompt;
|
|
14622
|
-
ifExists;
|
|
14623
|
-
description;
|
|
14624
|
-
id;
|
|
14625
|
-
functionType;
|
|
14626
|
-
toolFunctions;
|
|
14627
|
-
metadata;
|
|
14628
|
-
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
14629
|
-
this.project = project;
|
|
14630
|
-
this.name = opts.name;
|
|
14631
|
-
this.slug = opts.slug;
|
|
14632
|
-
this.prompt = prompt;
|
|
14633
|
-
this.toolFunctions = toolFunctions;
|
|
14634
|
-
this.ifExists = opts.ifExists;
|
|
14635
|
-
this.description = opts.description;
|
|
14636
|
-
this.id = opts.id;
|
|
14637
|
-
this.functionType = functionType;
|
|
14638
|
-
this.metadata = opts.metadata;
|
|
15531
|
+
if (globalThis._lazy_load) {
|
|
15532
|
+
_evals.reporters[name] = ret;
|
|
14639
15533
|
}
|
|
14640
|
-
|
|
14641
|
-
|
|
14642
|
-
|
|
14643
|
-
|
|
14644
|
-
|
|
14645
|
-
|
|
14646
|
-
|
|
14647
|
-
if ("slug" in fn) {
|
|
14648
|
-
return {
|
|
14649
|
-
type: "slug",
|
|
14650
|
-
project_id: await projectNameToId.resolve(fn.project),
|
|
14651
|
-
slug: fn.slug
|
|
14652
|
-
};
|
|
14653
|
-
} else {
|
|
14654
|
-
return fn;
|
|
14655
|
-
}
|
|
14656
|
-
})
|
|
14657
|
-
);
|
|
14658
|
-
prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
14659
|
-
resolvableToolFunctions;
|
|
14660
|
-
}
|
|
14661
|
-
return {
|
|
14662
|
-
project_id: await projectNameToId.resolve(this.project),
|
|
14663
|
-
name: this.name,
|
|
14664
|
-
slug: this.slug,
|
|
14665
|
-
description: this.description ?? "",
|
|
14666
|
-
function_data: {
|
|
14667
|
-
type: "prompt"
|
|
14668
|
-
},
|
|
14669
|
-
function_type: this.functionType,
|
|
14670
|
-
prompt_data,
|
|
14671
|
-
if_exists: this.ifExists,
|
|
14672
|
-
metadata: this.metadata
|
|
14673
|
-
};
|
|
15534
|
+
return ret;
|
|
15535
|
+
}
|
|
15536
|
+
function serializeJSONWithPlainString(v) {
|
|
15537
|
+
if (typeof v === "string") {
|
|
15538
|
+
return v;
|
|
15539
|
+
} else {
|
|
15540
|
+
return JSON.stringify(v);
|
|
14674
15541
|
}
|
|
14675
|
-
};
|
|
14676
|
-
var promptContentsSchema = z9.union([
|
|
14677
|
-
z9.object({
|
|
14678
|
-
prompt: z9.string()
|
|
14679
|
-
}),
|
|
14680
|
-
z9.object({
|
|
14681
|
-
messages: z9.array(ChatCompletionMessageParam)
|
|
14682
|
-
})
|
|
14683
|
-
]);
|
|
14684
|
-
var promptDefinitionSchema = promptContentsSchema.and(
|
|
14685
|
-
z9.object({
|
|
14686
|
-
model: z9.string(),
|
|
14687
|
-
params: ModelParams.optional(),
|
|
14688
|
-
templateFormat: z9.enum(["mustache", "nunjucks", "none"]).optional()
|
|
14689
|
-
})
|
|
14690
|
-
);
|
|
14691
|
-
var promptDefinitionWithToolsSchema = promptDefinitionSchema.and(
|
|
14692
|
-
z9.object({
|
|
14693
|
-
tools: z9.array(ToolFunctionDefinition).optional()
|
|
14694
|
-
})
|
|
14695
|
-
);
|
|
14696
|
-
var PromptBuilder = class {
|
|
14697
|
-
constructor(project) {
|
|
14698
|
-
this.project = project;
|
|
14699
|
-
}
|
|
14700
|
-
create(opts) {
|
|
14701
|
-
const toolFunctions = [];
|
|
14702
|
-
const rawTools = [];
|
|
14703
|
-
for (const tool of opts.tools ?? []) {
|
|
14704
|
-
if (tool instanceof CodeFunction) {
|
|
14705
|
-
toolFunctions.push(tool);
|
|
14706
|
-
} else if ("type" in tool && !("function" in tool)) {
|
|
14707
|
-
toolFunctions.push(tool);
|
|
14708
|
-
} else {
|
|
14709
|
-
rawTools.push(tool);
|
|
14710
|
-
}
|
|
14711
|
-
}
|
|
14712
|
-
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
14713
|
-
const promptData = promptDefinitionToPromptData(opts, rawTools);
|
|
14714
|
-
const promptRow = {
|
|
14715
|
-
id: opts.id,
|
|
14716
|
-
_xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
|
|
14717
|
-
name: opts.name,
|
|
14718
|
-
slug,
|
|
14719
|
-
prompt_data: promptData,
|
|
14720
|
-
...this.project.id !== void 0 ? { project_id: this.project.id } : {}
|
|
14721
|
-
};
|
|
14722
|
-
const prompt = new Prompt2(
|
|
14723
|
-
promptRow,
|
|
14724
|
-
{},
|
|
14725
|
-
// It doesn't make sense to specify defaults here.
|
|
14726
|
-
opts.noTrace ?? false
|
|
14727
|
-
);
|
|
14728
|
-
const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
|
|
14729
|
-
...opts,
|
|
14730
|
-
slug
|
|
14731
|
-
});
|
|
14732
|
-
this.project.addPrompt(codePrompt);
|
|
14733
|
-
return prompt;
|
|
14734
|
-
}
|
|
14735
|
-
};
|
|
14736
|
-
function promptDefinitionToPromptData(promptDefinition, rawTools) {
|
|
14737
|
-
const promptBlock = "messages" in promptDefinition ? {
|
|
14738
|
-
type: "chat",
|
|
14739
|
-
messages: promptDefinition.messages,
|
|
14740
|
-
tools: rawTools && rawTools.length > 0 ? JSON.stringify(rawTools) : void 0
|
|
14741
|
-
} : {
|
|
14742
|
-
type: "completion",
|
|
14743
|
-
content: promptDefinition.prompt
|
|
14744
|
-
};
|
|
14745
|
-
return {
|
|
14746
|
-
prompt: promptBlock,
|
|
14747
|
-
options: {
|
|
14748
|
-
model: promptDefinition.model,
|
|
14749
|
-
params: promptDefinition.params
|
|
14750
|
-
},
|
|
14751
|
-
...promptDefinition.templateFormat ? { template_format: promptDefinition.templateFormat } : {}
|
|
14752
|
-
};
|
|
14753
15542
|
}
|
|
14754
|
-
|
|
14755
|
-
|
|
14756
|
-
|
|
14757
|
-
|
|
14758
|
-
|
|
14759
|
-
|
|
14760
|
-
|
|
14761
|
-
|
|
14762
|
-
const result = z9.object({
|
|
14763
|
-
project: Project
|
|
14764
|
-
}).parse(response);
|
|
14765
|
-
const projectId = result.project.id;
|
|
14766
|
-
this.nameToId[projectName] = projectId;
|
|
14767
|
-
this.idToName[projectId] = projectName;
|
|
14768
|
-
}
|
|
14769
|
-
return this.nameToId[projectName];
|
|
14770
|
-
}
|
|
14771
|
-
async getName(projectId) {
|
|
14772
|
-
if (!(projectId in this.idToName)) {
|
|
14773
|
-
const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
|
|
14774
|
-
id: projectId
|
|
14775
|
-
});
|
|
14776
|
-
const result = z9.array(Project).nonempty().parse(response);
|
|
14777
|
-
const projectName = result[0].name;
|
|
14778
|
-
this.idToName[projectId] = projectName;
|
|
14779
|
-
this.nameToId[projectName] = projectId;
|
|
14780
|
-
}
|
|
14781
|
-
return this.idToName[projectId];
|
|
14782
|
-
}
|
|
14783
|
-
async resolve(project) {
|
|
14784
|
-
if (project.id) {
|
|
14785
|
-
return project.id;
|
|
14786
|
-
}
|
|
14787
|
-
return this.getId(project.name);
|
|
14788
|
-
}
|
|
14789
|
-
};
|
|
14790
|
-
|
|
14791
|
-
// src/eval-parameters.ts
|
|
14792
|
-
var evalParametersSchema = z10.record(
|
|
14793
|
-
z10.string(),
|
|
14794
|
-
z10.union([
|
|
14795
|
-
z10.object({
|
|
14796
|
-
type: z10.literal("prompt"),
|
|
14797
|
-
default: promptDefinitionWithToolsSchema.optional(),
|
|
14798
|
-
description: z10.string().optional()
|
|
14799
|
-
}),
|
|
14800
|
-
z10.instanceof(z10.ZodType)
|
|
14801
|
-
// For Zod schemas
|
|
14802
|
-
])
|
|
14803
|
-
);
|
|
14804
|
-
function validateParameters(parameters, parameterSchema) {
|
|
14805
|
-
return Object.fromEntries(
|
|
14806
|
-
Object.entries(parameterSchema).map(([name, schema]) => {
|
|
14807
|
-
const value = parameters[name];
|
|
14808
|
-
try {
|
|
14809
|
-
if ("type" in schema && schema.type === "prompt") {
|
|
14810
|
-
const promptData = value ? PromptData.parse(value) : schema.default ? promptDefinitionToPromptData(
|
|
14811
|
-
schema.default,
|
|
14812
|
-
schema.default.tools
|
|
14813
|
-
) : void 0;
|
|
14814
|
-
if (!promptData) {
|
|
14815
|
-
throw new Error(`Parameter '${name}' is required`);
|
|
14816
|
-
}
|
|
14817
|
-
return [name, Prompt2.fromPromptData(name, promptData)];
|
|
14818
|
-
} else {
|
|
14819
|
-
const schemaCasted = schema;
|
|
14820
|
-
return [name, schemaCasted.parse(value)];
|
|
14821
|
-
}
|
|
14822
|
-
} catch (e) {
|
|
14823
|
-
console.error("Error validating parameter", name, e);
|
|
14824
|
-
throw Error(
|
|
14825
|
-
`Invalid parameter '${name}': ${e instanceof Error ? e.message : String(e)}`
|
|
14826
|
-
);
|
|
14827
|
-
}
|
|
14828
|
-
})
|
|
15543
|
+
function evaluateFilter(object, filter2) {
|
|
15544
|
+
const { path, pattern } = filter2;
|
|
15545
|
+
const key = path.reduce(
|
|
15546
|
+
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
15547
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15548
|
+
acc[p]
|
|
15549
|
+
) : void 0,
|
|
15550
|
+
object
|
|
14829
15551
|
);
|
|
14830
|
-
|
|
14831
|
-
|
|
14832
|
-
// src/framework.ts
|
|
14833
|
-
function BaseExperiment(options = {}) {
|
|
14834
|
-
return { _type: "BaseExperiment", ...options };
|
|
14835
|
-
}
|
|
14836
|
-
var EvalResultWithSummary = class {
|
|
14837
|
-
constructor(summary, results) {
|
|
14838
|
-
this.summary = summary;
|
|
14839
|
-
this.results = results;
|
|
14840
|
-
}
|
|
14841
|
-
/**
|
|
14842
|
-
* @deprecated Use `summary` instead.
|
|
14843
|
-
*/
|
|
14844
|
-
toString() {
|
|
14845
|
-
return JSON.stringify(this.summary);
|
|
14846
|
-
}
|
|
14847
|
-
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
14848
|
-
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
14849
|
-
}
|
|
14850
|
-
toJSON() {
|
|
14851
|
-
return {
|
|
14852
|
-
summary: this.summary,
|
|
14853
|
-
results: this.results
|
|
14854
|
-
};
|
|
14855
|
-
}
|
|
14856
|
-
};
|
|
14857
|
-
function makeEvalName(projectName, experimentName) {
|
|
14858
|
-
let out = projectName;
|
|
14859
|
-
if (experimentName) {
|
|
14860
|
-
out += ` [experimentName=${experimentName}]`;
|
|
14861
|
-
}
|
|
14862
|
-
return out;
|
|
14863
|
-
}
|
|
14864
|
-
function initExperiment2(state, options = {}) {
|
|
14865
|
-
return init({
|
|
14866
|
-
state,
|
|
14867
|
-
...options,
|
|
14868
|
-
setCurrent: false
|
|
14869
|
-
});
|
|
14870
|
-
}
|
|
14871
|
-
function callEvaluatorData(data) {
|
|
14872
|
-
const dataResult = typeof data === "function" ? data() : data;
|
|
14873
|
-
let baseExperiment = void 0;
|
|
14874
|
-
if ("_type" in dataResult && dataResult._type === "BaseExperiment") {
|
|
14875
|
-
baseExperiment = dataResult.name;
|
|
15552
|
+
if (key === void 0) {
|
|
15553
|
+
return false;
|
|
14876
15554
|
}
|
|
14877
|
-
return
|
|
14878
|
-
data: dataResult,
|
|
14879
|
-
baseExperiment
|
|
14880
|
-
};
|
|
15555
|
+
return pattern.test(serializeJSONWithPlainString(key));
|
|
14881
15556
|
}
|
|
14882
|
-
function
|
|
14883
|
-
return
|
|
15557
|
+
function scorerName(scorer, scorer_idx) {
|
|
15558
|
+
return scorer.name || `scorer_${scorer_idx}`;
|
|
14884
15559
|
}
|
|
14885
|
-
function
|
|
14886
|
-
return
|
|
15560
|
+
async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
|
|
15561
|
+
return await runEvaluatorInternal(
|
|
15562
|
+
experiment,
|
|
15563
|
+
evaluator,
|
|
15564
|
+
progressReporter,
|
|
15565
|
+
filters,
|
|
15566
|
+
stream,
|
|
15567
|
+
parameters,
|
|
15568
|
+
collectResults,
|
|
15569
|
+
enableCache
|
|
15570
|
+
);
|
|
14887
15571
|
}
|
|
14888
|
-
|
|
14889
|
-
|
|
14890
|
-
|
|
14891
|
-
|
|
14892
|
-
|
|
15572
|
+
var defaultErrorScoreHandler = ({
|
|
15573
|
+
rootSpan,
|
|
15574
|
+
data: _,
|
|
15575
|
+
unhandledScores
|
|
15576
|
+
}) => {
|
|
15577
|
+
const scores = Object.fromEntries(unhandledScores.map((s) => [s, 0]));
|
|
15578
|
+
rootSpan.log({ scores });
|
|
15579
|
+
return scores;
|
|
14893
15580
|
};
|
|
14894
|
-
function
|
|
14895
|
-
|
|
14896
|
-
|
|
14897
|
-
async function Eval(name, evaluator, reporterOrOpts) {
|
|
14898
|
-
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
14899
|
-
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
14900
|
-
if (globalThis._evals.evaluators[evalName]) {
|
|
14901
|
-
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
14902
|
-
}
|
|
14903
|
-
if (globalThis._lazy_load) {
|
|
14904
|
-
globalThis._evals.evaluators[evalName] = {
|
|
14905
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
14906
|
-
evaluator: {
|
|
14907
|
-
evalName,
|
|
14908
|
-
projectName: name,
|
|
14909
|
-
...evaluator
|
|
14910
|
-
},
|
|
14911
|
-
reporter: options.reporter
|
|
14912
|
-
};
|
|
14913
|
-
_initializeSpanContext();
|
|
14914
|
-
return new EvalResultWithSummary(
|
|
14915
|
-
{
|
|
14916
|
-
scores: {},
|
|
14917
|
-
metrics: {},
|
|
14918
|
-
projectName: "",
|
|
14919
|
-
experimentName: ""
|
|
14920
|
-
},
|
|
14921
|
-
[]
|
|
14922
|
-
);
|
|
14923
|
-
}
|
|
14924
|
-
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
14925
|
-
const shouldCollectResults = options.returnResults ?? true;
|
|
14926
|
-
if (typeof options.reporter === "string") {
|
|
14927
|
-
throw new Error(
|
|
14928
|
-
"Must specify a reporter object, not a name. Can only specify reporter names when running 'braintrust eval'"
|
|
14929
|
-
);
|
|
14930
|
-
}
|
|
14931
|
-
const resolvedReporter = options.reporter || defaultReporter;
|
|
14932
|
-
try {
|
|
14933
|
-
const { data, baseExperiment: defaultBaseExperiment } = callEvaluatorData(
|
|
14934
|
-
evaluator.data
|
|
14935
|
-
);
|
|
14936
|
-
const experiment = options.parent || options.noSendLogs ? null : initExperiment2(evaluator.state, {
|
|
14937
|
-
...evaluator.projectId ? { projectId: evaluator.projectId } : { project: name },
|
|
14938
|
-
experiment: evaluator.experimentName,
|
|
14939
|
-
description: evaluator.description,
|
|
14940
|
-
metadata: evaluator.metadata,
|
|
14941
|
-
isPublic: evaluator.isPublic,
|
|
14942
|
-
update: evaluator.update,
|
|
14943
|
-
baseExperiment: evaluator.baseExperimentName ?? defaultBaseExperiment,
|
|
14944
|
-
baseExperimentId: evaluator.baseExperimentId,
|
|
14945
|
-
gitMetadataSettings: evaluator.gitMetadataSettings,
|
|
14946
|
-
repoInfo: evaluator.repoInfo,
|
|
14947
|
-
dataset: Dataset2.isDataset(data) ? data : void 0
|
|
14948
|
-
});
|
|
14949
|
-
if (experiment && typeof process !== "undefined" && globalThis.BRAINTRUST_CONTEXT_MANAGER !== void 0) {
|
|
14950
|
-
await experiment._waitForId();
|
|
14951
|
-
}
|
|
14952
|
-
if (experiment && options.onStart) {
|
|
14953
|
-
const summary = await experiment.summarize({ summarizeScores: false });
|
|
14954
|
-
options.onStart(summary);
|
|
14955
|
-
}
|
|
14956
|
-
try {
|
|
14957
|
-
const evalDef = {
|
|
14958
|
-
evalName,
|
|
14959
|
-
projectName: name,
|
|
14960
|
-
...evaluator,
|
|
14961
|
-
data
|
|
14962
|
-
};
|
|
14963
|
-
const enableCache = options.enableCache ?? true;
|
|
14964
|
-
let ret;
|
|
14965
|
-
if (options.parent) {
|
|
14966
|
-
ret = await withParent(
|
|
14967
|
-
options.parent,
|
|
14968
|
-
() => runEvaluator(
|
|
14969
|
-
null,
|
|
14970
|
-
evalDef,
|
|
14971
|
-
progressReporter,
|
|
14972
|
-
[],
|
|
14973
|
-
options.stream,
|
|
14974
|
-
options.parameters,
|
|
14975
|
-
shouldCollectResults,
|
|
14976
|
-
enableCache
|
|
14977
|
-
),
|
|
14978
|
-
evaluator.state
|
|
14979
|
-
);
|
|
14980
|
-
} else {
|
|
14981
|
-
ret = await runEvaluator(
|
|
14982
|
-
experiment,
|
|
14983
|
-
evalDef,
|
|
14984
|
-
progressReporter,
|
|
14985
|
-
[],
|
|
14986
|
-
options.stream,
|
|
14987
|
-
options.parameters,
|
|
14988
|
-
shouldCollectResults,
|
|
14989
|
-
enableCache
|
|
14990
|
-
);
|
|
14991
|
-
}
|
|
14992
|
-
progressReporter.stop();
|
|
14993
|
-
resolvedReporter.reportEval(evalDef, ret, {
|
|
14994
|
-
verbose: true,
|
|
14995
|
-
jsonl: false
|
|
14996
|
-
});
|
|
14997
|
-
return ret;
|
|
14998
|
-
} finally {
|
|
14999
|
-
if (experiment) {
|
|
15000
|
-
await experiment.flush().catch(console.error);
|
|
15001
|
-
} else if (options.parent) {
|
|
15002
|
-
await flush().catch(console.error);
|
|
15003
|
-
}
|
|
15004
|
-
}
|
|
15005
|
-
} finally {
|
|
15006
|
-
progressReporter.stop();
|
|
15007
|
-
}
|
|
15008
|
-
}
|
|
15009
|
-
function Reporter(name, reporter) {
|
|
15010
|
-
const ret = { name, ...reporter };
|
|
15011
|
-
if (_evals.reporters[name]) {
|
|
15012
|
-
throw new Error(`Reporter ${name} already exists`);
|
|
15013
|
-
}
|
|
15014
|
-
if (globalThis._lazy_load) {
|
|
15015
|
-
_evals.reporters[name] = ret;
|
|
15016
|
-
}
|
|
15017
|
-
return ret;
|
|
15018
|
-
}
|
|
15019
|
-
function serializeJSONWithPlainString(v) {
|
|
15020
|
-
if (typeof v === "string") {
|
|
15021
|
-
return v;
|
|
15022
|
-
} else {
|
|
15023
|
-
return JSON.stringify(v);
|
|
15024
|
-
}
|
|
15025
|
-
}
|
|
15026
|
-
function evaluateFilter(object, filter2) {
|
|
15027
|
-
const { path, pattern } = filter2;
|
|
15028
|
-
const key = path.reduce(
|
|
15029
|
-
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
15030
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
15031
|
-
acc[p]
|
|
15032
|
-
) : void 0,
|
|
15033
|
-
object
|
|
15034
|
-
);
|
|
15035
|
-
if (key === void 0) {
|
|
15036
|
-
return false;
|
|
15037
|
-
}
|
|
15038
|
-
return pattern.test(serializeJSONWithPlainString(key));
|
|
15039
|
-
}
|
|
15040
|
-
function scorerName(scorer, scorer_idx) {
|
|
15041
|
-
return scorer.name || `scorer_${scorer_idx}`;
|
|
15042
|
-
}
|
|
15043
|
-
async function runEvaluator(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults = true, enableCache = true) {
|
|
15044
|
-
return await runEvaluatorInternal(
|
|
15045
|
-
experiment,
|
|
15046
|
-
evaluator,
|
|
15047
|
-
progressReporter,
|
|
15048
|
-
filters,
|
|
15049
|
-
stream,
|
|
15050
|
-
parameters,
|
|
15051
|
-
collectResults,
|
|
15052
|
-
enableCache
|
|
15053
|
-
);
|
|
15054
|
-
}
|
|
15055
|
-
var defaultErrorScoreHandler = ({
|
|
15056
|
-
rootSpan,
|
|
15057
|
-
data: _,
|
|
15058
|
-
unhandledScores
|
|
15059
|
-
}) => {
|
|
15060
|
-
const scores = Object.fromEntries(unhandledScores.map((s) => [s, 0]));
|
|
15061
|
-
rootSpan.log({ scores });
|
|
15062
|
-
return scores;
|
|
15063
|
-
};
|
|
15064
|
-
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
15065
|
-
if (enableCache) {
|
|
15066
|
-
(evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
|
|
15581
|
+
async function runEvaluatorInternal(experiment, evaluator, progressReporter, filters, stream, parameters, collectResults, enableCache) {
|
|
15582
|
+
if (enableCache) {
|
|
15583
|
+
(evaluator.state ?? _internalGetGlobalState())?.spanCache?.start();
|
|
15067
15584
|
}
|
|
15068
15585
|
try {
|
|
15069
15586
|
if (typeof evaluator.data === "string") {
|
|
15070
15587
|
throw new Error("Unimplemented: string data paths");
|
|
15071
15588
|
}
|
|
15072
15589
|
let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
|
|
15073
|
-
parameters = validateParameters(
|
|
15590
|
+
parameters = await validateParameters(
|
|
15074
15591
|
parameters ?? {},
|
|
15075
|
-
evaluator.parameters
|
|
15592
|
+
evaluator.parameters
|
|
15076
15593
|
);
|
|
15077
15594
|
if ("_type" in dataResult) {
|
|
15078
15595
|
if (dataResult._type !== "BaseExperiment") {
|
|
@@ -15099,7 +15616,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15099
15616
|
}
|
|
15100
15617
|
const resolvedDataResult = dataResult instanceof Promise ? await dataResult : dataResult;
|
|
15101
15618
|
const dataIterable = (() => {
|
|
15102
|
-
if (
|
|
15619
|
+
if (isAsyncIterable3(resolvedDataResult)) {
|
|
15103
15620
|
return resolvedDataResult;
|
|
15104
15621
|
}
|
|
15105
15622
|
if (Array.isArray(resolvedDataResult) || isIterable(resolvedDataResult)) {
|
|
@@ -15231,6 +15748,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15231
15748
|
} else {
|
|
15232
15749
|
rootSpan.log({ output, metadata, expected });
|
|
15233
15750
|
}
|
|
15751
|
+
if (evaluator.flushBeforeScoring) {
|
|
15752
|
+
await rootSpan.flush();
|
|
15753
|
+
}
|
|
15234
15754
|
const scoringArgs = {
|
|
15235
15755
|
input: datum.input,
|
|
15236
15756
|
expected: "expected" in datum ? datum.expected : void 0,
|
|
@@ -15477,206 +15997,646 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
15477
15997
|
spanCache?.stop();
|
|
15478
15998
|
}
|
|
15479
15999
|
}
|
|
15480
|
-
}
|
|
15481
|
-
var warning = (text) => `Warning: ${text}`;
|
|
15482
|
-
function logError2(e, verbose) {
|
|
15483
|
-
if (!verbose) {
|
|
15484
|
-
console.error(`${e}`);
|
|
15485
|
-
} else {
|
|
15486
|
-
console.error(e);
|
|
16000
|
+
}
|
|
16001
|
+
var warning = (text) => `Warning: ${text}`;
|
|
16002
|
+
function logError2(e, verbose) {
|
|
16003
|
+
if (!verbose) {
|
|
16004
|
+
console.error(`${e}`);
|
|
16005
|
+
} else {
|
|
16006
|
+
console.error(e);
|
|
16007
|
+
}
|
|
16008
|
+
}
|
|
16009
|
+
function accumulateScores(accumulator, scores) {
|
|
16010
|
+
for (const [name, score] of Object.entries(scores)) {
|
|
16011
|
+
if (score === null || score === void 0) {
|
|
16012
|
+
continue;
|
|
16013
|
+
}
|
|
16014
|
+
const existing = accumulator[name] ?? { total: 0, count: 0 };
|
|
16015
|
+
accumulator[name] = {
|
|
16016
|
+
total: existing.total + score,
|
|
16017
|
+
count: existing.count + 1
|
|
16018
|
+
};
|
|
16019
|
+
}
|
|
16020
|
+
}
|
|
16021
|
+
function ensureScoreAccumulator(results) {
|
|
16022
|
+
const accumulator = {};
|
|
16023
|
+
for (const result of results) {
|
|
16024
|
+
accumulateScores(accumulator, result.scores);
|
|
16025
|
+
}
|
|
16026
|
+
return accumulator;
|
|
16027
|
+
}
|
|
16028
|
+
function buildLocalSummary(evaluator, results, precomputedScores) {
|
|
16029
|
+
const scoresByName = precomputedScores ?? ensureScoreAccumulator(results);
|
|
16030
|
+
return {
|
|
16031
|
+
projectName: evaluator.projectName,
|
|
16032
|
+
experimentName: evaluator.evalName,
|
|
16033
|
+
scores: Object.fromEntries(
|
|
16034
|
+
Object.entries(scoresByName).map(([name, { total, count }]) => [
|
|
16035
|
+
name,
|
|
16036
|
+
{
|
|
16037
|
+
name,
|
|
16038
|
+
score: count === 0 ? 0 : total / count,
|
|
16039
|
+
improvements: 0,
|
|
16040
|
+
regressions: 0
|
|
16041
|
+
}
|
|
16042
|
+
])
|
|
16043
|
+
)
|
|
16044
|
+
};
|
|
16045
|
+
}
|
|
16046
|
+
function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
|
|
16047
|
+
if (failingResults.length > 0) {
|
|
16048
|
+
console.error(
|
|
16049
|
+
warning(
|
|
16050
|
+
`Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
|
|
16051
|
+
)
|
|
16052
|
+
);
|
|
16053
|
+
if (jsonl) {
|
|
16054
|
+
console.log(
|
|
16055
|
+
JSON.stringify({
|
|
16056
|
+
evaluatorName: evaluator.evalName,
|
|
16057
|
+
errors: failingResults.map(
|
|
16058
|
+
(r) => `${r.error instanceof Error ? r.error.stack : r.error}`
|
|
16059
|
+
)
|
|
16060
|
+
})
|
|
16061
|
+
);
|
|
16062
|
+
} else {
|
|
16063
|
+
for (const result of failingResults) {
|
|
16064
|
+
logError2(result.error, verbose);
|
|
16065
|
+
}
|
|
16066
|
+
}
|
|
16067
|
+
if (!verbose && !jsonl) {
|
|
16068
|
+
console.error(warning("Add --verbose to see full stack traces."));
|
|
16069
|
+
}
|
|
16070
|
+
}
|
|
16071
|
+
}
|
|
16072
|
+
var defaultReporter = {
|
|
16073
|
+
name: "Braintrust default reporter",
|
|
16074
|
+
async reportEval(evaluator, result, { verbose, jsonl }) {
|
|
16075
|
+
const { results, summary } = result;
|
|
16076
|
+
const failingResults = results.filter(
|
|
16077
|
+
(r) => r.error !== void 0
|
|
16078
|
+
);
|
|
16079
|
+
if (failingResults.length > 0) {
|
|
16080
|
+
reportFailures(evaluator, failingResults, { verbose, jsonl });
|
|
16081
|
+
}
|
|
16082
|
+
if (jsonl) {
|
|
16083
|
+
isomorph_default.writeln(JSON.stringify(summary));
|
|
16084
|
+
} else {
|
|
16085
|
+
isomorph_default.writeln("Experiment summary");
|
|
16086
|
+
isomorph_default.writeln("==================");
|
|
16087
|
+
if (summary.comparisonExperimentName) {
|
|
16088
|
+
isomorph_default.writeln(
|
|
16089
|
+
`${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
|
|
16090
|
+
);
|
|
16091
|
+
isomorph_default.writeln("");
|
|
16092
|
+
}
|
|
16093
|
+
const hasScores = Object.keys(summary.scores).length > 0;
|
|
16094
|
+
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
16095
|
+
const hasComparison = !!summary.comparisonExperimentName;
|
|
16096
|
+
if (hasScores || hasMetrics) {
|
|
16097
|
+
if (hasComparison) {
|
|
16098
|
+
isomorph_default.writeln(
|
|
16099
|
+
"Name Value Change Improvements Regressions"
|
|
16100
|
+
);
|
|
16101
|
+
isomorph_default.writeln(
|
|
16102
|
+
"----------------------------------------------------------------"
|
|
16103
|
+
);
|
|
16104
|
+
}
|
|
16105
|
+
for (const score of Object.values(summary.scores)) {
|
|
16106
|
+
const scorePercent = (score.score * 100).toFixed(2);
|
|
16107
|
+
const scoreValue = `${scorePercent}%`;
|
|
16108
|
+
if (hasComparison) {
|
|
16109
|
+
let diffString = "-";
|
|
16110
|
+
if (!isEmpty2(score.diff)) {
|
|
16111
|
+
const diffPercent = (score.diff * 100).toFixed(2);
|
|
16112
|
+
const diffSign = score.diff > 0 ? "+" : "";
|
|
16113
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
16114
|
+
}
|
|
16115
|
+
const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
|
|
16116
|
+
const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
|
|
16117
|
+
isomorph_default.writeln(
|
|
16118
|
+
`${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
16119
|
+
);
|
|
16120
|
+
} else {
|
|
16121
|
+
isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
|
|
16122
|
+
}
|
|
16123
|
+
}
|
|
16124
|
+
for (const metric of Object.values(summary.metrics ?? {})) {
|
|
16125
|
+
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
16126
|
+
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
16127
|
+
const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
|
|
16128
|
+
if (hasComparison) {
|
|
16129
|
+
let diffString = "-";
|
|
16130
|
+
if (!isEmpty2(metric.diff)) {
|
|
16131
|
+
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
16132
|
+
const diffSign = metric.diff > 0 ? "+" : "";
|
|
16133
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
16134
|
+
}
|
|
16135
|
+
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
16136
|
+
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
16137
|
+
isomorph_default.writeln(
|
|
16138
|
+
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
16139
|
+
);
|
|
16140
|
+
} else {
|
|
16141
|
+
isomorph_default.writeln(
|
|
16142
|
+
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
16143
|
+
);
|
|
16144
|
+
}
|
|
16145
|
+
}
|
|
16146
|
+
}
|
|
16147
|
+
if (summary.experimentUrl) {
|
|
16148
|
+
isomorph_default.writeln("");
|
|
16149
|
+
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
16150
|
+
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
16151
|
+
}
|
|
16152
|
+
}
|
|
16153
|
+
isomorph_default.writeln("");
|
|
16154
|
+
return failingResults.length === 0;
|
|
16155
|
+
},
|
|
16156
|
+
async reportRun(evalReports) {
|
|
16157
|
+
return evalReports.every((r) => r);
|
|
16158
|
+
}
|
|
16159
|
+
};
|
|
16160
|
+
|
|
16161
|
+
// src/framework2.ts
|
|
16162
|
+
import { z as z11 } from "zod/v3";
|
|
16163
|
+
var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
|
|
16164
|
+
var ProjectBuilder = class {
|
|
16165
|
+
create(opts) {
|
|
16166
|
+
return new Project2(opts);
|
|
16167
|
+
}
|
|
16168
|
+
};
|
|
16169
|
+
var projects = new ProjectBuilder();
|
|
16170
|
+
var Project2 = class {
|
|
16171
|
+
name;
|
|
16172
|
+
id;
|
|
16173
|
+
tools;
|
|
16174
|
+
prompts;
|
|
16175
|
+
parameters;
|
|
16176
|
+
scorers;
|
|
16177
|
+
_publishableCodeFunctions = [];
|
|
16178
|
+
_publishablePrompts = [];
|
|
16179
|
+
_publishableParameters = [];
|
|
16180
|
+
constructor(args) {
|
|
16181
|
+
_initializeSpanContext();
|
|
16182
|
+
this.name = "name" in args ? args.name : void 0;
|
|
16183
|
+
this.id = "id" in args ? args.id : void 0;
|
|
16184
|
+
this.tools = new ToolBuilder(this);
|
|
16185
|
+
this.prompts = new PromptBuilder(this);
|
|
16186
|
+
this.parameters = new ParametersBuilder(this);
|
|
16187
|
+
this.scorers = new ScorerBuilder(this);
|
|
16188
|
+
}
|
|
16189
|
+
addPrompt(prompt) {
|
|
16190
|
+
this._publishablePrompts.push(prompt);
|
|
16191
|
+
if (globalThis._lazy_load) {
|
|
16192
|
+
globalThis._evals.prompts.push(prompt);
|
|
16193
|
+
}
|
|
16194
|
+
}
|
|
16195
|
+
addParameters(parameters) {
|
|
16196
|
+
this._publishableParameters.push(parameters);
|
|
16197
|
+
if (globalThis._lazy_load) {
|
|
16198
|
+
if (globalThis._evals.parameters == null)
|
|
16199
|
+
globalThis._evals.parameters = [];
|
|
16200
|
+
globalThis._evals.parameters.push(parameters);
|
|
16201
|
+
}
|
|
16202
|
+
}
|
|
16203
|
+
addCodeFunction(fn) {
|
|
16204
|
+
this._publishableCodeFunctions.push(fn);
|
|
16205
|
+
if (globalThis._lazy_load) {
|
|
16206
|
+
globalThis._evals.functions.push(fn);
|
|
16207
|
+
}
|
|
16208
|
+
}
|
|
16209
|
+
async publish() {
|
|
16210
|
+
if (globalThis._lazy_load) {
|
|
16211
|
+
console.warn("publish() is a no-op when running `braintrust push`.");
|
|
16212
|
+
return;
|
|
16213
|
+
}
|
|
16214
|
+
await login();
|
|
16215
|
+
const projectMap = new ProjectNameIdMap();
|
|
16216
|
+
const functionDefinitions = [];
|
|
16217
|
+
if (this._publishableCodeFunctions.length > 0) {
|
|
16218
|
+
console.warn(
|
|
16219
|
+
"Code functions cannot be published directly. Use `braintrust push` instead."
|
|
16220
|
+
);
|
|
16221
|
+
}
|
|
16222
|
+
if (this._publishablePrompts.length > 0) {
|
|
16223
|
+
for (const prompt of this._publishablePrompts) {
|
|
16224
|
+
const functionDefinition = await prompt.toFunctionDefinition(projectMap);
|
|
16225
|
+
functionDefinitions.push(functionDefinition);
|
|
16226
|
+
}
|
|
16227
|
+
}
|
|
16228
|
+
await _internalGetGlobalState().apiConn().post_json("insert-functions", {
|
|
16229
|
+
functions: functionDefinitions
|
|
16230
|
+
});
|
|
16231
|
+
}
|
|
16232
|
+
};
|
|
16233
|
+
var ToolBuilder = class {
|
|
16234
|
+
constructor(project) {
|
|
16235
|
+
this.project = project;
|
|
16236
|
+
}
|
|
16237
|
+
taskCounter = 0;
|
|
16238
|
+
// This type definition is just a catch all so that the implementation can be
|
|
16239
|
+
// less specific than the two more specific declarations above.
|
|
16240
|
+
create(opts) {
|
|
16241
|
+
this.taskCounter++;
|
|
16242
|
+
opts = opts ?? {};
|
|
16243
|
+
const { handler, name, slug, parameters, returns, ...rest } = opts;
|
|
16244
|
+
let resolvedName = name ?? handler.name;
|
|
16245
|
+
if (resolvedName.trim().length === 0) {
|
|
16246
|
+
resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
16247
|
+
}
|
|
16248
|
+
const tool = new CodeFunction(this.project, {
|
|
16249
|
+
handler,
|
|
16250
|
+
name: resolvedName,
|
|
16251
|
+
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
16252
|
+
type: "tool",
|
|
16253
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
16254
|
+
parameters,
|
|
16255
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
16256
|
+
returns,
|
|
16257
|
+
...rest
|
|
16258
|
+
});
|
|
16259
|
+
this.project.addCodeFunction(tool);
|
|
16260
|
+
return tool;
|
|
16261
|
+
}
|
|
16262
|
+
};
|
|
16263
|
+
var ScorerBuilder = class {
|
|
16264
|
+
constructor(project) {
|
|
16265
|
+
this.project = project;
|
|
16266
|
+
}
|
|
16267
|
+
taskCounter = 0;
|
|
16268
|
+
create(opts) {
|
|
16269
|
+
this.taskCounter++;
|
|
16270
|
+
let resolvedName = opts.name;
|
|
16271
|
+
if (!resolvedName && "handler" in opts) {
|
|
16272
|
+
resolvedName = opts.handler.name;
|
|
16273
|
+
}
|
|
16274
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
16275
|
+
resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
16276
|
+
}
|
|
16277
|
+
const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
|
|
16278
|
+
if ("handler" in opts) {
|
|
16279
|
+
const scorer = new CodeFunction(this.project, {
|
|
16280
|
+
...opts,
|
|
16281
|
+
name: resolvedName,
|
|
16282
|
+
slug,
|
|
16283
|
+
type: "scorer"
|
|
16284
|
+
});
|
|
16285
|
+
this.project.addCodeFunction(scorer);
|
|
16286
|
+
} else {
|
|
16287
|
+
const promptBlock = "messages" in opts ? {
|
|
16288
|
+
type: "chat",
|
|
16289
|
+
messages: opts.messages
|
|
16290
|
+
} : {
|
|
16291
|
+
type: "completion",
|
|
16292
|
+
content: opts.prompt
|
|
16293
|
+
};
|
|
16294
|
+
const promptData = {
|
|
16295
|
+
prompt: promptBlock,
|
|
16296
|
+
options: {
|
|
16297
|
+
model: opts.model,
|
|
16298
|
+
params: opts.params
|
|
16299
|
+
},
|
|
16300
|
+
parser: {
|
|
16301
|
+
type: "llm_classifier",
|
|
16302
|
+
use_cot: opts.useCot,
|
|
16303
|
+
choice_scores: opts.choiceScores
|
|
16304
|
+
}
|
|
16305
|
+
};
|
|
16306
|
+
const codePrompt = new CodePrompt(
|
|
16307
|
+
this.project,
|
|
16308
|
+
promptData,
|
|
16309
|
+
[],
|
|
16310
|
+
{
|
|
16311
|
+
...opts,
|
|
16312
|
+
name: resolvedName,
|
|
16313
|
+
slug
|
|
16314
|
+
},
|
|
16315
|
+
"scorer"
|
|
16316
|
+
);
|
|
16317
|
+
this.project.addPrompt(codePrompt);
|
|
16318
|
+
}
|
|
16319
|
+
}
|
|
16320
|
+
};
|
|
16321
|
+
var CodeFunction = class {
|
|
16322
|
+
constructor(project, opts) {
|
|
16323
|
+
this.project = project;
|
|
16324
|
+
this.handler = opts.handler;
|
|
16325
|
+
this.name = opts.name;
|
|
16326
|
+
this.slug = opts.slug;
|
|
16327
|
+
this.description = opts.description;
|
|
16328
|
+
this.type = opts.type;
|
|
16329
|
+
this.ifExists = opts.ifExists;
|
|
16330
|
+
this.metadata = opts.metadata;
|
|
16331
|
+
this.parameters = opts.parameters;
|
|
16332
|
+
this.returns = opts.returns;
|
|
16333
|
+
if (this.returns && !this.parameters) {
|
|
16334
|
+
throw new Error("parameters are required if return type is defined");
|
|
16335
|
+
}
|
|
16336
|
+
}
|
|
16337
|
+
handler;
|
|
16338
|
+
name;
|
|
16339
|
+
slug;
|
|
16340
|
+
type;
|
|
16341
|
+
description;
|
|
16342
|
+
parameters;
|
|
16343
|
+
returns;
|
|
16344
|
+
ifExists;
|
|
16345
|
+
metadata;
|
|
16346
|
+
key() {
|
|
16347
|
+
return JSON.stringify([
|
|
16348
|
+
this.project.id ?? "",
|
|
16349
|
+
this.project.name ?? "",
|
|
16350
|
+
this.slug
|
|
16351
|
+
]);
|
|
16352
|
+
}
|
|
16353
|
+
};
|
|
16354
|
+
var CodePrompt = class {
|
|
16355
|
+
project;
|
|
16356
|
+
name;
|
|
16357
|
+
slug;
|
|
16358
|
+
prompt;
|
|
16359
|
+
ifExists;
|
|
16360
|
+
description;
|
|
16361
|
+
id;
|
|
16362
|
+
functionType;
|
|
16363
|
+
toolFunctions;
|
|
16364
|
+
metadata;
|
|
16365
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
16366
|
+
this.project = project;
|
|
16367
|
+
this.name = opts.name;
|
|
16368
|
+
this.slug = opts.slug;
|
|
16369
|
+
this.prompt = prompt;
|
|
16370
|
+
this.toolFunctions = toolFunctions;
|
|
16371
|
+
this.ifExists = opts.ifExists;
|
|
16372
|
+
this.description = opts.description;
|
|
16373
|
+
this.id = opts.id;
|
|
16374
|
+
this.functionType = functionType;
|
|
16375
|
+
this.metadata = opts.metadata;
|
|
16376
|
+
}
|
|
16377
|
+
async toFunctionDefinition(projectNameToId) {
|
|
16378
|
+
const prompt_data = {
|
|
16379
|
+
...this.prompt
|
|
16380
|
+
};
|
|
16381
|
+
if (this.toolFunctions.length > 0) {
|
|
16382
|
+
const resolvableToolFunctions = await Promise.all(
|
|
16383
|
+
this.toolFunctions.map(async (fn) => {
|
|
16384
|
+
if ("slug" in fn) {
|
|
16385
|
+
return {
|
|
16386
|
+
type: "slug",
|
|
16387
|
+
project_id: await projectNameToId.resolve(fn.project),
|
|
16388
|
+
slug: fn.slug
|
|
16389
|
+
};
|
|
16390
|
+
} else {
|
|
16391
|
+
return fn;
|
|
16392
|
+
}
|
|
16393
|
+
})
|
|
16394
|
+
);
|
|
16395
|
+
prompt_data.tool_functions = // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
16396
|
+
resolvableToolFunctions;
|
|
16397
|
+
}
|
|
16398
|
+
return {
|
|
16399
|
+
project_id: await projectNameToId.resolve(this.project),
|
|
16400
|
+
name: this.name,
|
|
16401
|
+
slug: this.slug,
|
|
16402
|
+
description: this.description ?? "",
|
|
16403
|
+
function_data: {
|
|
16404
|
+
type: "prompt"
|
|
16405
|
+
},
|
|
16406
|
+
function_type: this.functionType,
|
|
16407
|
+
prompt_data,
|
|
16408
|
+
if_exists: this.ifExists,
|
|
16409
|
+
metadata: this.metadata
|
|
16410
|
+
};
|
|
16411
|
+
}
|
|
16412
|
+
};
|
|
16413
|
+
var PromptBuilder = class {
|
|
16414
|
+
constructor(project) {
|
|
16415
|
+
this.project = project;
|
|
16416
|
+
}
|
|
16417
|
+
create(opts) {
|
|
16418
|
+
const toolFunctions = [];
|
|
16419
|
+
const rawTools = [];
|
|
16420
|
+
for (const tool of opts.tools ?? []) {
|
|
16421
|
+
if (tool instanceof CodeFunction) {
|
|
16422
|
+
toolFunctions.push(tool);
|
|
16423
|
+
} else if ("type" in tool && !("function" in tool)) {
|
|
16424
|
+
toolFunctions.push(tool);
|
|
16425
|
+
} else {
|
|
16426
|
+
rawTools.push(tool);
|
|
16427
|
+
}
|
|
16428
|
+
}
|
|
16429
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
16430
|
+
const promptData = promptDefinitionToPromptData(opts, rawTools);
|
|
16431
|
+
const promptRow = {
|
|
16432
|
+
id: opts.id,
|
|
16433
|
+
_xact_id: opts.version ? loadPrettyXact(opts.version) : void 0,
|
|
16434
|
+
name: opts.name,
|
|
16435
|
+
slug,
|
|
16436
|
+
prompt_data: promptData,
|
|
16437
|
+
...this.project.id !== void 0 ? { project_id: this.project.id } : {}
|
|
16438
|
+
};
|
|
16439
|
+
const prompt = new Prompt2(
|
|
16440
|
+
promptRow,
|
|
16441
|
+
{},
|
|
16442
|
+
// It doesn't make sense to specify defaults here.
|
|
16443
|
+
opts.noTrace ?? false
|
|
16444
|
+
);
|
|
16445
|
+
const codePrompt = new CodePrompt(this.project, promptData, toolFunctions, {
|
|
16446
|
+
...opts,
|
|
16447
|
+
slug
|
|
16448
|
+
});
|
|
16449
|
+
this.project.addPrompt(codePrompt);
|
|
16450
|
+
return prompt;
|
|
16451
|
+
}
|
|
16452
|
+
};
|
|
16453
|
+
var CodeParameters = class {
|
|
16454
|
+
project;
|
|
16455
|
+
name;
|
|
16456
|
+
slug;
|
|
16457
|
+
description;
|
|
16458
|
+
schema;
|
|
16459
|
+
ifExists;
|
|
16460
|
+
metadata;
|
|
16461
|
+
constructor(project, opts) {
|
|
16462
|
+
this.project = project;
|
|
16463
|
+
this.name = opts.name;
|
|
16464
|
+
this.slug = opts.slug;
|
|
16465
|
+
this.description = opts.description;
|
|
16466
|
+
this.schema = opts.schema;
|
|
16467
|
+
this.ifExists = opts.ifExists;
|
|
16468
|
+
this.metadata = opts.metadata;
|
|
15487
16469
|
}
|
|
15488
|
-
|
|
15489
|
-
|
|
15490
|
-
|
|
15491
|
-
|
|
15492
|
-
|
|
15493
|
-
|
|
15494
|
-
|
|
15495
|
-
|
|
15496
|
-
|
|
15497
|
-
|
|
16470
|
+
async toFunctionDefinition(projectNameToId) {
|
|
16471
|
+
return {
|
|
16472
|
+
project_id: await projectNameToId.resolve(this.project),
|
|
16473
|
+
name: this.name,
|
|
16474
|
+
slug: this.slug,
|
|
16475
|
+
description: this.description ?? "",
|
|
16476
|
+
function_type: "parameters",
|
|
16477
|
+
function_data: {
|
|
16478
|
+
type: "parameters",
|
|
16479
|
+
data: {},
|
|
16480
|
+
__schema: serializeEvalParameterstoParametersSchema(this.schema)
|
|
16481
|
+
},
|
|
16482
|
+
if_exists: this.ifExists,
|
|
16483
|
+
metadata: this.metadata
|
|
15498
16484
|
};
|
|
15499
16485
|
}
|
|
15500
|
-
}
|
|
15501
|
-
|
|
15502
|
-
|
|
15503
|
-
|
|
15504
|
-
accumulateScores(accumulator, result.scores);
|
|
16486
|
+
};
|
|
16487
|
+
var ParametersBuilder = class {
|
|
16488
|
+
constructor(project) {
|
|
16489
|
+
this.project = project;
|
|
15505
16490
|
}
|
|
15506
|
-
|
|
15507
|
-
}
|
|
15508
|
-
|
|
15509
|
-
|
|
15510
|
-
|
|
15511
|
-
|
|
15512
|
-
|
|
15513
|
-
|
|
15514
|
-
|
|
15515
|
-
|
|
15516
|
-
|
|
15517
|
-
|
|
15518
|
-
|
|
15519
|
-
|
|
15520
|
-
|
|
15521
|
-
|
|
15522
|
-
|
|
15523
|
-
|
|
15524
|
-
|
|
15525
|
-
|
|
15526
|
-
|
|
15527
|
-
|
|
15528
|
-
|
|
15529
|
-
|
|
15530
|
-
|
|
15531
|
-
|
|
15532
|
-
|
|
15533
|
-
|
|
15534
|
-
|
|
15535
|
-
JSON.stringify({
|
|
15536
|
-
evaluatorName: evaluator.evalName,
|
|
15537
|
-
errors: failingResults.map(
|
|
15538
|
-
(r) => `${r.error instanceof Error ? r.error.stack : r.error}`
|
|
15539
|
-
)
|
|
15540
|
-
})
|
|
15541
|
-
);
|
|
16491
|
+
create(opts) {
|
|
16492
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
16493
|
+
const codeParameters = new CodeParameters(this.project, {
|
|
16494
|
+
name: opts.name,
|
|
16495
|
+
slug,
|
|
16496
|
+
description: opts.description,
|
|
16497
|
+
schema: opts.schema,
|
|
16498
|
+
ifExists: opts.ifExists,
|
|
16499
|
+
metadata: opts.metadata
|
|
16500
|
+
});
|
|
16501
|
+
this.project.addParameters(codeParameters);
|
|
16502
|
+
return opts.schema;
|
|
16503
|
+
}
|
|
16504
|
+
};
|
|
16505
|
+
function serializeEvalParameterstoParametersSchema(parameters) {
|
|
16506
|
+
const properties = {};
|
|
16507
|
+
const required = [];
|
|
16508
|
+
for (const [name, value] of Object.entries(parameters)) {
|
|
16509
|
+
if ("type" in value && value.type === "prompt") {
|
|
16510
|
+
const defaultPromptData = value.default ? promptDefinitionToPromptData(value.default) : void 0;
|
|
16511
|
+
properties[name] = {
|
|
16512
|
+
type: "object",
|
|
16513
|
+
"x-bt-type": "prompt",
|
|
16514
|
+
...value.description ? { description: value.description } : {},
|
|
16515
|
+
...defaultPromptData ? { default: defaultPromptData } : {}
|
|
16516
|
+
};
|
|
16517
|
+
if (!defaultPromptData) {
|
|
16518
|
+
required.push(name);
|
|
16519
|
+
}
|
|
15542
16520
|
} else {
|
|
15543
|
-
|
|
15544
|
-
|
|
16521
|
+
const schemaObj = zodToJsonSchema(value);
|
|
16522
|
+
properties[name] = schemaObj;
|
|
16523
|
+
if (!("default" in schemaObj)) {
|
|
16524
|
+
required.push(name);
|
|
15545
16525
|
}
|
|
15546
16526
|
}
|
|
15547
|
-
if (!verbose && !jsonl) {
|
|
15548
|
-
console.error(warning("Add --verbose to see full stack traces."));
|
|
15549
|
-
}
|
|
15550
16527
|
}
|
|
16528
|
+
return {
|
|
16529
|
+
type: "object",
|
|
16530
|
+
properties,
|
|
16531
|
+
...required.length > 0 ? { required } : {},
|
|
16532
|
+
additionalProperties: true
|
|
16533
|
+
};
|
|
15551
16534
|
}
|
|
15552
|
-
var
|
|
15553
|
-
|
|
15554
|
-
|
|
15555
|
-
|
|
15556
|
-
|
|
15557
|
-
|
|
15558
|
-
|
|
15559
|
-
|
|
15560
|
-
|
|
16535
|
+
var ProjectNameIdMap = class {
|
|
16536
|
+
nameToId = {};
|
|
16537
|
+
idToName = {};
|
|
16538
|
+
async getId(projectName) {
|
|
16539
|
+
if (!(projectName in this.nameToId)) {
|
|
16540
|
+
const response = await _internalGetGlobalState().appConn().post_json("api/project/register", {
|
|
16541
|
+
project_name: projectName
|
|
16542
|
+
});
|
|
16543
|
+
const result = z11.object({
|
|
16544
|
+
project: Project
|
|
16545
|
+
}).parse(response);
|
|
16546
|
+
const projectId = result.project.id;
|
|
16547
|
+
this.nameToId[projectName] = projectId;
|
|
16548
|
+
this.idToName[projectId] = projectName;
|
|
15561
16549
|
}
|
|
15562
|
-
|
|
15563
|
-
|
|
15564
|
-
|
|
15565
|
-
|
|
15566
|
-
|
|
15567
|
-
|
|
15568
|
-
|
|
15569
|
-
|
|
15570
|
-
|
|
15571
|
-
|
|
15572
|
-
|
|
15573
|
-
const hasScores = Object.keys(summary.scores).length > 0;
|
|
15574
|
-
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
15575
|
-
const hasComparison = !!summary.comparisonExperimentName;
|
|
15576
|
-
if (hasScores || hasMetrics) {
|
|
15577
|
-
if (hasComparison) {
|
|
15578
|
-
isomorph_default.writeln(
|
|
15579
|
-
"Name Value Change Improvements Regressions"
|
|
15580
|
-
);
|
|
15581
|
-
isomorph_default.writeln(
|
|
15582
|
-
"----------------------------------------------------------------"
|
|
15583
|
-
);
|
|
15584
|
-
}
|
|
15585
|
-
for (const score of Object.values(summary.scores)) {
|
|
15586
|
-
const scorePercent = (score.score * 100).toFixed(2);
|
|
15587
|
-
const scoreValue = `${scorePercent}%`;
|
|
15588
|
-
if (hasComparison) {
|
|
15589
|
-
let diffString = "-";
|
|
15590
|
-
if (!isEmpty2(score.diff)) {
|
|
15591
|
-
const diffPercent = (score.diff * 100).toFixed(2);
|
|
15592
|
-
const diffSign = score.diff > 0 ? "+" : "";
|
|
15593
|
-
diffString = `${diffSign}${diffPercent}%`;
|
|
15594
|
-
}
|
|
15595
|
-
const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
|
|
15596
|
-
const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
|
|
15597
|
-
isomorph_default.writeln(
|
|
15598
|
-
`${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
15599
|
-
);
|
|
15600
|
-
} else {
|
|
15601
|
-
isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
|
|
15602
|
-
}
|
|
15603
|
-
}
|
|
15604
|
-
for (const metric of Object.values(summary.metrics ?? {})) {
|
|
15605
|
-
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
15606
|
-
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
15607
|
-
const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
|
|
15608
|
-
if (hasComparison) {
|
|
15609
|
-
let diffString = "-";
|
|
15610
|
-
if (!isEmpty2(metric.diff)) {
|
|
15611
|
-
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
15612
|
-
const diffSign = metric.diff > 0 ? "+" : "";
|
|
15613
|
-
diffString = `${diffSign}${diffPercent}%`;
|
|
15614
|
-
}
|
|
15615
|
-
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
15616
|
-
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
15617
|
-
isomorph_default.writeln(
|
|
15618
|
-
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
15619
|
-
);
|
|
15620
|
-
} else {
|
|
15621
|
-
isomorph_default.writeln(
|
|
15622
|
-
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
15623
|
-
);
|
|
15624
|
-
}
|
|
15625
|
-
}
|
|
15626
|
-
}
|
|
15627
|
-
if (summary.experimentUrl) {
|
|
15628
|
-
isomorph_default.writeln("");
|
|
15629
|
-
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
15630
|
-
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
15631
|
-
}
|
|
16550
|
+
return this.nameToId[projectName];
|
|
16551
|
+
}
|
|
16552
|
+
async getName(projectId) {
|
|
16553
|
+
if (!(projectId in this.idToName)) {
|
|
16554
|
+
const response = await _internalGetGlobalState().appConn().post_json("api/project/get", {
|
|
16555
|
+
id: projectId
|
|
16556
|
+
});
|
|
16557
|
+
const result = z11.array(Project).nonempty().parse(response);
|
|
16558
|
+
const projectName = result[0].name;
|
|
16559
|
+
this.idToName[projectId] = projectName;
|
|
16560
|
+
this.nameToId[projectName] = projectId;
|
|
15632
16561
|
}
|
|
15633
|
-
|
|
15634
|
-
|
|
15635
|
-
|
|
15636
|
-
|
|
15637
|
-
|
|
16562
|
+
return this.idToName[projectId];
|
|
16563
|
+
}
|
|
16564
|
+
async resolve(project) {
|
|
16565
|
+
if (project.id) {
|
|
16566
|
+
return project.id;
|
|
16567
|
+
}
|
|
16568
|
+
return this.getId(project.name);
|
|
15638
16569
|
}
|
|
15639
16570
|
};
|
|
15640
16571
|
|
|
15641
16572
|
// dev/types.ts
|
|
15642
|
-
import { z as
|
|
15643
|
-
var evalBodySchema =
|
|
15644
|
-
name:
|
|
15645
|
-
parameters:
|
|
16573
|
+
import { z as z12 } from "zod/v3";
|
|
16574
|
+
var evalBodySchema = z12.object({
|
|
16575
|
+
name: z12.string(),
|
|
16576
|
+
parameters: z12.record(z12.string(), z12.unknown()).nullish(),
|
|
15646
16577
|
data: RunEval.shape.data,
|
|
15647
|
-
scores:
|
|
15648
|
-
|
|
16578
|
+
scores: z12.array(
|
|
16579
|
+
z12.object({
|
|
15649
16580
|
function_id: FunctionId,
|
|
15650
|
-
name:
|
|
16581
|
+
name: z12.string()
|
|
15651
16582
|
})
|
|
15652
16583
|
).nullish(),
|
|
15653
|
-
experiment_name:
|
|
15654
|
-
project_id:
|
|
16584
|
+
experiment_name: z12.string().nullish(),
|
|
16585
|
+
project_id: z12.string().nullish(),
|
|
15655
16586
|
parent: InvokeParent.optional(),
|
|
15656
|
-
stream:
|
|
16587
|
+
stream: z12.boolean().optional()
|
|
15657
16588
|
});
|
|
15658
|
-
var
|
|
15659
|
-
|
|
15660
|
-
|
|
15661
|
-
|
|
15662
|
-
type:
|
|
16589
|
+
var staticParametersSchema = z12.record(
|
|
16590
|
+
z12.string(),
|
|
16591
|
+
z12.union([
|
|
16592
|
+
z12.object({
|
|
16593
|
+
type: z12.literal("prompt"),
|
|
15663
16594
|
default: PromptData.optional(),
|
|
15664
|
-
description:
|
|
16595
|
+
description: z12.string().optional()
|
|
15665
16596
|
}),
|
|
15666
|
-
|
|
15667
|
-
type:
|
|
15668
|
-
schema:
|
|
15669
|
-
|
|
15670
|
-
|
|
15671
|
-
description: z11.string().optional()
|
|
16597
|
+
z12.object({
|
|
16598
|
+
type: z12.literal("data"),
|
|
16599
|
+
schema: z12.record(z12.unknown()),
|
|
16600
|
+
default: z12.unknown().optional(),
|
|
16601
|
+
description: z12.string().optional()
|
|
15672
16602
|
})
|
|
15673
16603
|
])
|
|
15674
16604
|
);
|
|
15675
|
-
var
|
|
15676
|
-
|
|
16605
|
+
var parametersSchema = z12.object({
|
|
16606
|
+
type: z12.literal("object"),
|
|
16607
|
+
properties: z12.record(z12.string(), z12.record(z12.unknown())),
|
|
16608
|
+
required: z12.array(z12.string()).optional(),
|
|
16609
|
+
additionalProperties: z12.boolean().optional()
|
|
16610
|
+
});
|
|
16611
|
+
var parametersSourceSchema = z12.object({
|
|
16612
|
+
parametersId: z12.string().optional(),
|
|
16613
|
+
slug: z12.string(),
|
|
16614
|
+
name: z12.string(),
|
|
16615
|
+
projectId: z12.string().optional(),
|
|
16616
|
+
version: z12.string().optional()
|
|
16617
|
+
});
|
|
16618
|
+
var parametersContainerSchema = z12.object({
|
|
16619
|
+
type: z12.literal("braintrust.parameters"),
|
|
16620
|
+
schema: parametersSchema,
|
|
16621
|
+
source: parametersSourceSchema
|
|
16622
|
+
});
|
|
16623
|
+
var staticParametersContainerSchema = z12.object({
|
|
16624
|
+
type: z12.literal("braintrust.staticParameters"),
|
|
16625
|
+
schema: staticParametersSchema,
|
|
16626
|
+
source: z12.null()
|
|
16627
|
+
});
|
|
16628
|
+
var serializedParametersContainerSchema = z12.union([
|
|
16629
|
+
parametersContainerSchema,
|
|
16630
|
+
staticParametersContainerSchema,
|
|
16631
|
+
// keeping this type here since old versions of the SDK will still pass the unwrapped schema and we need to handle this in the app
|
|
16632
|
+
staticParametersSchema
|
|
16633
|
+
]);
|
|
16634
|
+
var evaluatorDefinitionSchema = z12.object({
|
|
16635
|
+
parameters: serializedParametersContainerSchema.optional(),
|
|
16636
|
+
scores: z12.array(z12.object({ name: z12.string() })).optional()
|
|
15677
16637
|
});
|
|
15678
|
-
var evaluatorDefinitionsSchema =
|
|
15679
|
-
|
|
16638
|
+
var evaluatorDefinitionsSchema = z12.record(
|
|
16639
|
+
z12.string(),
|
|
15680
16640
|
evaluatorDefinitionSchema
|
|
15681
16641
|
);
|
|
15682
16642
|
|
|
@@ -15695,6 +16655,7 @@ export {
|
|
|
15695
16655
|
CodePrompt,
|
|
15696
16656
|
ContextManager,
|
|
15697
16657
|
DEFAULT_FETCH_BATCH_SIZE,
|
|
16658
|
+
DEFAULT_MAX_REQUEST_SIZE,
|
|
15698
16659
|
Dataset2 as Dataset,
|
|
15699
16660
|
ERR_PERMALINK,
|
|
15700
16661
|
Eval,
|
|
@@ -15705,6 +16666,7 @@ export {
|
|
|
15705
16666
|
IDGenerator,
|
|
15706
16667
|
JSONAttachment,
|
|
15707
16668
|
LEGACY_CACHED_HEADER,
|
|
16669
|
+
LOGS3_OVERFLOW_REFERENCE_TYPE,
|
|
15708
16670
|
LazyValue,
|
|
15709
16671
|
Logger,
|
|
15710
16672
|
LoginInvalidOrgError,
|
|
@@ -15729,8 +16691,10 @@ export {
|
|
|
15729
16691
|
_exportsForTestingOnly,
|
|
15730
16692
|
_internalGetGlobalState,
|
|
15731
16693
|
_internalSetInitialState,
|
|
16694
|
+
addAzureBlobHeaders,
|
|
15732
16695
|
braintrustStreamChunkSchema,
|
|
15733
16696
|
buildLocalSummary,
|
|
16697
|
+
constructLogs3OverflowRequest,
|
|
15734
16698
|
createFinalValuePassThroughStream,
|
|
15735
16699
|
currentExperiment,
|
|
15736
16700
|
currentLogger,
|
|
@@ -15755,15 +16719,18 @@ export {
|
|
|
15755
16719
|
initLogger,
|
|
15756
16720
|
invoke,
|
|
15757
16721
|
isTemplateFormat,
|
|
16722
|
+
loadParameters,
|
|
15758
16723
|
loadPrompt,
|
|
15759
16724
|
log,
|
|
15760
16725
|
logError,
|
|
15761
16726
|
login,
|
|
15762
16727
|
loginToState,
|
|
16728
|
+
logs3OverflowUploadSchema,
|
|
15763
16729
|
newId,
|
|
15764
16730
|
parseCachedHeader,
|
|
15765
16731
|
parseTemplateFormat,
|
|
15766
16732
|
permalink,
|
|
16733
|
+
pickLogs3OverflowObjectIds,
|
|
15767
16734
|
projects,
|
|
15768
16735
|
promptContentsSchema,
|
|
15769
16736
|
promptDefinitionSchema,
|
|
@@ -15784,6 +16751,8 @@ export {
|
|
|
15784
16751
|
traceable,
|
|
15785
16752
|
traced,
|
|
15786
16753
|
updateSpan,
|
|
16754
|
+
uploadLogs3OverflowPayload,
|
|
16755
|
+
utf8ByteLength,
|
|
15787
16756
|
withCurrent,
|
|
15788
16757
|
withDataset,
|
|
15789
16758
|
withExperiment,
|