braintrust 0.0.199 → 0.0.201

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,172 @@
1
+ import { Request, Response, NextFunction } from "express";
2
+ import { IncomingHttpHeaders } from "http";
3
+ import createError from "http-errors";
4
+
5
+ export interface RequestContext {
6
+ appOrigin: string;
7
+ token: string | undefined;
8
+ }
9
+ declare module "express" {
10
+ interface Request {
11
+ ctx?: RequestContext;
12
+ }
13
+ }
14
+
15
+ export function authorizeRequest(
16
+ req: Request,
17
+ res: Response,
18
+ next: NextFunction,
19
+ ) {
20
+ try {
21
+ const ctx: RequestContext = {
22
+ appOrigin: extractAllowedOrigin(req.headers[ORIGIN_HEADER]),
23
+ token: undefined,
24
+ };
25
+
26
+ // Extract token and data from request
27
+ if (
28
+ req.headers.authorization ||
29
+ req.headers[BRAINTRUST_AUTH_TOKEN_HEADER]
30
+ ) {
31
+ const tokenText = parseBraintrustAuthHeader(req.headers);
32
+ if (!tokenText) {
33
+ return next(createError(400, "Invalid authorization token format"));
34
+ }
35
+ ctx.token = tokenText.toLowerCase() === "null" ? undefined : tokenText;
36
+ }
37
+
38
+ req.ctx = ctx;
39
+
40
+ next(); // Proceed to next middleware/controller
41
+ } catch (e) {
42
+ next(e);
43
+ }
44
+ }
45
+
46
+ export function checkAuthorized(
47
+ req: Request,
48
+ res: Response,
49
+ next: NextFunction,
50
+ ) {
51
+ if (!req.ctx?.token) {
52
+ return next(createError(401, "Unauthorized"));
53
+ }
54
+ next();
55
+ }
56
+
57
+ function parseBraintrustAuthHeader(
58
+ headers: IncomingHttpHeaders | Record<string, string>,
59
+ ): string | undefined {
60
+ const tokenString = parseHeader(headers, BRAINTRUST_AUTH_TOKEN_HEADER);
61
+ return tokenString ?? parseAuthHeader(headers) ?? undefined;
62
+ }
63
+
64
+ function parseHeader(
65
+ headers: IncomingHttpHeaders | Record<string, string>,
66
+ headerName: string,
67
+ ): string | undefined {
68
+ const token = headers[headerName];
69
+ let tokenString;
70
+ if (typeof token === "string") {
71
+ tokenString = token;
72
+ } else if (Array.isArray(token) && token.length > 0) {
73
+ tokenString = token[0];
74
+ }
75
+
76
+ return tokenString;
77
+ }
78
+
79
+ export type StaticOrigin =
80
+ | boolean
81
+ | string
82
+ | RegExp
83
+ | Array<boolean | string | RegExp>;
84
+
85
+ export function checkOrigin(
86
+ requestOrigin: string | undefined,
87
+ callback: (err: Error | null, origin?: StaticOrigin) => void,
88
+ ) {
89
+ if (!requestOrigin) {
90
+ return callback(null, true);
91
+ }
92
+
93
+ // the origins can be glob patterns
94
+ for (const origin of WHITELISTED_ORIGINS || []) {
95
+ if (
96
+ (origin instanceof RegExp && origin.test(requestOrigin)) ||
97
+ origin === requestOrigin
98
+ ) {
99
+ return callback(null, requestOrigin);
100
+ }
101
+ }
102
+
103
+ return callback(null, false);
104
+ }
105
+
106
+ const BRAINTRUST_AUTH_TOKEN_HEADER = "x-bt-auth-token";
107
+ const ORIGIN_HEADER = "origin";
108
+
109
+ export function extractAllowedOrigin(originHeader: string | undefined): string {
110
+ let allowedOrigin: string = MAIN_ORIGIN;
111
+ checkOrigin(originHeader, (err, origin) => {
112
+ if (!err && originHeader && origin) {
113
+ allowedOrigin = originHeader;
114
+ }
115
+ });
116
+ return allowedOrigin;
117
+ }
118
+
119
+ const MAIN_ORIGIN = "https://www.braintrust.dev";
120
+ const WHITELISTED_ORIGINS = [
121
+ MAIN_ORIGIN,
122
+ "https://www.braintrustdata.com",
123
+ new RegExp("https://.*.preview.braintrust.dev"),
124
+ ]
125
+ .concat(
126
+ process.env.WHITELISTED_ORIGIN ? [process.env.WHITELISTED_ORIGIN] : [],
127
+ )
128
+ .concat(
129
+ process.env.BRAINTRUST_APP_URL ? [process.env.BRAINTRUST_APP_URL] : [],
130
+ );
131
+
132
+ function parseAuthHeader(
133
+ headers: Record<string, string | string[] | undefined>,
134
+ ) {
135
+ const authHeader = headers["authorization"];
136
+ let authValue = null;
137
+ if (Array.isArray(authHeader)) {
138
+ authValue = authHeader[authHeader.length - 1];
139
+ } else {
140
+ authValue = authHeader;
141
+ }
142
+
143
+ if (!authValue) {
144
+ return null;
145
+ }
146
+
147
+ const parts = authValue.split(" ");
148
+ if (parts.length !== 2) {
149
+ return null;
150
+ }
151
+ return parts[1];
152
+ }
153
+
154
+ export const baseAllowedHeaders = [
155
+ "Content-Type",
156
+ "X-Amz-Date",
157
+ "Authorization",
158
+ "X-Api-Key",
159
+ "X-Amz-Security-Token",
160
+ "x-bt-auth-token",
161
+ "x-bt-parent",
162
+ // These are eval-specific
163
+ "x-bt-org-name",
164
+ "x-bt-stream-fmt",
165
+ "x-bt-use-cache",
166
+ "x-stainless-os",
167
+ "x-stainless-lang",
168
+ "x-stainless-package-version",
169
+ "x-stainless-runtime",
170
+ "x-stainless-runtime-version",
171
+ "x-stainless-arch",
172
+ ];
@@ -0,0 +1,37 @@
1
+ import { z } from "zod";
2
+ import { Request, Response, ErrorRequestHandler, NextFunction } from "express";
3
+ import { HttpError } from "http-errors";
4
+
5
+ export const errorHandler: ErrorRequestHandler = (
6
+ err: Error | HttpError,
7
+ req: Request,
8
+ res: Response,
9
+ next: NextFunction,
10
+ ) => {
11
+ if ("status" in err) {
12
+ res.status(err.status).json({
13
+ error: {
14
+ message: err.message,
15
+ status: err.status,
16
+ },
17
+ });
18
+ return;
19
+ }
20
+ if (err instanceof z.ZodError) {
21
+ res.status(400).json({
22
+ error: {
23
+ message: "Invalid request",
24
+ errors: err.errors,
25
+ },
26
+ });
27
+ return;
28
+ }
29
+
30
+ console.error("Internal server error", err);
31
+ res.status(500).json({
32
+ error: {
33
+ message: "Internal server error",
34
+ status: 500,
35
+ },
36
+ });
37
+ };
package/dev/index.ts ADDED
@@ -0,0 +1 @@
1
+ export { runDevServer } from "./server";
package/dev/server.ts ADDED
@@ -0,0 +1,413 @@
1
+ import express, { NextFunction, Request, Response } from "express";
2
+ import cors from "cors";
3
+ import {
4
+ callEvaluatorData,
5
+ Eval,
6
+ EvalData,
7
+ EvalHooks,
8
+ EvalScorer,
9
+ EvaluatorDef,
10
+ OneOrMoreScores,
11
+ scorerName,
12
+ } from "../src/framework";
13
+ import { errorHandler } from "./errorHandler";
14
+ import {
15
+ authorizeRequest,
16
+ baseAllowedHeaders,
17
+ checkAuthorized,
18
+ checkOrigin,
19
+ } from "./authorize";
20
+ import {
21
+ FunctionId,
22
+ InvokeFunctionRequest,
23
+ RunEvalRequest,
24
+ SSEProgressEventData,
25
+ } from "@braintrust/core/typespecs";
26
+ import {
27
+ BaseMetadata,
28
+ BraintrustState,
29
+ EvalCase,
30
+ getSpanParentObject,
31
+ initDataset,
32
+ LoginOptions,
33
+ loginToState,
34
+ } from "../src/logger";
35
+ import { LRUCache } from "../src/prompt-cache/lru-cache";
36
+ import {
37
+ BT_CURSOR_HEADER,
38
+ BT_FOUND_EXISTING_HEADER,
39
+ parseParent,
40
+ } from "@braintrust/core";
41
+ import { serializeSSEEvent } from "./stream";
42
+ import {
43
+ evalBodySchema,
44
+ EvaluatorDefinitions,
45
+ EvaluatorManifest,
46
+ evalParametersSerializedSchema,
47
+ } from "./types";
48
+ import { EvalParameters, validateParameters } from "../src/eval-parameters";
49
+ import { z } from "zod";
50
+ import { promptDefinitionToPromptData } from "../src/framework2";
51
+ import zodToJsonSchema from "zod-to-json-schema";
52
+ export interface DevServerOpts {
53
+ host: string;
54
+ port: number;
55
+ }
56
+
57
+ export function runDevServer(
58
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
59
+ evaluators: EvaluatorDef<any, any, any, any, any>[],
60
+ opts: DevServerOpts,
61
+ ) {
62
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
63
+ const allEvaluators: EvaluatorManifest = Object.fromEntries(
64
+ evaluators.map((evaluator) => [evaluator.evalName, evaluator]),
65
+ ) as EvaluatorManifest;
66
+
67
+ globalThis._lazy_load = false;
68
+
69
+ const app = express();
70
+
71
+ app.use(express.json({ limit: "1gb" }));
72
+ console.log("Starting server");
73
+ app.use((req, res, next) => {
74
+ if (req.headers["access-control-request-private-network"]) {
75
+ res.setHeader("Access-Control-Allow-Private-Network", "true");
76
+ }
77
+ next();
78
+ });
79
+
80
+ app.use(
81
+ cors({
82
+ origin: checkOrigin,
83
+ methods: ["GET", "PATCH", "POST", "PUT", "DELETE", "OPTIONS"],
84
+ allowedHeaders: baseAllowedHeaders,
85
+ credentials: true,
86
+ exposedHeaders: [
87
+ BT_CURSOR_HEADER,
88
+ BT_FOUND_EXISTING_HEADER,
89
+ "x-bt-span-id",
90
+ "x-bt-span-export",
91
+ ],
92
+ maxAge: 86400,
93
+ }),
94
+ );
95
+
96
+ app.use(authorizeRequest);
97
+
98
+ app.get("/", (req, res) => {
99
+ res.send("Hello, world!");
100
+ });
101
+
102
+ // List endpoint - returns all available evaluators and their metadata
103
+ app.get("/list", (req, res) => {
104
+ const evalDefs: EvaluatorDefinitions = Object.fromEntries(
105
+ Object.entries(allEvaluators).map(([name, evaluator]) => [
106
+ name,
107
+ {
108
+ parameters: evaluator.parameters
109
+ ? makeEvalParametersSchema(evaluator.parameters)
110
+ : undefined,
111
+ scores: evaluator.scores.map((score, idx) => ({
112
+ name: scorerName(score, idx),
113
+ })),
114
+ },
115
+ ]),
116
+ );
117
+ res.json(evalDefs);
118
+ });
119
+
120
+ app.post(
121
+ "/eval",
122
+ checkAuthorized,
123
+ asyncHandler(async (req, res) => {
124
+ const { name, parameters, parent, data, scores, stream } =
125
+ evalBodySchema.parse(req.body);
126
+
127
+ const state = await cachedLogin({ apiKey: req.ctx?.token });
128
+
129
+ const evaluator = allEvaluators[name];
130
+ if (!evaluator) {
131
+ res.status(404).json({ error: `Evaluator '${name}' not found` });
132
+ return;
133
+ }
134
+
135
+ if (
136
+ evaluator.parameters &&
137
+ Object.keys(evaluator.parameters).length > 0
138
+ ) {
139
+ try {
140
+ if (!evaluator.parameters) {
141
+ res.status(400).json({
142
+ error: `Evaluator '${name}' does not accept parameters`,
143
+ });
144
+ return;
145
+ }
146
+
147
+ // This gets done again in the framework, but we do it here too to give a
148
+ // better error message.
149
+ validateParameters(parameters ?? {}, evaluator.parameters);
150
+ } catch (e) {
151
+ console.error("Error validating parameters", e);
152
+ if (e instanceof z.ZodError || e instanceof Error) {
153
+ res.status(400).json({
154
+ error: e.message,
155
+ });
156
+ return;
157
+ }
158
+ throw e;
159
+ }
160
+ }
161
+
162
+ const resolvedData = await getDataset(state, data);
163
+ const evalData = callEvaluatorData(resolvedData);
164
+ console.log("Starting eval", evaluator.evalName);
165
+
166
+ // Set up SSE headers
167
+ if (stream) {
168
+ res.setHeader("Content-Type", "text/event-stream");
169
+ res.setHeader("Cache-Control", "no-cache");
170
+ res.setHeader("Connection", "keep-alive");
171
+ } else {
172
+ res.setHeader("Content-Type", "application/json");
173
+ }
174
+
175
+ const task = async (
176
+ input: unknown,
177
+ hooks: EvalHooks<unknown, BaseMetadata, EvalParameters>,
178
+ ) => {
179
+ const result = await evaluator.task(input, hooks);
180
+
181
+ hooks.reportProgress({
182
+ format: "code",
183
+ output_type: "completion",
184
+ event: "json_delta",
185
+ data: JSON.stringify(result),
186
+ });
187
+ return result;
188
+ };
189
+
190
+ try {
191
+ const summary = await Eval(
192
+ "worker-thread",
193
+ {
194
+ ...evaluator,
195
+ data: evalData.data,
196
+ scores: evaluator.scores.concat(
197
+ scores?.map((score) =>
198
+ makeScorer(state, score.name, score.function_id),
199
+ ) ?? [],
200
+ ),
201
+ task,
202
+ state,
203
+ },
204
+ {
205
+ // Avoid printing the bar to the console.
206
+ progress: {
207
+ start: (name, total) => {},
208
+ stop: () => {
209
+ console.log("Finished running experiment");
210
+ },
211
+ increment: (name) => {},
212
+ },
213
+ stream: (data: SSEProgressEventData) => {
214
+ if (stream) {
215
+ res.write(
216
+ serializeSSEEvent({
217
+ event: "progress",
218
+ data: JSON.stringify(data),
219
+ }),
220
+ );
221
+ }
222
+ },
223
+ onStart: (metadata) => {
224
+ if (stream) {
225
+ res.write(
226
+ serializeSSEEvent({
227
+ event: "start",
228
+ data: JSON.stringify(metadata),
229
+ }),
230
+ );
231
+ }
232
+ },
233
+ parent: parseParent(parent),
234
+ parameters: parameters ?? {},
235
+ },
236
+ );
237
+
238
+ if (stream) {
239
+ res.write(
240
+ serializeSSEEvent({
241
+ event: "summary",
242
+ data: JSON.stringify(summary.summary),
243
+ }),
244
+ );
245
+ res.write(
246
+ serializeSSEEvent({
247
+ event: "done",
248
+ data: "",
249
+ }),
250
+ );
251
+ } else {
252
+ res.json(summary.summary);
253
+ }
254
+ } catch (e) {
255
+ console.error("Error running eval", e);
256
+ if (stream) {
257
+ res.write(
258
+ serializeSSEEvent({
259
+ event: "error",
260
+ data: JSON.stringify(e),
261
+ }),
262
+ );
263
+ } else {
264
+ res.status(500).json({ error: e });
265
+ }
266
+ } finally {
267
+ res.end();
268
+ }
269
+ }),
270
+ );
271
+
272
+ app.use(errorHandler);
273
+
274
+ // Start the server
275
+ app.listen(opts.port, opts.host, () => {
276
+ console.log(`Dev server running at http://${opts.host}:${opts.port}`);
277
+ });
278
+ }
279
+ const asyncHandler =
280
+ (fn: (req: Request, res: Response, next: NextFunction) => Promise<void>) =>
281
+ (req: Request, res: Response, next: NextFunction) => {
282
+ Promise.resolve(fn(req, res, next)).catch(next);
283
+ };
284
+
285
+ const loginCache = new LRUCache<string, BraintrustState>({
286
+ max: 32, // TODO: Make this configurable
287
+ });
288
+
289
+ async function cachedLogin(options: LoginOptions): Promise<BraintrustState> {
290
+ const key = JSON.stringify(options);
291
+ const cached = loginCache.get(key);
292
+ if (cached) {
293
+ return cached;
294
+ }
295
+
296
+ const state = await loginToState(options);
297
+ loginCache.set(key, state);
298
+ return state;
299
+ }
300
+
301
+ async function getDataset(
302
+ state: BraintrustState,
303
+ data: RunEvalRequest["data"],
304
+ ): Promise<EvalData<unknown, unknown, BaseMetadata>> {
305
+ if ("project_name" in data) {
306
+ return initDataset({
307
+ state,
308
+ project: data.project_name,
309
+ dataset: data.dataset_name,
310
+ _internal_btql: data._internal_btql ?? undefined,
311
+ });
312
+ } else if ("dataset_id" in data) {
313
+ const datasetInfo = await getDatasetById({
314
+ state,
315
+ datasetId: data.dataset_id,
316
+ });
317
+ return initDataset({
318
+ state,
319
+ projectId: datasetInfo.projectId,
320
+ dataset: datasetInfo.dataset,
321
+ _internal_btql: data._internal_btql ?? undefined,
322
+ });
323
+ } else {
324
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
325
+ return data.data as EvalCase<unknown, unknown, BaseMetadata>[];
326
+ }
327
+ }
328
+
329
+ const datasetFetchSchema = z.object({
330
+ project_id: z.string(),
331
+ name: z.string(),
332
+ });
333
+ async function getDatasetById({
334
+ state,
335
+ datasetId,
336
+ }: {
337
+ state: BraintrustState;
338
+ datasetId: string;
339
+ }): Promise<{ projectId: string; dataset: string }> {
340
+ const dataset = await state.appConn().post_json("api/dataset/get", {
341
+ id: datasetId,
342
+ });
343
+ const parsed = z.array(datasetFetchSchema).parse(dataset);
344
+ if (parsed.length === 0) {
345
+ throw new Error(`Dataset '${datasetId}' not found`);
346
+ }
347
+ return { projectId: parsed[0].project_id, dataset: parsed[0].name };
348
+ }
349
+
350
+ function makeScorer(
351
+ state: BraintrustState,
352
+ name: string,
353
+ score: FunctionId,
354
+ ): EvalScorer<unknown, unknown, unknown, BaseMetadata> {
355
+ const ret = async (input: EvalCase<unknown, unknown, BaseMetadata>) => {
356
+ const request: InvokeFunctionRequest = {
357
+ ...score,
358
+ input,
359
+ parent: await getSpanParentObject().export(),
360
+ stream: false,
361
+ mode: "auto",
362
+ strict: true,
363
+ };
364
+ const result = await state.proxyConn().post(`function/invoke`, request, {
365
+ headers: {
366
+ Accept: "application/json",
367
+ },
368
+ });
369
+ const data = await result.json();
370
+ // NOTE: Ideally we can parse this value with a zod schema.
371
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
372
+ return data as OneOrMoreScores;
373
+ };
374
+
375
+ Object.defineProperties(ret, {
376
+ name: { value: `Remote eval scorer (${name})` },
377
+ });
378
+
379
+ return ret;
380
+ }
381
+
382
+ function makeEvalParametersSchema(
383
+ parameters: EvalParameters,
384
+ ): z.infer<typeof evalParametersSerializedSchema> {
385
+ return Object.fromEntries(
386
+ Object.entries(parameters).map(([name, value]) => {
387
+ if ("type" in value && value.type === "prompt") {
388
+ return [
389
+ name,
390
+ {
391
+ type: "prompt",
392
+ default: value.default
393
+ ? promptDefinitionToPromptData(value.default)
394
+ : undefined,
395
+ description: value.description,
396
+ },
397
+ ];
398
+ } else {
399
+ return [
400
+ name,
401
+ {
402
+ type: "data",
403
+ // Since this schema is bundled, it won't pass an instanceof check.
404
+ // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
405
+ schema: zodToJsonSchema(value as z.ZodSchema),
406
+ default: value.default,
407
+ description: value.description,
408
+ },
409
+ ];
410
+ }
411
+ }),
412
+ );
413
+ }
package/dev/stream.ts ADDED
@@ -0,0 +1,14 @@
1
+ export interface RawSSEEvent {
2
+ id?: string;
3
+ event?: string;
4
+ data: string;
5
+ }
6
+
7
+ export function serializeSSEEvent(event: RawSSEEvent): string {
8
+ return (
9
+ Object.entries(event)
10
+ .filter(([_key, value]) => value !== undefined)
11
+ .map(([key, value]) => `${key}: ${value}`)
12
+ .join("\n") + "\n\n"
13
+ );
14
+ }
package/dev/types.ts ADDED
@@ -0,0 +1,63 @@
1
+ import {
2
+ functionIdSchema,
3
+ invokeParent,
4
+ runEvalSchema,
5
+ promptDataSchema,
6
+ } from "@braintrust/core/typespecs";
7
+ import { z } from "zod";
8
+ import { EvaluatorDef } from "../src/framework";
9
+ import { BaseMetadata } from "../src/logger";
10
+
11
+ export const evalBodySchema = z.object({
12
+ name: z.string(),
13
+ parameters: z.record(z.string(), z.unknown()).nullish(),
14
+ data: runEvalSchema.shape.data,
15
+ scores: z
16
+ .array(
17
+ z.object({
18
+ function_id: functionIdSchema,
19
+ name: z.string(),
20
+ }),
21
+ )
22
+ .nullish(),
23
+ parent: invokeParent.optional(),
24
+ stream: z.boolean().optional(),
25
+ });
26
+
27
+ export type EvaluatorManifest = Record<
28
+ string,
29
+ EvaluatorDef<unknown, unknown, unknown, BaseMetadata>
30
+ >;
31
+
32
+ export const evalParametersSerializedSchema = z.record(
33
+ z.string(),
34
+ z.union([
35
+ z.object({
36
+ type: z.literal("prompt"),
37
+ default: promptDataSchema.optional(),
38
+ description: z.string().optional(),
39
+ }),
40
+ z.object({
41
+ type: z.literal("data"),
42
+ schema: z.record(z.unknown()), // JSON Schema
43
+ default: z.unknown().optional(),
44
+ description: z.string().optional(),
45
+ }),
46
+ ]),
47
+ );
48
+
49
+ export type EvalParameterSerializedSchema = z.infer<
50
+ typeof evalParametersSerializedSchema
51
+ >;
52
+
53
+ export const evaluatorDefinitionSchema = z.object({
54
+ parameters: evalParametersSerializedSchema.optional(),
55
+ });
56
+ export type EvaluatorDefinition = z.infer<typeof evaluatorDefinitionSchema>;
57
+
58
+ export const evaluatorDefinitionsSchema = z.record(
59
+ z.string(),
60
+ evaluatorDefinitionSchema,
61
+ );
62
+
63
+ export type EvaluatorDefinitions = z.infer<typeof evaluatorDefinitionsSchema>;