braintrust 0.0.199 → 0.0.201
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/authorize.ts +172 -0
- package/dev/errorHandler.ts +37 -0
- package/dev/index.ts +1 -0
- package/dev/server.ts +413 -0
- package/dev/stream.ts +14 -0
- package/dev/types.ts +63 -0
- package/dist/browser.d.mts +8242 -15
- package/dist/browser.d.ts +8242 -15
- package/dist/browser.js +112 -35
- package/dist/browser.mjs +115 -35
- package/dist/cli.js +1348 -549
- package/dist/index.d.mts +8253 -36
- package/dist/index.d.ts +8253 -36
- package/dist/index.js +478 -328
- package/dist/index.mjs +482 -330
- package/package.json +14 -8
- package/tsup.config.ts +7 -0
package/dev/authorize.ts
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import { Request, Response, NextFunction } from "express";
|
|
2
|
+
import { IncomingHttpHeaders } from "http";
|
|
3
|
+
import createError from "http-errors";
|
|
4
|
+
|
|
5
|
+
export interface RequestContext {
|
|
6
|
+
appOrigin: string;
|
|
7
|
+
token: string | undefined;
|
|
8
|
+
}
|
|
9
|
+
declare module "express" {
|
|
10
|
+
interface Request {
|
|
11
|
+
ctx?: RequestContext;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function authorizeRequest(
|
|
16
|
+
req: Request,
|
|
17
|
+
res: Response,
|
|
18
|
+
next: NextFunction,
|
|
19
|
+
) {
|
|
20
|
+
try {
|
|
21
|
+
const ctx: RequestContext = {
|
|
22
|
+
appOrigin: extractAllowedOrigin(req.headers[ORIGIN_HEADER]),
|
|
23
|
+
token: undefined,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
// Extract token and data from request
|
|
27
|
+
if (
|
|
28
|
+
req.headers.authorization ||
|
|
29
|
+
req.headers[BRAINTRUST_AUTH_TOKEN_HEADER]
|
|
30
|
+
) {
|
|
31
|
+
const tokenText = parseBraintrustAuthHeader(req.headers);
|
|
32
|
+
if (!tokenText) {
|
|
33
|
+
return next(createError(400, "Invalid authorization token format"));
|
|
34
|
+
}
|
|
35
|
+
ctx.token = tokenText.toLowerCase() === "null" ? undefined : tokenText;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
req.ctx = ctx;
|
|
39
|
+
|
|
40
|
+
next(); // Proceed to next middleware/controller
|
|
41
|
+
} catch (e) {
|
|
42
|
+
next(e);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function checkAuthorized(
|
|
47
|
+
req: Request,
|
|
48
|
+
res: Response,
|
|
49
|
+
next: NextFunction,
|
|
50
|
+
) {
|
|
51
|
+
if (!req.ctx?.token) {
|
|
52
|
+
return next(createError(401, "Unauthorized"));
|
|
53
|
+
}
|
|
54
|
+
next();
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function parseBraintrustAuthHeader(
|
|
58
|
+
headers: IncomingHttpHeaders | Record<string, string>,
|
|
59
|
+
): string | undefined {
|
|
60
|
+
const tokenString = parseHeader(headers, BRAINTRUST_AUTH_TOKEN_HEADER);
|
|
61
|
+
return tokenString ?? parseAuthHeader(headers) ?? undefined;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function parseHeader(
|
|
65
|
+
headers: IncomingHttpHeaders | Record<string, string>,
|
|
66
|
+
headerName: string,
|
|
67
|
+
): string | undefined {
|
|
68
|
+
const token = headers[headerName];
|
|
69
|
+
let tokenString;
|
|
70
|
+
if (typeof token === "string") {
|
|
71
|
+
tokenString = token;
|
|
72
|
+
} else if (Array.isArray(token) && token.length > 0) {
|
|
73
|
+
tokenString = token[0];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return tokenString;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export type StaticOrigin =
|
|
80
|
+
| boolean
|
|
81
|
+
| string
|
|
82
|
+
| RegExp
|
|
83
|
+
| Array<boolean | string | RegExp>;
|
|
84
|
+
|
|
85
|
+
export function checkOrigin(
|
|
86
|
+
requestOrigin: string | undefined,
|
|
87
|
+
callback: (err: Error | null, origin?: StaticOrigin) => void,
|
|
88
|
+
) {
|
|
89
|
+
if (!requestOrigin) {
|
|
90
|
+
return callback(null, true);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// the origins can be glob patterns
|
|
94
|
+
for (const origin of WHITELISTED_ORIGINS || []) {
|
|
95
|
+
if (
|
|
96
|
+
(origin instanceof RegExp && origin.test(requestOrigin)) ||
|
|
97
|
+
origin === requestOrigin
|
|
98
|
+
) {
|
|
99
|
+
return callback(null, requestOrigin);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return callback(null, false);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const BRAINTRUST_AUTH_TOKEN_HEADER = "x-bt-auth-token";
|
|
107
|
+
const ORIGIN_HEADER = "origin";
|
|
108
|
+
|
|
109
|
+
export function extractAllowedOrigin(originHeader: string | undefined): string {
|
|
110
|
+
let allowedOrigin: string = MAIN_ORIGIN;
|
|
111
|
+
checkOrigin(originHeader, (err, origin) => {
|
|
112
|
+
if (!err && originHeader && origin) {
|
|
113
|
+
allowedOrigin = originHeader;
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
return allowedOrigin;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const MAIN_ORIGIN = "https://www.braintrust.dev";
|
|
120
|
+
const WHITELISTED_ORIGINS = [
|
|
121
|
+
MAIN_ORIGIN,
|
|
122
|
+
"https://www.braintrustdata.com",
|
|
123
|
+
new RegExp("https://.*.preview.braintrust.dev"),
|
|
124
|
+
]
|
|
125
|
+
.concat(
|
|
126
|
+
process.env.WHITELISTED_ORIGIN ? [process.env.WHITELISTED_ORIGIN] : [],
|
|
127
|
+
)
|
|
128
|
+
.concat(
|
|
129
|
+
process.env.BRAINTRUST_APP_URL ? [process.env.BRAINTRUST_APP_URL] : [],
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
function parseAuthHeader(
|
|
133
|
+
headers: Record<string, string | string[] | undefined>,
|
|
134
|
+
) {
|
|
135
|
+
const authHeader = headers["authorization"];
|
|
136
|
+
let authValue = null;
|
|
137
|
+
if (Array.isArray(authHeader)) {
|
|
138
|
+
authValue = authHeader[authHeader.length - 1];
|
|
139
|
+
} else {
|
|
140
|
+
authValue = authHeader;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (!authValue) {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const parts = authValue.split(" ");
|
|
148
|
+
if (parts.length !== 2) {
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
return parts[1];
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export const baseAllowedHeaders = [
|
|
155
|
+
"Content-Type",
|
|
156
|
+
"X-Amz-Date",
|
|
157
|
+
"Authorization",
|
|
158
|
+
"X-Api-Key",
|
|
159
|
+
"X-Amz-Security-Token",
|
|
160
|
+
"x-bt-auth-token",
|
|
161
|
+
"x-bt-parent",
|
|
162
|
+
// These are eval-specific
|
|
163
|
+
"x-bt-org-name",
|
|
164
|
+
"x-bt-stream-fmt",
|
|
165
|
+
"x-bt-use-cache",
|
|
166
|
+
"x-stainless-os",
|
|
167
|
+
"x-stainless-lang",
|
|
168
|
+
"x-stainless-package-version",
|
|
169
|
+
"x-stainless-runtime",
|
|
170
|
+
"x-stainless-runtime-version",
|
|
171
|
+
"x-stainless-arch",
|
|
172
|
+
];
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { Request, Response, ErrorRequestHandler, NextFunction } from "express";
|
|
3
|
+
import { HttpError } from "http-errors";
|
|
4
|
+
|
|
5
|
+
export const errorHandler: ErrorRequestHandler = (
|
|
6
|
+
err: Error | HttpError,
|
|
7
|
+
req: Request,
|
|
8
|
+
res: Response,
|
|
9
|
+
next: NextFunction,
|
|
10
|
+
) => {
|
|
11
|
+
if ("status" in err) {
|
|
12
|
+
res.status(err.status).json({
|
|
13
|
+
error: {
|
|
14
|
+
message: err.message,
|
|
15
|
+
status: err.status,
|
|
16
|
+
},
|
|
17
|
+
});
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
if (err instanceof z.ZodError) {
|
|
21
|
+
res.status(400).json({
|
|
22
|
+
error: {
|
|
23
|
+
message: "Invalid request",
|
|
24
|
+
errors: err.errors,
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
console.error("Internal server error", err);
|
|
31
|
+
res.status(500).json({
|
|
32
|
+
error: {
|
|
33
|
+
message: "Internal server error",
|
|
34
|
+
status: 500,
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
};
|
package/dev/index.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { runDevServer } from "./server";
|
package/dev/server.ts
ADDED
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
import express, { NextFunction, Request, Response } from "express";
|
|
2
|
+
import cors from "cors";
|
|
3
|
+
import {
|
|
4
|
+
callEvaluatorData,
|
|
5
|
+
Eval,
|
|
6
|
+
EvalData,
|
|
7
|
+
EvalHooks,
|
|
8
|
+
EvalScorer,
|
|
9
|
+
EvaluatorDef,
|
|
10
|
+
OneOrMoreScores,
|
|
11
|
+
scorerName,
|
|
12
|
+
} from "../src/framework";
|
|
13
|
+
import { errorHandler } from "./errorHandler";
|
|
14
|
+
import {
|
|
15
|
+
authorizeRequest,
|
|
16
|
+
baseAllowedHeaders,
|
|
17
|
+
checkAuthorized,
|
|
18
|
+
checkOrigin,
|
|
19
|
+
} from "./authorize";
|
|
20
|
+
import {
|
|
21
|
+
FunctionId,
|
|
22
|
+
InvokeFunctionRequest,
|
|
23
|
+
RunEvalRequest,
|
|
24
|
+
SSEProgressEventData,
|
|
25
|
+
} from "@braintrust/core/typespecs";
|
|
26
|
+
import {
|
|
27
|
+
BaseMetadata,
|
|
28
|
+
BraintrustState,
|
|
29
|
+
EvalCase,
|
|
30
|
+
getSpanParentObject,
|
|
31
|
+
initDataset,
|
|
32
|
+
LoginOptions,
|
|
33
|
+
loginToState,
|
|
34
|
+
} from "../src/logger";
|
|
35
|
+
import { LRUCache } from "../src/prompt-cache/lru-cache";
|
|
36
|
+
import {
|
|
37
|
+
BT_CURSOR_HEADER,
|
|
38
|
+
BT_FOUND_EXISTING_HEADER,
|
|
39
|
+
parseParent,
|
|
40
|
+
} from "@braintrust/core";
|
|
41
|
+
import { serializeSSEEvent } from "./stream";
|
|
42
|
+
import {
|
|
43
|
+
evalBodySchema,
|
|
44
|
+
EvaluatorDefinitions,
|
|
45
|
+
EvaluatorManifest,
|
|
46
|
+
evalParametersSerializedSchema,
|
|
47
|
+
} from "./types";
|
|
48
|
+
import { EvalParameters, validateParameters } from "../src/eval-parameters";
|
|
49
|
+
import { z } from "zod";
|
|
50
|
+
import { promptDefinitionToPromptData } from "../src/framework2";
|
|
51
|
+
import zodToJsonSchema from "zod-to-json-schema";
|
|
52
|
+
export interface DevServerOpts {
|
|
53
|
+
host: string;
|
|
54
|
+
port: number;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function runDevServer(
|
|
58
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
59
|
+
evaluators: EvaluatorDef<any, any, any, any, any>[],
|
|
60
|
+
opts: DevServerOpts,
|
|
61
|
+
) {
|
|
62
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
63
|
+
const allEvaluators: EvaluatorManifest = Object.fromEntries(
|
|
64
|
+
evaluators.map((evaluator) => [evaluator.evalName, evaluator]),
|
|
65
|
+
) as EvaluatorManifest;
|
|
66
|
+
|
|
67
|
+
globalThis._lazy_load = false;
|
|
68
|
+
|
|
69
|
+
const app = express();
|
|
70
|
+
|
|
71
|
+
app.use(express.json({ limit: "1gb" }));
|
|
72
|
+
console.log("Starting server");
|
|
73
|
+
app.use((req, res, next) => {
|
|
74
|
+
if (req.headers["access-control-request-private-network"]) {
|
|
75
|
+
res.setHeader("Access-Control-Allow-Private-Network", "true");
|
|
76
|
+
}
|
|
77
|
+
next();
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
app.use(
|
|
81
|
+
cors({
|
|
82
|
+
origin: checkOrigin,
|
|
83
|
+
methods: ["GET", "PATCH", "POST", "PUT", "DELETE", "OPTIONS"],
|
|
84
|
+
allowedHeaders: baseAllowedHeaders,
|
|
85
|
+
credentials: true,
|
|
86
|
+
exposedHeaders: [
|
|
87
|
+
BT_CURSOR_HEADER,
|
|
88
|
+
BT_FOUND_EXISTING_HEADER,
|
|
89
|
+
"x-bt-span-id",
|
|
90
|
+
"x-bt-span-export",
|
|
91
|
+
],
|
|
92
|
+
maxAge: 86400,
|
|
93
|
+
}),
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
app.use(authorizeRequest);
|
|
97
|
+
|
|
98
|
+
app.get("/", (req, res) => {
|
|
99
|
+
res.send("Hello, world!");
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
// List endpoint - returns all available evaluators and their metadata
|
|
103
|
+
app.get("/list", (req, res) => {
|
|
104
|
+
const evalDefs: EvaluatorDefinitions = Object.fromEntries(
|
|
105
|
+
Object.entries(allEvaluators).map(([name, evaluator]) => [
|
|
106
|
+
name,
|
|
107
|
+
{
|
|
108
|
+
parameters: evaluator.parameters
|
|
109
|
+
? makeEvalParametersSchema(evaluator.parameters)
|
|
110
|
+
: undefined,
|
|
111
|
+
scores: evaluator.scores.map((score, idx) => ({
|
|
112
|
+
name: scorerName(score, idx),
|
|
113
|
+
})),
|
|
114
|
+
},
|
|
115
|
+
]),
|
|
116
|
+
);
|
|
117
|
+
res.json(evalDefs);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
app.post(
|
|
121
|
+
"/eval",
|
|
122
|
+
checkAuthorized,
|
|
123
|
+
asyncHandler(async (req, res) => {
|
|
124
|
+
const { name, parameters, parent, data, scores, stream } =
|
|
125
|
+
evalBodySchema.parse(req.body);
|
|
126
|
+
|
|
127
|
+
const state = await cachedLogin({ apiKey: req.ctx?.token });
|
|
128
|
+
|
|
129
|
+
const evaluator = allEvaluators[name];
|
|
130
|
+
if (!evaluator) {
|
|
131
|
+
res.status(404).json({ error: `Evaluator '${name}' not found` });
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (
|
|
136
|
+
evaluator.parameters &&
|
|
137
|
+
Object.keys(evaluator.parameters).length > 0
|
|
138
|
+
) {
|
|
139
|
+
try {
|
|
140
|
+
if (!evaluator.parameters) {
|
|
141
|
+
res.status(400).json({
|
|
142
|
+
error: `Evaluator '${name}' does not accept parameters`,
|
|
143
|
+
});
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// This gets done again in the framework, but we do it here too to give a
|
|
148
|
+
// better error message.
|
|
149
|
+
validateParameters(parameters ?? {}, evaluator.parameters);
|
|
150
|
+
} catch (e) {
|
|
151
|
+
console.error("Error validating parameters", e);
|
|
152
|
+
if (e instanceof z.ZodError || e instanceof Error) {
|
|
153
|
+
res.status(400).json({
|
|
154
|
+
error: e.message,
|
|
155
|
+
});
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
throw e;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const resolvedData = await getDataset(state, data);
|
|
163
|
+
const evalData = callEvaluatorData(resolvedData);
|
|
164
|
+
console.log("Starting eval", evaluator.evalName);
|
|
165
|
+
|
|
166
|
+
// Set up SSE headers
|
|
167
|
+
if (stream) {
|
|
168
|
+
res.setHeader("Content-Type", "text/event-stream");
|
|
169
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
170
|
+
res.setHeader("Connection", "keep-alive");
|
|
171
|
+
} else {
|
|
172
|
+
res.setHeader("Content-Type", "application/json");
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const task = async (
|
|
176
|
+
input: unknown,
|
|
177
|
+
hooks: EvalHooks<unknown, BaseMetadata, EvalParameters>,
|
|
178
|
+
) => {
|
|
179
|
+
const result = await evaluator.task(input, hooks);
|
|
180
|
+
|
|
181
|
+
hooks.reportProgress({
|
|
182
|
+
format: "code",
|
|
183
|
+
output_type: "completion",
|
|
184
|
+
event: "json_delta",
|
|
185
|
+
data: JSON.stringify(result),
|
|
186
|
+
});
|
|
187
|
+
return result;
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
try {
|
|
191
|
+
const summary = await Eval(
|
|
192
|
+
"worker-thread",
|
|
193
|
+
{
|
|
194
|
+
...evaluator,
|
|
195
|
+
data: evalData.data,
|
|
196
|
+
scores: evaluator.scores.concat(
|
|
197
|
+
scores?.map((score) =>
|
|
198
|
+
makeScorer(state, score.name, score.function_id),
|
|
199
|
+
) ?? [],
|
|
200
|
+
),
|
|
201
|
+
task,
|
|
202
|
+
state,
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
// Avoid printing the bar to the console.
|
|
206
|
+
progress: {
|
|
207
|
+
start: (name, total) => {},
|
|
208
|
+
stop: () => {
|
|
209
|
+
console.log("Finished running experiment");
|
|
210
|
+
},
|
|
211
|
+
increment: (name) => {},
|
|
212
|
+
},
|
|
213
|
+
stream: (data: SSEProgressEventData) => {
|
|
214
|
+
if (stream) {
|
|
215
|
+
res.write(
|
|
216
|
+
serializeSSEEvent({
|
|
217
|
+
event: "progress",
|
|
218
|
+
data: JSON.stringify(data),
|
|
219
|
+
}),
|
|
220
|
+
);
|
|
221
|
+
}
|
|
222
|
+
},
|
|
223
|
+
onStart: (metadata) => {
|
|
224
|
+
if (stream) {
|
|
225
|
+
res.write(
|
|
226
|
+
serializeSSEEvent({
|
|
227
|
+
event: "start",
|
|
228
|
+
data: JSON.stringify(metadata),
|
|
229
|
+
}),
|
|
230
|
+
);
|
|
231
|
+
}
|
|
232
|
+
},
|
|
233
|
+
parent: parseParent(parent),
|
|
234
|
+
parameters: parameters ?? {},
|
|
235
|
+
},
|
|
236
|
+
);
|
|
237
|
+
|
|
238
|
+
if (stream) {
|
|
239
|
+
res.write(
|
|
240
|
+
serializeSSEEvent({
|
|
241
|
+
event: "summary",
|
|
242
|
+
data: JSON.stringify(summary.summary),
|
|
243
|
+
}),
|
|
244
|
+
);
|
|
245
|
+
res.write(
|
|
246
|
+
serializeSSEEvent({
|
|
247
|
+
event: "done",
|
|
248
|
+
data: "",
|
|
249
|
+
}),
|
|
250
|
+
);
|
|
251
|
+
} else {
|
|
252
|
+
res.json(summary.summary);
|
|
253
|
+
}
|
|
254
|
+
} catch (e) {
|
|
255
|
+
console.error("Error running eval", e);
|
|
256
|
+
if (stream) {
|
|
257
|
+
res.write(
|
|
258
|
+
serializeSSEEvent({
|
|
259
|
+
event: "error",
|
|
260
|
+
data: JSON.stringify(e),
|
|
261
|
+
}),
|
|
262
|
+
);
|
|
263
|
+
} else {
|
|
264
|
+
res.status(500).json({ error: e });
|
|
265
|
+
}
|
|
266
|
+
} finally {
|
|
267
|
+
res.end();
|
|
268
|
+
}
|
|
269
|
+
}),
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
app.use(errorHandler);
|
|
273
|
+
|
|
274
|
+
// Start the server
|
|
275
|
+
app.listen(opts.port, opts.host, () => {
|
|
276
|
+
console.log(`Dev server running at http://${opts.host}:${opts.port}`);
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
const asyncHandler =
|
|
280
|
+
(fn: (req: Request, res: Response, next: NextFunction) => Promise<void>) =>
|
|
281
|
+
(req: Request, res: Response, next: NextFunction) => {
|
|
282
|
+
Promise.resolve(fn(req, res, next)).catch(next);
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
const loginCache = new LRUCache<string, BraintrustState>({
|
|
286
|
+
max: 32, // TODO: Make this configurable
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
async function cachedLogin(options: LoginOptions): Promise<BraintrustState> {
|
|
290
|
+
const key = JSON.stringify(options);
|
|
291
|
+
const cached = loginCache.get(key);
|
|
292
|
+
if (cached) {
|
|
293
|
+
return cached;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
const state = await loginToState(options);
|
|
297
|
+
loginCache.set(key, state);
|
|
298
|
+
return state;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
async function getDataset(
|
|
302
|
+
state: BraintrustState,
|
|
303
|
+
data: RunEvalRequest["data"],
|
|
304
|
+
): Promise<EvalData<unknown, unknown, BaseMetadata>> {
|
|
305
|
+
if ("project_name" in data) {
|
|
306
|
+
return initDataset({
|
|
307
|
+
state,
|
|
308
|
+
project: data.project_name,
|
|
309
|
+
dataset: data.dataset_name,
|
|
310
|
+
_internal_btql: data._internal_btql ?? undefined,
|
|
311
|
+
});
|
|
312
|
+
} else if ("dataset_id" in data) {
|
|
313
|
+
const datasetInfo = await getDatasetById({
|
|
314
|
+
state,
|
|
315
|
+
datasetId: data.dataset_id,
|
|
316
|
+
});
|
|
317
|
+
return initDataset({
|
|
318
|
+
state,
|
|
319
|
+
projectId: datasetInfo.projectId,
|
|
320
|
+
dataset: datasetInfo.dataset,
|
|
321
|
+
_internal_btql: data._internal_btql ?? undefined,
|
|
322
|
+
});
|
|
323
|
+
} else {
|
|
324
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
325
|
+
return data.data as EvalCase<unknown, unknown, BaseMetadata>[];
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
const datasetFetchSchema = z.object({
|
|
330
|
+
project_id: z.string(),
|
|
331
|
+
name: z.string(),
|
|
332
|
+
});
|
|
333
|
+
async function getDatasetById({
|
|
334
|
+
state,
|
|
335
|
+
datasetId,
|
|
336
|
+
}: {
|
|
337
|
+
state: BraintrustState;
|
|
338
|
+
datasetId: string;
|
|
339
|
+
}): Promise<{ projectId: string; dataset: string }> {
|
|
340
|
+
const dataset = await state.appConn().post_json("api/dataset/get", {
|
|
341
|
+
id: datasetId,
|
|
342
|
+
});
|
|
343
|
+
const parsed = z.array(datasetFetchSchema).parse(dataset);
|
|
344
|
+
if (parsed.length === 0) {
|
|
345
|
+
throw new Error(`Dataset '${datasetId}' not found`);
|
|
346
|
+
}
|
|
347
|
+
return { projectId: parsed[0].project_id, dataset: parsed[0].name };
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function makeScorer(
|
|
351
|
+
state: BraintrustState,
|
|
352
|
+
name: string,
|
|
353
|
+
score: FunctionId,
|
|
354
|
+
): EvalScorer<unknown, unknown, unknown, BaseMetadata> {
|
|
355
|
+
const ret = async (input: EvalCase<unknown, unknown, BaseMetadata>) => {
|
|
356
|
+
const request: InvokeFunctionRequest = {
|
|
357
|
+
...score,
|
|
358
|
+
input,
|
|
359
|
+
parent: await getSpanParentObject().export(),
|
|
360
|
+
stream: false,
|
|
361
|
+
mode: "auto",
|
|
362
|
+
strict: true,
|
|
363
|
+
};
|
|
364
|
+
const result = await state.proxyConn().post(`function/invoke`, request, {
|
|
365
|
+
headers: {
|
|
366
|
+
Accept: "application/json",
|
|
367
|
+
},
|
|
368
|
+
});
|
|
369
|
+
const data = await result.json();
|
|
370
|
+
// NOTE: Ideally we can parse this value with a zod schema.
|
|
371
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
372
|
+
return data as OneOrMoreScores;
|
|
373
|
+
};
|
|
374
|
+
|
|
375
|
+
Object.defineProperties(ret, {
|
|
376
|
+
name: { value: `Remote eval scorer (${name})` },
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
return ret;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
function makeEvalParametersSchema(
|
|
383
|
+
parameters: EvalParameters,
|
|
384
|
+
): z.infer<typeof evalParametersSerializedSchema> {
|
|
385
|
+
return Object.fromEntries(
|
|
386
|
+
Object.entries(parameters).map(([name, value]) => {
|
|
387
|
+
if ("type" in value && value.type === "prompt") {
|
|
388
|
+
return [
|
|
389
|
+
name,
|
|
390
|
+
{
|
|
391
|
+
type: "prompt",
|
|
392
|
+
default: value.default
|
|
393
|
+
? promptDefinitionToPromptData(value.default)
|
|
394
|
+
: undefined,
|
|
395
|
+
description: value.description,
|
|
396
|
+
},
|
|
397
|
+
];
|
|
398
|
+
} else {
|
|
399
|
+
return [
|
|
400
|
+
name,
|
|
401
|
+
{
|
|
402
|
+
type: "data",
|
|
403
|
+
// Since this schema is bundled, it won't pass an instanceof check.
|
|
404
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
405
|
+
schema: zodToJsonSchema(value as z.ZodSchema),
|
|
406
|
+
default: value.default,
|
|
407
|
+
description: value.description,
|
|
408
|
+
},
|
|
409
|
+
];
|
|
410
|
+
}
|
|
411
|
+
}),
|
|
412
|
+
);
|
|
413
|
+
}
|
package/dev/stream.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export interface RawSSEEvent {
|
|
2
|
+
id?: string;
|
|
3
|
+
event?: string;
|
|
4
|
+
data: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export function serializeSSEEvent(event: RawSSEEvent): string {
|
|
8
|
+
return (
|
|
9
|
+
Object.entries(event)
|
|
10
|
+
.filter(([_key, value]) => value !== undefined)
|
|
11
|
+
.map(([key, value]) => `${key}: ${value}`)
|
|
12
|
+
.join("\n") + "\n\n"
|
|
13
|
+
);
|
|
14
|
+
}
|
package/dev/types.ts
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import {
|
|
2
|
+
functionIdSchema,
|
|
3
|
+
invokeParent,
|
|
4
|
+
runEvalSchema,
|
|
5
|
+
promptDataSchema,
|
|
6
|
+
} from "@braintrust/core/typespecs";
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import { EvaluatorDef } from "../src/framework";
|
|
9
|
+
import { BaseMetadata } from "../src/logger";
|
|
10
|
+
|
|
11
|
+
export const evalBodySchema = z.object({
|
|
12
|
+
name: z.string(),
|
|
13
|
+
parameters: z.record(z.string(), z.unknown()).nullish(),
|
|
14
|
+
data: runEvalSchema.shape.data,
|
|
15
|
+
scores: z
|
|
16
|
+
.array(
|
|
17
|
+
z.object({
|
|
18
|
+
function_id: functionIdSchema,
|
|
19
|
+
name: z.string(),
|
|
20
|
+
}),
|
|
21
|
+
)
|
|
22
|
+
.nullish(),
|
|
23
|
+
parent: invokeParent.optional(),
|
|
24
|
+
stream: z.boolean().optional(),
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
export type EvaluatorManifest = Record<
|
|
28
|
+
string,
|
|
29
|
+
EvaluatorDef<unknown, unknown, unknown, BaseMetadata>
|
|
30
|
+
>;
|
|
31
|
+
|
|
32
|
+
export const evalParametersSerializedSchema = z.record(
|
|
33
|
+
z.string(),
|
|
34
|
+
z.union([
|
|
35
|
+
z.object({
|
|
36
|
+
type: z.literal("prompt"),
|
|
37
|
+
default: promptDataSchema.optional(),
|
|
38
|
+
description: z.string().optional(),
|
|
39
|
+
}),
|
|
40
|
+
z.object({
|
|
41
|
+
type: z.literal("data"),
|
|
42
|
+
schema: z.record(z.unknown()), // JSON Schema
|
|
43
|
+
default: z.unknown().optional(),
|
|
44
|
+
description: z.string().optional(),
|
|
45
|
+
}),
|
|
46
|
+
]),
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
export type EvalParameterSerializedSchema = z.infer<
|
|
50
|
+
typeof evalParametersSerializedSchema
|
|
51
|
+
>;
|
|
52
|
+
|
|
53
|
+
export const evaluatorDefinitionSchema = z.object({
|
|
54
|
+
parameters: evalParametersSerializedSchema.optional(),
|
|
55
|
+
});
|
|
56
|
+
export type EvaluatorDefinition = z.infer<typeof evaluatorDefinitionSchema>;
|
|
57
|
+
|
|
58
|
+
export const evaluatorDefinitionsSchema = z.record(
|
|
59
|
+
z.string(),
|
|
60
|
+
evaluatorDefinitionSchema,
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
export type EvaluatorDefinitions = z.infer<typeof evaluatorDefinitionsSchema>;
|