braintrust 0.0.21 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/logger.js ADDED
@@ -0,0 +1,652 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
26
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
27
+ return new (P || (P = Promise))(function (resolve, reject) {
28
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
29
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
30
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
31
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
32
+ });
33
+ };
34
+ var __importDefault = (this && this.__importDefault) || function (mod) {
35
+ return (mod && mod.__esModule) ? mod : { "default": mod };
36
+ };
37
+ Object.defineProperty(exports, "__esModule", { value: true });
38
+ exports.Experiment = exports.summarize = exports.log = exports.login = exports.init = exports.Project = void 0;
39
+ const http = __importStar(require("http"));
40
+ const https = __importStar(require("https"));
41
+ const axios_1 = __importDefault(require("axios"));
42
+ const uuid_1 = require("uuid");
43
+ const git = __importStar(require("./gitutil"));
44
+ const oai_1 = require("./oai");
45
+ let _state = {
46
+ current_project: null,
47
+ current_experiment: null,
48
+ };
49
+ let API_URL = null;
50
+ let ORG_ID = null;
51
+ let ORG_NAME = null;
52
+ let LOG_URL = null;
53
+ let LOGGED_IN = false;
54
+ class HTTPConnection {
55
+ constructor(base_url) {
56
+ this.base_url = base_url;
57
+ this.token = null;
58
+ this.session = null;
59
+ this._reset();
60
+ }
61
+ ping() {
62
+ return __awaiter(this, void 0, void 0, function* () {
63
+ try {
64
+ const resp = yield this.get("ping");
65
+ if (_var_user_info === null) {
66
+ _var_user_info = resp.data;
67
+ }
68
+ return resp.status === 200;
69
+ }
70
+ catch (e) {
71
+ return false;
72
+ }
73
+ });
74
+ }
75
+ make_long_lived() {
76
+ // Following a suggestion in https://stackoverflow.com/questions/23013220/max-retries-exceeded-with-url-in-requests
77
+ this._reset();
78
+ }
79
+ set_token(token) {
80
+ token = token.trim();
81
+ this.token = token;
82
+ this._reset();
83
+ }
84
+ // As far as I can tell, you cannot set the retry/backoff factor here
85
+ _reset() {
86
+ // From https://github.com/axios/axios/issues/1846
87
+ const httpAgent = new http.Agent({ keepAlive: true });
88
+ const httpsAgent = new https.Agent({ keepAlive: true });
89
+ let headers = {};
90
+ if (this.token) {
91
+ headers["Authorization"] = `Bearer ${this.token}`;
92
+ }
93
+ this.session = axios_1.default.create({
94
+ httpAgent,
95
+ httpsAgent,
96
+ headers,
97
+ });
98
+ }
99
+ get(path, params = undefined) {
100
+ return __awaiter(this, void 0, void 0, function* () {
101
+ return yield this.session.get(_urljoin(this.base_url, path), { params });
102
+ });
103
+ }
104
+ post(path, params = undefined) {
105
+ return __awaiter(this, void 0, void 0, function* () {
106
+ return yield this.session.post(_urljoin(this.base_url, path), params);
107
+ });
108
+ }
109
+ }
110
+ let _api_conn = null;
111
+ function api_conn() {
112
+ if (!_api_conn) {
113
+ _api_conn = new HTTPConnection(LOG_URL);
114
+ }
115
+ return _api_conn;
116
+ }
117
+ function api_get(object_type, args = undefined, retries = 0) {
118
+ return __awaiter(this, void 0, void 0, function* () {
119
+ const tries = retries + 1;
120
+ for (let i = 0; i < tries; i++) {
121
+ try {
122
+ const resp = yield api_conn().get(`${object_type}`, args);
123
+ return resp.data;
124
+ }
125
+ catch (e) {
126
+ if (i < tries - 1) {
127
+ console.log(`Retrying API request ${object_type} ${args} ${e.status} ${e.text}`);
128
+ continue;
129
+ }
130
+ throw e;
131
+ }
132
+ }
133
+ });
134
+ }
135
+ function api_insert(object_type, args = undefined) {
136
+ return __awaiter(this, void 0, void 0, function* () {
137
+ const resp = yield api_conn().post(`${object_type}`, args);
138
+ return resp.data;
139
+ });
140
+ }
141
+ let _var_user_info = null;
142
+ function _user_info() {
143
+ return __awaiter(this, void 0, void 0, function* () {
144
+ if (_var_user_info === null) {
145
+ _var_user_info = yield api_get("ping");
146
+ }
147
+ return _var_user_info;
148
+ });
149
+ }
150
+ class Project {
151
+ constructor(name, id, org_id) {
152
+ this.name = name;
153
+ this.id = id;
154
+ this.org_id = org_id;
155
+ }
156
+ }
157
+ exports.Project = Project;
158
+ // NOTE: This is because we do not have async constructors
159
+ const _PROJECTS_ENDPOINT = "projects";
160
+ function initProject(name) {
161
+ return __awaiter(this, void 0, void 0, function* () {
162
+ const unique_key = { name, org_id: ORG_ID };
163
+ // Can we have an upsert (or insert if not exists) method instead?
164
+ let existing = [];
165
+ if (unique_key) {
166
+ existing = yield api_get(_PROJECTS_ENDPOINT, unique_key);
167
+ }
168
+ if (existing.length === 0) {
169
+ existing = yield api_insert(_PROJECTS_ENDPOINT, unique_key);
170
+ }
171
+ if (existing) {
172
+ return existing[0];
173
+ }
174
+ else {
175
+ throw new Error(`Unable to find record in ${_PROJECTS_ENDPOINT}`);
176
+ }
177
+ });
178
+ }
179
+ /*
180
+ def guess_git_experiment_name():
181
+ try:
182
+ repo = git.Repo(search_parent_directories=True)
183
+ except git.InvalidGitRepositoryError:
184
+ return None
185
+
186
+ branch = repo.active_branch.name
187
+ diff = repo.git.diff(repo.head.commit.tree)
188
+ if not diff and len(repo.head.commit.parents) > 0:
189
+ diff = repo.head.commit.message + "\n" + repo.git.diff(repo.head.commit.tree, repo.head.commit.parents[0].tree)
190
+
191
+ return [
192
+ {
193
+ "role": "system",
194
+ "content": """\
195
+ You can generate two word summaries for machine learning experiment names, based
196
+ on the branch name and an optional "diff" of the experiment's code on top of the branch.
197
+ The experiment name should be exactly two words, concatenated with a hyphen, all lowercase.
198
+ The input format is the output of "git diff". For example, "foo-bar" is valid but
199
+ "foo-bar-baz" is not.""",
200
+ },
201
+ {
202
+ "role": "user",
203
+ "content": f"Branch: {branch}" + (f"\n\nDiff:\n{diff[:4096]}" if diff else ""),
204
+ },
205
+ ]
206
+ */
207
+ function guessGitExperimentName() {
208
+ return __awaiter(this, void 0, void 0, function* () {
209
+ const repo = yield git.currentRepo();
210
+ if (!repo) {
211
+ return undefined;
212
+ }
213
+ const branch = yield repo.raw(["rev-parse", "--abbrev-ref", "HEAD"]);
214
+ let diff = yield repo.diff();
215
+ if (!diff) {
216
+ const last_commit = yield repo.log({ maxCount: 2 });
217
+ if (last_commit.all.length > 1) {
218
+ const parent = last_commit.all[1];
219
+ diff = parent.message + "\n" + (yield repo.diff(["HEAD", parent.hash]));
220
+ }
221
+ }
222
+ return [
223
+ {
224
+ role: "system",
225
+ content: `You can generate two word summaries for machine learning experiment names, based
226
+ on the branch name and an optional "diff" of the experiment's code on top of the branch.
227
+ The experiment name should be exactly two words, concatenated with a hyphen, all lowercase.
228
+ The input format is the output of "git diff". For example, "foo-bar" is valid but
229
+ "foo-bar-baz" is not.`,
230
+ },
231
+ {
232
+ role: "user",
233
+ content: `Branch: ${branch}` + (diff ? `\n\nDiff:\n${diff.slice(0, 4096)}` : ""),
234
+ },
235
+ ];
236
+ });
237
+ }
238
+ function guessExperimentName() {
239
+ return __awaiter(this, void 0, void 0, function* () {
240
+ if ((0, oai_1.openAI)() === null) {
241
+ return undefined;
242
+ }
243
+ const messages = yield guessGitExperimentName();
244
+ if (!messages) {
245
+ return undefined;
246
+ }
247
+ const resp = yield (0, oai_1.cachedChatCompletion)({
248
+ model: "gpt-3.5-turbo",
249
+ messages,
250
+ max_tokens: 128,
251
+ temperature: 0.7,
252
+ });
253
+ let name = undefined;
254
+ if (resp.choices.length > 0) {
255
+ name = resp.choices[0].message.content.split("-").slice(0, 2).join("-");
256
+ // Strip punctuation and whitespace from the prefix and suffix
257
+ name = name === null || name === void 0 ? void 0 : name.replace(/^[ .,;:!?-]+|[ .,;:!?-]+$/g, "");
258
+ }
259
+ return name;
260
+ });
261
+ }
262
+ class LogThread {
263
+ constructor() {
264
+ this.items = [];
265
+ this.active_flush = Promise.resolve([]);
266
+ this.active_flush_resolved = true;
267
+ }
268
+ log(items) {
269
+ this.items.push(...items);
270
+ if (this.active_flush_resolved) {
271
+ this.active_flush_resolved = false;
272
+ this.active_flush = this.flush_once();
273
+ }
274
+ }
275
+ flush_once() {
276
+ return __awaiter(this, void 0, void 0, function* () {
277
+ this.active_flush_resolved = false;
278
+ const items = this.items;
279
+ this.items = [];
280
+ let ret = [];
281
+ if (items.length > 0) {
282
+ const resp = yield api_insert("logs", items);
283
+ ret = resp.data;
284
+ }
285
+ // If more items were added while we were flushing, flush again
286
+ if (this.items.length > 0) {
287
+ this.active_flush = this.flush_once();
288
+ }
289
+ else {
290
+ this.active_flush_resolved = true;
291
+ }
292
+ return ret;
293
+ });
294
+ }
295
+ flush() {
296
+ return __awaiter(this, void 0, void 0, function* () {
297
+ while (true) {
298
+ yield this.active_flush;
299
+ if (this.active_flush_resolved) {
300
+ break;
301
+ }
302
+ }
303
+ });
304
+ }
305
+ }
306
+ /**
307
+ * Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
308
+ *
309
+ * @param project The name of the project to create the experiment in.
310
+ * @param options Additional options for configuring init().
311
+ * @param options.experiment The name of the experiment to create. If not specified, a name will be generated automatically.
312
+ * @param options.description An optional description of the experiment.
313
+ * @param options.update If the experiment already exists, continue logging to it.
314
+ * @param options.baseExperiment An optional experiment name to use as a base. If specified, the new experiment will be summarized and compared to this
315
+ * experiment. Otherwise, it will pick an experiment by finding the closest ancestor on the default (e.g. main) branch.
316
+ * @param options.apiUrl The URL of the BrainTrust API. Defaults to https://www.braintrustdata.com.
317
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
318
+ * key is specified, will prompt the user to login.
319
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
320
+ * @param options.disableCache Do not use cached login information.
321
+ * @returns The newly created Experiment.
322
+ */
323
+ function init(project, options = {}) {
324
+ return __awaiter(this, void 0, void 0, function* () {
325
+ const { experiment, description, baseExperiment, update, apiUrl, apiKey, orgName, disableCache, } = options || {};
326
+ yield login({
327
+ orgName: orgName,
328
+ disableCache,
329
+ apiKey,
330
+ apiUrl,
331
+ });
332
+ _state.current_project = yield initProject(project);
333
+ _state.current_experiment = yield initExperiment(_state.current_project, {
334
+ name: experiment,
335
+ description,
336
+ update,
337
+ baseExperiment,
338
+ });
339
+ return _state.current_experiment;
340
+ });
341
+ }
342
+ exports.init = init;
343
+ /**
344
+ * Log into BrainTrust. This will prompt you for your API token, which you can find at
345
+ * https://www.braintrustdata.com/app/token. This method is called automatically by `init()`.
346
+ *
347
+ * @param options Options for configuring login().
348
+ * @param options.apiUrl The URL of the BrainTrust API. Defaults to https://www.braintrustdata.com.
349
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
350
+ * key is specified, will prompt the user to login.
351
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
352
+ * @param options.disableCache Do not use cached login information.
353
+ * @param options.forceLogin Login again, even if you have already logged in (by default, this function will exit quickly if you have already logged in)
354
+ */
355
+ function login(options = {}) {
356
+ return __awaiter(this, void 0, void 0, function* () {
357
+ const { apiUrl = process.env.BRAINTRUST_API_URL || "https://www.braintrustdata.com", apiKey = process.env.BRAINTRUST_API_KEY, orgName: orgName = undefined, disableCache = false, forceLogin = false, } = options || {};
358
+ if (LOGGED_IN && !forceLogin) {
359
+ return;
360
+ }
361
+ API_URL = apiUrl;
362
+ let login_key_info = null;
363
+ let ping_ok = false;
364
+ let conn = null;
365
+ if (apiKey !== undefined) {
366
+ const resp = yield axios_1.default.post(_urljoin(API_URL, `/api/apikey/login`), {
367
+ token: apiKey,
368
+ });
369
+ const info = resp.data;
370
+ _check_org_info(info.org_info, orgName);
371
+ conn = api_conn();
372
+ conn.set_token(apiKey);
373
+ ping_ok = yield conn.ping();
374
+ }
375
+ else {
376
+ // TODO: Implement token based login in the JS client
377
+ throw new Error("Please specify an api key. Token based login is not yet implemented in the JS client.");
378
+ }
379
+ if (!conn) {
380
+ throw new Error("Conn should be set at this point (a bug)");
381
+ }
382
+ if (!ping_ok) {
383
+ yield conn.get("ping");
384
+ }
385
+ conn.make_long_lived();
386
+ LOGGED_IN = true;
387
+ });
388
+ }
389
+ exports.login = login;
390
+ /**
391
+ * Log a single event to the current experiment. The event will be batched and uploaded behind the scenes.
392
+ *
393
+ * @param event The event to log.
394
+ * @param event.inputs The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
395
+ * BrainTrust will use the `inputs` to know whether two test casess are the same between experiments, so they should
396
+ * not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the
397
+ * `inputs` should be identical.
398
+ * @param event.output The output of your application, including post-processing (an arbitrary, JSON serializable object),
399
+ * that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries,
400
+ * the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may
401
+ * be multiple valid queries that answer a single question.
402
+ * @param event.expected The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to
403
+ * determine if your `output` value is correct or not. BrainTrust currently does not compare `output` to `expected` for
404
+ * you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
405
+ * navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
406
+ * fine-tune your models.
407
+ * @param event.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
408
+ * that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
409
+ * summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
410
+ * between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
411
+ * covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
412
+ * @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
413
+ * about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
414
+ * `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
415
+ * JSON-serializable type, but its keys must be strings.
416
+ * @param event.id (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
417
+ * @returns The `id` of the logged event.
418
+ */
419
+ function log(options) {
420
+ if (!_state.current_experiment) {
421
+ throw new Error("Not initialized. Please call init() first");
422
+ }
423
+ return _state.current_experiment.log(options);
424
+ }
425
+ exports.log = log;
426
+ /**
427
+ * Summarize the current experiment, including the scores (compared to the closest reference experiment) and metadata.
428
+ *
429
+ * @param options Options for summarizing the experiment.
430
+ * @param options.summarizeScores Whether to summarize the scores. If False, only the metadata will be returned.
431
+ * @param options.comparisonExperimentId The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
432
+ * @returns A summary of the experiment, including the scores (compared to the closest reference experiment) and metadata.
433
+ */
434
+ function summarize(options = {}) {
435
+ return __awaiter(this, void 0, void 0, function* () {
436
+ if (!_state.current_experiment) {
437
+ throw new Error("Not initialized. Please call init() first");
438
+ }
439
+ return yield _state.current_experiment.summarize(options);
440
+ });
441
+ }
442
+ exports.summarize = summarize;
443
+ function _check_org_info(org_info, org_name) {
444
+ if (org_info.length === 0) {
445
+ throw new Error("This user is not part of any organizations.");
446
+ }
447
+ for (const org of org_info) {
448
+ if (org_name === undefined || org.name === org_name) {
449
+ ORG_ID = org.id;
450
+ ORG_NAME = org.name;
451
+ LOG_URL = org.api_url;
452
+ break;
453
+ }
454
+ }
455
+ if (ORG_ID === undefined) {
456
+ throw new Error(`Organization ${org_name} not found. Must be one of ${org_info
457
+ .map((x) => x.name)
458
+ .join(", ")}`);
459
+ }
460
+ }
461
+ function _urljoin(...parts) {
462
+ return parts.map((x) => x.replace(/^\//, "")).join("/");
463
+ }
464
+ function initExperiment(project, { name, description, update, baseExperiment, } = {
465
+ name: undefined,
466
+ description: undefined,
467
+ baseExperiment: undefined,
468
+ }) {
469
+ return __awaiter(this, void 0, void 0, function* () {
470
+ const args = { project_id: project.id };
471
+ if (!name) {
472
+ name = yield guessExperimentName();
473
+ }
474
+ if (name) {
475
+ args["name"] = name;
476
+ }
477
+ if (description) {
478
+ args["description"] = description;
479
+ }
480
+ if (update) {
481
+ args["update"] = update;
482
+ }
483
+ const repoStatus = yield git.getRepoStatus();
484
+ if (repoStatus) {
485
+ args["repo_info"] = repoStatus;
486
+ }
487
+ const conn = api_conn();
488
+ let base_exp_id = undefined;
489
+ if (baseExperiment !== undefined) {
490
+ const resp = yield conn.get("experiments", {
491
+ project_id: project.id,
492
+ name: baseExperiment,
493
+ });
494
+ const experiments = resp.data;
495
+ if (experiments.length > 0) {
496
+ base_exp_id = experiments[0]["id"];
497
+ }
498
+ else {
499
+ throw new Error(`Base experiment ${baseExperiment} not found`);
500
+ }
501
+ }
502
+ if (base_exp_id === undefined) {
503
+ const resp = yield conn.post("experiments-by-commits", {
504
+ project_id: project.id,
505
+ commits: yield git.getPastNAncestors(),
506
+ });
507
+ base_exp_id = resp.data["experiment_id"];
508
+ }
509
+ if (base_exp_id !== undefined) {
510
+ args["base_exp_id"] = base_exp_id;
511
+ }
512
+ const data = (yield api_insert("register-experiment", args))[0];
513
+ // NOTE: This is a deviation from the Python lib and allows the log() method
514
+ // to not be async.
515
+ //
516
+ const user_id = (yield _user_info())["id"];
517
+ return new Experiment(project, data.id, data.name, user_id);
518
+ });
519
+ }
520
+ /**
521
+ * An experiment is a collection of logged events, such as model inputs and outputs, which represent
522
+ * a snapshot of your application at a particular point in time. An experiment is meant to capture more
523
+ * than just the model you use, and includes the data you use to test, pre- and post- processing code,
524
+ * comparison metrics (scores), and any other metadata you want to include.
525
+ *
526
+ * Experiments are associated with a project, and two experiments are meant to be easily comparable via
527
+ * their `inputs`. You can change the attributes of the experiments in a project (e.g. scoring functions)
528
+ * over time, simply by changing what you log.
529
+ *
530
+ * You should not create `Experiment` objects directly. Instead, use the `braintrust.init()` method.
531
+ */
532
+ class Experiment {
533
+ constructor(project, id, name, user_id) {
534
+ this.project = project;
535
+ this.id = id;
536
+ this.name = name;
537
+ this.user_id = user_id;
538
+ this.logger = new LogThread();
539
+ }
540
+ /**
541
+ * Log a single event to the experiment. The event will be batched and uploaded behind the scenes.
542
+ *
543
+ * @param event The event to log.
544
+ * @param event.inputs The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
545
+ * BrainTrust will use the `inputs` to know whether two test casess are the same between experiments, so they should
546
+ * not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the
547
+ * `inputs` should be identical.
548
+ * @param event.output The output of your application, including post-processing (an arbitrary, JSON serializable object),
549
+ * that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries,
550
+ * the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may
551
+ * be multiple valid queries that answer a single question.
552
+ * @param event.expected The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to
553
+ * determine if your `output` value is correct or not. BrainTrust currently does not compare `output` to `expected` for
554
+ * you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
555
+ * navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
556
+ * fine-tune your models.
557
+ * @param event.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
558
+ * that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
559
+ * summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
560
+ * between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
561
+ * covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
562
+ * @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
563
+ * about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
564
+ * `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
565
+ * JSON-serializable type, but its keys must be strings.
566
+ * @param event.id (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
567
+ * @returns The `id` of the logged event.
568
+ */
569
+ log({ inputs, output, expected, scores, metadata, id, }) {
570
+ for (const [name, score] of Object.entries(scores)) {
571
+ if (typeof name !== "string") {
572
+ throw new Error("score names must be strings");
573
+ }
574
+ if (typeof score !== "number") {
575
+ throw new Error("score values must be numbers");
576
+ }
577
+ if (score < 0 || score > 1) {
578
+ throw new Error("score values must be between 0 and 1");
579
+ }
580
+ }
581
+ if (metadata !== undefined) {
582
+ for (const key of Object.keys(metadata)) {
583
+ if (typeof key !== "string") {
584
+ throw new Error("metadata keys must be strings");
585
+ }
586
+ }
587
+ }
588
+ const args = {
589
+ id: id || (0, uuid_1.v4)(),
590
+ inputs,
591
+ output,
592
+ expected,
593
+ scores,
594
+ project_id: this.project.id,
595
+ experiment_id: this.id,
596
+ user_id: this.user_id,
597
+ created: new Date().toISOString(),
598
+ metadata,
599
+ };
600
+ if (JSON.stringify(args).length > 65535) {
601
+ throw new Error("Events must be less than 64KB in size. Please reduce the size of your inputs, output, expected, scores, or metadata.");
602
+ }
603
+ this.logger.log([args]);
604
+ return args.id;
605
+ }
606
+ /**
607
+ * Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
608
+ *
609
+ * @param options Options for summarizing the experiment.
610
+ * @param options.summarizeScores Whether to summarize the scores. If False, only the metadata will be returned.
611
+ * @param options.comparisonExperimentId The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
612
+ * @returns A summary of the experiment, including the scores (compared to the closest reference experiment) and metadata.
613
+ */
614
+ summarize(options = {}) {
615
+ return __awaiter(this, void 0, void 0, function* () {
616
+ let { summarizeScores = true, comparisonExperimentId = undefined } = options || {};
617
+ yield this.logger.flush();
618
+ const projectUrl = `${API_URL}/app/${encodeURIComponent(ORG_NAME)}/p/${encodeURIComponent(this.project.name)}`;
619
+ const experimentUrl = `${projectUrl}/${encodeURIComponent(this.name)}`;
620
+ let scores = undefined;
621
+ let comparisonExperimentName = undefined;
622
+ if (summarizeScores) {
623
+ if (comparisonExperimentId === undefined) {
624
+ const conn = api_conn();
625
+ const resp = yield conn.get("/crud/base_experiments", {
626
+ id: this.id,
627
+ });
628
+ const base_experiments = resp.data;
629
+ if (base_experiments.length > 0) {
630
+ comparisonExperimentId = base_experiments[0]["base_exp_id"];
631
+ comparisonExperimentName = base_experiments[0]["base_exp_name"];
632
+ }
633
+ }
634
+ if (comparisonExperimentId !== undefined) {
635
+ scores = yield api_get("/experiment-comparison", {
636
+ experiment_id: this.id,
637
+ base_experiment_id: comparisonExperimentId,
638
+ }, 3);
639
+ }
640
+ }
641
+ return {
642
+ projectName: this.project.name,
643
+ experimentName: this.name,
644
+ projectUrl: projectUrl,
645
+ experimentUrl: experimentUrl,
646
+ comparisonExperimentName: comparisonExperimentName,
647
+ scores,
648
+ };
649
+ });
650
+ }
651
+ }
652
+ exports.Experiment = Experiment;
package/dist/oai.js CHANGED
@@ -1,4 +1,6 @@
1
1
  "use strict";
2
+ // TODO REPLACE WITH autoevals version
3
+ // https://github.com/braintrustdata/braintrust/issues/218
2
4
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
5
  if (k2 === undefined) k2 = k;
4
6
  var desc = Object.getOwnPropertyDescriptor(m, k);