braintrust 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ *
8
+ * Adapted from: https://github.com/substack/node-resolve
9
+ */
10
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
11
+ if (k2 === undefined) k2 = k;
12
+ var desc = Object.getOwnPropertyDescriptor(m, k);
13
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
14
+ desc = { enumerable: true, get: function() { return m[k]; } };
15
+ }
16
+ Object.defineProperty(o, k2, desc);
17
+ }) : (function(o, m, k, k2) {
18
+ if (k2 === undefined) k2 = k;
19
+ o[k2] = m[k];
20
+ }));
21
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
22
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
23
+ }) : function(o, v) {
24
+ o["default"] = v;
25
+ });
26
+ var __importStar = (this && this.__importStar) || function (mod) {
27
+ if (mod && mod.__esModule) return mod;
28
+ var result = {};
29
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
30
+ __setModuleDefault(result, mod);
31
+ return result;
32
+ };
33
+ var __importDefault = (this && this.__importDefault) || function (mod) {
34
+ return (mod && mod.__esModule) ? mod : { "default": mod };
35
+ };
36
+ Object.defineProperty(exports, "__esModule", { value: true });
37
+ exports.GlobalPaths = void 0;
38
+ const path = __importStar(require("path"));
39
+ // BRAINTRUST: This was changed to be a relative import
40
+ const tryRealpath_1 = __importDefault(require("./tryRealpath"));
41
+ function nodeModulesPaths(basedir, options) {
42
+ const modules = options && options.moduleDirectory
43
+ ? Array.from(options.moduleDirectory)
44
+ : ["node_modules"];
45
+ // ensure that `basedir` is an absolute path at this point,
46
+ // resolving against the process' current working directory
47
+ const basedirAbs = path.resolve(basedir);
48
+ let prefix = "/";
49
+ if (/^([A-Za-z]:)/.test(basedirAbs)) {
50
+ prefix = "";
51
+ }
52
+ else if (/^\\\\/.test(basedirAbs)) {
53
+ prefix = "\\\\";
54
+ }
55
+ // The node resolution algorithm (as implemented by NodeJS and TypeScript)
56
+ // traverses parents of the physical path, not the symlinked path
57
+ let physicalBasedir;
58
+ try {
59
+ physicalBasedir = (0, tryRealpath_1.default)(basedirAbs);
60
+ }
61
+ catch (_a) {
62
+ // realpath can throw, e.g. on mapped drives
63
+ physicalBasedir = basedirAbs;
64
+ }
65
+ const paths = [physicalBasedir];
66
+ let parsed = path.parse(physicalBasedir);
67
+ while (parsed.dir !== paths[paths.length - 1]) {
68
+ paths.push(parsed.dir);
69
+ parsed = path.parse(parsed.dir);
70
+ }
71
+ const dirs = paths.reduce((dirs, aPath) => {
72
+ for (const moduleDir of modules) {
73
+ if (path.isAbsolute(moduleDir)) {
74
+ if (aPath === basedirAbs && moduleDir) {
75
+ dirs.push(moduleDir);
76
+ }
77
+ }
78
+ else {
79
+ dirs.push(path.join(prefix, aPath, moduleDir));
80
+ }
81
+ }
82
+ return dirs;
83
+ }, []);
84
+ if (options.paths) {
85
+ dirs.push(...options.paths);
86
+ }
87
+ return dirs;
88
+ }
89
+ exports.default = nodeModulesPaths;
90
+ function findGlobalPaths() {
91
+ const { root } = path.parse(process.cwd());
92
+ const globalPath = path.join(root, "node_modules");
93
+ const resolvePaths = require.resolve.paths("/");
94
+ if (resolvePaths) {
95
+ // the global paths start one after the root node_modules
96
+ const rootIndex = resolvePaths.indexOf(globalPath);
97
+ return rootIndex > -1 ? resolvePaths.slice(rootIndex + 1) : [];
98
+ }
99
+ return [];
100
+ }
101
+ exports.GlobalPaths = findGlobalPaths();
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+ export default function tryRealpath(path: string): string;
@@ -0,0 +1,21 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ const graceful_fs_1 = require("graceful-fs");
10
+ function tryRealpath(path) {
11
+ try {
12
+ path = graceful_fs_1.realpathSync.native(path);
13
+ }
14
+ catch (error) {
15
+ if (error.code !== "ENOENT" && error.code !== "EISDIR") {
16
+ throw error;
17
+ }
18
+ }
19
+ return path;
20
+ }
21
+ exports.default = tryRealpath;
@@ -0,0 +1,206 @@
1
+ export declare class Project {
2
+ name: string;
3
+ id: string;
4
+ org_id: string;
5
+ constructor(name: string, id: string, org_id: string);
6
+ }
7
+ /**
8
+ * Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
9
+ *
10
+ * @param project The name of the project to create the experiment in.
11
+ * @param options Additional options for configuring init().
12
+ * @param options.experiment The name of the experiment to create. If not specified, a name will be generated automatically.
13
+ * @param options.description An optional description of the experiment.
14
+ * @param options.update If the experiment already exists, continue logging to it.
15
+ * @param options.baseExperiment An optional experiment name to use as a base. If specified, the new experiment will be summarized and compared to this
16
+ * experiment. Otherwise, it will pick an experiment by finding the closest ancestor on the default (e.g. main) branch.
17
+ * @param options.apiUrl The URL of the BrainTrust API. Defaults to https://www.braintrustdata.com.
18
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
19
+ * key is specified, will prompt the user to login.
20
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
21
+ * @param options.disableCache Do not use cached login information.
22
+ * @returns The newly created Experiment.
23
+ */
24
+ export declare function init(project: string, options?: {
25
+ readonly experiment?: string;
26
+ readonly description?: string;
27
+ readonly update?: boolean;
28
+ readonly baseExperiment?: string;
29
+ readonly apiUrl?: string;
30
+ readonly apiKey?: string;
31
+ readonly orgName?: string;
32
+ readonly disableCache?: boolean;
33
+ }): Promise<Experiment>;
34
+ /**
35
+ * Log into BrainTrust. This will prompt you for your API token, which you can find at
36
+ * https://www.braintrustdata.com/app/token. This method is called automatically by `init()`.
37
+ *
38
+ * @param options Options for configuring login().
39
+ * @param options.apiUrl The URL of the BrainTrust API. Defaults to https://www.braintrustdata.com.
40
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
41
+ * key is specified, will prompt the user to login.
42
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
43
+ * @param options.disableCache Do not use cached login information.
44
+ * @param options.forceLogin Login again, even if you have already logged in (by default, this function will exit quickly if you have already logged in)
45
+ */
46
+ export declare function login(options?: {
47
+ apiUrl?: string;
48
+ apiKey?: string;
49
+ orgName?: string;
50
+ disableCache?: boolean;
51
+ forceLogin?: boolean;
52
+ }): Promise<void>;
53
+ /**
54
+ * Log a single event to the current experiment. The event will be batched and uploaded behind the scenes.
55
+ *
56
+ * @param event The event to log.
57
+ * @param event.input The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
58
+ * BrainTrust will use the `input` to know whether two test casess are the same between experiments, so they should
59
+ * not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the
60
+ * `input` should be identical.
61
+ * @param event.output The output of your application, including post-processing (an arbitrary, JSON serializable object),
62
+ * that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries,
63
+ * the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may
64
+ * be multiple valid queries that answer a single question.
65
+ * @param event.expected The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to
66
+ * determine if your `output` value is correct or not. BrainTrust currently does not compare `output` to `expected` for
67
+ * you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
68
+ * navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
69
+ * fine-tune your models.
70
+ * @param event.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
71
+ * that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
72
+ * summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
73
+ * between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
74
+ * covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
75
+ * @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
76
+ * about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
77
+ * `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
78
+ * JSON-serializable type, but its keys must be strings.
79
+ * @param event.id (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
80
+ * @param event.inputs (Deprecated) the same as `input` (will be removed in a future version)
81
+ * @returns The `id` of the logged event.
82
+ */
83
+ export declare function log(options: {
84
+ readonly input?: unknown;
85
+ readonly output: unknown;
86
+ readonly expected?: unknown;
87
+ readonly scores: Record<string, number>;
88
+ readonly metadata?: Record<string, unknown>;
89
+ readonly id?: string;
90
+ readonly inputs?: unknown;
91
+ }): string;
92
+ /**
93
+ * Summarize the current experiment, including the scores (compared to the closest reference experiment) and metadata.
94
+ *
95
+ * @param options Options for summarizing the experiment.
96
+ * @param options.summarizeScores Whether to summarize the scores. If False, only the metadata will be returned.
97
+ * @param options.comparisonExperimentId The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
98
+ * @returns A summary of the experiment, including the scores (compared to the closest reference experiment) and metadata.
99
+ */
100
+ export declare function summarize(options?: {
101
+ readonly summarizeScores?: boolean;
102
+ readonly comparisonExperimentId?: string;
103
+ }): Promise<ExperimentSummary>;
104
+ /**
105
+ * An experiment is a collection of logged events, such as model inputs and outputs, which represent
106
+ * a snapshot of your application at a particular point in time. An experiment is meant to capture more
107
+ * than just the model you use, and includes the data you use to test, pre- and post- processing code,
108
+ * comparison metrics (scores), and any other metadata you want to include.
109
+ *
110
+ * Experiments are associated with a project, and two experiments are meant to be easily comparable via
111
+ * their `inputs`. You can change the attributes of the experiments in a project (e.g. scoring functions)
112
+ * over time, simply by changing what you log.
113
+ *
114
+ * You should not create `Experiment` objects directly. Instead, use the `braintrust.init()` method.
115
+ */
116
+ export declare class Experiment {
117
+ readonly project: Project;
118
+ readonly id: string;
119
+ readonly name: string;
120
+ readonly user_id: string;
121
+ private logger;
122
+ constructor(project: Project, id: string, name: string, user_id: string);
123
+ /**
124
+ * Log a single event to the experiment. The event will be batched and uploaded behind the scenes.
125
+ *
126
+ * @param event The event to log.
127
+ * @param event.input The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
128
+ * BrainTrust will use the `input` to know whether two test casess are the same between experiments, so they should
129
+ * not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the
130
+ * `input` should be identical.
131
+ * @param event.output The output of your application, including post-processing (an arbitrary, JSON serializable object),
132
+ * that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries,
133
+ * the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may
134
+ * be multiple valid queries that answer a single question.
135
+ * @param event.expected The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to
136
+ * determine if your `output` value is correct or not. BrainTrust currently does not compare `output` to `expected` for
137
+ * you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
138
+ * navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
139
+ * fine-tune your models.
140
+ * @param event.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
141
+ * that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
142
+ * summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
143
+ * between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
144
+ * covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
145
+ * @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
146
+ * about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
147
+ * `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
148
+ * JSON-serializable type, but its keys must be strings.
149
+ * @param event.id (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
150
+ * @param event.inputs (Deprecated) the same as `input` (will be removed in a future version)
151
+ * @returns The `id` of the logged event.
152
+ */
153
+ log({ input, output, expected, scores, metadata, id, inputs, }: {
154
+ readonly input?: unknown;
155
+ readonly output: unknown;
156
+ readonly expected?: unknown;
157
+ readonly scores: Record<string, number>;
158
+ readonly metadata?: Record<string, unknown>;
159
+ readonly id?: string;
160
+ readonly inputs?: unknown;
161
+ }): string;
162
+ /**
163
+ * Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
164
+ *
165
+ * @param options Options for summarizing the experiment.
166
+ * @param options.summarizeScores Whether to summarize the scores. If False, only the metadata will be returned.
167
+ * @param options.comparisonExperimentId The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
168
+ * @returns A summary of the experiment, including the scores (compared to the closest reference experiment) and metadata.
169
+ */
170
+ summarize(options?: {
171
+ readonly summarizeScores?: boolean;
172
+ readonly comparisonExperimentId?: string;
173
+ }): Promise<ExperimentSummary>;
174
+ }
175
+ /**
176
+ * Summary of a score's performance.
177
+ * @property name Name of the score.
178
+ * @property score Average score across all examples.
179
+ * @property diff Difference in score between the current and reference experiment.
180
+ * @property improvements Number of improvements in the score.
181
+ * @property regressions Number of regressions in the score.
182
+ */
183
+ export interface ScoreSummary {
184
+ name: string;
185
+ score: number;
186
+ diff: number;
187
+ improvements: number;
188
+ regressions: number;
189
+ }
190
+ /**
191
+ * Summary of an experiment's scores and metadata.
192
+ * @property projectName Name of the project that the experiment belongs to.
193
+ * @property experimentName Name of the experiment.
194
+ * @property projectUrl URL to the project's page in the BrainTrust app.
195
+ * @property experimentUrl URL to the experiment's page in the BrainTrust app.
196
+ * @property comparisonExperimentName The experiment scores are baselined against.
197
+ * @property scores Summary of the experiment's scores.
198
+ */
199
+ export interface ExperimentSummary {
200
+ projectName: string;
201
+ experimentName: string;
202
+ projectUrl: string;
203
+ experimentUrl: string;
204
+ comparisonExperimentName: string | undefined;
205
+ scores: Record<string, ScoreSummary> | undefined;
206
+ }