braintrust 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +77 -0
- package/dist/index.js +77 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +15 -0
- package/src/index.ts +112 -0
- package/tsconfig.json +15 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
|
|
3
|
+
*
|
|
4
|
+
* @param project The name of the project to create the experiment in.
|
|
5
|
+
* @param options
|
|
6
|
+
* @param options.experiment The name of the experiment to create. If not specified, a name will be generated automatically.
|
|
7
|
+
* @param options.description An optional description of the experiment.
|
|
8
|
+
* @param options.base_experiment An optional experiment name to use as a base. If specified, the new experiment will be summarized and compared to this
|
|
9
|
+
* experiment. Otherwise, it will pick an experiment by finding the closest ancestor on the default (e.g. main) branch.
|
|
10
|
+
* @param options.api_url The URL of the BrainTrust API. Defaults to https://www.braintrustdata.com.
|
|
11
|
+
* @param options.api_key The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
|
|
12
|
+
* key is specified, will prompt the user to login.
|
|
13
|
+
* @param options.org_name (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
|
|
14
|
+
* @param options.disable_cache Do not use cached login information.
|
|
15
|
+
* @returns The experiment object.
|
|
16
|
+
*/
|
|
17
|
+
export declare function init(project: string, { experiment, }: {
|
|
18
|
+
readonly experiment?: string;
|
|
19
|
+
readonly description?: string;
|
|
20
|
+
readonly base_experiment?: string;
|
|
21
|
+
readonly api_url?: string;
|
|
22
|
+
readonly api_key?: string;
|
|
23
|
+
readonly org_name?: string;
|
|
24
|
+
readonly disable_cache?: boolean;
|
|
25
|
+
}): Experiment;
|
|
26
|
+
export declare class Experiment {
|
|
27
|
+
private _project;
|
|
28
|
+
constructor(project: string);
|
|
29
|
+
get project(): string;
|
|
30
|
+
/**
|
|
31
|
+
* Log a single event to the experiment. The event will be batched and uploaded behind the scenes.
|
|
32
|
+
*
|
|
33
|
+
* @param values
|
|
34
|
+
* @param values.inputs The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
|
|
35
|
+
* BrainTrust will use the `inputs` to know whether two test casess are the same between experiments, so they should
|
|
36
|
+
* not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the
|
|
37
|
+
* `inputs` should be identical.
|
|
38
|
+
* @param values.output The output of your application, including post-processing (an arbitrary, JSON serializable object),
|
|
39
|
+
* that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries,
|
|
40
|
+
* the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may
|
|
41
|
+
* be multiple valid queries that answer a single question.
|
|
42
|
+
* @param values.expected The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to
|
|
43
|
+
* determine if your `output` value is correct or not. BrainTrust currently does not compare `output` to `expected` for
|
|
44
|
+
* you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
|
|
45
|
+
* navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
|
|
46
|
+
* fine-tune your models.
|
|
47
|
+
* @param values.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
|
|
48
|
+
* that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
|
|
49
|
+
* summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
|
|
50
|
+
* between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
|
|
51
|
+
* covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
52
|
+
* @param values.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
|
|
53
|
+
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
54
|
+
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
55
|
+
* JSON-serializable type, but its keys must be strings.
|
|
56
|
+
* @returns The `id` of the logged event.
|
|
57
|
+
*/
|
|
58
|
+
log({ inputs, output, expected, scores, metadata, }: {
|
|
59
|
+
readonly inputs: unknown;
|
|
60
|
+
readonly output: unknown;
|
|
61
|
+
readonly expected: unknown;
|
|
62
|
+
readonly scores: Record<string, number>;
|
|
63
|
+
readonly metadata?: Record<string, unknown>;
|
|
64
|
+
}): string;
|
|
65
|
+
/**
|
|
66
|
+
* Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
|
|
67
|
+
*
|
|
68
|
+
* @param options
|
|
69
|
+
* @param summarize_scores Whether to summarize the scores. If False, only the metadata will be returned.
|
|
70
|
+
* @param comparison_experiment_id The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
|
|
71
|
+
* @returns `ExperimentSummary`
|
|
72
|
+
*/
|
|
73
|
+
summarize(options?: {
|
|
74
|
+
readonly summarizeScores?: boolean;
|
|
75
|
+
readonly comparisonExperimentId?: string;
|
|
76
|
+
} | undefined): string;
|
|
77
|
+
}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
|
|
4
|
+
*
|
|
5
|
+
* @param project The name of the project to create the experiment in.
|
|
6
|
+
* @param options
|
|
7
|
+
* @param options.experiment The name of the experiment to create. If not specified, a name will be generated automatically.
|
|
8
|
+
* @param options.description An optional description of the experiment.
|
|
9
|
+
* @param options.base_experiment An optional experiment name to use as a base. If specified, the new experiment will be summarized and compared to this
|
|
10
|
+
* experiment. Otherwise, it will pick an experiment by finding the closest ancestor on the default (e.g. main) branch.
|
|
11
|
+
* @param options.api_url The URL of the BrainTrust API. Defaults to https://www.braintrustdata.com.
|
|
12
|
+
* @param options.api_key The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
|
|
13
|
+
* key is specified, will prompt the user to login.
|
|
14
|
+
* @param options.org_name (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
|
|
15
|
+
* @param options.disable_cache Do not use cached login information.
|
|
16
|
+
* @returns The experiment object.
|
|
17
|
+
*/
|
|
18
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
19
|
+
exports.Experiment = exports.init = void 0;
|
|
20
|
+
function init(project, { experiment, }) {
|
|
21
|
+
// TODO
|
|
22
|
+
return new Experiment(project);
|
|
23
|
+
}
|
|
24
|
+
exports.init = init;
|
|
25
|
+
class Experiment {
|
|
26
|
+
constructor(project) {
|
|
27
|
+
this._project = project;
|
|
28
|
+
}
|
|
29
|
+
get project() {
|
|
30
|
+
return this._project;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Log a single event to the experiment. The event will be batched and uploaded behind the scenes.
|
|
34
|
+
*
|
|
35
|
+
* @param values
|
|
36
|
+
* @param values.inputs The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
|
|
37
|
+
* BrainTrust will use the `inputs` to know whether two test casess are the same between experiments, so they should
|
|
38
|
+
* not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the
|
|
39
|
+
* `inputs` should be identical.
|
|
40
|
+
* @param values.output The output of your application, including post-processing (an arbitrary, JSON serializable object),
|
|
41
|
+
* that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries,
|
|
42
|
+
* the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may
|
|
43
|
+
* be multiple valid queries that answer a single question.
|
|
44
|
+
* @param values.expected The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to
|
|
45
|
+
* determine if your `output` value is correct or not. BrainTrust currently does not compare `output` to `expected` for
|
|
46
|
+
* you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
|
|
47
|
+
* navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
|
|
48
|
+
* fine-tune your models.
|
|
49
|
+
* @param values.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
|
|
50
|
+
* that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
|
|
51
|
+
* summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
|
|
52
|
+
* between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
|
|
53
|
+
* covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
54
|
+
* @param values.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
|
|
55
|
+
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
56
|
+
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
57
|
+
* JSON-serializable type, but its keys must be strings.
|
|
58
|
+
* @returns The `id` of the logged event.
|
|
59
|
+
*/
|
|
60
|
+
log({ inputs, output, expected, scores, metadata, }) {
|
|
61
|
+
// TODO
|
|
62
|
+
(() => ({ inputs, output, expected, scores, metadata }))();
|
|
63
|
+
return "foo";
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
|
|
67
|
+
*
|
|
68
|
+
* @param options
|
|
69
|
+
* @param summarize_scores Whether to summarize the scores. If False, only the metadata will be returned.
|
|
70
|
+
* @param comparison_experiment_id The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
|
|
71
|
+
* @returns `ExperimentSummary`
|
|
72
|
+
*/
|
|
73
|
+
summarize(options = undefined) {
|
|
74
|
+
return "Summary!";
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
exports.Experiment = Experiment;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"program":{"fileNames":["../node_modules/typescript/lib/lib.es5.d.ts","../node_modules/typescript/lib/lib.es2015.d.ts","../node_modules/typescript/lib/lib.es2015.core.d.ts","../node_modules/typescript/lib/lib.es2015.collection.d.ts","../node_modules/typescript/lib/lib.es2015.generator.d.ts","../node_modules/typescript/lib/lib.es2015.iterable.d.ts","../node_modules/typescript/lib/lib.es2015.promise.d.ts","../node_modules/typescript/lib/lib.es2015.proxy.d.ts","../node_modules/typescript/lib/lib.es2015.reflect.d.ts","../node_modules/typescript/lib/lib.es2015.symbol.d.ts","../node_modules/typescript/lib/lib.es2015.symbol.wellknown.d.ts","../node_modules/typescript/lib/lib.decorators.d.ts","../node_modules/typescript/lib/lib.decorators.legacy.d.ts","../src/index.ts"],"fileInfos":[{"version":"f59215c5f1d886b05395ee7aca73e0ac69ddfad2843aa88530e797879d511bad","affectsGlobalScope":true},"45b7ab580deca34ae9729e97c13cfd999df04416a79116c3bfb483804f85ded4",{"version":"9d9885c728913c1d16e0d2831b40341d6ad9a0ceecaabc55209b306ad9c736a5","affectsGlobalScope":true},{"version":"17bea081b9c0541f39dd1ae9bc8c78bdd561879a682e60e2f25f688c0ecab248","affectsGlobalScope":true},{"version":"4443e68b35f3332f753eacc66a04ac1d2053b8b035a0e0ac1d455392b5e243b3","affectsGlobalScope":true},{"version":"ab22100fdd0d24cfc2cc59d0a00fc8cf449830d9c4030dc54390a46bd562e929","affectsGlobalScope":true},{"version":"f7bd636ae3a4623c503359ada74510c4005df5b36de7f23e1db8a5c543fd176b","affectsGlobalScope":true},{"version":"ce691fb9e5c64efb9547083e4a34091bcbe5bdb41027e310ebba8f7d96a98671","affectsGlobalScope":true},{"version":"8d697a2a929a5fcb38b7a65594020fcef05ec1630804a33748829c5ff53640d0","affectsGlobalScope":true},{"version":"0c20f4d2358eb679e4ae8a4432bdd96c857a2960fd6800b21ec4008ec59d60ea","affectsGlobalScope":true},{"version":"36ae84ccc0633f7c0787bc6108386c8b773e95d3b052d9464a99cd9b8795fbec","affectsGlobalScope":true},{"version":"189c0703923150aa30673fa3de411346d727cc44a11c75d05d7cf9ef095daa22","affectsGlobalScope":true},{"version":"782dec38049b92d4e85c1585fbea5474a219c6984a35b004963b00beb1aab538","affectsGlobalScope":true},{"version":"0b0efe6cf1c03e6fcdb6e0d4650267739d78670d17e77310d8e4930d91671622","signature":"b4370986352e9c14688509ded0252bf8daa86bdcff5f2c2f7c8b2f7514fec525"}],"root":[14],"options":{"declaration":true,"esModuleInterop":true,"module":1,"outDir":"./","strict":true,"target":2},"referencedMap":[],"exportedModulesMap":[],"semanticDiagnosticsPerFile":[12,13,4,3,2,5,6,7,8,9,10,11,1,14]},"version":"5.1.6"}
|
package/package.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "braintrust",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "SDK for integrating BrainTrust",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"build": "tsc",
|
|
8
|
+
"test": "jest"
|
|
9
|
+
},
|
|
10
|
+
"author": "",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"devDependencies": {
|
|
13
|
+
"typescript": "^5.1.6"
|
|
14
|
+
}
|
|
15
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
|
|
3
|
+
*
|
|
4
|
+
* @param project The name of the project to create the experiment in.
|
|
5
|
+
* @param options
|
|
6
|
+
* @param options.experiment The name of the experiment to create. If not specified, a name will be generated automatically.
|
|
7
|
+
* @param options.description An optional description of the experiment.
|
|
8
|
+
* @param options.base_experiment An optional experiment name to use as a base. If specified, the new experiment will be summarized and compared to this
|
|
9
|
+
* experiment. Otherwise, it will pick an experiment by finding the closest ancestor on the default (e.g. main) branch.
|
|
10
|
+
* @param options.api_url The URL of the BrainTrust API. Defaults to https://www.braintrustdata.com.
|
|
11
|
+
* @param options.api_key The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
|
|
12
|
+
* key is specified, will prompt the user to login.
|
|
13
|
+
* @param options.org_name (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
|
|
14
|
+
* @param options.disable_cache Do not use cached login information.
|
|
15
|
+
* @returns The experiment object.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
export function init(
|
|
19
|
+
project: string,
|
|
20
|
+
{
|
|
21
|
+
experiment,
|
|
22
|
+
}: {
|
|
23
|
+
readonly experiment?: string;
|
|
24
|
+
readonly description?: string;
|
|
25
|
+
readonly base_experiment?: string;
|
|
26
|
+
readonly api_url?: string;
|
|
27
|
+
readonly api_key?: string;
|
|
28
|
+
readonly org_name?: string;
|
|
29
|
+
readonly disable_cache?: boolean;
|
|
30
|
+
}
|
|
31
|
+
) {
|
|
32
|
+
// TODO
|
|
33
|
+
return new Experiment(project);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export class Experiment {
|
|
37
|
+
private _project: string;
|
|
38
|
+
|
|
39
|
+
constructor(project: string) {
|
|
40
|
+
this._project = project;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
get project() {
|
|
44
|
+
return this._project;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Log a single event to the experiment. The event will be batched and uploaded behind the scenes.
|
|
49
|
+
*
|
|
50
|
+
* @param values
|
|
51
|
+
* @param values.inputs The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
|
|
52
|
+
* BrainTrust will use the `inputs` to know whether two test casess are the same between experiments, so they should
|
|
53
|
+
* not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the
|
|
54
|
+
* `inputs` should be identical.
|
|
55
|
+
* @param values.output The output of your application, including post-processing (an arbitrary, JSON serializable object),
|
|
56
|
+
* that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries,
|
|
57
|
+
* the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may
|
|
58
|
+
* be multiple valid queries that answer a single question.
|
|
59
|
+
* @param values.expected The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to
|
|
60
|
+
* determine if your `output` value is correct or not. BrainTrust currently does not compare `output` to `expected` for
|
|
61
|
+
* you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
|
|
62
|
+
* navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
|
|
63
|
+
* fine-tune your models.
|
|
64
|
+
* @param values.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
|
|
65
|
+
* that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
|
|
66
|
+
* summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
|
|
67
|
+
* between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
|
|
68
|
+
* covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
69
|
+
* @param values.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
|
|
70
|
+
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
71
|
+
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
72
|
+
* JSON-serializable type, but its keys must be strings.
|
|
73
|
+
* @returns The `id` of the logged event.
|
|
74
|
+
*/
|
|
75
|
+
log({
|
|
76
|
+
inputs,
|
|
77
|
+
output,
|
|
78
|
+
expected,
|
|
79
|
+
scores,
|
|
80
|
+
metadata,
|
|
81
|
+
}: {
|
|
82
|
+
readonly inputs: unknown;
|
|
83
|
+
readonly output: unknown;
|
|
84
|
+
readonly expected: unknown;
|
|
85
|
+
readonly scores: Record<string, number>;
|
|
86
|
+
readonly metadata?: Record<string, unknown>;
|
|
87
|
+
}): string {
|
|
88
|
+
// TODO
|
|
89
|
+
(() => ({ inputs, output, expected, scores, metadata }))();
|
|
90
|
+
|
|
91
|
+
return "foo";
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
|
|
96
|
+
*
|
|
97
|
+
* @param options
|
|
98
|
+
* @param summarize_scores Whether to summarize the scores. If False, only the metadata will be returned.
|
|
99
|
+
* @param comparison_experiment_id The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
|
|
100
|
+
* @returns `ExperimentSummary`
|
|
101
|
+
*/
|
|
102
|
+
summarize(
|
|
103
|
+
options:
|
|
104
|
+
| {
|
|
105
|
+
readonly summarizeScores?: boolean;
|
|
106
|
+
readonly comparisonExperimentId?: string;
|
|
107
|
+
}
|
|
108
|
+
| undefined = undefined
|
|
109
|
+
): string {
|
|
110
|
+
return "Summary!";
|
|
111
|
+
}
|
|
112
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"incremental": true,
|
|
4
|
+
"declaration": true,
|
|
5
|
+
"outDir": "./dist",
|
|
6
|
+
"lib": ["es6"],
|
|
7
|
+
"module": "commonjs",
|
|
8
|
+
"target": "es6",
|
|
9
|
+
"moduleResolution": "node",
|
|
10
|
+
"strict": true,
|
|
11
|
+
"esModuleInterop": true
|
|
12
|
+
},
|
|
13
|
+
"include": ["src"],
|
|
14
|
+
"exclude": ["node_modules/**"]
|
|
15
|
+
}
|