braintrust 0.0.56 → 0.0.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +5 -4
- package/dist/cli.js +859 -382
- package/dist/framework.d.ts +2 -1
- package/dist/index.js +733 -243
- package/dist/isomorph.d.ts +12 -0
- package/dist/logger.d.ts +331 -90
- package/dist/merge_row_batch.d.ts +1 -0
- package/dist/stackutil.d.ts +8 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/util.d.ts +4 -0
- package/package.json +2 -2
package/dist/isomorph.d.ts
CHANGED
|
@@ -9,11 +9,23 @@ export interface RepoStatus {
|
|
|
9
9
|
commit_message?: string;
|
|
10
10
|
commit_time?: string;
|
|
11
11
|
}
|
|
12
|
+
export interface CallerLocation {
|
|
13
|
+
caller_functionname: string;
|
|
14
|
+
caller_filename: string;
|
|
15
|
+
caller_lineno: number;
|
|
16
|
+
}
|
|
17
|
+
export interface IsoAsyncLocalStorage<T> {
|
|
18
|
+
enterWith(store: T): void;
|
|
19
|
+
run<R>(store: T | undefined, callback: () => R): R;
|
|
20
|
+
getStore(): T | undefined;
|
|
21
|
+
}
|
|
12
22
|
export interface Common {
|
|
13
23
|
makeAxios: (conf: CreateAxiosDefaults) => AxiosInstance;
|
|
14
24
|
getRepoStatus: () => Promise<RepoStatus | undefined>;
|
|
15
25
|
getPastNAncestors: () => Promise<string[]>;
|
|
16
26
|
getEnv: (name: string) => string | undefined;
|
|
27
|
+
getCallerLocation: () => CallerLocation | undefined;
|
|
28
|
+
newAsyncLocalStorage: <T>() => IsoAsyncLocalStorage<T>;
|
|
17
29
|
}
|
|
18
30
|
declare const iso: Common;
|
|
19
31
|
export default iso;
|
package/dist/logger.d.ts
CHANGED
|
@@ -1,18 +1,214 @@
|
|
|
1
|
+
import { AxiosInstance } from "axios";
|
|
2
|
+
import { IsoAsyncLocalStorage } from "./isomorph";
|
|
3
|
+
import { IS_MERGE_FIELD } from "./util";
|
|
4
|
+
export type SetCurrentArg = {
|
|
5
|
+
setCurrent?: boolean;
|
|
6
|
+
};
|
|
7
|
+
export type StartSpanArgs = {
|
|
8
|
+
spanAttributes?: Record<any, any>;
|
|
9
|
+
startTime?: number;
|
|
10
|
+
event?: ExperimentLogPartialArgs & Partial<IdField>;
|
|
11
|
+
};
|
|
12
|
+
export type StartSpanOptionalNameArgs = StartSpanArgs & {
|
|
13
|
+
name?: string;
|
|
14
|
+
};
|
|
15
|
+
export type EndSpanArgs = {
|
|
16
|
+
endTime?: number;
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* A Span encapsulates logged data and metrics for a unit of work. This interface is shared by all span implementations.
|
|
20
|
+
*
|
|
21
|
+
* We suggest using one of the various `startSpan` methods, instead of creating Spans directly. See `Span.startSpan` for full details.
|
|
22
|
+
*/
|
|
23
|
+
export interface Span {
|
|
24
|
+
/**
|
|
25
|
+
* Row ID of the span.
|
|
26
|
+
*/
|
|
27
|
+
id: string;
|
|
28
|
+
/**
|
|
29
|
+
* Span ID of the span. This is used to link spans together.
|
|
30
|
+
*/
|
|
31
|
+
span_id: string;
|
|
32
|
+
/**
|
|
33
|
+
* Span ID of the root span in the full trace.
|
|
34
|
+
*/
|
|
35
|
+
root_span_id: string;
|
|
36
|
+
/**
|
|
37
|
+
* Incrementally update the current span with new data. The event will be batched and uploaded behind the scenes.
|
|
38
|
+
*
|
|
39
|
+
* @param event: Data to be logged. See `Experiment.log` for full details.
|
|
40
|
+
*/
|
|
41
|
+
log(event: ExperimentLogPartialArgs): void;
|
|
42
|
+
/**
|
|
43
|
+
* Create a new span. This is useful if you want to log more detailed trace information beyond the scope of a single log event. Data logged over several calls to `Span.log` will be merged into one logical row.
|
|
44
|
+
*
|
|
45
|
+
* We recommend running spans within a callback (using `startSpanWithinCallback`) to automatically mark them as current and ensure they are terminated. If you wish to start a span outside a callback, be sure to terminate it with `span.end()`.
|
|
46
|
+
*
|
|
47
|
+
* @param name The name of the span.
|
|
48
|
+
* @param args.span_attributes Optional additional attributes to attach to the span, such as a type name.
|
|
49
|
+
* @param args.start_time Optional start time of the span, as a timestamp in seconds.
|
|
50
|
+
* @param args.event Data to be logged. See `Experiment.log` for full details.
|
|
51
|
+
* @returns The newly-created `Span`
|
|
52
|
+
*/
|
|
53
|
+
startSpan(name: string, args?: StartSpanArgs): Span;
|
|
54
|
+
/**
|
|
55
|
+
* Wrapper over `Span.startSpan`, which passes the initialized `Span` it to the given callback and ends it afterwards. See `Span.startSpan` for full details.
|
|
56
|
+
*
|
|
57
|
+
* @param args.setCurrent If true (the default), the span will be marked as the currently-active span for the duration of the callback. Equivalent to calling `braintrust.withCurrent(span, callback)`.
|
|
58
|
+
*/
|
|
59
|
+
traced<R>(name: string, callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
|
|
60
|
+
/**
|
|
61
|
+
* Terminate the span. Returns the end time logged to the row's metrics. After calling end, you may not invoke any further methods on the span object, except for the property accessors.
|
|
62
|
+
*
|
|
63
|
+
* Will be invoked automatically if the span is constructed with traced.
|
|
64
|
+
*
|
|
65
|
+
* @param args.endTime Optional end time of the span, as a timestamp in seconds.
|
|
66
|
+
* @returns The end time logged to the span metrics.
|
|
67
|
+
*/
|
|
68
|
+
end(args?: EndSpanArgs): number;
|
|
69
|
+
/**
|
|
70
|
+
* Alias for `end`.
|
|
71
|
+
*/
|
|
72
|
+
close(args?: EndSpanArgs): number;
|
|
73
|
+
kind: "span";
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* A fake implementation of the Span API which does nothing. This can be used as the default span.
|
|
77
|
+
*/
|
|
78
|
+
export declare class NoopSpan implements Span {
|
|
79
|
+
id: string;
|
|
80
|
+
span_id: string;
|
|
81
|
+
root_span_id: string;
|
|
82
|
+
kind: "span";
|
|
83
|
+
constructor();
|
|
84
|
+
log(_: ExperimentLogPartialArgs): void;
|
|
85
|
+
startSpan(_0: string, _1?: StartSpanArgs): this;
|
|
86
|
+
traced<R>(_0: string, callback: (span: Span) => R, _1: StartSpanArgs & SetCurrentArg): R;
|
|
87
|
+
end(args?: EndSpanArgs): number;
|
|
88
|
+
close(args?: EndSpanArgs): number;
|
|
89
|
+
}
|
|
90
|
+
export declare const noopSpan: NoopSpan;
|
|
91
|
+
declare global {
|
|
92
|
+
var __inherited_braintrust_state: BraintrustState;
|
|
93
|
+
}
|
|
94
|
+
declare class BraintrustState {
|
|
95
|
+
id: string;
|
|
96
|
+
currentExperiment: IsoAsyncLocalStorage<Experiment | undefined>;
|
|
97
|
+
currentSpan: IsoAsyncLocalStorage<Span>;
|
|
98
|
+
apiUrl: string | null;
|
|
99
|
+
loginToken: string | null;
|
|
100
|
+
orgId: string | null;
|
|
101
|
+
orgName: string | null;
|
|
102
|
+
logUrl: string | null;
|
|
103
|
+
loggedIn: boolean;
|
|
104
|
+
private _apiConn;
|
|
105
|
+
private _logConn;
|
|
106
|
+
private _userInfo;
|
|
107
|
+
constructor();
|
|
108
|
+
apiConn(): HTTPConnection;
|
|
109
|
+
logConn(): HTTPConnection;
|
|
110
|
+
userInfo(): Promise<UserInfo>;
|
|
111
|
+
setUserInfoIfNull(info: UserInfo): void;
|
|
112
|
+
}
|
|
113
|
+
export declare const _internalGetGlobalState: () => BraintrustState;
|
|
114
|
+
declare class HTTPConnection {
|
|
115
|
+
base_url: string;
|
|
116
|
+
token: string | null;
|
|
117
|
+
session: AxiosInstance | null;
|
|
118
|
+
constructor(base_url: string);
|
|
119
|
+
ping(): Promise<boolean>;
|
|
120
|
+
make_long_lived(): void;
|
|
121
|
+
static sanitize_token(token: string): string;
|
|
122
|
+
set_token(token: string): void;
|
|
123
|
+
_reset(): void;
|
|
124
|
+
get(path: string, params?: unknown | undefined): Promise<import("axios").AxiosResponse<any, any>>;
|
|
125
|
+
post(path: string, params?: unknown | undefined, config?: any): Promise<import("axios").AxiosResponse<any, any>>;
|
|
126
|
+
get_json(object_type: string, args?: unknown | undefined, retries?: number): Promise<any>;
|
|
127
|
+
post_json(object_type: string, args?: unknown | undefined): Promise<any>;
|
|
128
|
+
}
|
|
129
|
+
interface UserInfo {
|
|
130
|
+
id: string;
|
|
131
|
+
}
|
|
1
132
|
export declare class Project {
|
|
2
133
|
name: string;
|
|
3
134
|
id: string;
|
|
4
135
|
org_id: string;
|
|
5
136
|
constructor(name: string, id: string, org_id: string);
|
|
6
137
|
}
|
|
138
|
+
export type IdField = {
|
|
139
|
+
id: string;
|
|
140
|
+
};
|
|
141
|
+
export type InputField = {
|
|
142
|
+
input: unknown;
|
|
143
|
+
};
|
|
144
|
+
export type InputsField = {
|
|
145
|
+
inputs: unknown;
|
|
146
|
+
};
|
|
147
|
+
export type OtherExperimentLogFields = {
|
|
148
|
+
output: unknown;
|
|
149
|
+
expected: unknown;
|
|
150
|
+
scores: Record<string, number>;
|
|
151
|
+
metadata: Record<string, unknown>;
|
|
152
|
+
metrics: Record<string, unknown>;
|
|
153
|
+
datasetRecordId: string;
|
|
154
|
+
};
|
|
155
|
+
export type ExperimentLogPartialArgs = Partial<OtherExperimentLogFields> & Partial<InputField | InputsField>;
|
|
156
|
+
export type ExperimentLogFullArgs = Partial<Omit<OtherExperimentLogFields, "scores">> & Required<Pick<OtherExperimentLogFields, "scores">> & Partial<InputField | InputsField> & Partial<IdField>;
|
|
157
|
+
type ExperimentEvent = Partial<InputField> & Partial<OtherExperimentLogFields> & {
|
|
158
|
+
id: string;
|
|
159
|
+
span_id: string;
|
|
160
|
+
root_span_id: string;
|
|
161
|
+
project_id: string;
|
|
162
|
+
experiment_id: string;
|
|
163
|
+
[IS_MERGE_FIELD]: boolean;
|
|
164
|
+
} & Partial<{
|
|
165
|
+
user_id: string;
|
|
166
|
+
created: string;
|
|
167
|
+
span_parents: string[];
|
|
168
|
+
span_attributes: Record<string, unknown>;
|
|
169
|
+
}>;
|
|
170
|
+
interface DatasetEvent {
|
|
171
|
+
inputs?: unknown;
|
|
172
|
+
output?: unknown;
|
|
173
|
+
metadata?: unknown;
|
|
174
|
+
id: string;
|
|
175
|
+
project_id: string;
|
|
176
|
+
dataset_id: string;
|
|
177
|
+
user_id: string;
|
|
178
|
+
created: string;
|
|
179
|
+
}
|
|
180
|
+
type LogEvent = ExperimentEvent | DatasetEvent;
|
|
7
181
|
export interface DatasetRecord {
|
|
8
182
|
id: string;
|
|
9
183
|
input: any;
|
|
10
184
|
output: any;
|
|
11
185
|
metadata: any;
|
|
12
186
|
}
|
|
187
|
+
declare class LogThread {
|
|
188
|
+
private items;
|
|
189
|
+
private active_flush;
|
|
190
|
+
private active_flush_resolved;
|
|
191
|
+
log(items: LogEvent[]): void;
|
|
192
|
+
flush_once(batchSize?: number): Promise<string[]>;
|
|
193
|
+
flush(): Promise<void>;
|
|
194
|
+
}
|
|
195
|
+
export type InitOptions = {
|
|
196
|
+
experiment?: string;
|
|
197
|
+
description?: string;
|
|
198
|
+
dataset?: Dataset;
|
|
199
|
+
update?: boolean;
|
|
200
|
+
baseExperiment?: string;
|
|
201
|
+
isPublic?: boolean;
|
|
202
|
+
apiUrl?: string;
|
|
203
|
+
apiKey?: string;
|
|
204
|
+
orgName?: string;
|
|
205
|
+
disableCache?: boolean;
|
|
206
|
+
};
|
|
13
207
|
/**
|
|
14
208
|
* Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
|
|
15
209
|
*
|
|
210
|
+
* Remember to close your experiment when it is finished by calling `Experiment.close`. We recommend initializing the experiment within a callback (using `braintrust.withExperiment`) to automatically mark it as current and ensure it is terminated.
|
|
211
|
+
*
|
|
16
212
|
* @param project The name of the project to create the experiment in.
|
|
17
213
|
* @param options Additional options for configuring init().
|
|
18
214
|
* @param options.experiment The name of the experiment to create. If not specified, a name will be generated automatically.
|
|
@@ -30,21 +226,27 @@ export interface DatasetRecord {
|
|
|
30
226
|
* @param options.disableCache Do not use cached login information.
|
|
31
227
|
* @returns The newly created Experiment.
|
|
32
228
|
*/
|
|
33
|
-
export declare function init(project: string, options?:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
229
|
+
export declare function init(project: string, options?: Readonly<InitOptions>): Promise<Experiment>;
|
|
230
|
+
/**
|
|
231
|
+
* Wrapper over `braintrust.init`, which passes the initialized `Experiment` it to the given callback and closes it afterwards. See `braintrust.init` for full details.
|
|
232
|
+
*
|
|
233
|
+
* @param options.setCurrent If true (default), set the currently-active experiment to the newly-created one. Equivalent to calling `braintrust.withCurrent(experiment, callback)`.
|
|
234
|
+
*/
|
|
235
|
+
export declare function withExperiment<R>(project: string, callback: (experiment: Experiment) => R, options?: Readonly<InitOptions & SetCurrentArg>): Promise<R>;
|
|
236
|
+
type InitDatasetOptions = {
|
|
237
|
+
dataset?: string;
|
|
238
|
+
description?: string;
|
|
239
|
+
version?: string;
|
|
240
|
+
apiUrl?: string;
|
|
241
|
+
apiKey?: string;
|
|
242
|
+
orgName?: string;
|
|
243
|
+
disableCache?: boolean;
|
|
244
|
+
};
|
|
45
245
|
/**
|
|
46
246
|
* Create a new dataset in a specified project. If the project does not exist, it will be created.
|
|
47
247
|
*
|
|
248
|
+
* Remember to close your dataset when it is finished by calling `Dataset.close`. We recommend initializing the dataset within a callback (using `braintrust.withDataset`) to ensure it is terminated.
|
|
249
|
+
*
|
|
48
250
|
* @param project The name of the project to create the dataset in.
|
|
49
251
|
* @param options Additional options for configuring init().
|
|
50
252
|
* @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically.
|
|
@@ -56,15 +258,11 @@ export declare function init(project: string, options?: {
|
|
|
56
258
|
* @param options.disableCache Do not use cached login information.
|
|
57
259
|
* @returns The newly created Dataset.
|
|
58
260
|
*/
|
|
59
|
-
export declare function initDataset(project: string, options?:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
readonly apiKey?: string;
|
|
65
|
-
readonly orgName?: string;
|
|
66
|
-
readonly disableCache?: boolean;
|
|
67
|
-
}): Promise<Dataset>;
|
|
261
|
+
export declare function initDataset(project: string, options?: Readonly<InitDatasetOptions>): Promise<Dataset>;
|
|
262
|
+
/**
|
|
263
|
+
* Wrapper over `braintrust.initDataset`, which passes the initialized `Dataset` it to the given callback and closes it afterwards. See `braintrust.initDataset` for full details.
|
|
264
|
+
*/
|
|
265
|
+
export declare function withDataset<R>(project: string, callback: (dataset: Dataset) => R, options?: Readonly<InitDatasetOptions>): Promise<R>;
|
|
68
266
|
/**
|
|
69
267
|
* Log into Braintrust. This will prompt you for your API token, which you can find at
|
|
70
268
|
* https://www.braintrustdata.com/app/token. This method is called automatically by `init()`.
|
|
@@ -87,42 +285,10 @@ export declare function login(options?: {
|
|
|
87
285
|
/**
|
|
88
286
|
* Log a single event to the current experiment. The event will be batched and uploaded behind the scenes.
|
|
89
287
|
*
|
|
90
|
-
* @param event The event to log.
|
|
91
|
-
* @param event.input The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
|
|
92
|
-
* Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should
|
|
93
|
-
* not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the
|
|
94
|
-
* `input` should be identical.
|
|
95
|
-
* @param event.output The output of your application, including post-processing (an arbitrary, JSON serializable object),
|
|
96
|
-
* that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries,
|
|
97
|
-
* the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may
|
|
98
|
-
* be multiple valid queries that answer a single question.
|
|
99
|
-
* @param event.expected The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to
|
|
100
|
-
* determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for
|
|
101
|
-
* you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
|
|
102
|
-
* navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
|
|
103
|
-
* fine-tune your models.
|
|
104
|
-
* @param event.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
|
|
105
|
-
* that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
|
|
106
|
-
* summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
|
|
107
|
-
* between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
|
|
108
|
-
* covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
109
|
-
* @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
|
|
110
|
-
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
111
|
-
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
112
|
-
* JSON-serializable type, but its keys must be strings.
|
|
113
|
-
* @param event.id (Optional) a unique identifier for the event. If you don't provide one, Braintrust will generate one for you.
|
|
114
|
-
* @param event.inputs (Deprecated) the same as `input` (will be removed in a future version)
|
|
288
|
+
* @param event The event to log. See `Experiment.log` for full details.
|
|
115
289
|
* @returns The `id` of the logged event.
|
|
116
290
|
*/
|
|
117
|
-
export declare function log(
|
|
118
|
-
readonly input?: unknown;
|
|
119
|
-
readonly output: unknown;
|
|
120
|
-
readonly expected?: unknown;
|
|
121
|
-
readonly scores: Record<string, number>;
|
|
122
|
-
readonly metadata?: Record<string, unknown>;
|
|
123
|
-
readonly id?: string;
|
|
124
|
-
readonly inputs?: unknown;
|
|
125
|
-
}): string;
|
|
291
|
+
export declare function log(event: ExperimentLogFullArgs): string;
|
|
126
292
|
/**
|
|
127
293
|
* Summarize the current experiment, including the scores (compared to the closest reference experiment) and metadata.
|
|
128
294
|
*
|
|
@@ -135,6 +301,37 @@ export declare function summarize(options?: {
|
|
|
135
301
|
readonly summarizeScores?: boolean;
|
|
136
302
|
readonly comparisonExperimentId?: string;
|
|
137
303
|
}): Promise<ExperimentSummary>;
|
|
304
|
+
/**
|
|
305
|
+
* Returns the currently-active experiment (set by `braintrust.withExperiment` or `braintrust.withCurrent`). Returns undefined if no current experiment has been set.
|
|
306
|
+
*/
|
|
307
|
+
export declare function currentExperiment(): Experiment | undefined;
|
|
308
|
+
/**
|
|
309
|
+
* Return the currently-active span for logging (set by `traced` or `braintrust.withCurrent`). If there is no active span, returns a no-op span object, which supports the same interface as spans but does no logging.
|
|
310
|
+
*
|
|
311
|
+
* See `Span` for full details.
|
|
312
|
+
*/
|
|
313
|
+
export declare function currentSpan(): Span;
|
|
314
|
+
/**
|
|
315
|
+
* Toplevel function for starting a span. If there is a currently-active span, the new span is created as a subspan. Otherwise, if there is a currently-active experiment, the new span is created as a toplevel span. Otherwise, it returns a no-op span object.
|
|
316
|
+
*
|
|
317
|
+
* Unless a name is explicitly provided, the name of the span will be the name of the calling function, or "root" if no meaningful name can be determined.
|
|
318
|
+
*
|
|
319
|
+
* We recommend running spans within a callback (using `startSpanWithinCallback`) to automatically mark them as current and ensure they are terminated. If you wish to start a span outside a callback, be sure to terminate it with `span.end()`.
|
|
320
|
+
*
|
|
321
|
+
* See `Span.startSpan` for full details.
|
|
322
|
+
*/
|
|
323
|
+
export declare function startSpan(args?: StartSpanOptionalNameArgs): Span;
|
|
324
|
+
/**
|
|
325
|
+
* Wrapper over `braintrust.startSpan`, which passes the initialized `Span` it to the given callback and ends it afterwards. See `Span.traced` for full details.
|
|
326
|
+
*/
|
|
327
|
+
export declare function traced<R>(callback: (span: Span) => R, args?: StartSpanOptionalNameArgs & SetCurrentArg): R;
|
|
328
|
+
/**
|
|
329
|
+
* Set the given experiment or span as current within the given callback and any asynchronous operations created within the callback. The current experiment can be accessed with `braintrust.currentExperiment`, and the current span with `braintrust.currentSpan`.
|
|
330
|
+
*
|
|
331
|
+
* @param object: The experiment or span to be marked as current.
|
|
332
|
+
* @param callback: The callback to be run under the scope of the current object.
|
|
333
|
+
*/
|
|
334
|
+
export declare function withCurrent<R>(object: Experiment | Span, callback: () => R): R;
|
|
138
335
|
/**
|
|
139
336
|
* An experiment is a collection of logged events, such as model inputs and outputs, which represent
|
|
140
337
|
* a snapshot of your application at a particular point in time. An experiment is meant to capture more
|
|
@@ -154,47 +351,36 @@ export declare class Experiment {
|
|
|
154
351
|
readonly user_id: string;
|
|
155
352
|
readonly dataset?: Dataset;
|
|
156
353
|
private logger;
|
|
354
|
+
private lastStartTime;
|
|
355
|
+
private finished;
|
|
356
|
+
kind: "experiment";
|
|
157
357
|
constructor(project: Project, id: string, name: string, user_id: string, dataset?: Dataset);
|
|
158
358
|
/**
|
|
159
359
|
* Log a single event to the experiment. The event will be batched and uploaded behind the scenes.
|
|
160
360
|
*
|
|
161
361
|
* @param event The event to log.
|
|
162
|
-
* @param event.input The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on,
|
|
163
|
-
*
|
|
164
|
-
*
|
|
165
|
-
*
|
|
166
|
-
* @param event.
|
|
167
|
-
*
|
|
168
|
-
*
|
|
169
|
-
*
|
|
170
|
-
* @param event.
|
|
171
|
-
*
|
|
172
|
-
* you, since there are so many different ways to do that correctly. Instead, these values are just used to help you
|
|
173
|
-
* navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or
|
|
174
|
-
* fine-tune your models.
|
|
175
|
-
* @param event.scores A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals
|
|
176
|
-
* that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a
|
|
177
|
-
* summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity
|
|
178
|
-
* between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was
|
|
179
|
-
* covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
180
|
-
* @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
|
|
181
|
-
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
182
|
-
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
183
|
-
* JSON-serializable type, but its keys must be strings.
|
|
184
|
-
* @param event.id (Optional) a unique identifier for the event. If you don't provide one, Braintrust will generate one for you.
|
|
185
|
-
* @param event.inputs (Deprecated) the same as `input` (will be removed in a future version)
|
|
186
|
-
* @returns The `id` of the logged event.
|
|
362
|
+
* @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
|
|
363
|
+
* @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
364
|
+
* @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
365
|
+
* @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
366
|
+
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
367
|
+
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically and should not be specified: "start", "end", "caller_functionname", "caller_filename", "caller_lineno".
|
|
368
|
+
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
369
|
+
* @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
|
|
370
|
+
* @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
|
|
371
|
+
* :returns: The `id` of the logged event.
|
|
187
372
|
*/
|
|
188
|
-
log(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
373
|
+
log(event: Readonly<ExperimentLogFullArgs>): string;
|
|
374
|
+
/**
|
|
375
|
+
* Create a new toplevel span. The name parameter is optional and defaults to "root".
|
|
376
|
+
*
|
|
377
|
+
* See `Span.startSpan` for full details.
|
|
378
|
+
*/
|
|
379
|
+
startSpan(args?: StartSpanOptionalNameArgs): Span;
|
|
380
|
+
/**
|
|
381
|
+
* Wrapper over `Experiment.startSpan`, which passes the initialized `Span` it to the given callback and ends it afterwards. See `Span.traced` for full details.
|
|
382
|
+
*/
|
|
383
|
+
traced<R>(callback: (span: Span) => R, args?: StartSpanOptionalNameArgs & SetCurrentArg): R;
|
|
198
384
|
/**
|
|
199
385
|
* Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
|
|
200
386
|
*
|
|
@@ -207,6 +393,50 @@ export declare class Experiment {
|
|
|
207
393
|
readonly summarizeScores?: boolean;
|
|
208
394
|
readonly comparisonExperimentId?: string;
|
|
209
395
|
}): Promise<ExperimentSummary>;
|
|
396
|
+
/**
|
|
397
|
+
* Finish the experiment and return its id. After calling close, you may not invoke any further methods on the experiment object.
|
|
398
|
+
*
|
|
399
|
+
* Will be invoked automatically if the experiment is wrapped in a callback passed to `braintrust.withExperiment`.
|
|
400
|
+
*
|
|
401
|
+
* @returns The experiment id.
|
|
402
|
+
*/
|
|
403
|
+
close(): Promise<string>;
|
|
404
|
+
private checkNotFinished;
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Primary implementation of the `Span` interface. See the `Span` interface for full details on each method.
|
|
408
|
+
*
|
|
409
|
+
* We suggest using one of the various `startSpan` methods, instead of creating Spans directly. See `Span.startSpan` for full details.
|
|
410
|
+
*/
|
|
411
|
+
export declare class SpanImpl implements Span {
|
|
412
|
+
private finished;
|
|
413
|
+
private experimentLogger;
|
|
414
|
+
private internalData;
|
|
415
|
+
private isMerge;
|
|
416
|
+
id: string;
|
|
417
|
+
span_id: string;
|
|
418
|
+
root_span_id: string;
|
|
419
|
+
private _project_id;
|
|
420
|
+
private _experiment_id;
|
|
421
|
+
kind: "span";
|
|
422
|
+
constructor(args: {
|
|
423
|
+
experimentLogger: LogThread;
|
|
424
|
+
name: string;
|
|
425
|
+
spanAttributes?: Record<any, any>;
|
|
426
|
+
startTime?: number;
|
|
427
|
+
setCurrent?: boolean;
|
|
428
|
+
event?: ExperimentLogPartialArgs & Partial<IdField>;
|
|
429
|
+
} & ({
|
|
430
|
+
rootExperiment: Experiment;
|
|
431
|
+
} | {
|
|
432
|
+
parentSpan: SpanImpl;
|
|
433
|
+
}));
|
|
434
|
+
log(event: ExperimentLogPartialArgs): void;
|
|
435
|
+
startSpan(name: string, args?: StartSpanArgs): Span;
|
|
436
|
+
traced<R>(name: string, callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
|
|
437
|
+
end(args?: EndSpanArgs): number;
|
|
438
|
+
close(args?: EndSpanArgs): number;
|
|
439
|
+
private checkNotFinished;
|
|
210
440
|
}
|
|
211
441
|
/**
|
|
212
442
|
* A dataset is a collection of records, such as model inputs and outputs, which represent
|
|
@@ -223,6 +453,7 @@ export declare class Dataset {
|
|
|
223
453
|
private pinnedVersion?;
|
|
224
454
|
private _fetchedData?;
|
|
225
455
|
private logger;
|
|
456
|
+
private finished;
|
|
226
457
|
constructor(project: Project, id: string, name: string, user_id: string, pinnedVersion?: string);
|
|
227
458
|
/**
|
|
228
459
|
* Insert a single record to the dataset. The record will be batched and uploaded behind the scenes. If you pass in an `id`,
|
|
@@ -288,6 +519,15 @@ export declare class Dataset {
|
|
|
288
519
|
fetchedData(): Promise<any[]>;
|
|
289
520
|
clearCache(): void;
|
|
290
521
|
version(): Promise<any>;
|
|
522
|
+
/**
|
|
523
|
+
* Terminate connection to the dataset and return its id. After calling close, you may not invoke any further methods on the dataset object.
|
|
524
|
+
*
|
|
525
|
+
* Will be invoked automatically if the dataset is bound as a context manager.
|
|
526
|
+
*
|
|
527
|
+
* @returns The dataset id.
|
|
528
|
+
*/
|
|
529
|
+
close(): Promise<string>;
|
|
530
|
+
private checkNotFinished;
|
|
291
531
|
}
|
|
292
532
|
/**
|
|
293
533
|
* Summary of a score's performance.
|
|
@@ -347,3 +587,4 @@ export interface DatasetSummary {
|
|
|
347
587
|
datasetUrl: string;
|
|
348
588
|
dataSummary: DataSummary;
|
|
349
589
|
}
|
|
590
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function mergeRowBatch(rows: Record<string, any>[]): Record<string, any>[];
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { CallerLocation } from "./isomorph";
|
|
2
|
+
export interface StackTraceEntry {
|
|
3
|
+
functionName: string;
|
|
4
|
+
fileName: string;
|
|
5
|
+
lineNo: number;
|
|
6
|
+
}
|
|
7
|
+
export declare function getStackTrace(): StackTraceEntry[];
|
|
8
|
+
export declare function getCallerLocation(): CallerLocation | undefined;
|