braintrust 0.0.140 → 0.0.141-dev

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.d.ts CHANGED
@@ -1,2 +1,1089 @@
1
- export * from "./logger";
2
- export * from "./wrappers/oai";
1
+ import { GitMetadataSettings, LogFeedbackFullArgs, BackgroundLogEvent, ExperimentEvent, ExperimentLogFullArgs, ExperimentLogPartialArgs, IdField, SpanType, RepoInfo, DEFAULT_IS_LEGACY_DATASET, TRANSACTION_ID_FIELD, TransactionId, SpanObjectTypeV2, DatasetRecord } from '@braintrust/core';
2
+ import { PromptData, OpenAIMessage, Tools, AnyModelParam, Prompt as Prompt$1, PromptSessionEvent, FunctionId } from '@braintrust/core/typespecs';
3
+ import { z } from 'zod';
4
+
5
+ interface IsoAsyncLocalStorage<T> {
6
+ enterWith(store: T): void;
7
+ run<R>(store: T | undefined, callback: () => R): R;
8
+ getStore(): T | undefined;
9
+ }
10
+
11
+ declare class LazyValue<T> {
12
+ private callable;
13
+ private value;
14
+ constructor(callable: () => Promise<T>);
15
+ get(): Promise<T>;
16
+ get hasComputed(): boolean;
17
+ }
18
+
19
+ /// <reference lib="dom" />
20
+
21
+ type SetCurrentArg = {
22
+ setCurrent?: boolean;
23
+ };
24
+ type StartSpanEventArgs = ExperimentLogPartialArgs & Partial<IdField>;
25
+ type StartSpanArgs = {
26
+ name?: string;
27
+ type?: SpanType;
28
+ spanAttributes?: Record<any, any>;
29
+ startTime?: number;
30
+ parent?: string;
31
+ event?: StartSpanEventArgs;
32
+ };
33
+ type EndSpanArgs = {
34
+ endTime?: number;
35
+ };
36
+ interface Exportable {
37
+ /**
38
+ * Return a serialized representation of the object that can be used to start subspans in other places. See `Span.traced` for more details.
39
+ */
40
+ export(): Promise<string>;
41
+ }
42
+ /**
43
+ * A Span encapsulates logged data and metrics for a unit of work. This interface is shared by all span implementations.
44
+ *
45
+ * We suggest using one of the various `traced` methods, instead of creating Spans directly.
46
+ *
47
+ * See `Span.traced` for full details.
48
+ */
49
+ interface Span extends Exportable {
50
+ /**
51
+ * Row ID of the span.
52
+ */
53
+ id: string;
54
+ /**
55
+ * Incrementally update the current span with new data. The event will be batched and uploaded behind the scenes.
56
+ *
57
+ * @param event: Data to be logged. See `Experiment.log` for full details.
58
+ */
59
+ log(event: ExperimentLogPartialArgs): void;
60
+ /**
61
+ * Add feedback to the current span. Unlike `Experiment.logFeedback` and `Logger.logFeedback`, this method does not accept an id parameter, because it logs feedback to the current span.
62
+ *
63
+ * @param event: Data to be logged. See `Experiment.logFeedback` for full details.
64
+ */
65
+ logFeedback(event: Omit<LogFeedbackFullArgs, "id">): void;
66
+ /**
67
+ * Create a new span and run the provided callback. This is useful if you want to log more detailed trace information beyond the scope of a single log event. Data logged over several calls to `Span.log` will be merged into one logical row.
68
+ *
69
+ * Spans created within `traced` are ended automatically. By default, the span is marked as current, so they can be accessed using `braintrust.currentSpan`.
70
+ *
71
+ * @param callback The function to be run under the span context.
72
+ * @param args.name Optional name of the span. If not provided, a name will be inferred from the call stack.
73
+ * @param args.type Optional type of the span. If not provided, the type will be unset.
74
+ * @param args.span_attributes Optional additional attributes to attach to the span, such as a type name.
75
+ * @param args.start_time Optional start time of the span, as a timestamp in seconds.
76
+ * @param args.setCurrent If true (the default), the span will be marked as the currently-active span for the duration of the callback.
77
+ * @param args.parent Optional parent info string for the span. The string can be generated from `[Span,Experiment,Logger].export`. If not provided, the current span will be used (depending on context). This is useful for adding spans to an existing trace.
78
+ * @param args.event Data to be logged. See `Experiment.log` for full details.
79
+ * @Returns The result of running `callback`.
80
+ */
81
+ traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
82
+ /**
83
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
84
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
85
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
86
+ *
87
+ * See `traced` for full details.
88
+ *
89
+ * @returns The newly-created `Span`
90
+ */
91
+ startSpan(args?: StartSpanArgs): Span;
92
+ /**
93
+ * Log an end time to the span (defaults to the current time). Returns the logged time.
94
+ *
95
+ * Will be invoked automatically if the span is constructed with `traced`.
96
+ *
97
+ * @param args.endTime Optional end time of the span, as a timestamp in seconds.
98
+ * @returns The end time logged to the span metrics.
99
+ */
100
+ end(args?: EndSpanArgs): number;
101
+ /**
102
+ * Flush any pending rows to the server.
103
+ */
104
+ flush(): Promise<void>;
105
+ /**
106
+ * Alias for `end`.
107
+ */
108
+ close(args?: EndSpanArgs): number;
109
+ /**
110
+ * Set the span's name, type, or other attributes after it's created.
111
+ */
112
+ setAttributes(args: Omit<StartSpanArgs, "event">): void;
113
+ kind: "span";
114
+ }
115
+ /**
116
+ * A fake implementation of the Span API which does nothing. This can be used as the default span.
117
+ */
118
+ declare class NoopSpan implements Span {
119
+ id: string;
120
+ kind: "span";
121
+ constructor();
122
+ log(_: ExperimentLogPartialArgs): void;
123
+ logFeedback(_event: Omit<LogFeedbackFullArgs, "id">): void;
124
+ traced<R>(callback: (span: Span) => R, _1?: StartSpanArgs & SetCurrentArg): R;
125
+ startSpan(_1?: StartSpanArgs): this;
126
+ end(args?: EndSpanArgs): number;
127
+ export(): Promise<string>;
128
+ flush(): Promise<void>;
129
+ close(args?: EndSpanArgs): number;
130
+ setAttributes(_args: Omit<StartSpanArgs, "event">): void;
131
+ }
132
+ declare const NOOP_SPAN: NoopSpan;
133
+ declare global {
134
+ var __inherited_braintrust_state: BraintrustState;
135
+ }
136
+ declare const loginSchema: z.ZodObject<{
137
+ appUrl: z.ZodString;
138
+ appPublicUrl: z.ZodString;
139
+ orgName: z.ZodString;
140
+ logUrl: z.ZodString;
141
+ proxyUrl: z.ZodString;
142
+ loginToken: z.ZodString;
143
+ orgId: z.ZodOptional<z.ZodNullable<z.ZodString>>;
144
+ gitMetadataSettings: z.ZodOptional<z.ZodNullable<z.ZodObject<{
145
+ collect: z.ZodEnum<["all", "none", "some"]>;
146
+ fields: z.ZodOptional<z.ZodArray<z.ZodEnum<["dirty", "tag", "commit", "branch", "author_name", "author_email", "commit_message", "commit_time", "git_diff"]>, "many">>;
147
+ }, "strict", z.ZodTypeAny, {
148
+ collect: "some" | "none" | "all";
149
+ fields?: ("dirty" | "tag" | "commit" | "branch" | "author_name" | "author_email" | "commit_message" | "commit_time" | "git_diff")[] | undefined;
150
+ }, {
151
+ collect: "some" | "none" | "all";
152
+ fields?: ("dirty" | "tag" | "commit" | "branch" | "author_name" | "author_email" | "commit_message" | "commit_time" | "git_diff")[] | undefined;
153
+ }>>>;
154
+ }, "strict", z.ZodTypeAny, {
155
+ appUrl: string;
156
+ appPublicUrl: string;
157
+ orgName: string;
158
+ logUrl: string;
159
+ proxyUrl: string;
160
+ loginToken: string;
161
+ orgId?: string | null | undefined;
162
+ gitMetadataSettings?: {
163
+ collect: "some" | "none" | "all";
164
+ fields?: ("dirty" | "tag" | "commit" | "branch" | "author_name" | "author_email" | "commit_message" | "commit_time" | "git_diff")[] | undefined;
165
+ } | null | undefined;
166
+ }, {
167
+ appUrl: string;
168
+ appPublicUrl: string;
169
+ orgName: string;
170
+ logUrl: string;
171
+ proxyUrl: string;
172
+ loginToken: string;
173
+ orgId?: string | null | undefined;
174
+ gitMetadataSettings?: {
175
+ collect: "some" | "none" | "all";
176
+ fields?: ("dirty" | "tag" | "commit" | "branch" | "author_name" | "author_email" | "commit_message" | "commit_time" | "git_diff")[] | undefined;
177
+ } | null | undefined;
178
+ }>;
179
+ type SerializedBraintrustState = z.infer<typeof loginSchema>;
180
+ declare class BraintrustState {
181
+ private loginParams;
182
+ id: string;
183
+ currentExperiment: Experiment | undefined;
184
+ currentLogger: Logger<false> | undefined;
185
+ currentSpan: IsoAsyncLocalStorage<Span>;
186
+ private _bgLogger;
187
+ appUrl: string | null;
188
+ appPublicUrl: string | null;
189
+ loginToken: string | null;
190
+ orgId: string | null;
191
+ orgName: string | null;
192
+ logUrl: string | null;
193
+ proxyUrl: string | null;
194
+ loggedIn: boolean;
195
+ gitMetadataSettings?: GitMetadataSettings;
196
+ fetch: typeof globalThis.fetch;
197
+ private _apiConn;
198
+ private _logConn;
199
+ private _proxyConn;
200
+ constructor(loginParams: LoginOptions);
201
+ resetLoginInfo(): void;
202
+ copyLoginInfo(other: BraintrustState): void;
203
+ serialize(): SerializedBraintrustState;
204
+ static deserialize(serialized: unknown): BraintrustState;
205
+ setFetch(fetch: typeof globalThis.fetch): void;
206
+ login(loginParams: LoginOptions & {
207
+ forceLogin?: boolean;
208
+ }): Promise<void>;
209
+ apiConn(): HTTPConnection;
210
+ logConn(): HTTPConnection;
211
+ proxyConn(): HTTPConnection;
212
+ bgLogger(): BackgroundLogger;
213
+ loginReplaceLogConn(logConn: HTTPConnection): void;
214
+ }
215
+ declare function _internalSetInitialState(): void;
216
+ declare const _internalGetGlobalState: () => BraintrustState;
217
+ declare class HTTPConnection {
218
+ base_url: string;
219
+ token: string | null;
220
+ headers: Record<string, string>;
221
+ fetch: typeof globalThis.fetch;
222
+ constructor(base_url: string, fetch: typeof globalThis.fetch);
223
+ setFetch(fetch: typeof globalThis.fetch): void;
224
+ ping(): Promise<boolean>;
225
+ make_long_lived(): void;
226
+ static sanitize_token(token: string): string;
227
+ set_token(token: string): void;
228
+ _reset(): void;
229
+ get(path: string, params?: Record<string, string | undefined> | undefined, config?: RequestInit): Promise<Response>;
230
+ post(path: string, params?: Record<string, unknown> | string, config?: RequestInit): Promise<Response>;
231
+ get_json(object_type: string, args?: Record<string, string | undefined> | undefined, retries?: number): Promise<any>;
232
+ post_json(object_type: string, args?: Record<string, unknown> | string | undefined): Promise<any>;
233
+ }
234
+ interface ObjectMetadata {
235
+ id: string;
236
+ name: string;
237
+ fullInfo: Record<string, unknown>;
238
+ }
239
+ interface ProjectExperimentMetadata {
240
+ project: ObjectMetadata;
241
+ experiment: ObjectMetadata;
242
+ }
243
+ interface ProjectDatasetMetadata {
244
+ project: ObjectMetadata;
245
+ dataset: ObjectMetadata;
246
+ }
247
+ interface OrgProjectMetadata {
248
+ org_id: string;
249
+ project: ObjectMetadata;
250
+ }
251
+ interface LogOptions<IsAsyncFlush> {
252
+ asyncFlush?: IsAsyncFlush;
253
+ computeMetadataArgs?: Record<string, any>;
254
+ }
255
+ type PromiseUnless<B, R> = B extends true ? R : Promise<Awaited<R>>;
256
+ interface ParentSpanIds {
257
+ spanId: string;
258
+ rootSpanId: string;
259
+ }
260
+ declare class Logger<IsAsyncFlush extends boolean> implements Exportable {
261
+ private state;
262
+ private lazyMetadata;
263
+ private _asyncFlush;
264
+ private computeMetadataArgs;
265
+ private lastStartTime;
266
+ private lazyId;
267
+ private calledStartSpan;
268
+ kind: "logger";
269
+ constructor(state: BraintrustState, lazyMetadata: LazyValue<OrgProjectMetadata>, logOptions?: LogOptions<IsAsyncFlush>);
270
+ get org_id(): Promise<string>;
271
+ get project(): Promise<ObjectMetadata>;
272
+ get id(): Promise<string>;
273
+ private parentObjectType;
274
+ private triggerWaitUntilFlush;
275
+ /**
276
+ * Log a single event. The event will be batched and uploaded behind the scenes if `logOptions.asyncFlush` is true.
277
+ *
278
+ * @param event The event to log.
279
+ * @param event.input: (Optional) the arguments that uniquely define a user input (an arbitrary, JSON serializable object).
280
+ * @param event.output: (Optional) the output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
281
+ * @param event.expected: (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
282
+ * @param event.scores: (Optional) a dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
283
+ * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
284
+ * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
285
+ * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
286
+ * @param options Additional logging options
287
+ * @param options.allowConcurrentWithSpans in rare cases where you need to log at the top level separately from spans on the logger elsewhere, set this to true.
288
+ * :returns: The `id` of the logged event.
289
+ */
290
+ log(event: Readonly<StartSpanEventArgs>, options?: {
291
+ allowConcurrentWithSpans?: boolean;
292
+ }): PromiseUnless<IsAsyncFlush, string>;
293
+ /**
294
+ * Create a new toplevel span underneath the logger. The name defaults to "root".
295
+ *
296
+ * See `Span.traced` for full details.
297
+ */
298
+ traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): PromiseUnless<IsAsyncFlush, R>;
299
+ /**
300
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
301
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
302
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
303
+ *
304
+ * See `traced` for full details.
305
+ */
306
+ startSpan(args?: StartSpanArgs): Span;
307
+ private startSpanImpl;
308
+ /**
309
+ * Log feedback to an event. Feedback is used to save feedback scores, set an expected value, or add a comment.
310
+ *
311
+ * @param event
312
+ * @param event.id The id of the event to log feedback for. This is the `id` returned by `log` or accessible as the `id` field of a span.
313
+ * @param event.scores (Optional) a dictionary of numeric values (between 0 and 1) to log. These scores will be merged into the existing scores for the event.
314
+ * @param event.expected (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not.
315
+ * @param event.comment (Optional) an optional comment string to log about the event.
316
+ * @param event.metadata (Optional) a dictionary with additional data about the feedback. If you have a `user_id`, you can log it here and access it in the Braintrust UI.
317
+ * @param event.source (Optional) the source of the feedback. Must be one of "external" (default), "app", or "api".
318
+ */
319
+ logFeedback(event: LogFeedbackFullArgs): void;
320
+ /**
321
+ * Return a serialized representation of the logger that can be used to start subspans in other places. See `Span.start_span` for more details.
322
+ */
323
+ export(): Promise<string>;
324
+ flush(): Promise<void>;
325
+ get asyncFlush(): IsAsyncFlush | undefined;
326
+ }
327
+ declare class BackgroundLogger {
328
+ private logConn;
329
+ private items;
330
+ private activeFlush;
331
+ private activeFlushResolved;
332
+ private activeFlushError;
333
+ syncFlush: boolean;
334
+ maxRequestSize: number;
335
+ defaultBatchSize: number;
336
+ numTries: number;
337
+ queueDropExceedingMaxsize: number | undefined;
338
+ queueDropLoggingPeriod: number;
339
+ failedPublishPayloadsDir: string | undefined;
340
+ allPublishPayloadsDir: string | undefined;
341
+ private queueDropLoggingState;
342
+ constructor(logConn: LazyValue<HTTPConnection>);
343
+ log(items: LazyValue<BackgroundLogEvent>[]): void;
344
+ flush(): Promise<void>;
345
+ private flushOnce;
346
+ private unwrapLazyValues;
347
+ private submitLogsRequest;
348
+ private registerDroppedItemCount;
349
+ private dumpDroppedEvents;
350
+ private static writePayloadToDir;
351
+ private triggerActiveFlush;
352
+ private logFailedPayloadsDir;
353
+ internalReplaceLogConn(logConn: HTTPConnection): void;
354
+ }
355
+ type InitOpenOption<IsOpen extends boolean> = {
356
+ open?: IsOpen;
357
+ };
358
+ type InitOptions<IsOpen extends boolean> = FullLoginOptions & {
359
+ experiment?: string;
360
+ description?: string;
361
+ dataset?: AnyDataset;
362
+ update?: boolean;
363
+ baseExperiment?: string;
364
+ isPublic?: boolean;
365
+ metadata?: Record<string, unknown>;
366
+ gitMetadataSettings?: GitMetadataSettings;
367
+ projectId?: string;
368
+ baseExperimentId?: string;
369
+ repoInfo?: RepoInfo;
370
+ setCurrent?: boolean;
371
+ state?: BraintrustState;
372
+ } & InitOpenOption<IsOpen>;
373
+ type FullInitOptions<IsOpen extends boolean> = {
374
+ project?: string;
375
+ } & InitOptions<IsOpen>;
376
+ type InitializedExperiment<IsOpen extends boolean | undefined> = IsOpen extends true ? ReadonlyExperiment : Experiment;
377
+ /**
378
+ * Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
379
+ *
380
+ * @param options Options for configuring init().
381
+ * @param options.project The name of the project to create the experiment in. Must specify at least one of `project` or `projectId`.
382
+ * @param options.experiment The name of the experiment to create. If not specified, a name will be generated automatically.
383
+ * @param options.description An optional description of the experiment.
384
+ * @param options.dataset (Optional) A dataset to associate with the experiment. You can pass in the name of the dataset (in the same project) or a dataset object (from any project).
385
+ * @param options.update If the experiment already exists, continue logging to it. If it does not exist, creates the experiment with the specified arguments.
386
+ * @param options.baseExperiment An optional experiment name to use as a base. If specified, the new experiment will be summarized and compared to this experiment. Otherwise, it will pick an experiment by finding the closest ancestor on the default (e.g. main) branch.
387
+ * @param options.isPublic An optional parameter to control whether the experiment is publicly visible to anybody with the link or privately visible to only members of the organization. Defaults to private.
388
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev.
389
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API key is specified, will prompt the user to login.
390
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
391
+ * @param options.metadata (Optional) A dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
392
+ * @param options.gitMetadataSettings (Optional) Settings for collecting git metadata. By default, will collect all git metadata fields allowed in org-level settings.
393
+ * @param setCurrent If true (the default), set the global current-experiment to the newly-created one.
394
+ * @param options.open If the experiment already exists, open it in read-only mode. Throws an error if the experiment does not already exist.
395
+ * @param options.projectId The id of the project to create the experiment in. This takes precedence over `project` if specified.
396
+ * @param options.baseExperimentId An optional experiment id to use as a base. If specified, the new experiment will be summarized and compared to this. This takes precedence over `baseExperiment` if specified.
397
+ * @param options.repoInfo (Optional) Explicitly specify the git metadata for this experiment. This takes precedence over `gitMetadataSettings` if specified.
398
+ * @returns The newly created Experiment.
399
+ */
400
+ declare function init<IsOpen extends boolean = false>(options: Readonly<FullInitOptions<IsOpen>>): InitializedExperiment<IsOpen>;
401
+ /**
402
+ * Legacy form of `init` which accepts the project name as the first parameter,
403
+ * separately from the remaining options. See `init(options)` for full details.
404
+ */
405
+ declare function init<IsOpen extends boolean = false>(project: string, options?: Readonly<InitOptions<IsOpen>>): InitializedExperiment<IsOpen>;
406
+ /**
407
+ * Alias for init(options).
408
+ */
409
+ declare function initExperiment<IsOpen extends boolean = false>(options: Readonly<InitOptions<IsOpen>>): InitializedExperiment<IsOpen>;
410
+ /**
411
+ * Alias for init(project, options).
412
+ */
413
+ declare function initExperiment<IsOpen extends boolean = false>(project: string, options?: Readonly<InitOptions<IsOpen>>): InitializedExperiment<IsOpen>;
414
+ /**
415
+ * This function is deprecated. Use `init` instead.
416
+ */
417
+ declare function withExperiment<R>(project: string, callback: (experiment: Experiment) => R, options?: Readonly<InitOptions<false> & SetCurrentArg>): R;
418
+ /**
419
+ * This function is deprecated. Use `initLogger` instead.
420
+ */
421
+ declare function withLogger<IsAsyncFlush extends boolean = false, R = void>(callback: (logger: Logger<IsAsyncFlush>) => R, options?: Readonly<InitLoggerOptions<IsAsyncFlush> & SetCurrentArg>): R;
422
+ type UseOutputOption<IsLegacyDataset extends boolean> = {
423
+ useOutput?: IsLegacyDataset;
424
+ };
425
+ type InitDatasetOptions<IsLegacyDataset extends boolean> = FullLoginOptions & {
426
+ dataset?: string;
427
+ description?: string;
428
+ version?: string;
429
+ projectId?: string;
430
+ state?: BraintrustState;
431
+ } & UseOutputOption<IsLegacyDataset>;
432
+ type FullInitDatasetOptions<IsLegacyDataset extends boolean> = {
433
+ project?: string;
434
+ } & InitDatasetOptions<IsLegacyDataset>;
435
+ /**
436
+ * Create a new dataset in a specified project. If the project does not exist, it will be created.
437
+ *
438
+ * @param options Options for configuring initDataset().
439
+ * @param options.project The name of the project to create the dataset in. Must specify at least one of `project` or `projectId`.
440
+ * @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically.
441
+ * @param options.description An optional description of the dataset.
442
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev.
443
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API key is specified, will prompt the user to login.
444
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
445
+ * @param options.projectId The id of the project to create the dataset in. This takes precedence over `project` if specified.
446
+ * @param options.useOutput (Deprecated) If true, records will be fetched from this dataset in the legacy format, with the "expected" field renamed to "output". This option will be removed in a future version of Braintrust.
447
+ * @returns The newly created Dataset.
448
+ */
449
+ declare function initDataset<IsLegacyDataset extends boolean = typeof DEFAULT_IS_LEGACY_DATASET>(options: Readonly<FullInitDatasetOptions<IsLegacyDataset>>): Dataset<IsLegacyDataset>;
450
+ /**
451
+ * Legacy form of `initDataset` which accepts the project name as the first
452
+ * parameter, separately from the remaining options. See
453
+ * `initDataset(options)` for full details.
454
+ */
455
+ declare function initDataset<IsLegacyDataset extends boolean = typeof DEFAULT_IS_LEGACY_DATASET>(project: string, options?: Readonly<InitDatasetOptions<IsLegacyDataset>>): Dataset<IsLegacyDataset>;
456
+ /**
457
+ * This function is deprecated. Use `initDataset` instead.
458
+ */
459
+ declare function withDataset<R, IsLegacyDataset extends boolean = typeof DEFAULT_IS_LEGACY_DATASET>(project: string, callback: (dataset: Dataset<IsLegacyDataset>) => R, options?: Readonly<InitDatasetOptions<IsLegacyDataset>>): R;
460
+ type AsyncFlushArg<IsAsyncFlush> = {
461
+ asyncFlush?: IsAsyncFlush;
462
+ };
463
+ type InitLoggerOptions<IsAsyncFlush> = FullLoginOptions & {
464
+ projectName?: string;
465
+ projectId?: string;
466
+ setCurrent?: boolean;
467
+ state?: BraintrustState;
468
+ } & AsyncFlushArg<IsAsyncFlush>;
469
+ /**
470
+ * Create a new logger in a specified project. If the project does not exist, it will be created.
471
+ *
472
+ * @param options Additional options for configuring init().
473
+ * @param options.projectName The name of the project to log into. If unspecified, will default to the Global project.
474
+ * @param options.projectId The id of the project to log into. This takes precedence over projectName if specified.
475
+ * @param options.asyncFlush If true, will log asynchronously in the background. Otherwise, will log synchronously. (false by default, to support serverless environments)
476
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev.
477
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
478
+ * key is specified, will prompt the user to login.
479
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
480
+ * @param options.forceLogin Login again, even if you have already logged in (by default, the logger will not login if you are already logged in)
481
+ * @param setCurrent If true (the default), set the global current-experiment to the newly-created one.
482
+ * @returns The newly created Logger.
483
+ */
484
+ declare function initLogger<IsAsyncFlush extends boolean = false>(options?: Readonly<InitLoggerOptions<IsAsyncFlush>>): Logger<IsAsyncFlush>;
485
+ type LoadPromptOptions = FullLoginOptions & {
486
+ projectName?: string;
487
+ projectId?: string;
488
+ slug?: string;
489
+ version?: string;
490
+ defaults?: DefaultPromptArgs;
491
+ noTrace?: boolean;
492
+ state?: BraintrustState;
493
+ };
494
+ /**
495
+ * Load a prompt from the specified project.
496
+ *
497
+ * @param options Options for configuring loadPrompt().
498
+ * @param options.projectName The name of the project to load the prompt from. Must specify at least one of `projectName` or `projectId`.
499
+ * @param options.projectId The id of the project to load the prompt from. This takes precedence over `projectName` if specified.
500
+ * @param options.slug The slug of the prompt to load.
501
+ * @param options.version An optional version of the prompt (to read). If not specified, the latest version will be used.
502
+ * @param options.defaults (Optional) A dictionary of default values to use when rendering the prompt. Prompt values will override these defaults.
503
+ * @param options.noTrace If true, do not include logging metadata for this prompt when build() is called.
504
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev.
505
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
506
+ * key is specified, will prompt the user to login.
507
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
508
+ * @returns The prompt object.
509
+ * @throws If the prompt is not found.
510
+ * @throws If multiple prompts are found with the same slug in the same project (this should never happen).
511
+ *
512
+ * @example
513
+ * ```javascript
514
+ * const prompt = await loadPrompt({
515
+ * projectName: "My Project",
516
+ * slug: "my-prompt",
517
+ * });
518
+ * ```
519
+ */
520
+ declare function loadPrompt({ projectName, projectId, slug, version, defaults, noTrace, appUrl, apiKey, orgName, fetch, forceLogin, state: stateArg, }: LoadPromptOptions): Promise<Prompt>;
521
+ /**
522
+ * Options for logging in to Braintrust.
523
+ */
524
+ interface LoginOptions {
525
+ /**
526
+ * The URL of the Braintrust App. Defaults to https://www.braintrust.dev. You should not need
527
+ * to change this unless you are doing the "Full" deployment.
528
+ */
529
+ appUrl?: string;
530
+ /**
531
+ * The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable.
532
+ */
533
+ apiKey?: string;
534
+ /**
535
+ * The name of a specific organization to connect to. Since API keys are scoped to organizations, this parameter is usually
536
+ * unnecessary unless you are logging in with a JWT.
537
+ */
538
+ orgName?: string;
539
+ /**
540
+ * A custom fetch implementation to use.
541
+ */
542
+ fetch?: typeof globalThis.fetch;
543
+ }
544
+ type FullLoginOptions = LoginOptions & {
545
+ forceLogin?: boolean;
546
+ };
547
+ /**
548
+ * Log into Braintrust. This will prompt you for your API token, which you can find at
549
+ * https://www.braintrust.dev/app/token. This method is called automatically by `init()`.
550
+ *
551
+ * @param options Options for configuring login().
552
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev.
553
+ * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
554
+ * key is specified, will prompt the user to login.
555
+ * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
556
+ * @param options.forceLogin Login again, even if you have already logged in (by default, this function will exit quickly if you have already logged in)
557
+ */
558
+ declare function login(options?: LoginOptions & {
559
+ forceLogin?: boolean;
560
+ }): Promise<BraintrustState>;
561
+ declare function loginToState(options?: LoginOptions): Promise<BraintrustState>;
562
+ /**
563
+ * Log a single event to the current experiment. The event will be batched and uploaded behind the scenes.
564
+ *
565
+ * @param event The event to log. See `Experiment.log` for full details.
566
+ * @returns The `id` of the logged event.
567
+ */
568
+ declare function log(event: ExperimentLogFullArgs): string;
569
+ /**
570
+ * Summarize the current experiment, including the scores (compared to the closest reference experiment) and metadata.
571
+ *
572
+ * @param options Options for summarizing the experiment.
573
+ * @param options.summarizeScores Whether to summarize the scores. If False, only the metadata will be returned.
574
+ * @param options.comparisonExperimentId The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
575
+ * @returns A summary of the experiment, including the scores (compared to the closest reference experiment) and metadata.
576
+ */
577
+ declare function summarize(options?: {
578
+ readonly summarizeScores?: boolean;
579
+ readonly comparisonExperimentId?: string;
580
+ }): Promise<ExperimentSummary>;
581
+ type OptionalStateArg = {
582
+ state?: BraintrustState;
583
+ };
584
+ /**
585
+ * Returns the currently-active experiment (set by `braintrust.init`). Returns undefined if no current experiment has been set.
586
+ */
587
+ declare function currentExperiment(options?: OptionalStateArg): Experiment | undefined;
588
+ /**
589
+ * Returns the currently-active logger (set by `braintrust.initLogger`). Returns undefined if no current logger has been set.
590
+ */
591
+ declare function currentLogger<IsAsyncFlush extends boolean>(options?: AsyncFlushArg<IsAsyncFlush> & OptionalStateArg): Logger<IsAsyncFlush> | undefined;
592
+ /**
593
+ * Return the currently-active span for logging (set by one of the `traced` methods). If there is no active span, returns a no-op span object, which supports the same interface as spans but does no logging.
594
+ *
595
+ * See `Span` for full details.
596
+ */
597
+ declare function currentSpan(options?: OptionalStateArg): Span;
598
+ /**
599
+ * Mainly for internal use. Return the parent object for starting a span in a global context.
600
+ */
601
+ declare function getSpanParentObject<IsAsyncFlush extends boolean>(options?: AsyncFlushArg<IsAsyncFlush> & OptionalStateArg): Span | Experiment | Logger<IsAsyncFlush>;
602
+ /**
603
+ * Toplevel function for starting a span. It checks the following (in precedence order):
604
+ * * Currently-active span
605
+ * * Currently-active experiment
606
+ * * Currently-active logger
607
+ *
608
+ * and creates a span under the first one that is active. Alternatively, if `parent` is specified, it creates a span under the specified parent row. If none of these are active, it returns a no-op span object.
609
+ *
610
+ * See `Span.traced` for full details.
611
+ */
612
+ declare function traced<IsAsyncFlush extends boolean = false, R = void>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg & AsyncFlushArg<IsAsyncFlush>): PromiseUnless<IsAsyncFlush, R>;
613
+ /**
614
+ * Wrap a function with `traced`, using the arguments as `input` and return value as `output`.
615
+ * Any functions wrapped this way will automatically be traced, similar to the `@traced` decorator
616
+ * in Python. If you want to correctly propagate the function's name and define it in one go, then
617
+ * you can do so like this:
618
+ *
619
+ * ```ts
620
+ * const myFunc = wrapTraced(async function myFunc(input) {
621
+ * const result = await client.chat.completions.create({
622
+ * model: "gpt-3.5-turbo",
623
+ * messages: [{ role: "user", content: input }],
624
+ * });
625
+ * return result.choices[0].message.content ?? "unknown";
626
+ * });
627
+ * ```
628
+ * Now, any calls to `myFunc` will be traced, and the input and output will be logged automatically.
629
+ * If tracing is inactive, i.e. there is no active logger or experiment, it's just a no-op.
630
+ *
631
+ * @param fn The function to wrap.
632
+ * @param args Span-level arguments (e.g. a custom name or type) to pass to `traced`.
633
+ * @returns The wrapped function.
634
+ */
635
+ declare function wrapTraced<F extends (...args: any[]) => any, IsAsyncFlush extends boolean = false>(fn: F, args?: StartSpanArgs & SetCurrentArg & AsyncFlushArg<IsAsyncFlush>): IsAsyncFlush extends false ? (...args: Parameters<F>) => Promise<Awaited<ReturnType<F>>> : F;
636
+ /**
637
+ * A synonym for `wrapTraced`. If you're porting from systems that use `traceable`, you can use this to
638
+ * make your codebase more consistent.
639
+ */
640
+ declare const traceable: typeof wrapTraced;
641
+ /**
642
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
643
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
644
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
645
+ *
646
+ * See `traced` for full details.
647
+ */
648
+ declare function startSpan<IsAsyncFlush extends boolean = false>(args?: StartSpanArgs & AsyncFlushArg<IsAsyncFlush> & OptionalStateArg): Span;
649
+ /**
650
+ * Flush any pending rows to the server.
651
+ */
652
+ declare function flush(options?: OptionalStateArg): Promise<void>;
653
+ /**
654
+ * Set the fetch implementation to use for requests. You can specify it here,
655
+ * or when you call `login`.
656
+ *
657
+ * @param fetch The fetch implementation to use.
658
+ */
659
+ declare function setFetch(fetch: typeof globalThis.fetch): void;
660
+ type WithTransactionId<R> = R & {
661
+ [TRANSACTION_ID_FIELD]: TransactionId;
662
+ };
663
+ declare class ObjectFetcher<RecordType> implements AsyncIterable<WithTransactionId<RecordType>> {
664
+ private objectType;
665
+ private pinnedVersion;
666
+ private mutateRecord?;
667
+ private _fetchedData;
668
+ constructor(objectType: "dataset" | "experiment", pinnedVersion: string | undefined, mutateRecord?: ((r: any) => RecordType) | undefined);
669
+ get id(): Promise<string>;
670
+ protected getState(): Promise<BraintrustState>;
671
+ fetch(): AsyncGenerator<WithTransactionId<RecordType>>;
672
+ [Symbol.asyncIterator](): AsyncIterator<WithTransactionId<RecordType>>;
673
+ fetchedData(): Promise<WithTransactionId<RecordType>[]>;
674
+ clearCache(): void;
675
+ version(): Promise<string | undefined>;
676
+ }
677
+ type BaseMetadata = Record<string, unknown> | void;
678
+ type DefaultMetadataType = void;
679
+ type EvalCase<Input, Expected, Metadata> = {
680
+ input: Input;
681
+ tags?: string[];
682
+ } & (Expected extends void ? {} : {
683
+ expected: Expected;
684
+ }) & (Metadata extends void ? {} : {
685
+ metadata: Metadata;
686
+ });
687
+ /**
688
+ * An experiment is a collection of logged events, such as model inputs and outputs, which represent
689
+ * a snapshot of your application at a particular point in time. An experiment is meant to capture more
690
+ * than just the model you use, and includes the data you use to test, pre- and post- processing code,
691
+ * comparison metrics (scores), and any other metadata you want to include.
692
+ *
693
+ * Experiments are associated with a project, and two experiments are meant to be easily comparable via
694
+ * their `inputs`. You can change the attributes of the experiments in a project (e.g. scoring functions)
695
+ * over time, simply by changing what you log.
696
+ *
697
+ * You should not create `Experiment` objects directly. Instead, use the `braintrust.init()` method.
698
+ */
699
+ declare class Experiment extends ObjectFetcher<ExperimentEvent> implements Exportable {
700
+ private readonly lazyMetadata;
701
+ readonly dataset?: AnyDataset;
702
+ private lastStartTime;
703
+ private lazyId;
704
+ private calledStartSpan;
705
+ private state;
706
+ kind: "experiment";
707
+ constructor(state: BraintrustState, lazyMetadata: LazyValue<ProjectExperimentMetadata>, dataset?: AnyDataset);
708
+ get id(): Promise<string>;
709
+ get name(): Promise<string>;
710
+ get project(): Promise<ObjectMetadata>;
711
+ private parentObjectType;
712
+ protected getState(): Promise<BraintrustState>;
713
+ /**
714
+ * Log a single event to the experiment. The event will be batched and uploaded behind the scenes.
715
+ *
716
+ * @param event The event to log.
717
+ * @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
718
+ * @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
719
+ * @param event.expected: (Optional) The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
720
+ * @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
721
+ * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
722
+ * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
723
+ * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
724
+ * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
725
+ * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
726
+ * @param options Additional logging options
727
+ * @param options.allowConcurrentWithSpans in rare cases where you need to log at the top level separately from spans on the experiment elsewhere, set this to true.
728
+ * :returns: The `id` of the logged event.
729
+ */
730
+ log(event: Readonly<ExperimentLogFullArgs>, options?: {
731
+ allowConcurrentWithSpans?: boolean;
732
+ }): string;
733
+ /**
734
+ * Create a new toplevel span underneath the experiment. The name defaults to "root".
735
+ *
736
+ * See `Span.traced` for full details.
737
+ */
738
+ traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
739
+ /**
740
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
741
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
742
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
743
+ *
744
+ * See `traced` for full details.
745
+ */
746
+ startSpan(args?: StartSpanArgs): Span;
747
+ private startSpanImpl;
748
+ fetchBaseExperiment(): Promise<{
749
+ id: any;
750
+ name: any;
751
+ } | null>;
752
+ /**
753
+ * Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
754
+ *
755
+ * @param options Options for summarizing the experiment.
756
+ * @param options.summarizeScores Whether to summarize the scores. If False, only the metadata will be returned.
757
+ * @param options.comparisonExperimentId The experiment to compare against. If None, the most recent experiment on the origin's main branch will be used.
758
+ * @returns A summary of the experiment, including the scores (compared to the closest reference experiment) and metadata.
759
+ */
760
+ summarize(options?: {
761
+ readonly summarizeScores?: boolean;
762
+ readonly comparisonExperimentId?: string;
763
+ }): Promise<ExperimentSummary>;
764
+ /**
765
+ * Log feedback to an event in the experiment. Feedback is used to save feedback scores, set an expected value, or add a comment.
766
+ *
767
+ * @param event
768
+ * @param event.id The id of the event to log feedback for. This is the `id` returned by `log` or accessible as the `id` field of a span.
769
+ * @param event.scores (Optional) a dictionary of numeric values (between 0 and 1) to log. These scores will be merged into the existing scores for the event.
770
+ * @param event.expected (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not.
771
+ * @param event.comment (Optional) an optional comment string to log about the event.
772
+ * @param event.metadata (Optional) a dictionary with additional data about the feedback. If you have a `user_id`, you can log it here and access it in the Braintrust UI.
773
+ * @param event.source (Optional) the source of the feedback. Must be one of "external" (default), "app", or "api".
774
+ */
775
+ logFeedback(event: LogFeedbackFullArgs): void;
776
+ /**
777
+ * Return a serialized representation of the experiment that can be used to start subspans in other places. See `Span.start_span` for more details.
778
+ */
779
+ export(): Promise<string>;
780
+ /**
781
+ * Flush any pending rows to the server.
782
+ */
783
+ flush(): Promise<void>;
784
+ /**
785
+ * This function is deprecated. You can simply remove it from your code.
786
+ */
787
+ close(): Promise<string>;
788
+ }
789
+ /**
790
+ * A read-only view of an experiment, initialized by passing `open: true` to `init()`.
791
+ */
792
+ declare class ReadonlyExperiment extends ObjectFetcher<ExperimentEvent> {
793
+ private state;
794
+ private readonly lazyMetadata;
795
+ constructor(state: BraintrustState, lazyMetadata: LazyValue<ProjectExperimentMetadata>);
796
+ get id(): Promise<string>;
797
+ get name(): Promise<string>;
798
+ protected getState(): Promise<BraintrustState>;
799
+ asDataset<Input, Expected>(): AsyncGenerator<EvalCase<Input, Expected, void>>;
800
+ }
801
+ declare function newId(): string;
802
+ /**
803
+ * Primary implementation of the `Span` interface. See the `Span` interface for full details on each method.
804
+ *
805
+ * We suggest using one of the various `traced` methods, instead of creating Spans directly. See `Span.startSpan` for full details.
806
+ */
807
+ declare class SpanImpl implements Span {
808
+ private state;
809
+ private isMerge;
810
+ private loggedEndTime;
811
+ private parentObjectType;
812
+ private parentObjectId;
813
+ private parentComputeObjectMetadataArgs;
814
+ private _id;
815
+ private spanId;
816
+ private rootSpanId;
817
+ private spanParents;
818
+ kind: "span";
819
+ constructor(args: {
820
+ state: BraintrustState;
821
+ parentObjectType: SpanObjectTypeV2;
822
+ parentObjectId: LazyValue<string>;
823
+ parentComputeObjectMetadataArgs: Record<string, any> | undefined;
824
+ parentSpanIds: ParentSpanIds | undefined;
825
+ defaultRootType?: SpanType;
826
+ } & Omit<StartSpanArgs, "parent">);
827
+ get id(): string;
828
+ setAttributes(args: Omit<StartSpanArgs, "event">): void;
829
+ log(event: ExperimentLogPartialArgs): void;
830
+ private logInternal;
831
+ logFeedback(event: Omit<LogFeedbackFullArgs, "id">): void;
832
+ traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
833
+ startSpan(args?: StartSpanArgs): Span;
834
+ end(args?: EndSpanArgs): number;
835
+ export(): Promise<string>;
836
+ flush(): Promise<void>;
837
+ close(args?: EndSpanArgs): number;
838
+ }
839
+ /**
840
+ * A dataset is a collection of records, such as model inputs and expected outputs, which represent
841
+ * data you can use to evaluate and fine-tune models. You can log production data to datasets,
842
+ * curate them with interesting examples, edit/delete records, and run evaluations against them.
843
+ *
844
+ * You should not create `Dataset` objects directly. Instead, use the `braintrust.initDataset()` method.
845
+ */
846
+ declare class Dataset<IsLegacyDataset extends boolean = typeof DEFAULT_IS_LEGACY_DATASET> extends ObjectFetcher<DatasetRecord<IsLegacyDataset>> {
847
+ private state;
848
+ private readonly lazyMetadata;
849
+ constructor(state: BraintrustState, lazyMetadata: LazyValue<ProjectDatasetMetadata>, pinnedVersion?: string, legacy?: IsLegacyDataset);
850
+ get id(): Promise<string>;
851
+ get name(): Promise<string>;
852
+ get project(): Promise<ObjectMetadata>;
853
+ protected getState(): Promise<BraintrustState>;
854
+ /**
855
+ * Insert a single record to the dataset. The record will be batched and uploaded behind the scenes. If you pass in an `id`,
856
+ * and a record with that `id` already exists, it will be overwritten (upsert).
857
+ *
858
+ * @param event The event to log.
859
+ * @param event.input The argument that uniquely define an input case (an arbitrary, JSON serializable object).
860
+ * @param event.expected The output of your application, including post-processing (an arbitrary, JSON serializable object).
861
+ * @param event.tags (Optional) a list of strings that you can use to filter and group records later.
862
+ * @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
863
+ * about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
864
+ * `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
865
+ * JSON-serializable type, but its keys must be strings.
866
+ * @param event.id (Optional) a unique identifier for the event. If you don't provide one, Braintrust will generate one for you.
867
+ * @param event.output: (Deprecated) The output of your application. Use `expected` instead.
868
+ * @returns The `id` of the logged record.
869
+ */
870
+ insert({ input, expected, metadata, tags, id, output, }: {
871
+ readonly input?: unknown;
872
+ readonly expected?: unknown;
873
+ readonly tags?: string[];
874
+ readonly metadata?: Record<string, unknown>;
875
+ readonly id?: string;
876
+ readonly output?: unknown;
877
+ }): string;
878
+ delete(id: string): string;
879
+ /**
880
+ * Summarize the dataset, including high level metrics about its size and other metadata.
881
+ * @param summarizeData Whether to summarize the data. If false, only the metadata will be returned.
882
+ * @returns `DatasetSummary`
883
+ * @returns A summary of the dataset.
884
+ */
885
+ summarize(options?: {
886
+ readonly summarizeData?: boolean;
887
+ }): Promise<DatasetSummary>;
888
+ /**
889
+ * Flush any pending rows to the server.
890
+ */
891
+ flush(): Promise<void>;
892
+ /**
893
+ * This function is deprecated. You can simply remove it from your code.
894
+ */
895
+ close(): Promise<string>;
896
+ }
897
+ type CompiledPromptParams = Omit<NonNullable<PromptData["options"]>["params"], "use_cache"> & {
898
+ model: NonNullable<NonNullable<PromptData["options"]>["model"]>;
899
+ };
900
+ type ChatPrompt = {
901
+ messages: OpenAIMessage[];
902
+ tools?: Tools;
903
+ };
904
+ type CompletionPrompt = {
905
+ prompt: string;
906
+ };
907
+ type CompiledPrompt<Flavor extends "chat" | "completion"> = CompiledPromptParams & {
908
+ span_info?: {
909
+ name?: string;
910
+ spanAttributes?: Record<any, any>;
911
+ metadata: {
912
+ prompt: {
913
+ variables: Record<string, unknown>;
914
+ id: string;
915
+ project_id: string;
916
+ version: string;
917
+ };
918
+ };
919
+ };
920
+ } & (Flavor extends "chat" ? ChatPrompt : Flavor extends "completion" ? CompletionPrompt : {});
921
+ type DefaultPromptArgs = Partial<CompiledPromptParams & AnyModelParam & ChatPrompt & CompletionPrompt>;
922
+ declare class Prompt {
923
+ private metadata;
924
+ private defaults;
925
+ private noTrace;
926
+ constructor(metadata: Omit<Prompt$1, "log_id"> | PromptSessionEvent, defaults: DefaultPromptArgs, noTrace: boolean);
927
+ get id(): string;
928
+ get projectId(): string;
929
+ get name(): string;
930
+ get slug(): string;
931
+ get prompt(): PromptData["prompt"];
932
+ get version(): TransactionId;
933
+ get options(): NonNullable<PromptData["options"]>;
934
+ /**
935
+ * Build the prompt with the given formatting options. The args you pass in will
936
+ * be forwarded to the mustache template that defines the prompt and rendered with
937
+ * the `mustache-js` library.
938
+ *
939
+ * @param buildArgs Args to forward along to the prompt template.
940
+ */
941
+ build<Flavor extends "chat" | "completion" = "chat">(buildArgs: unknown, options?: {
942
+ flavor?: Flavor;
943
+ }): CompiledPrompt<Flavor>;
944
+ private runBuild;
945
+ }
946
+ type AnyDataset = Dataset<boolean>;
947
+ /**
948
+ * Summary of a score's performance.
949
+ * @property name Name of the score.
950
+ * @property score Average score across all examples.
951
+ * @property diff Difference in score between the current and reference experiment.
952
+ * @property improvements Number of improvements in the score.
953
+ * @property regressions Number of regressions in the score.
954
+ */
955
+ interface ScoreSummary {
956
+ name: string;
957
+ score: number;
958
+ diff?: number;
959
+ improvements: number;
960
+ regressions: number;
961
+ }
962
+ /**
963
+ * Summary of a metric's performance.
964
+ * @property name Name of the metric.
965
+ * @property metric Average metric across all examples.
966
+ * @property unit Unit label for the metric.
967
+ * @property diff Difference in metric between the current and reference experiment.
968
+ * @property improvements Number of improvements in the metric.
969
+ * @property regressions Number of regressions in the metric.
970
+ */
971
+ interface MetricSummary {
972
+ name: string;
973
+ metric: number;
974
+ unit: string;
975
+ diff?: number;
976
+ improvements: number;
977
+ regressions: number;
978
+ }
979
+ /**
980
+ * Summary of an experiment's scores and metadata.
981
+ * @property projectName Name of the project that the experiment belongs to.
982
+ * @property experimentName Name of the experiment.
983
+ * @property experimentId ID of the experiment. May be `undefined` if the eval was run locally.
984
+ * @property projectUrl URL to the project's page in the Braintrust app.
985
+ * @property experimentUrl URL to the experiment's page in the Braintrust app.
986
+ * @property comparisonExperimentName The experiment scores are baselined against.
987
+ * @property scores Summary of the experiment's scores.
988
+ */
989
+ interface ExperimentSummary {
990
+ projectName: string;
991
+ experimentName: string;
992
+ projectId?: string;
993
+ experimentId?: string;
994
+ projectUrl?: string;
995
+ experimentUrl?: string;
996
+ comparisonExperimentName?: string;
997
+ scores: Record<string, ScoreSummary>;
998
+ metrics?: Record<string, MetricSummary>;
999
+ }
1000
+ /**
1001
+ * Summary of a dataset's data.
1002
+ *
1003
+ * @property newRecords New or updated records added in this session.
1004
+ * @property totalRecords Total records in the dataset.
1005
+ */
1006
+ interface DataSummary {
1007
+ newRecords: number;
1008
+ totalRecords: number;
1009
+ }
1010
+ /**
1011
+ * Summary of a dataset's scores and metadata.
1012
+ *
1013
+ * @property projectName Name of the project that the dataset belongs to.
1014
+ * @property datasetName Name of the dataset.
1015
+ * @property projectUrl URL to the project's page in the Braintrust app.
1016
+ * @property datasetUrl URL to the experiment's page in the Braintrust app.
1017
+ * @property dataSummary Summary of the dataset's data.
1018
+ */
1019
+ interface DatasetSummary {
1020
+ projectName: string;
1021
+ datasetName: string;
1022
+ projectUrl: string;
1023
+ datasetUrl: string;
1024
+ dataSummary: DataSummary;
1025
+ }
1026
+
1027
+ type BraintrustStreamChunk = {
1028
+ type: "text_delta";
1029
+ data: string;
1030
+ } | {
1031
+ type: "json_delta";
1032
+ data: string;
1033
+ };
1034
+ declare class BraintrustStream {
1035
+ private stream;
1036
+ constructor(baseStream: ReadableStream<Uint8Array>);
1037
+ constructor(stream: ReadableStream<string>);
1038
+ constructor(stream: ReadableStream<BraintrustStreamChunk>);
1039
+ copy(): BraintrustStream;
1040
+ toReadableStream(): ReadableStream<BraintrustStreamChunk>;
1041
+ }
1042
+ declare function createFinalValuePassThroughStream<T extends BraintrustStreamChunk | string | Uint8Array>(onFinal: (result: unknown) => void): TransformStream<T, BraintrustStreamChunk>;
1043
+ declare function devNullWritableStream(): WritableStream;
1044
+
1045
+ type InvokeReturn<Stream extends boolean, Return> = Stream extends true ? BraintrustStream : Return;
1046
+ type InvokeFunctionArgs<Arg, Return, Stream extends boolean = false> = FunctionId & FullLoginOptions & {
1047
+ arg: Arg;
1048
+ parent?: Exportable | string;
1049
+ state?: BraintrustState;
1050
+ stream?: Stream;
1051
+ schema?: z.ZodSchema<Return>;
1052
+ };
1053
+ declare function invoke<Arg, Return, Stream extends boolean = false>(args: InvokeFunctionArgs<Arg, Return, Stream>): Promise<InvokeReturn<Stream, Return>>;
1054
+
1055
+ interface BetaLike {
1056
+ chat: {
1057
+ completions: {
1058
+ stream: any;
1059
+ };
1060
+ };
1061
+ embeddings: any;
1062
+ }
1063
+ interface ChatLike {
1064
+ completions: any;
1065
+ }
1066
+ interface OpenAILike {
1067
+ chat: ChatLike;
1068
+ embeddings: any;
1069
+ beta?: BetaLike;
1070
+ }
1071
+ declare global {
1072
+ var __inherited_braintrust_wrap_openai: ((openai: any) => any) | undefined;
1073
+ }
1074
+ /**
1075
+ * Wrap an `OpenAI` object (created with `new OpenAI(...)`) to add tracing. If Braintrust is
1076
+ * not configured, this is a no-op
1077
+ *
1078
+ * Currently, this only supports the `v4` API.
1079
+ *
1080
+ * @param openai
1081
+ * @returns The wrapped `OpenAI` object.
1082
+ */
1083
+ declare function wrapOpenAI<T extends object>(openai: T): T;
1084
+ declare function wrapOpenAIv4<T extends OpenAILike>(openai: T): T;
1085
+ declare const LEGACY_CACHED_HEADER = "x-cached";
1086
+ declare const X_CACHED_HEADER = "x-bt-cached";
1087
+ declare function parseCachedHeader(value: string | null | undefined): number | undefined;
1088
+
1089
+ export { type AnyDataset, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, type EvalCase, Experiment, type ExperimentSummary, type Exportable, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, type PromiseUnless, Prompt, ReadonlyExperiment, type ScoreSummary, type SerializedBraintrustState, type SetCurrentArg, type Span, SpanImpl, type StartSpanArgs, type WithTransactionId, X_CACHED_HEADER, _internalGetGlobalState, _internalSetInitialState, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, login, loginToState, newId, parseCachedHeader, setFetch, startSpan, summarize, traceable, traced, withDataset, withExperiment, withLogger, wrapOpenAI, wrapOpenAIv4, wrapTraced };