braintrust 0.0.91 → 0.0.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3693,7 +3693,7 @@ var DefaultAsyncLocalStorage = class {
3693
3693
  }
3694
3694
  };
3695
3695
  var iso = {
3696
- getRepoStatus: async () => void 0,
3696
+ getRepoStatus: async (_settings) => void 0,
3697
3697
  getPastNAncestors: async () => [],
3698
3698
  getEnv: (_name) => void 0,
3699
3699
  getCallerLocation: () => void 0,
@@ -7681,9 +7681,7 @@ async function getBaseBranchAncestor(remote = void 0) {
7681
7681
  if (git === null) {
7682
7682
  throw new Error("Not in a git repo");
7683
7683
  }
7684
- const { remote: remoteName, branch: baseBranch } = await getBaseBranch(
7685
- remote
7686
- );
7684
+ const { remote: remoteName, branch: baseBranch } = await getBaseBranch(remote);
7687
7685
  const isDirty = (await git.diffSummary()).files.length > 0;
7688
7686
  const head = isDirty ? "HEAD" : "HEAD^";
7689
7687
  try {
@@ -7732,7 +7730,21 @@ function truncateToByteLimit(s, byteLimit = 65536) {
7732
7730
  const truncated = encoded.subarray(0, byteLimit);
7733
7731
  return new TextDecoder().decode(truncated);
7734
7732
  }
7735
- async function getRepoStatus() {
7733
+ async function getRepoStatus(settings) {
7734
+ if (settings && settings.collect === "none") {
7735
+ return void 0;
7736
+ }
7737
+ const repo = await repoStatus();
7738
+ if (!repo || !settings || settings.collect === "all") {
7739
+ return repo;
7740
+ }
7741
+ let sanitized = {};
7742
+ settings.fields?.forEach((field) => {
7743
+ sanitized = { ...sanitized, [field]: repo[field] };
7744
+ });
7745
+ return sanitized;
7746
+ }
7747
+ async function repoStatus() {
7736
7748
  const git = await currentRepo();
7737
7749
  if (git === null) {
7738
7750
  return void 0;
@@ -7944,6 +7956,24 @@ var SpanTypeAttribute = /* @__PURE__ */ ((SpanTypeAttribute2) => {
7944
7956
  SpanTypeAttribute2["TOOL"] = "tool";
7945
7957
  return SpanTypeAttribute2;
7946
7958
  })(SpanTypeAttribute || {});
7959
+ function mergeGitMetadataSettings(s1, s2) {
7960
+ var _a2;
7961
+ if (s1.collect === "all") {
7962
+ return s2;
7963
+ } else if (s2.collect === "all") {
7964
+ return s1;
7965
+ } else if (s1.collect === "none") {
7966
+ return s1;
7967
+ } else if (s2.collect === "none") {
7968
+ return s2;
7969
+ }
7970
+ const fields = ((_a2 = s1.fields) != null ? _a2 : []).filter((f) => {
7971
+ var _a22;
7972
+ return ((_a22 = s2.fields) != null ? _a22 : []).includes(f);
7973
+ });
7974
+ const collect = fields.length > 0 ? "some" : "none";
7975
+ return { collect, fields };
7976
+ }
7947
7977
 
7948
7978
  // src/util.ts
7949
7979
  var GLOBAL_PROJECT = "Global";
@@ -7998,7 +8028,7 @@ var NoopSpan = class {
7998
8028
  var NOOP_SPAN = new NoopSpan();
7999
8029
  var BraintrustState = class {
8000
8030
  constructor() {
8001
- this.apiUrl = null;
8031
+ this.appUrl = null;
8002
8032
  this.loginToken = null;
8003
8033
  this.orgId = null;
8004
8034
  this.orgName = null;
@@ -8014,21 +8044,22 @@ var BraintrustState = class {
8014
8044
  globalThis.__inherited_braintrust_state = this;
8015
8045
  }
8016
8046
  resetLoginInfo() {
8017
- this.apiUrl = null;
8047
+ this.appUrl = null;
8018
8048
  this.loginToken = null;
8019
8049
  this.orgId = null;
8020
8050
  this.orgName = null;
8021
8051
  this.logUrl = null;
8022
8052
  this.loggedIn = false;
8053
+ this.gitMetadataSettings = void 0;
8023
8054
  this._apiConn = null;
8024
8055
  this._logConn = null;
8025
8056
  }
8026
8057
  apiConn() {
8027
8058
  if (!this._apiConn) {
8028
- if (!this.apiUrl) {
8029
- throw new Error("Must initialize apiUrl before requesting apiConn");
8059
+ if (!this.appUrl) {
8060
+ throw new Error("Must initialize appUrl before requesting apiConn");
8030
8061
  }
8031
- this._apiConn = new HTTPConnection(this.apiUrl);
8062
+ this._apiConn = new HTTPConnection(this.appUrl);
8032
8063
  }
8033
8064
  return this._apiConn;
8034
8065
  }
@@ -8248,12 +8279,12 @@ var Logger = class {
8248
8279
  * Log a single event. The event will be batched and uploaded behind the scenes if `logOptions.asyncFlush` is true.
8249
8280
  *
8250
8281
  * @param event The event to log.
8251
- * @param event.input: The arguments that uniquely define a user input (an arbitrary, JSON serializable object).
8252
- * @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
8253
- * @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
8254
- * @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
8282
+ * @param event.input: (Optional) the arguments that uniquely define a user input (an arbitrary, JSON serializable object).
8283
+ * @param event.output: (Optional) the output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
8284
+ * @param event.expected: (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
8285
+ * @param event.scores: (Optional) a dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
8255
8286
  * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
8256
- * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end", "caller_functionname", "caller_filename", "caller_lineno".
8287
+ * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
8257
8288
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
8258
8289
  * :returns: The `id` of the logged event.
8259
8290
  */
@@ -8307,7 +8338,9 @@ var Logger = class {
8307
8338
  };
8308
8339
  }
8309
8340
  /**
8310
- * Lower-level alternative to `traced`, which does not automatically end the span or mark it as current.
8341
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
8342
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
8343
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
8311
8344
  *
8312
8345
  * See `traced` for full details.
8313
8346
  */
@@ -8458,16 +8491,17 @@ function init(project, options = {}) {
8458
8491
  baseExperiment,
8459
8492
  isPublic,
8460
8493
  update,
8461
- apiUrl,
8494
+ appUrl,
8462
8495
  apiKey,
8463
8496
  orgName,
8464
- metadata
8497
+ metadata,
8498
+ gitMetadataSettings
8465
8499
  } = options || {};
8466
8500
  const lazyMetadata = (async () => {
8467
8501
  await login({
8468
8502
  orgName,
8469
8503
  apiKey,
8470
- apiUrl
8504
+ appUrl
8471
8505
  });
8472
8506
  const args = {
8473
8507
  project_name: project,
@@ -8482,9 +8516,20 @@ function init(project, options = {}) {
8482
8516
  if (update) {
8483
8517
  args["update"] = update;
8484
8518
  }
8485
- const repoStatus = await isomorph_default.getRepoStatus();
8486
- if (repoStatus) {
8487
- args["repo_info"] = repoStatus;
8519
+ let mergedGitMetadataSettings = {
8520
+ ..._state.gitMetadataSettings || {
8521
+ collect: "all"
8522
+ }
8523
+ };
8524
+ if (gitMetadataSettings) {
8525
+ mergedGitMetadataSettings = mergeGitMetadataSettings(
8526
+ mergedGitMetadataSettings,
8527
+ gitMetadataSettings
8528
+ );
8529
+ }
8530
+ const repoStatus2 = await isomorph_default.getRepoStatus(gitMetadataSettings);
8531
+ if (repoStatus2) {
8532
+ args["repo_info"] = repoStatus2;
8488
8533
  }
8489
8534
  if (baseExperiment) {
8490
8535
  args["base_experiment"] = baseExperiment;
@@ -8549,12 +8594,12 @@ function withLogger(callback, options = {}) {
8549
8594
  return callback(logger);
8550
8595
  }
8551
8596
  function initDataset(project, options = {}) {
8552
- const { dataset, description, version, apiUrl, apiKey, orgName } = options || {};
8597
+ const { dataset, description, version, appUrl, apiKey, orgName } = options || {};
8553
8598
  const lazyMetadata = (async () => {
8554
8599
  await login({
8555
8600
  orgName,
8556
8601
  apiKey,
8557
- apiUrl
8602
+ appUrl
8558
8603
  });
8559
8604
  const args = {
8560
8605
  org_id: _state.orgId,
@@ -8590,7 +8635,7 @@ function initLogger(options = {}) {
8590
8635
  projectName,
8591
8636
  projectId,
8592
8637
  asyncFlush,
8593
- apiUrl,
8638
+ appUrl,
8594
8639
  apiKey,
8595
8640
  orgName,
8596
8641
  forceLogin
@@ -8599,7 +8644,7 @@ function initLogger(options = {}) {
8599
8644
  await login({
8600
8645
  orgName,
8601
8646
  apiKey,
8602
- apiUrl,
8647
+ appUrl,
8603
8648
  forceLogin
8604
8649
  });
8605
8650
  const org_id = _state.orgId;
@@ -8645,7 +8690,7 @@ function initLogger(options = {}) {
8645
8690
  }
8646
8691
  async function login(options = {}) {
8647
8692
  const {
8648
- apiUrl = isomorph_default.getEnv("BRAINTRUST_API_URL") || "https://www.braintrustdata.com",
8693
+ appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
8649
8694
  apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
8650
8695
  orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
8651
8696
  } = options || {};
@@ -8654,11 +8699,11 @@ async function login(options = {}) {
8654
8699
  return;
8655
8700
  }
8656
8701
  _state.resetLoginInfo();
8657
- _state.apiUrl = apiUrl;
8702
+ _state.appUrl = appUrl;
8658
8703
  let conn = null;
8659
8704
  if (apiKey !== void 0) {
8660
8705
  const resp = await checkResponse(
8661
- await fetch(_urljoin(_state.apiUrl, `/api/apikey/login`), {
8706
+ await fetch(_urljoin(_state.appUrl, `/api/apikey/login`), {
8662
8707
  method: "POST",
8663
8708
  headers: {
8664
8709
  "Content-Type": "application/json"
@@ -8775,7 +8820,8 @@ function _check_org_info(org_info, org_name) {
8775
8820
  if (org_name === void 0 || org.name === org_name) {
8776
8821
  _state.orgId = org.id;
8777
8822
  _state.orgName = org.name;
8778
- _state.logUrl = isomorph_default.getEnv("BRAINTRUST_LOG_URL") ?? org.api_url;
8823
+ _state.logUrl = isomorph_default.getEnv("BRAINTRUST_API_URL") ?? org.api_url;
8824
+ _state.gitMetadataSettings = org.git_metadata || void 0;
8779
8825
  break;
8780
8826
  }
8781
8827
  }
@@ -8844,6 +8890,9 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
8844
8890
  "Exactly one of input or inputs (deprecated) must be specified. Prefer input."
8845
8891
  );
8846
8892
  }
8893
+ if (!event.output) {
8894
+ throw new Error("output must be specified");
8895
+ }
8847
8896
  if (!event.scores) {
8848
8897
  throw new Error("scores must be specified");
8849
8898
  }
@@ -8891,10 +8940,10 @@ var Experiment = class {
8891
8940
  * @param event The event to log.
8892
8941
  * @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
8893
8942
  * @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
8894
- * @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
8943
+ * @param event.expected: (Optional) The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
8895
8944
  * @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
8896
8945
  * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
8897
- * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end", "caller_functionname", "caller_filename", "caller_lineno".
8946
+ * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
8898
8947
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
8899
8948
  * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
8900
8949
  * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
@@ -8933,7 +8982,9 @@ var Experiment = class {
8933
8982
  };
8934
8983
  }
8935
8984
  /**
8936
- * Lower-level alternative to `traced`, which does not automatically end the span or mark it as current.
8985
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
8986
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
8987
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
8937
8988
  *
8938
8989
  * See `traced` for full details.
8939
8990
  */
@@ -8958,7 +9009,7 @@ var Experiment = class {
8958
9009
  let { summarizeScores = true, comparisonExperimentId = void 0 } = options || {};
8959
9010
  await this.bgLogger.flush();
8960
9011
  const state = await this.getState();
8961
- const projectUrl = `${state.apiUrl}/app/${encodeURIComponent(
9012
+ const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
8962
9013
  state.orgName
8963
9014
  )}/p/${encodeURIComponent((await this.project).name)}`;
8964
9015
  const experimentUrl = `${projectUrl}/${encodeURIComponent(
@@ -9054,9 +9105,9 @@ var SpanImpl = class _SpanImpl {
9054
9105
  })();
9055
9106
  this.internalData = {
9056
9107
  metrics: {
9057
- start: args.startTime ?? getCurrentUnixTimestamp(),
9058
- ...callerLocation
9108
+ start: args.startTime ?? getCurrentUnixTimestamp()
9059
9109
  },
9110
+ context: { ...callerLocation },
9060
9111
  span_attributes: { ...args.spanAttributes, name },
9061
9112
  created: (/* @__PURE__ */ new Date()).toISOString()
9062
9113
  };
@@ -9243,7 +9294,7 @@ var Dataset = class {
9243
9294
  let { summarizeData = true } = options || {};
9244
9295
  await this.bgLogger.flush();
9245
9296
  const state = await this.getState();
9246
- const projectUrl = `${state.apiUrl}/app/${encodeURIComponent(
9297
+ const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
9247
9298
  state.orgName
9248
9299
  )}/p/${encodeURIComponent((await this.project).name)}`;
9249
9300
  const datasetUrl = `${projectUrl}/d/${encodeURIComponent(await this.name)}`;
@@ -1,13 +1,4 @@
1
- export interface RepoStatus {
2
- commit?: string;
3
- branch?: string;
4
- tag?: string;
5
- dirty: boolean;
6
- author_name?: string;
7
- author_email?: string;
8
- commit_message?: string;
9
- commit_time?: string;
10
- }
1
+ import { GitMetadataSettings, RepoStatus } from "@braintrust/core";
11
2
  export interface CallerLocation {
12
3
  caller_functionname: string;
13
4
  caller_filename: string;
@@ -19,7 +10,7 @@ export interface IsoAsyncLocalStorage<T> {
19
10
  getStore(): T | undefined;
20
11
  }
21
12
  export interface Common {
22
- getRepoStatus: () => Promise<RepoStatus | undefined>;
13
+ getRepoStatus: (settings?: GitMetadataSettings) => Promise<RepoStatus | undefined>;
23
14
  getPastNAncestors: () => Promise<string[]>;
24
15
  getEnv: (name: string) => string | undefined;
25
16
  getCallerLocation: () => CallerLocation | undefined;
package/dist/logger.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /// <reference lib="dom" />
2
- import { IS_MERGE_FIELD, PARENT_ID_FIELD, Source, AUDIT_SOURCE_FIELD, AUDIT_METADATA_FIELD } from "@braintrust/core";
2
+ import { IS_MERGE_FIELD, PARENT_ID_FIELD, Source, AUDIT_SOURCE_FIELD, AUDIT_METADATA_FIELD, GitMetadataSettings } from "@braintrust/core";
3
3
  import { IsoAsyncLocalStorage } from "./isomorph";
4
4
  export type Metadata = Record<string, unknown>;
5
5
  export type SetCurrentArg = {
@@ -64,7 +64,9 @@ export interface Span {
64
64
  */
65
65
  traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
66
66
  /**
67
- * Lower-level alternative to `traced`, which does not automatically end the span or mark it as current. Be sure to end the span with `span.end()` when it has finished.
67
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
68
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
69
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
68
70
  *
69
71
  * See `traced` for full details.
70
72
  *
@@ -111,12 +113,13 @@ declare class BraintrustState {
111
113
  currentExperiment: Experiment | undefined;
112
114
  currentLogger: Logger<false> | undefined;
113
115
  currentSpan: IsoAsyncLocalStorage<Span>;
114
- apiUrl: string | null;
116
+ appUrl: string | null;
115
117
  loginToken: string | null;
116
118
  orgId: string | null;
117
119
  orgName: string | null;
118
120
  logUrl: string | null;
119
121
  loggedIn: boolean;
122
+ gitMetadataSettings?: GitMetadataSettings;
120
123
  private _apiConn;
121
124
  private _logConn;
122
125
  constructor();
@@ -176,12 +179,12 @@ export declare class Logger<IsAsyncFlush extends boolean> {
176
179
  * Log a single event. The event will be batched and uploaded behind the scenes if `logOptions.asyncFlush` is true.
177
180
  *
178
181
  * @param event The event to log.
179
- * @param event.input: The arguments that uniquely define a user input (an arbitrary, JSON serializable object).
180
- * @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
181
- * @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
182
- * @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
182
+ * @param event.input: (Optional) the arguments that uniquely define a user input (an arbitrary, JSON serializable object).
183
+ * @param event.output: (Optional) the output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
184
+ * @param event.expected: (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
185
+ * @param event.scores: (Optional) a dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
183
186
  * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
184
- * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end", "caller_functionname", "caller_filename", "caller_lineno".
187
+ * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
185
188
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
186
189
  * :returns: The `id` of the logged event.
187
190
  */
@@ -194,7 +197,9 @@ export declare class Logger<IsAsyncFlush extends boolean> {
194
197
  traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): PromiseUnless<IsAsyncFlush, R>;
195
198
  private lazyParentIds;
196
199
  /**
197
- * Lower-level alternative to `traced`, which does not automatically end the span or mark it as current.
200
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
201
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
202
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
198
203
  *
199
204
  * See `traced` for full details.
200
205
  */
@@ -232,7 +237,7 @@ export type OtherExperimentLogFields = {
232
237
  datasetRecordId: string;
233
238
  };
234
239
  export type ExperimentLogPartialArgs = Partial<OtherExperimentLogFields> & Partial<InputField | InputsField>;
235
- export type ExperimentLogFullArgs = Partial<Omit<OtherExperimentLogFields, "scores">> & Required<Pick<OtherExperimentLogFields, "scores">> & Partial<InputField | InputsField> & Partial<IdField>;
240
+ export type ExperimentLogFullArgs = Partial<Omit<OtherExperimentLogFields, "output" | "scores">> & Required<Pick<OtherExperimentLogFields, "output" | "scores">> & Partial<InputField | InputsField> & Partial<IdField>;
236
241
  export type LogFeedbackFullArgs = IdField & Partial<Omit<OtherExperimentLogFields, "output" | "metrics" | "datasetRecordId"> & {
237
242
  comment: string;
238
243
  source: Source;
@@ -259,6 +264,7 @@ type ExperimentEvent = Partial<InputField> & Partial<OtherExperimentLogFields> &
259
264
  created: string;
260
265
  span_parents: string[];
261
266
  span_attributes: Record<string, unknown>;
267
+ context: Record<string, unknown>;
262
268
  [PARENT_ID_FIELD]: string;
263
269
  [AUDIT_SOURCE_FIELD]: Source;
264
270
  [AUDIT_METADATA_FIELD]?: Record<string, unknown>;
@@ -311,10 +317,11 @@ export type InitOptions = {
311
317
  update?: boolean;
312
318
  baseExperiment?: string;
313
319
  isPublic?: boolean;
314
- apiUrl?: string;
320
+ appUrl?: string;
315
321
  apiKey?: string;
316
322
  orgName?: string;
317
323
  metadata?: Metadata;
324
+ gitMetadataSettings?: GitMetadataSettings;
318
325
  setCurrent?: boolean;
319
326
  };
320
327
  /**
@@ -330,7 +337,7 @@ export type InitOptions = {
330
337
  * @param options.baseExperiment An optional experiment name to use as a base. If specified, the new experiment will be summarized and compared to this
331
338
  * experiment. Otherwise, it will pick an experiment by finding the closest ancestor on the default (e.g. main) branch.
332
339
  * @param options.isPublic An optional parameter to control whether the experiment is publicly visible to anybody with the link or privately visible to only members of the organization. Defaults to private.
333
- * @param options.apiUrl The URL of the Braintrust API. Defaults to https://www.braintrustdata.com.
340
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrustdata.com.
334
341
  * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
335
342
  * key is specified, will prompt the user to login.
336
343
  * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
@@ -338,6 +345,7 @@ export type InitOptions = {
338
345
  * about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
339
346
  * `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
340
347
  * JSON-serializable type, but its keys must be strings.
348
+ * @param options.gitMetadataSettings (Optional) Settings for collecting git metadata. By default, will collect all git metadata fields allowed in org-level settings.
341
349
  * @param setCurrent If true (the default), set the global current-experiment to the newly-created one.
342
350
  * @returns The newly created Experiment.
343
351
  */
@@ -354,7 +362,7 @@ type InitDatasetOptions = {
354
362
  dataset?: string;
355
363
  description?: string;
356
364
  version?: string;
357
- apiUrl?: string;
365
+ appUrl?: string;
358
366
  apiKey?: string;
359
367
  orgName?: string;
360
368
  };
@@ -365,7 +373,7 @@ type InitDatasetOptions = {
365
373
  * @param options Additional options for configuring init().
366
374
  * @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically.
367
375
  * @param options.description An optional description of the dataset.
368
- * @param options.apiUrl The URL of the Braintrust API. Defaults to https://www.braintrustdata.com.
376
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrustdata.com.
369
377
  * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
370
378
  * key is specified, will prompt the user to login.
371
379
  * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
@@ -382,7 +390,7 @@ type AsyncFlushArg<IsAsyncFlush> = {
382
390
  type InitLoggerOptions<IsAsyncFlush> = {
383
391
  projectName?: string;
384
392
  projectId?: string;
385
- apiUrl?: string;
393
+ appUrl?: string;
386
394
  apiKey?: string;
387
395
  orgName?: string;
388
396
  forceLogin?: boolean;
@@ -395,7 +403,7 @@ type InitLoggerOptions<IsAsyncFlush> = {
395
403
  * @param options.projectName The name of the project to log into. If unspecified, will default to the Global project.
396
404
  * @param options.projectId The id of the project to log into. This takes precedence over projectName if specified.
397
405
  * @param options.asyncFlush If true, will log asynchronously in the background. Otherwise, will log synchronously. (false by default, to support serverless environments)
398
- * @param options.apiUrl The URL of the Braintrust API. Defaults to https://www.braintrustdata.com.
406
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrustdata.com.
399
407
  * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
400
408
  * key is specified, will prompt the user to login.
401
409
  * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
@@ -409,14 +417,14 @@ export declare function initLogger<IsAsyncFlush extends boolean = false>(options
409
417
  * https://www.braintrustdata.com/app/token. This method is called automatically by `init()`.
410
418
  *
411
419
  * @param options Options for configuring login().
412
- * @param options.apiUrl The URL of the Braintrust API. Defaults to https://www.braintrustdata.com.
420
+ * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrustdata.com.
413
421
  * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
414
422
  * key is specified, will prompt the user to login.
415
423
  * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
416
424
  * @param options.forceLogin Login again, even if you have already logged in (by default, this function will exit quickly if you have already logged in)
417
425
  */
418
426
  export declare function login(options?: {
419
- apiUrl?: string;
427
+ appUrl?: string;
420
428
  apiKey?: string;
421
429
  orgName?: string;
422
430
  forceLogin?: boolean;
@@ -470,7 +478,11 @@ export declare function getSpanParentObject<IsAsyncFlush extends boolean>(option
470
478
  */
471
479
  export declare function traced<IsAsyncFlush extends boolean = false, R = void>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg & AsyncFlushArg<IsAsyncFlush>): PromiseUnless<IsAsyncFlush, R>;
472
480
  /**
473
- * Lower-level alternative to `traced`, which does not automatically end the span or mark it as current. See `traced` for full details.
481
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
482
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
483
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
484
+ *
485
+ * See `traced` for full details.
474
486
  */
475
487
  export declare function startSpan<IsAsyncFlush extends boolean = false>(args?: StartSpanArgs & AsyncFlushArg<IsAsyncFlush>): Span;
476
488
  /**
@@ -502,10 +514,10 @@ export declare class Experiment {
502
514
  * @param event The event to log.
503
515
  * @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
504
516
  * @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
505
- * @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
517
+ * @param event.expected: (Optional) The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
506
518
  * @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
507
519
  * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
508
- * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end", "caller_functionname", "caller_filename", "caller_lineno".
520
+ * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
509
521
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
510
522
  * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
511
523
  * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
@@ -520,7 +532,9 @@ export declare class Experiment {
520
532
  traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
521
533
  private lazyParentIds;
522
534
  /**
523
- * Lower-level alternative to `traced`, which does not automatically end the span or mark it as current.
535
+ * Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
536
+ * where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
537
+ * so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
524
538
  *
525
539
  * See `traced` for full details.
526
540
  */