braintrust 0.0.97 → 0.0.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3648,7 +3648,6 @@ var require_pluralize = __commonJS({
3648
3648
  var src_exports = {};
3649
3649
  __export(src_exports, {
3650
3650
  BaseExperiment: () => BaseExperiment,
3651
- Dataset: () => Dataset,
3652
3651
  Eval: () => Eval,
3653
3652
  Experiment: () => Experiment,
3654
3653
  Logger: () => Logger,
@@ -3664,6 +3663,7 @@ __export(src_exports, {
3664
3663
  getSpanParentObject: () => getSpanParentObject,
3665
3664
  init: () => init,
3666
3665
  initDataset: () => initDataset,
3666
+ initExperiment: () => initExperiment,
3667
3667
  initLogger: () => initLogger,
3668
3668
  log: () => log,
3669
3669
  login: () => login,
@@ -3695,7 +3695,7 @@ var DefaultAsyncLocalStorage = class {
3695
3695
  }
3696
3696
  };
3697
3697
  var iso = {
3698
- getRepoStatus: async (_settings) => void 0,
3698
+ getRepoInfo: async (_settings) => void 0,
3699
3699
  getPastNAncestors: async () => [],
3700
3700
  getEnv: (_name) => void 0,
3701
3701
  getCallerLocation: () => void 0,
@@ -7734,11 +7734,11 @@ function truncateToByteLimit(s, byteLimit = 65536) {
7734
7734
  const truncated = encoded.subarray(0, byteLimit);
7735
7735
  return new TextDecoder().decode(truncated);
7736
7736
  }
7737
- async function getRepoStatus(settings) {
7737
+ async function getRepoInfo(settings) {
7738
7738
  if (settings && settings.collect === "none") {
7739
7739
  return void 0;
7740
7740
  }
7741
- const repo = await repoStatus();
7741
+ const repo = await repoInfo();
7742
7742
  if (!repo || !settings || settings.collect === "all") {
7743
7743
  return repo;
7744
7744
  }
@@ -7748,7 +7748,7 @@ async function getRepoStatus(settings) {
7748
7748
  });
7749
7749
  return sanitized;
7750
7750
  }
7751
- async function repoStatus() {
7751
+ async function repoInfo() {
7752
7752
  const git = await currentRepo();
7753
7753
  if (git === null) {
7754
7754
  return void 0;
@@ -7890,9 +7890,10 @@ function v4(options, buf, offset) {
7890
7890
  }
7891
7891
  var v4_default = v4;
7892
7892
 
7893
- // ../core/js/dist/index.mjs
7893
+ // ../core/js/dist/main/index.mjs
7894
7894
  var TRANSACTION_ID_FIELD = "_xact_id";
7895
7895
  var IS_MERGE_FIELD = "_is_merge";
7896
+ var MERGE_PATHS_FIELD = "_merge_paths";
7896
7897
  var AUDIT_SOURCE_FIELD = "_audit_source";
7897
7898
  var AUDIT_METADATA_FIELD = "_audit_metadata";
7898
7899
  var VALID_SOURCES = ["app", "api", "external"];
@@ -7951,6 +7952,54 @@ function mergeRowBatch(rows) {
7951
7952
  out.push(...Object.values(rowGroups));
7952
7953
  return out;
7953
7954
  }
7955
+ var DEFAULT_IS_LEGACY_DATASET = true;
7956
+ function ensureDatasetRecord(r, legacy) {
7957
+ if (legacy) {
7958
+ return ensureLegacyDatasetRecord(r);
7959
+ } else {
7960
+ return ensureNewDatasetRecord(r);
7961
+ }
7962
+ }
7963
+ function ensureLegacyDatasetRecord(r) {
7964
+ if ("output" in r) {
7965
+ return r;
7966
+ }
7967
+ const row = {
7968
+ ...r,
7969
+ output: r.expected
7970
+ };
7971
+ delete row.expected;
7972
+ return row;
7973
+ }
7974
+ function ensureNewDatasetRecord(r) {
7975
+ if ("expected" in r) {
7976
+ return r;
7977
+ }
7978
+ const row = {
7979
+ ...r,
7980
+ expected: r.output
7981
+ };
7982
+ delete row.output;
7983
+ return row;
7984
+ }
7985
+ function makeLegacyEvent(e) {
7986
+ if (!("dataset_id" in e) || !("expected" in e)) {
7987
+ return e;
7988
+ }
7989
+ const event = {
7990
+ ...e,
7991
+ output: e.expected
7992
+ };
7993
+ delete event.expected;
7994
+ if (MERGE_PATHS_FIELD in event) {
7995
+ for (const path2 of event[MERGE_PATHS_FIELD] || []) {
7996
+ if (path2.length > 0 && path2[0] === "expected") {
7997
+ path2[0] = "output";
7998
+ }
7999
+ }
8000
+ }
8001
+ return event;
8002
+ }
7954
8003
  var SpanTypeAttribute = /* @__PURE__ */ ((SpanTypeAttribute2) => {
7955
8004
  SpanTypeAttribute2["LLM"] = "llm";
7956
8005
  SpanTypeAttribute2["SCORE"] = "score";
@@ -8004,10 +8053,11 @@ function isEmpty(a) {
8004
8053
  return a === void 0 || a === null;
8005
8054
  }
8006
8055
  var LazyValue = class {
8056
+ callable;
8057
+ value = {
8058
+ hasComputed: false
8059
+ };
8007
8060
  constructor(callable) {
8008
- this.value = {
8009
- hasComputed: false
8010
- };
8011
8061
  this.callable = callable;
8012
8062
  }
8013
8063
  async get() {
@@ -8021,8 +8071,11 @@ var LazyValue = class {
8021
8071
 
8022
8072
  // src/logger.ts
8023
8073
  var NoopSpan = class {
8074
+ id;
8075
+ span_id;
8076
+ root_span_id;
8077
+ kind = "span";
8024
8078
  constructor() {
8025
- this.kind = "span";
8026
8079
  this.id = "";
8027
8080
  this.span_id = "";
8028
8081
  this.root_span_id = "";
@@ -8046,15 +8099,22 @@ var NoopSpan = class {
8046
8099
  };
8047
8100
  var NOOP_SPAN = new NoopSpan();
8048
8101
  var BraintrustState = class {
8102
+ id;
8103
+ currentExperiment;
8104
+ // Note: the value of IsAsyncFlush doesn't really matter here, since we
8105
+ // (safely) dynamically cast it whenever retrieving the logger.
8106
+ currentLogger;
8107
+ currentSpan;
8108
+ appUrl = null;
8109
+ loginToken = null;
8110
+ orgId = null;
8111
+ orgName = null;
8112
+ logUrl = null;
8113
+ loggedIn = false;
8114
+ gitMetadataSettings;
8115
+ _apiConn = null;
8116
+ _logConn = null;
8049
8117
  constructor() {
8050
- this.appUrl = null;
8051
- this.loginToken = null;
8052
- this.orgId = null;
8053
- this.orgName = null;
8054
- this.logUrl = null;
8055
- this.loggedIn = false;
8056
- this._apiConn = null;
8057
- this._logConn = null;
8058
8118
  this.id = v4_default();
8059
8119
  this.currentExperiment = void 0;
8060
8120
  this.currentLogger = void 0;
@@ -8101,6 +8161,9 @@ function _internalSetInitialState() {
8101
8161
  }
8102
8162
  var _internalGetGlobalState = () => _state;
8103
8163
  var FailedHTTPResponse = class extends Error {
8164
+ status;
8165
+ text;
8166
+ data;
8104
8167
  constructor(status, text, data = null) {
8105
8168
  super(`${status}: ${text}`);
8106
8169
  this.status = status;
@@ -8120,6 +8183,9 @@ async function checkResponse(resp) {
8120
8183
  }
8121
8184
  }
8122
8185
  var HTTPConnection = class _HTTPConnection {
8186
+ base_url;
8187
+ token;
8188
+ headers;
8123
8189
  constructor(base_url) {
8124
8190
  this.base_url = base_url;
8125
8191
  this.token = null;
@@ -8271,9 +8337,13 @@ function logFeedbackImpl(bgLogger, parentIds, {
8271
8337
  }
8272
8338
  }
8273
8339
  var Logger = class {
8340
+ lazyMetadata;
8341
+ logOptions;
8342
+ bgLogger;
8343
+ lastStartTime;
8344
+ // For type identification.
8345
+ kind = "logger";
8274
8346
  constructor(lazyMetadata, logOptions = {}) {
8275
- // For type identification.
8276
- this.kind = "logger";
8277
8347
  this.lazyMetadata = lazyMetadata;
8278
8348
  this.logOptions = logOptions;
8279
8349
  const logConn = new LazyValue(
@@ -8307,9 +8377,19 @@ var Logger = class {
8307
8377
  * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
8308
8378
  * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
8309
8379
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
8380
+ * @param options Additional logging options
8381
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the logger, set this to true.
8310
8382
  * :returns: The `id` of the logged event.
8311
8383
  */
8312
- log(event) {
8384
+ log(event, options) {
8385
+ if (!options?.allowLogConcurrentWithActiveSpan) {
8386
+ const checkCurrentSpan = currentSpan();
8387
+ if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
8388
+ throw new Error(
8389
+ "Cannot run toplevel Logger.log method while there is an active span. To log to the span, use Span.log"
8390
+ );
8391
+ }
8392
+ }
8313
8393
  const span = this.startSpan({ startTime: this.lastStartTime, event });
8314
8394
  this.lastStartTime = span.end();
8315
8395
  const ret = span.id;
@@ -8368,6 +8448,7 @@ var Logger = class {
8368
8448
  startSpan(args) {
8369
8449
  const { name, ...argsRest } = args ?? {};
8370
8450
  return new SpanImpl({
8451
+ parentObject: this,
8371
8452
  parentIds: new LazyValue(() => this.lazyParentIds()),
8372
8453
  bgLogger: this.bgLogger,
8373
8454
  name: name ?? "root",
@@ -8416,16 +8497,20 @@ var MaxRequestSize = 6 * 1024 * 1024;
8416
8497
  function constructJsonArray(items) {
8417
8498
  return `[${items.join(",")}]`;
8418
8499
  }
8500
+ function constructLogs3Data(items) {
8501
+ return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
8502
+ }
8419
8503
  var DefaultBatchSize = 100;
8420
8504
  var NumRetries = 3;
8421
8505
  function now() {
8422
8506
  return (/* @__PURE__ */ new Date()).getTime();
8423
8507
  }
8424
8508
  var BackgroundLogger = class {
8509
+ logConn;
8510
+ items = [];
8511
+ active_flush = Promise.resolve([]);
8512
+ active_flush_resolved = true;
8425
8513
  constructor(logConn) {
8426
- this.items = [];
8427
- this.active_flush = Promise.resolve([]);
8428
- this.active_flush_resolved = true;
8429
8514
  this.logConn = logConn;
8430
8515
  isomorph_default.processOn("beforeExit", async () => {
8431
8516
  await this.flush();
@@ -8474,11 +8559,20 @@ var BackgroundLogger = class {
8474
8559
  }
8475
8560
  postPromises.push(
8476
8561
  (async () => {
8477
- const itemsS = constructJsonArray(items);
8562
+ const dataStr = constructLogs3Data(items);
8478
8563
  for (let i = 0; i < NumRetries; i++) {
8479
8564
  const startTime = now();
8480
8565
  try {
8481
- return (await (await this.logConn.get()).post_json("logs", itemsS)).map((res) => res.id);
8566
+ try {
8567
+ return (await (await this.logConn.get()).post_json("logs3", dataStr)).ids.map((res) => res.id);
8568
+ } catch (e) {
8569
+ const legacyDataS = constructJsonArray(
8570
+ items.map(
8571
+ (r) => JSON.stringify(makeLegacyEvent(JSON.parse(r)))
8572
+ )
8573
+ );
8574
+ return (await (await this.logConn.get()).post_json("logs", legacyDataS)).map((res) => res.id);
8575
+ }
8482
8576
  } catch (e) {
8483
8577
  const retryingText = i + 1 === NumRetries ? "" : " Retrying";
8484
8578
  const errMsg = (() => {
@@ -8489,7 +8583,7 @@ var BackgroundLogger = class {
8489
8583
  }
8490
8584
  })();
8491
8585
  console.warn(
8492
- `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${itemsS.length}. Error: ${errMsg}.${retryingText}`
8586
+ `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}. Error: ${errMsg}.${retryingText}`
8493
8587
  );
8494
8588
  }
8495
8589
  }
@@ -8517,8 +8611,21 @@ var BackgroundLogger = class {
8517
8611
  }
8518
8612
  }
8519
8613
  };
8520
- function init(project, options = {}) {
8614
+ function init(projectOrOptions, optionalOptions) {
8615
+ const options = (() => {
8616
+ if (typeof projectOrOptions === "string") {
8617
+ return { ...optionalOptions, project: projectOrOptions };
8618
+ } else {
8619
+ if (optionalOptions !== void 0) {
8620
+ throw new Error(
8621
+ "Cannot specify options struct as both parameters. Must call either init(project, options) or init(options)."
8622
+ );
8623
+ }
8624
+ return projectOrOptions;
8625
+ }
8626
+ })();
8521
8627
  const {
8628
+ project,
8522
8629
  experiment,
8523
8630
  description,
8524
8631
  dataset,
@@ -8530,47 +8637,55 @@ function init(project, options = {}) {
8530
8637
  apiKey,
8531
8638
  orgName,
8532
8639
  metadata,
8533
- gitMetadataSettings
8534
- } = options || {};
8640
+ gitMetadataSettings,
8641
+ projectId,
8642
+ baseExperimentId,
8643
+ repoInfo: repoInfo2
8644
+ } = options;
8535
8645
  if (open && update) {
8536
8646
  throw new Error("Cannot open and update an experiment at the same time");
8537
8647
  }
8538
8648
  if (open || update) {
8539
8649
  if (isEmpty(experiment)) {
8540
8650
  const action = open ? "open" : "update";
8541
- throw new Error(`Cannot ${action} an experiment without specifying its name`);
8651
+ throw new Error(
8652
+ `Cannot ${action} an experiment without specifying its name`
8653
+ );
8542
8654
  }
8543
- const lazyMetadata2 = new LazyValue(async () => {
8544
- await login({
8545
- orgName,
8546
- apiKey,
8547
- appUrl
8548
- });
8549
- const args = {
8550
- project_name: project,
8551
- org_name: _state.orgName,
8552
- experiment_name: experiment
8553
- };
8554
- const response = await _state.apiConn().post_json("api/experiment/get", args);
8555
- if (response.length === 0) {
8556
- throw new Error(
8557
- `Experiment ${experiment} not found in project ${project}.`
8558
- );
8559
- }
8560
- const info = response[0];
8561
- return {
8562
- project: {
8563
- id: info.project_id,
8564
- name: "",
8565
- fullInfo: {}
8566
- },
8567
- experiment: {
8568
- id: info.id,
8569
- name: info.name,
8570
- fullInfo: info
8655
+ const lazyMetadata2 = new LazyValue(
8656
+ async () => {
8657
+ await login({
8658
+ orgName,
8659
+ apiKey,
8660
+ appUrl
8661
+ });
8662
+ const args = {
8663
+ project_name: project,
8664
+ project_id: projectId,
8665
+ org_name: _state.orgName,
8666
+ experiment_name: experiment
8667
+ };
8668
+ const response = await _state.apiConn().post_json("api/experiment/get", args);
8669
+ if (response.length === 0) {
8670
+ throw new Error(
8671
+ `Experiment ${experiment} not found in project ${projectId ?? project}.`
8672
+ );
8571
8673
  }
8572
- };
8573
- });
8674
+ const info = response[0];
8675
+ return {
8676
+ project: {
8677
+ id: info.project_id,
8678
+ name: "",
8679
+ fullInfo: {}
8680
+ },
8681
+ experiment: {
8682
+ id: info.id,
8683
+ name: info.name,
8684
+ fullInfo: info
8685
+ }
8686
+ };
8687
+ }
8688
+ );
8574
8689
  if (open) {
8575
8690
  return new ReadonlyExperiment(
8576
8691
  lazyMetadata2
@@ -8592,6 +8707,7 @@ function init(project, options = {}) {
8592
8707
  });
8593
8708
  const args = {
8594
8709
  project_name: project,
8710
+ project_id: projectId,
8595
8711
  org_id: _state.orgId
8596
8712
  };
8597
8713
  if (experiment) {
@@ -8600,22 +8716,29 @@ function init(project, options = {}) {
8600
8716
  if (description) {
8601
8717
  args["description"] = description;
8602
8718
  }
8603
- let mergedGitMetadataSettings = {
8604
- ..._state.gitMetadataSettings || {
8605
- collect: "all"
8719
+ const repoInfoArg = await (async () => {
8720
+ if (repoInfo2) {
8721
+ return repoInfo2;
8606
8722
  }
8607
- };
8608
- if (gitMetadataSettings) {
8609
- mergedGitMetadataSettings = mergeGitMetadataSettings(
8610
- mergedGitMetadataSettings,
8611
- gitMetadataSettings
8612
- );
8613
- }
8614
- const repoStatus2 = await isomorph_default.getRepoStatus(gitMetadataSettings);
8615
- if (repoStatus2) {
8616
- args["repo_info"] = repoStatus2;
8723
+ let mergedGitMetadataSettings = {
8724
+ ..._state.gitMetadataSettings || {
8725
+ collect: "all"
8726
+ }
8727
+ };
8728
+ if (gitMetadataSettings) {
8729
+ mergedGitMetadataSettings = mergeGitMetadataSettings(
8730
+ mergedGitMetadataSettings,
8731
+ gitMetadataSettings
8732
+ );
8733
+ }
8734
+ return await isomorph_default.getRepoInfo(mergedGitMetadataSettings);
8735
+ })();
8736
+ if (repoInfoArg) {
8737
+ args["repo_info"] = repoInfoArg;
8617
8738
  }
8618
- if (baseExperiment) {
8739
+ if (baseExperimentId) {
8740
+ args["base_exp_id"] = baseExperimentId;
8741
+ } else if (baseExperiment) {
8619
8742
  args["base_experiment"] = baseExperiment;
8620
8743
  } else {
8621
8744
  args["ancestor_commits"] = await isomorph_default.getPastNAncestors();
@@ -8666,6 +8789,21 @@ function init(project, options = {}) {
8666
8789
  }
8667
8790
  return ret;
8668
8791
  }
8792
+ function initExperiment(projectOrOptions, optionalOptions) {
8793
+ const options = (() => {
8794
+ if (typeof projectOrOptions === "string") {
8795
+ return { ...optionalOptions, project: projectOrOptions };
8796
+ } else {
8797
+ if (optionalOptions !== void 0) {
8798
+ throw new Error(
8799
+ "Cannot specify options struct as both parameters. Must call either init(project, options) or init(options)."
8800
+ );
8801
+ }
8802
+ return projectOrOptions;
8803
+ }
8804
+ })();
8805
+ return init(options);
8806
+ }
8669
8807
  function withExperiment(project, callback, options = {}) {
8670
8808
  console.warn(
8671
8809
  "withExperiment is deprecated and will be removed in a future version of braintrust. Simply create the experiment with `init`."
@@ -8680,8 +8818,30 @@ function withLogger(callback, options = {}) {
8680
8818
  const logger = initLogger(options);
8681
8819
  return callback(logger);
8682
8820
  }
8683
- function initDataset(project, options = {}) {
8684
- const { dataset, description, version, appUrl, apiKey, orgName } = options || {};
8821
+ function initDataset(projectOrOptions, optionalOptions) {
8822
+ const options = (() => {
8823
+ if (typeof projectOrOptions === "string") {
8824
+ return { ...optionalOptions, project: projectOrOptions };
8825
+ } else {
8826
+ if (optionalOptions !== void 0) {
8827
+ throw new Error(
8828
+ "Cannot specify options struct as both parameters. Must call either initDataset(project, options) or initDataset(options)."
8829
+ );
8830
+ }
8831
+ return projectOrOptions;
8832
+ }
8833
+ })();
8834
+ const {
8835
+ project,
8836
+ dataset,
8837
+ description,
8838
+ version,
8839
+ appUrl,
8840
+ apiKey,
8841
+ orgName,
8842
+ projectId,
8843
+ useOutput: legacy
8844
+ } = options;
8685
8845
  const lazyMetadata = new LazyValue(
8686
8846
  async () => {
8687
8847
  await login({
@@ -8692,6 +8852,7 @@ function initDataset(project, options = {}) {
8692
8852
  const args = {
8693
8853
  org_id: _state.orgId,
8694
8854
  project_name: project,
8855
+ project_id: projectId,
8695
8856
  dataset_name: dataset,
8696
8857
  description
8697
8858
  };
@@ -8710,7 +8871,7 @@ function initDataset(project, options = {}) {
8710
8871
  };
8711
8872
  }
8712
8873
  );
8713
- return new Dataset(lazyMetadata, version);
8874
+ return new Dataset(lazyMetadata, version, legacy);
8714
8875
  }
8715
8876
  function withDataset(project, callback, options = {}) {
8716
8877
  console.warn(
@@ -8780,15 +8941,30 @@ function initLogger(options = {}) {
8780
8941
  return ret;
8781
8942
  }
8782
8943
  async function login(options = {}) {
8944
+ let { forceLogin = false } = options || {};
8945
+ if (_state.loggedIn && !forceLogin) {
8946
+ let checkUpdatedParam2 = function(varname, arg, orig) {
8947
+ if (!isEmpty(arg) && !isEmpty(orig) && arg !== orig) {
8948
+ throw new Error(
8949
+ `Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
8950
+ );
8951
+ }
8952
+ };
8953
+ var checkUpdatedParam = checkUpdatedParam2;
8954
+ checkUpdatedParam2("appUrl", options.appUrl, _state.appUrl);
8955
+ checkUpdatedParam2(
8956
+ "apiKey",
8957
+ options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
8958
+ _state.loginToken
8959
+ );
8960
+ checkUpdatedParam2("orgName", options.orgName, _state.orgName);
8961
+ return;
8962
+ }
8783
8963
  const {
8784
8964
  appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
8785
8965
  apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
8786
8966
  orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
8787
8967
  } = options || {};
8788
- let { forceLogin = false } = options || {};
8789
- if (_state.loggedIn && !forceLogin) {
8790
- return;
8791
- }
8792
8968
  _state.resetLoginInfo();
8793
8969
  _state.appUrl = appUrl;
8794
8970
  let conn = null;
@@ -8997,11 +9173,12 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
8997
9173
  return event;
8998
9174
  }
8999
9175
  var ObjectFetcher = class {
9000
- constructor(objectType, pinnedVersion) {
9176
+ constructor(objectType, pinnedVersion, mutateRecord) {
9001
9177
  this.objectType = objectType;
9002
9178
  this.pinnedVersion = pinnedVersion;
9003
- this._fetchedData = void 0;
9179
+ this.mutateRecord = mutateRecord;
9004
9180
  }
9181
+ _fetchedData = void 0;
9005
9182
  get id() {
9006
9183
  throw new Error("ObjectFetcher subclasses must have an 'id' attribute");
9007
9184
  }
@@ -9020,12 +9197,24 @@ var ObjectFetcher = class {
9020
9197
  async fetchedData() {
9021
9198
  if (this._fetchedData === void 0) {
9022
9199
  const state = await this.getState();
9023
- const resp = await state.logConn().get(`object/${this.objectType}`, {
9024
- id: await this.id,
9025
- fmt: "json2",
9026
- version: this.pinnedVersion
9027
- });
9028
- this._fetchedData = await resp.json();
9200
+ let data = void 0;
9201
+ try {
9202
+ const resp = await state.logConn().get(`object3/${this.objectType}`, {
9203
+ id: await this.id,
9204
+ fmt: "json2",
9205
+ version: this.pinnedVersion,
9206
+ api_version: "2"
9207
+ });
9208
+ data = await resp.json();
9209
+ } catch (e) {
9210
+ const resp = await state.logConn().get(`object/${this.objectType}`, {
9211
+ id: await this.id,
9212
+ fmt: "json2",
9213
+ version: this.pinnedVersion
9214
+ });
9215
+ data = await resp.json();
9216
+ }
9217
+ this._fetchedData = this.mutateRecord ? data?.map(this.mutateRecord) : data;
9029
9218
  }
9030
9219
  return this._fetchedData || [];
9031
9220
  }
@@ -9049,10 +9238,14 @@ var ObjectFetcher = class {
9049
9238
  }
9050
9239
  };
9051
9240
  var Experiment = class extends ObjectFetcher {
9241
+ lazyMetadata;
9242
+ dataset;
9243
+ bgLogger;
9244
+ lastStartTime;
9245
+ // For type identification.
9246
+ kind = "experiment";
9052
9247
  constructor(lazyMetadata, dataset) {
9053
9248
  super("experiment", void 0);
9054
- // For type identification.
9055
- this.kind = "experiment";
9056
9249
  this.lazyMetadata = lazyMetadata;
9057
9250
  this.dataset = dataset;
9058
9251
  const logConn = new LazyValue(
@@ -9093,9 +9286,19 @@ var Experiment = class extends ObjectFetcher {
9093
9286
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
9094
9287
  * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
9095
9288
  * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
9289
+ * @param options Additional logging options
9290
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
9096
9291
  * :returns: The `id` of the logged event.
9097
9292
  */
9098
- log(event) {
9293
+ log(event, options) {
9294
+ if (!options?.allowLogConcurrentWithActiveSpan) {
9295
+ const checkCurrentSpan = currentSpan();
9296
+ if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
9297
+ throw new Error(
9298
+ "Cannot run toplevel Experiment.log method while there is an active span. To log to the span, use Span.log"
9299
+ );
9300
+ }
9301
+ }
9099
9302
  event = validateAndSanitizeExperimentLogFullArgs(event, !!this.dataset);
9100
9303
  const span = this.startSpan({ startTime: this.lastStartTime, event });
9101
9304
  this.lastStartTime = span.end();
@@ -9137,6 +9340,7 @@ var Experiment = class extends ObjectFetcher {
9137
9340
  startSpan(args) {
9138
9341
  const { name, ...argsRest } = args ?? {};
9139
9342
  return new SpanImpl({
9343
+ parentObject: this,
9140
9344
  parentIds: new LazyValue(() => this.lazyParentIds()),
9141
9345
  bgLogger: this.bgLogger,
9142
9346
  name: name ?? "root",
@@ -9274,20 +9478,38 @@ var ReadonlyExperiment = class extends ObjectFetcher {
9274
9478
  if (record.root_span_id !== record.span_id) {
9275
9479
  continue;
9276
9480
  }
9277
- const { output, expected } = record;
9278
- yield {
9279
- input: record.input,
9280
- expected: expected ?? output
9281
- };
9481
+ const { output, expected: expectedRecord } = record;
9482
+ const expected = expectedRecord ?? output;
9483
+ if (isEmpty(expected)) {
9484
+ yield {
9485
+ input: record.input
9486
+ };
9487
+ } else {
9488
+ yield {
9489
+ input: record.input,
9490
+ expected
9491
+ };
9492
+ }
9282
9493
  }
9283
9494
  }
9284
9495
  };
9285
9496
  var executionCounter = 0;
9286
9497
  var SpanImpl = class _SpanImpl {
9498
+ bgLogger;
9499
+ // `internalData` contains fields that are not part of the "user-sanitized"
9500
+ // set of fields which we want to log in just one of the span rows.
9501
+ internalData;
9502
+ isMerge;
9503
+ loggedEndTime;
9504
+ // For internal use only.
9505
+ parentObject;
9506
+ // These fields are logged to every span row.
9507
+ parentIds;
9508
+ rowIds;
9509
+ kind = "span";
9287
9510
  // root_experiment should only be specified for a root span. parent_span
9288
9511
  // should only be specified for non-root spans.
9289
9512
  constructor(args) {
9290
- this.kind = "span";
9291
9513
  this.loggedEndTime = void 0;
9292
9514
  this.bgLogger = args.bgLogger;
9293
9515
  const callerLocation = isomorph_default.getCallerLocation();
@@ -9315,6 +9537,7 @@ var SpanImpl = class _SpanImpl {
9315
9537
  },
9316
9538
  created: (/* @__PURE__ */ new Date()).toISOString()
9317
9539
  };
9540
+ this.parentObject = args.parentObject;
9318
9541
  this.parentIds = args.parentIds;
9319
9542
  const id = args.event?.id ?? v4_default();
9320
9543
  const span_id = v4_default();
@@ -9386,6 +9609,7 @@ var SpanImpl = class _SpanImpl {
9386
9609
  }
9387
9610
  startSpan(args) {
9388
9611
  return new _SpanImpl({
9612
+ parentObject: this.parentObject,
9389
9613
  parentIds: this.parentIds,
9390
9614
  bgLogger: this.bgLogger,
9391
9615
  parentSpanInfo: {
@@ -9411,8 +9635,20 @@ var SpanImpl = class _SpanImpl {
9411
9635
  }
9412
9636
  };
9413
9637
  var Dataset = class extends ObjectFetcher {
9414
- constructor(lazyMetadata, pinnedVersion) {
9415
- super("dataset", pinnedVersion);
9638
+ lazyMetadata;
9639
+ bgLogger;
9640
+ constructor(lazyMetadata, pinnedVersion, legacy) {
9641
+ const isLegacyDataset = legacy ?? DEFAULT_IS_LEGACY_DATASET;
9642
+ if (isLegacyDataset) {
9643
+ console.warn(
9644
+ `Records will be fetched from this dataset in the legacy format, with the "expected" field renamed to "output". Please update your code to use "expected", and use \`braintrust.initDataset()\` with \`{ useOutput: false }\`, which will become the default in a future version of Braintrust.`
9645
+ );
9646
+ }
9647
+ super(
9648
+ "dataset",
9649
+ pinnedVersion,
9650
+ (r) => ensureDatasetRecord(r, isLegacyDataset)
9651
+ );
9416
9652
  this.lazyMetadata = lazyMetadata;
9417
9653
  const logConn = new LazyValue(
9418
9654
  () => this.getState().then((state) => state.logConn())
@@ -9444,19 +9680,21 @@ var Dataset = class extends ObjectFetcher {
9444
9680
  *
9445
9681
  * @param event The event to log.
9446
9682
  * @param event.input The argument that uniquely define an input case (an arbitrary, JSON serializable object).
9447
- * @param event.output The output of your application, including post-processing (an arbitrary, JSON serializable object).
9683
+ * @param event.expected The output of your application, including post-processing (an arbitrary, JSON serializable object).
9448
9684
  * @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
9449
9685
  * about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
9450
9686
  * `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
9451
9687
  * JSON-serializable type, but its keys must be strings.
9452
9688
  * @param event.id (Optional) a unique identifier for the event. If you don't provide one, Braintrust will generate one for you.
9689
+ * @param event.output: (Deprecated) The output of your application. Use `expected` instead.
9453
9690
  * @returns The `id` of the logged record.
9454
9691
  */
9455
9692
  insert({
9456
9693
  input,
9457
- output,
9694
+ expected,
9458
9695
  metadata,
9459
- id
9696
+ id,
9697
+ output
9460
9698
  }) {
9461
9699
  if (metadata !== void 0) {
9462
9700
  for (const key of Object.keys(metadata)) {
@@ -9465,11 +9703,16 @@ var Dataset = class extends ObjectFetcher {
9465
9703
  }
9466
9704
  }
9467
9705
  }
9706
+ if (expected && output) {
9707
+ throw new Error(
9708
+ "Only one of expected or output (deprecated) can be specified. Prefer expected."
9709
+ );
9710
+ }
9468
9711
  const rowId = id || v4_default();
9469
9712
  const args = new LazyValue(async () => ({
9470
9713
  id: rowId,
9471
- inputs: input,
9472
- output,
9714
+ input,
9715
+ expected: expected === void 0 ? output : expected,
9473
9716
  project_id: (await this.project).id,
9474
9717
  dataset_id: await this.id,
9475
9718
  created: (/* @__PURE__ */ new Date()).toISOString(),
@@ -9540,7 +9783,7 @@ var Dataset = class extends ObjectFetcher {
9540
9783
 
9541
9784
  // src/node.ts
9542
9785
  function configureNode() {
9543
- isomorph_default.getRepoStatus = getRepoStatus;
9786
+ isomorph_default.getRepoInfo = getRepoInfo;
9544
9787
  isomorph_default.getPastNAncestors = getPastNAncestors;
9545
9788
  isomorph_default.getEnv = (name) => process.env[name];
9546
9789
  isomorph_default.getCallerLocation = getCallerLocation;
@@ -9565,8 +9808,9 @@ function fitNameToSpaces(name, length) {
9565
9808
  return padded.substring(0, length - 3) + "...";
9566
9809
  }
9567
9810
  var BarProgressReporter = class {
9811
+ multiBar;
9812
+ bars = {};
9568
9813
  constructor() {
9569
- this.bars = {};
9570
9814
  this.multiBar = new cliProgress.MultiBar(
9571
9815
  {
9572
9816
  clearOnComplete: false,
@@ -9602,7 +9846,7 @@ function makeEvalName(projectName, experimentName) {
9602
9846
  }
9603
9847
  return out;
9604
9848
  }
9605
- function initExperiment(projectName, options = {}) {
9849
+ function initExperiment2(projectName, options = {}) {
9606
9850
  return init(projectName, {
9607
9851
  ...options,
9608
9852
  setCurrent: false
@@ -9610,9 +9854,9 @@ function initExperiment(projectName, options = {}) {
9610
9854
  }
9611
9855
  globalThis._evals = {};
9612
9856
  async function Eval(name, evaluator) {
9613
- const evalName = makeEvalName(name, evaluator.experimentName);
9857
+ let evalName = makeEvalName(name, evaluator.experimentName);
9614
9858
  if (_evals[evalName]) {
9615
- throw new Error(`Evaluator ${evalName} already exists`);
9859
+ evalName = `${evalName}_${Object.keys(_evals).length}`;
9616
9860
  }
9617
9861
  if (globalThis._lazy_load) {
9618
9862
  _evals[evalName] = { evalName, projectName: name, ...evaluator };
@@ -9628,7 +9872,7 @@ async function Eval(name, evaluator) {
9628
9872
  }
9629
9873
  const progressReporter = new BarProgressReporter();
9630
9874
  try {
9631
- const experiment = initExperiment(name, {
9875
+ const experiment = initExperiment2(name, {
9632
9876
  experiment: evaluator.experimentName,
9633
9877
  metadata: evaluator.metadata,
9634
9878
  isPublic: evaluator.isPublic
@@ -9668,6 +9912,9 @@ function evaluateFilter(object, filter) {
9668
9912
  }
9669
9913
  return pattern.test(serializeJSONWithPlainString(key));
9670
9914
  }
9915
+ function scorerName(scorer, scorer_idx) {
9916
+ return scorer.name || `scorer_${scorer_idx}`;
9917
+ }
9671
9918
  async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9672
9919
  if (typeof evaluator.data === "string") {
9673
9920
  throw new Error("Unimplemented: string data paths");
@@ -9690,7 +9937,7 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9690
9937
  }
9691
9938
  name = baseExperiment.name;
9692
9939
  }
9693
- dataResult = initExperiment(evaluator.projectName, {
9940
+ dataResult = initExperiment2(evaluator.projectName, {
9694
9941
  experiment: name,
9695
9942
  open: true
9696
9943
  }).asDataset();
@@ -9711,11 +9958,13 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9711
9958
  );
9712
9959
  progressReporter.start(evaluator.evalName, data.length);
9713
9960
  const evals = data.map(async (datum) => {
9714
- let metadata = { ...datum.metadata };
9715
- let output = void 0;
9716
- let error2 = void 0;
9717
- let scores = {};
9718
9961
  const callback = async (rootSpan) => {
9962
+ let metadata = {
9963
+ ..."metadata" in datum ? datum.metadata : {}
9964
+ };
9965
+ let output = void 0;
9966
+ let error2 = void 0;
9967
+ let scores = {};
9719
9968
  try {
9720
9969
  const meta = (o) => metadata = { ...metadata, ...o };
9721
9970
  await rootSpan.traced(
@@ -9732,42 +9981,55 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9732
9981
  );
9733
9982
  rootSpan.log({ output });
9734
9983
  const scoringArgs = { ...datum, metadata, output };
9984
+ const scorerNames = evaluator.scores.map(scorerName);
9735
9985
  const scoreResults = await Promise.all(
9736
9986
  evaluator.scores.map(async (score, score_idx) => {
9737
- return rootSpan.traced(
9738
- async (span) => {
9739
- const scoreResult = score(scoringArgs);
9740
- const result = scoreResult instanceof Promise ? await scoreResult : scoreResult;
9741
- const {
9742
- metadata: resultMetadata,
9743
- name: _,
9744
- ...resultRest
9745
- } = result;
9746
- span.log({
9747
- output: resultRest,
9748
- metadata: resultMetadata
9749
- });
9750
- return result;
9751
- },
9752
- {
9753
- name: score.name || `scorer_${score_idx}`,
9754
- spanAttributes: {
9755
- type: SpanTypeAttribute.SCORE
9987
+ try {
9988
+ const result = await rootSpan.traced(
9989
+ async (span) => {
9990
+ const scoreResult = score(scoringArgs);
9991
+ const result2 = scoreResult instanceof Promise ? await scoreResult : scoreResult;
9992
+ const {
9993
+ metadata: resultMetadata,
9994
+ name: _,
9995
+ ...resultRest
9996
+ } = result2;
9997
+ span.log({
9998
+ output: resultRest,
9999
+ metadata: resultMetadata
10000
+ });
10001
+ return result2;
9756
10002
  },
9757
- event: { input: scoringArgs }
9758
- }
9759
- );
10003
+ {
10004
+ name: scorerNames[score_idx],
10005
+ spanAttributes: {
10006
+ type: SpanTypeAttribute.SCORE
10007
+ },
10008
+ event: { input: scoringArgs }
10009
+ }
10010
+ );
10011
+ return { kind: "score", value: result };
10012
+ } catch (e) {
10013
+ return { kind: "error", value: e };
10014
+ }
9760
10015
  })
9761
10016
  );
10017
+ const passingScorersAndResults = [];
10018
+ const failingScorersAndResults = [];
10019
+ scoreResults.forEach((result, i) => {
10020
+ const name = scorerNames[i];
10021
+ if (result.kind === "score") {
10022
+ passingScorersAndResults.push({ name, score: result.value });
10023
+ } else {
10024
+ failingScorersAndResults.push({ name, error: result.value });
10025
+ }
10026
+ });
9762
10027
  const scoreMetadata = {};
9763
- for (const scoreResult of scoreResults) {
10028
+ for (const { score: scoreResult } of passingScorersAndResults) {
9764
10029
  scores[scoreResult.name] = scoreResult.score;
9765
10030
  const metadata2 = {
9766
10031
  ...scoreResult.metadata
9767
10032
  };
9768
- if (scoreResult.error !== void 0) {
9769
- metadata2.error = scoreResult.error;
9770
- }
9771
10033
  if (Object.keys(metadata2).length > 0) {
9772
10034
  scoreMetadata[scoreResult.name] = metadata2;
9773
10035
  }
@@ -9776,6 +10038,21 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9776
10038
  meta({ scores: scoreMetadata });
9777
10039
  }
9778
10040
  rootSpan.log({ scores, metadata });
10041
+ if (failingScorersAndResults.length) {
10042
+ const scorerErrors = Object.fromEntries(
10043
+ failingScorersAndResults.map(({ name, error: error3 }) => [
10044
+ name,
10045
+ error3 instanceof Error ? error3.stack : `${error3}`
10046
+ ])
10047
+ );
10048
+ metadata["scorer_errors"] = scorerErrors;
10049
+ const names = Object.keys(scorerErrors).join(", ");
10050
+ const errors = failingScorersAndResults.map((item) => item.error);
10051
+ throw new AggregateError(
10052
+ errors,
10053
+ `Found exceptions for the following scorers: ${names}`
10054
+ );
10055
+ }
9779
10056
  } catch (e) {
9780
10057
  error2 = e;
9781
10058
  } finally {
@@ -9798,7 +10075,7 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9798
10075
  },
9799
10076
  event: {
9800
10077
  input: datum.input,
9801
- expected: datum.expected
10078
+ expected: "expected" in datum ? datum.expected : void 0
9802
10079
  }
9803
10080
  });
9804
10081
  }
@@ -9854,7 +10131,8 @@ function reportEvaluatorResult(evaluatorName, evaluatorResult, {
9854
10131
  if (!verbose && !jsonl) {
9855
10132
  console.error(warning("Add --verbose to see full stack traces."));
9856
10133
  }
9857
- } else if (summary) {
10134
+ }
10135
+ if (summary) {
9858
10136
  console.log(jsonl ? JSON.stringify(summary) : summary);
9859
10137
  } else {
9860
10138
  const scoresByName = {};
@@ -10082,6 +10360,9 @@ function wrapEmbeddings(create) {
10082
10360
  };
10083
10361
  }
10084
10362
  var WrapperStream = class {
10363
+ span;
10364
+ iter;
10365
+ startTime;
10085
10366
  constructor(span, startTime, iter) {
10086
10367
  this.span = span;
10087
10368
  this.iter = iter;
@@ -10118,7 +10399,6 @@ configureNode();
10118
10399
  // Annotate the CommonJS export names for ESM import in node:
10119
10400
  0 && (module.exports = {
10120
10401
  BaseExperiment,
10121
- Dataset,
10122
10402
  Eval,
10123
10403
  Experiment,
10124
10404
  Logger,
@@ -10134,6 +10414,7 @@ configureNode();
10134
10414
  getSpanParentObject,
10135
10415
  init,
10136
10416
  initDataset,
10417
+ initExperiment,
10137
10418
  initLogger,
10138
10419
  log,
10139
10420
  login,