braintrust 0.0.98 → 0.0.100

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1973,41 +1973,35 @@ var require_ansi_styles = __commonJS({
1973
1973
  }
1974
1974
  });
1975
1975
 
1976
- // ../../node_modules/.pnpm/has-flag@4.0.0/node_modules/has-flag/index.js
1976
+ // ../../node_modules/supports-color/node_modules/has-flag/index.js
1977
1977
  var require_has_flag2 = __commonJS({
1978
- "../../node_modules/.pnpm/has-flag@4.0.0/node_modules/has-flag/index.js"(exports, module2) {
1978
+ "../../node_modules/supports-color/node_modules/has-flag/index.js"(exports, module2) {
1979
1979
  "use strict";
1980
- module2.exports = (flag, argv = process.argv) => {
1980
+ module2.exports = (flag, argv) => {
1981
+ argv = argv || process.argv;
1981
1982
  const prefix = flag.startsWith("-") ? "" : flag.length === 1 ? "-" : "--";
1982
- const position = argv.indexOf(prefix + flag);
1983
- const terminatorPosition = argv.indexOf("--");
1984
- return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
1983
+ const pos = argv.indexOf(prefix + flag);
1984
+ const terminatorPos = argv.indexOf("--");
1985
+ return pos !== -1 && (terminatorPos === -1 ? true : pos < terminatorPos);
1985
1986
  };
1986
1987
  }
1987
1988
  });
1988
1989
 
1989
- // ../../node_modules/.pnpm/supports-color@7.2.0/node_modules/supports-color/index.js
1990
+ // ../../node_modules/supports-color/index.js
1990
1991
  var require_supports_color2 = __commonJS({
1991
- "../../node_modules/.pnpm/supports-color@7.2.0/node_modules/supports-color/index.js"(exports, module2) {
1992
+ "../../node_modules/supports-color/index.js"(exports, module2) {
1992
1993
  "use strict";
1993
1994
  var os = require("os");
1994
- var tty = require("tty");
1995
1995
  var hasFlag = require_has_flag2();
1996
- var { env } = process;
1996
+ var env = process.env;
1997
1997
  var forceColor;
1998
- if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false") || hasFlag("color=never")) {
1999
- forceColor = 0;
1998
+ if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false")) {
1999
+ forceColor = false;
2000
2000
  } else if (hasFlag("color") || hasFlag("colors") || hasFlag("color=true") || hasFlag("color=always")) {
2001
- forceColor = 1;
2001
+ forceColor = true;
2002
2002
  }
2003
2003
  if ("FORCE_COLOR" in env) {
2004
- if (env.FORCE_COLOR === "true") {
2005
- forceColor = 1;
2006
- } else if (env.FORCE_COLOR === "false") {
2007
- forceColor = 0;
2008
- } else {
2009
- forceColor = env.FORCE_COLOR.length === 0 ? 1 : Math.min(parseInt(env.FORCE_COLOR, 10), 3);
2010
- }
2004
+ forceColor = env.FORCE_COLOR.length === 0 || parseInt(env.FORCE_COLOR, 10) !== 0;
2011
2005
  }
2012
2006
  function translateLevel(level) {
2013
2007
  if (level === 0) {
@@ -2020,8 +2014,8 @@ var require_supports_color2 = __commonJS({
2020
2014
  has16m: level >= 3
2021
2015
  };
2022
2016
  }
2023
- function supportsColor(haveStream, streamIsTTY) {
2024
- if (forceColor === 0) {
2017
+ function supportsColor(stream) {
2018
+ if (forceColor === false) {
2025
2019
  return 0;
2026
2020
  }
2027
2021
  if (hasFlag("color=16m") || hasFlag("color=full") || hasFlag("color=truecolor")) {
@@ -2030,22 +2024,19 @@ var require_supports_color2 = __commonJS({
2030
2024
  if (hasFlag("color=256")) {
2031
2025
  return 2;
2032
2026
  }
2033
- if (haveStream && !streamIsTTY && forceColor === void 0) {
2027
+ if (stream && !stream.isTTY && forceColor !== true) {
2034
2028
  return 0;
2035
2029
  }
2036
- const min = forceColor || 0;
2037
- if (env.TERM === "dumb") {
2038
- return min;
2039
- }
2030
+ const min = forceColor ? 1 : 0;
2040
2031
  if (process.platform === "win32") {
2041
2032
  const osRelease = os.release().split(".");
2042
- if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
2033
+ if (Number(process.versions.node.split(".")[0]) >= 8 && Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
2043
2034
  return Number(osRelease[2]) >= 14931 ? 3 : 2;
2044
2035
  }
2045
2036
  return 1;
2046
2037
  }
2047
2038
  if ("CI" in env) {
2048
- if (["TRAVIS", "CIRCLECI", "APPVEYOR", "GITLAB_CI", "GITHUB_ACTIONS", "BUILDKITE"].some((sign) => sign in env) || env.CI_NAME === "codeship") {
2039
+ if (["TRAVIS", "CIRCLECI", "APPVEYOR", "GITLAB_CI"].some((sign) => sign in env) || env.CI_NAME === "codeship") {
2049
2040
  return 1;
2050
2041
  }
2051
2042
  return min;
@@ -2074,16 +2065,19 @@ var require_supports_color2 = __commonJS({
2074
2065
  if ("COLORTERM" in env) {
2075
2066
  return 1;
2076
2067
  }
2068
+ if (env.TERM === "dumb") {
2069
+ return min;
2070
+ }
2077
2071
  return min;
2078
2072
  }
2079
2073
  function getSupportLevel(stream) {
2080
- const level = supportsColor(stream, stream && stream.isTTY);
2074
+ const level = supportsColor(stream);
2081
2075
  return translateLevel(level);
2082
2076
  }
2083
2077
  module2.exports = {
2084
2078
  supportsColor: getSupportLevel,
2085
- stdout: translateLevel(supportsColor(true, tty.isatty(1))),
2086
- stderr: translateLevel(supportsColor(true, tty.isatty(2)))
2079
+ stdout: getSupportLevel(process.stdout),
2080
+ stderr: getSupportLevel(process.stderr)
2087
2081
  };
2088
2082
  }
2089
2083
  });
@@ -8053,10 +8047,11 @@ function isEmpty(a) {
8053
8047
  return a === void 0 || a === null;
8054
8048
  }
8055
8049
  var LazyValue = class {
8050
+ callable;
8051
+ value = {
8052
+ hasComputed: false
8053
+ };
8056
8054
  constructor(callable) {
8057
- this.value = {
8058
- hasComputed: false
8059
- };
8060
8055
  this.callable = callable;
8061
8056
  }
8062
8057
  async get() {
@@ -8070,8 +8065,11 @@ var LazyValue = class {
8070
8065
 
8071
8066
  // src/logger.ts
8072
8067
  var NoopSpan = class {
8068
+ id;
8069
+ span_id;
8070
+ root_span_id;
8071
+ kind = "span";
8073
8072
  constructor() {
8074
- this.kind = "span";
8075
8073
  this.id = "";
8076
8074
  this.span_id = "";
8077
8075
  this.root_span_id = "";
@@ -8095,15 +8093,22 @@ var NoopSpan = class {
8095
8093
  };
8096
8094
  var NOOP_SPAN = new NoopSpan();
8097
8095
  var BraintrustState = class {
8096
+ id;
8097
+ currentExperiment;
8098
+ // Note: the value of IsAsyncFlush doesn't really matter here, since we
8099
+ // (safely) dynamically cast it whenever retrieving the logger.
8100
+ currentLogger;
8101
+ currentSpan;
8102
+ appUrl = null;
8103
+ loginToken = null;
8104
+ orgId = null;
8105
+ orgName = null;
8106
+ logUrl = null;
8107
+ loggedIn = false;
8108
+ gitMetadataSettings;
8109
+ _apiConn = null;
8110
+ _logConn = null;
8098
8111
  constructor() {
8099
- this.appUrl = null;
8100
- this.loginToken = null;
8101
- this.orgId = null;
8102
- this.orgName = null;
8103
- this.logUrl = null;
8104
- this.loggedIn = false;
8105
- this._apiConn = null;
8106
- this._logConn = null;
8107
8112
  this.id = v4_default();
8108
8113
  this.currentExperiment = void 0;
8109
8114
  this.currentLogger = void 0;
@@ -8150,6 +8155,9 @@ function _internalSetInitialState() {
8150
8155
  }
8151
8156
  var _internalGetGlobalState = () => _state;
8152
8157
  var FailedHTTPResponse = class extends Error {
8158
+ status;
8159
+ text;
8160
+ data;
8153
8161
  constructor(status, text, data = null) {
8154
8162
  super(`${status}: ${text}`);
8155
8163
  this.status = status;
@@ -8169,6 +8177,9 @@ async function checkResponse(resp) {
8169
8177
  }
8170
8178
  }
8171
8179
  var HTTPConnection = class _HTTPConnection {
8180
+ base_url;
8181
+ token;
8182
+ headers;
8172
8183
  constructor(base_url) {
8173
8184
  this.base_url = base_url;
8174
8185
  this.token = null;
@@ -8320,9 +8331,13 @@ function logFeedbackImpl(bgLogger, parentIds, {
8320
8331
  }
8321
8332
  }
8322
8333
  var Logger = class {
8334
+ lazyMetadata;
8335
+ logOptions;
8336
+ bgLogger;
8337
+ lastStartTime;
8338
+ // For type identification.
8339
+ kind = "logger";
8323
8340
  constructor(lazyMetadata, logOptions = {}) {
8324
- // For type identification.
8325
- this.kind = "logger";
8326
8341
  this.lazyMetadata = lazyMetadata;
8327
8342
  this.logOptions = logOptions;
8328
8343
  const logConn = new LazyValue(
@@ -8356,9 +8371,19 @@ var Logger = class {
8356
8371
  * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
8357
8372
  * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
8358
8373
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
8374
+ * @param options Additional logging options
8375
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the logger, set this to true.
8359
8376
  * :returns: The `id` of the logged event.
8360
8377
  */
8361
- log(event) {
8378
+ log(event, options) {
8379
+ if (!options?.allowLogConcurrentWithActiveSpan) {
8380
+ const checkCurrentSpan = currentSpan();
8381
+ if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
8382
+ throw new Error(
8383
+ "Cannot run toplevel Logger.log method while there is an active span. To log to the span, use Span.log"
8384
+ );
8385
+ }
8386
+ }
8362
8387
  const span = this.startSpan({ startTime: this.lastStartTime, event });
8363
8388
  this.lastStartTime = span.end();
8364
8389
  const ret = span.id;
@@ -8417,6 +8442,7 @@ var Logger = class {
8417
8442
  startSpan(args) {
8418
8443
  const { name, ...argsRest } = args ?? {};
8419
8444
  return new SpanImpl({
8445
+ parentObject: this,
8420
8446
  parentIds: new LazyValue(() => this.lazyParentIds()),
8421
8447
  bgLogger: this.bgLogger,
8422
8448
  name: name ?? "root",
@@ -8474,10 +8500,11 @@ function now() {
8474
8500
  return (/* @__PURE__ */ new Date()).getTime();
8475
8501
  }
8476
8502
  var BackgroundLogger = class {
8503
+ logConn;
8504
+ items = [];
8505
+ active_flush = Promise.resolve([]);
8506
+ active_flush_resolved = true;
8477
8507
  constructor(logConn) {
8478
- this.items = [];
8479
- this.active_flush = Promise.resolve([]);
8480
- this.active_flush_resolved = true;
8481
8508
  this.logConn = logConn;
8482
8509
  isomorph_default.processOn("beforeExit", async () => {
8483
8510
  await this.flush();
@@ -8526,12 +8553,12 @@ var BackgroundLogger = class {
8526
8553
  }
8527
8554
  postPromises.push(
8528
8555
  (async () => {
8529
- const dataS = constructLogs3Data(items);
8556
+ const dataStr = constructLogs3Data(items);
8530
8557
  for (let i = 0; i < NumRetries; i++) {
8531
8558
  const startTime = now();
8532
8559
  try {
8533
8560
  try {
8534
- return (await (await this.logConn.get()).post_json("logs3", dataS)).ids.map((res) => res.id);
8561
+ return (await (await this.logConn.get()).post_json("logs3", dataStr)).ids.map((res) => res.id);
8535
8562
  } catch (e) {
8536
8563
  const legacyDataS = constructJsonArray(
8537
8564
  items.map(
@@ -8550,7 +8577,7 @@ var BackgroundLogger = class {
8550
8577
  }
8551
8578
  })();
8552
8579
  console.warn(
8553
- `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataS.length}. Error: ${errMsg}.${retryingText}`
8580
+ `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}. Error: ${errMsg}.${retryingText}`
8554
8581
  );
8555
8582
  }
8556
8583
  }
@@ -8918,9 +8945,12 @@ async function login(options = {}) {
8918
8945
  }
8919
8946
  };
8920
8947
  var checkUpdatedParam = checkUpdatedParam2;
8921
- ;
8922
8948
  checkUpdatedParam2("appUrl", options.appUrl, _state.appUrl);
8923
- checkUpdatedParam2("apiKey", options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0, _state.loginToken);
8949
+ checkUpdatedParam2(
8950
+ "apiKey",
8951
+ options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
8952
+ _state.loginToken
8953
+ );
8924
8954
  checkUpdatedParam2("orgName", options.orgName, _state.orgName);
8925
8955
  return;
8926
8956
  }
@@ -9141,8 +9171,8 @@ var ObjectFetcher = class {
9141
9171
  this.objectType = objectType;
9142
9172
  this.pinnedVersion = pinnedVersion;
9143
9173
  this.mutateRecord = mutateRecord;
9144
- this._fetchedData = void 0;
9145
9174
  }
9175
+ _fetchedData = void 0;
9146
9176
  get id() {
9147
9177
  throw new Error("ObjectFetcher subclasses must have an 'id' attribute");
9148
9178
  }
@@ -9202,10 +9232,14 @@ var ObjectFetcher = class {
9202
9232
  }
9203
9233
  };
9204
9234
  var Experiment = class extends ObjectFetcher {
9235
+ lazyMetadata;
9236
+ dataset;
9237
+ bgLogger;
9238
+ lastStartTime;
9239
+ // For type identification.
9240
+ kind = "experiment";
9205
9241
  constructor(lazyMetadata, dataset) {
9206
9242
  super("experiment", void 0);
9207
- // For type identification.
9208
- this.kind = "experiment";
9209
9243
  this.lazyMetadata = lazyMetadata;
9210
9244
  this.dataset = dataset;
9211
9245
  const logConn = new LazyValue(
@@ -9246,9 +9280,19 @@ var Experiment = class extends ObjectFetcher {
9246
9280
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
9247
9281
  * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
9248
9282
  * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
9283
+ * @param options Additional logging options
9284
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
9249
9285
  * :returns: The `id` of the logged event.
9250
9286
  */
9251
- log(event) {
9287
+ log(event, options) {
9288
+ if (!options?.allowLogConcurrentWithActiveSpan) {
9289
+ const checkCurrentSpan = currentSpan();
9290
+ if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
9291
+ throw new Error(
9292
+ "Cannot run toplevel Experiment.log method while there is an active span. To log to the span, use Span.log"
9293
+ );
9294
+ }
9295
+ }
9252
9296
  event = validateAndSanitizeExperimentLogFullArgs(event, !!this.dataset);
9253
9297
  const span = this.startSpan({ startTime: this.lastStartTime, event });
9254
9298
  this.lastStartTime = span.end();
@@ -9290,6 +9334,7 @@ var Experiment = class extends ObjectFetcher {
9290
9334
  startSpan(args) {
9291
9335
  const { name, ...argsRest } = args ?? {};
9292
9336
  return new SpanImpl({
9337
+ parentObject: this,
9293
9338
  parentIds: new LazyValue(() => this.lazyParentIds()),
9294
9339
  bgLogger: this.bgLogger,
9295
9340
  name: name ?? "root",
@@ -9444,10 +9489,21 @@ var ReadonlyExperiment = class extends ObjectFetcher {
9444
9489
  };
9445
9490
  var executionCounter = 0;
9446
9491
  var SpanImpl = class _SpanImpl {
9492
+ bgLogger;
9493
+ // `internalData` contains fields that are not part of the "user-sanitized"
9494
+ // set of fields which we want to log in just one of the span rows.
9495
+ internalData;
9496
+ isMerge;
9497
+ loggedEndTime;
9498
+ // For internal use only.
9499
+ parentObject;
9500
+ // These fields are logged to every span row.
9501
+ parentIds;
9502
+ rowIds;
9503
+ kind = "span";
9447
9504
  // root_experiment should only be specified for a root span. parent_span
9448
9505
  // should only be specified for non-root spans.
9449
9506
  constructor(args) {
9450
- this.kind = "span";
9451
9507
  this.loggedEndTime = void 0;
9452
9508
  this.bgLogger = args.bgLogger;
9453
9509
  const callerLocation = isomorph_default.getCallerLocation();
@@ -9475,6 +9531,7 @@ var SpanImpl = class _SpanImpl {
9475
9531
  },
9476
9532
  created: (/* @__PURE__ */ new Date()).toISOString()
9477
9533
  };
9534
+ this.parentObject = args.parentObject;
9478
9535
  this.parentIds = args.parentIds;
9479
9536
  const id = args.event?.id ?? v4_default();
9480
9537
  const span_id = v4_default();
@@ -9546,6 +9603,7 @@ var SpanImpl = class _SpanImpl {
9546
9603
  }
9547
9604
  startSpan(args) {
9548
9605
  return new _SpanImpl({
9606
+ parentObject: this.parentObject,
9549
9607
  parentIds: this.parentIds,
9550
9608
  bgLogger: this.bgLogger,
9551
9609
  parentSpanInfo: {
@@ -9571,6 +9629,8 @@ var SpanImpl = class _SpanImpl {
9571
9629
  }
9572
9630
  };
9573
9631
  var Dataset = class extends ObjectFetcher {
9632
+ lazyMetadata;
9633
+ bgLogger;
9574
9634
  constructor(lazyMetadata, pinnedVersion, legacy) {
9575
9635
  const isLegacyDataset = legacy ?? DEFAULT_IS_LEGACY_DATASET;
9576
9636
  if (isLegacyDataset) {
@@ -9742,8 +9802,9 @@ function fitNameToSpaces(name, length) {
9742
9802
  return padded.substring(0, length - 3) + "...";
9743
9803
  }
9744
9804
  var BarProgressReporter = class {
9805
+ multiBar;
9806
+ bars = {};
9745
9807
  constructor() {
9746
- this.bars = {};
9747
9808
  this.multiBar = new cliProgress.MultiBar(
9748
9809
  {
9749
9810
  clearOnComplete: false,
@@ -9787,9 +9848,9 @@ function initExperiment2(projectName, options = {}) {
9787
9848
  }
9788
9849
  globalThis._evals = {};
9789
9850
  async function Eval(name, evaluator) {
9790
- const evalName = makeEvalName(name, evaluator.experimentName);
9851
+ let evalName = makeEvalName(name, evaluator.experimentName);
9791
9852
  if (_evals[evalName]) {
9792
- throw new Error(`Evaluator ${evalName} already exists`);
9853
+ evalName = `${evalName}_${Object.keys(_evals).length}`;
9793
9854
  }
9794
9855
  if (globalThis._lazy_load) {
9795
9856
  _evals[evalName] = { evalName, projectName: name, ...evaluator };
@@ -9845,6 +9906,9 @@ function evaluateFilter(object, filter) {
9845
9906
  }
9846
9907
  return pattern.test(serializeJSONWithPlainString(key));
9847
9908
  }
9909
+ function scorerName(scorer, scorer_idx) {
9910
+ return scorer.name || `scorer_${scorer_idx}`;
9911
+ }
9848
9912
  async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9849
9913
  if (typeof evaluator.data === "string") {
9850
9914
  throw new Error("Unimplemented: string data paths");
@@ -9888,11 +9952,13 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9888
9952
  );
9889
9953
  progressReporter.start(evaluator.evalName, data.length);
9890
9954
  const evals = data.map(async (datum) => {
9891
- let metadata = { ..."metadata" in datum ? datum.metadata : {} };
9892
- let output = void 0;
9893
- let error2 = void 0;
9894
- let scores = {};
9895
9955
  const callback = async (rootSpan) => {
9956
+ let metadata = {
9957
+ ..."metadata" in datum ? datum.metadata : {}
9958
+ };
9959
+ let output = void 0;
9960
+ let error2 = void 0;
9961
+ let scores = {};
9896
9962
  try {
9897
9963
  const meta = (o) => metadata = { ...metadata, ...o };
9898
9964
  await rootSpan.traced(
@@ -9909,42 +9975,55 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9909
9975
  );
9910
9976
  rootSpan.log({ output });
9911
9977
  const scoringArgs = { ...datum, metadata, output };
9978
+ const scorerNames = evaluator.scores.map(scorerName);
9912
9979
  const scoreResults = await Promise.all(
9913
9980
  evaluator.scores.map(async (score, score_idx) => {
9914
- return rootSpan.traced(
9915
- async (span) => {
9916
- const scoreResult = score(scoringArgs);
9917
- const result = scoreResult instanceof Promise ? await scoreResult : scoreResult;
9918
- const {
9919
- metadata: resultMetadata,
9920
- name: _,
9921
- ...resultRest
9922
- } = result;
9923
- span.log({
9924
- output: resultRest,
9925
- metadata: resultMetadata
9926
- });
9927
- return result;
9928
- },
9929
- {
9930
- name: score.name || `scorer_${score_idx}`,
9931
- spanAttributes: {
9932
- type: SpanTypeAttribute.SCORE
9981
+ try {
9982
+ const result = await rootSpan.traced(
9983
+ async (span) => {
9984
+ const scoreResult = score(scoringArgs);
9985
+ const result2 = scoreResult instanceof Promise ? await scoreResult : scoreResult;
9986
+ const {
9987
+ metadata: resultMetadata,
9988
+ name: _,
9989
+ ...resultRest
9990
+ } = result2;
9991
+ span.log({
9992
+ output: resultRest,
9993
+ metadata: resultMetadata
9994
+ });
9995
+ return result2;
9933
9996
  },
9934
- event: { input: scoringArgs }
9935
- }
9936
- );
9997
+ {
9998
+ name: scorerNames[score_idx],
9999
+ spanAttributes: {
10000
+ type: SpanTypeAttribute.SCORE
10001
+ },
10002
+ event: { input: scoringArgs }
10003
+ }
10004
+ );
10005
+ return { kind: "score", value: result };
10006
+ } catch (e) {
10007
+ return { kind: "error", value: e };
10008
+ }
9937
10009
  })
9938
10010
  );
10011
+ const passingScorersAndResults = [];
10012
+ const failingScorersAndResults = [];
10013
+ scoreResults.forEach((result, i) => {
10014
+ const name = scorerNames[i];
10015
+ if (result.kind === "score") {
10016
+ passingScorersAndResults.push({ name, score: result.value });
10017
+ } else {
10018
+ failingScorersAndResults.push({ name, error: result.value });
10019
+ }
10020
+ });
9939
10021
  const scoreMetadata = {};
9940
- for (const scoreResult of scoreResults) {
10022
+ for (const { score: scoreResult } of passingScorersAndResults) {
9941
10023
  scores[scoreResult.name] = scoreResult.score;
9942
10024
  const metadata2 = {
9943
10025
  ...scoreResult.metadata
9944
10026
  };
9945
- if (scoreResult.error !== void 0) {
9946
- metadata2.error = scoreResult.error;
9947
- }
9948
10027
  if (Object.keys(metadata2).length > 0) {
9949
10028
  scoreMetadata[scoreResult.name] = metadata2;
9950
10029
  }
@@ -9953,6 +10032,21 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
9953
10032
  meta({ scores: scoreMetadata });
9954
10033
  }
9955
10034
  rootSpan.log({ scores, metadata });
10035
+ if (failingScorersAndResults.length) {
10036
+ const scorerErrors = Object.fromEntries(
10037
+ failingScorersAndResults.map(({ name, error: error3 }) => [
10038
+ name,
10039
+ error3 instanceof Error ? error3.stack : `${error3}`
10040
+ ])
10041
+ );
10042
+ metadata["scorer_errors"] = scorerErrors;
10043
+ const names = Object.keys(scorerErrors).join(", ");
10044
+ const errors = failingScorersAndResults.map((item) => item.error);
10045
+ throw new AggregateError(
10046
+ errors,
10047
+ `Found exceptions for the following scorers: ${names}`
10048
+ );
10049
+ }
9956
10050
  } catch (e) {
9957
10051
  error2 = e;
9958
10052
  } finally {
@@ -10031,7 +10125,8 @@ function reportEvaluatorResult(evaluatorName, evaluatorResult, {
10031
10125
  if (!verbose && !jsonl) {
10032
10126
  console.error(warning("Add --verbose to see full stack traces."));
10033
10127
  }
10034
- } else if (summary) {
10128
+ }
10129
+ if (summary) {
10035
10130
  console.log(jsonl ? JSON.stringify(summary) : summary);
10036
10131
  } else {
10037
10132
  const scoresByName = {};
@@ -10259,6 +10354,9 @@ function wrapEmbeddings(create) {
10259
10354
  };
10260
10355
  }
10261
10356
  var WrapperStream = class {
10357
+ span;
10358
+ iter;
10359
+ startTime;
10262
10360
  constructor(span, startTime, iter) {
10263
10361
  this.span = span;
10264
10362
  this.iter = iter;
package/dist/logger.d.ts CHANGED
@@ -186,9 +186,13 @@ export declare class Logger<IsAsyncFlush extends boolean> {
186
186
  * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
187
187
  * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
188
188
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
189
+ * @param options Additional logging options
190
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the logger, set this to true.
189
191
  * :returns: The `id` of the logged event.
190
192
  */
191
- log(event: Readonly<StartSpanEventArgs>): PromiseUnless<IsAsyncFlush, string>;
193
+ log(event: Readonly<StartSpanEventArgs>, options?: {
194
+ allowLogConcurrentWithActiveSpan?: boolean;
195
+ }): PromiseUnless<IsAsyncFlush, string>;
192
196
  /**
193
197
  * Create a new toplevel span underneath the logger. The name defaults to "root".
194
198
  *
@@ -501,9 +505,13 @@ export declare class Experiment extends ObjectFetcher<ExperimentEvent> {
501
505
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
502
506
  * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
503
507
  * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
508
+ * @param options Additional logging options
509
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
504
510
  * :returns: The `id` of the logged event.
505
511
  */
506
- log(event: Readonly<ExperimentLogFullArgs>): string;
512
+ log(event: Readonly<ExperimentLogFullArgs>, options?: {
513
+ allowLogConcurrentWithActiveSpan?: boolean;
514
+ }): string;
507
515
  /**
508
516
  * Create a new toplevel span underneath the experiment. The name defaults to "root".
509
517
  *
@@ -577,10 +585,12 @@ export declare class SpanImpl implements Span {
577
585
  private internalData;
578
586
  private isMerge;
579
587
  private loggedEndTime;
588
+ parentObject: Experiment | Logger<any>;
580
589
  private parentIds;
581
590
  private readonly rowIds;
582
591
  kind: "span";
583
592
  constructor(args: {
593
+ parentObject: Experiment | Logger<any>;
584
594
  parentIds: LazyValue<ParentExperimentIds | ParentProjectLogIds>;
585
595
  bgLogger: BackgroundLogger;
586
596
  } & Omit<StartSpanArgs, "parentId"> & ({