braintrust 0.0.98 → 0.0.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.js CHANGED
@@ -233,10 +233,11 @@ function isEmpty(a) {
233
233
  return a === void 0 || a === null;
234
234
  }
235
235
  var LazyValue = class {
236
+ callable;
237
+ value = {
238
+ hasComputed: false
239
+ };
236
240
  constructor(callable) {
237
- this.value = {
238
- hasComputed: false
239
- };
240
241
  this.callable = callable;
241
242
  }
242
243
  async get() {
@@ -250,8 +251,11 @@ var LazyValue = class {
250
251
 
251
252
  // src/logger.ts
252
253
  var NoopSpan = class {
254
+ id;
255
+ span_id;
256
+ root_span_id;
257
+ kind = "span";
253
258
  constructor() {
254
- this.kind = "span";
255
259
  this.id = "";
256
260
  this.span_id = "";
257
261
  this.root_span_id = "";
@@ -275,15 +279,22 @@ var NoopSpan = class {
275
279
  };
276
280
  var NOOP_SPAN = new NoopSpan();
277
281
  var BraintrustState = class {
282
+ id;
283
+ currentExperiment;
284
+ // Note: the value of IsAsyncFlush doesn't really matter here, since we
285
+ // (safely) dynamically cast it whenever retrieving the logger.
286
+ currentLogger;
287
+ currentSpan;
288
+ appUrl = null;
289
+ loginToken = null;
290
+ orgId = null;
291
+ orgName = null;
292
+ logUrl = null;
293
+ loggedIn = false;
294
+ gitMetadataSettings;
295
+ _apiConn = null;
296
+ _logConn = null;
278
297
  constructor() {
279
- this.appUrl = null;
280
- this.loginToken = null;
281
- this.orgId = null;
282
- this.orgName = null;
283
- this.logUrl = null;
284
- this.loggedIn = false;
285
- this._apiConn = null;
286
- this._logConn = null;
287
298
  this.id = v4_default();
288
299
  this.currentExperiment = void 0;
289
300
  this.currentLogger = void 0;
@@ -330,6 +341,9 @@ function _internalSetInitialState() {
330
341
  }
331
342
  var _internalGetGlobalState = () => _state;
332
343
  var FailedHTTPResponse = class extends Error {
344
+ status;
345
+ text;
346
+ data;
333
347
  constructor(status, text, data = null) {
334
348
  super(`${status}: ${text}`);
335
349
  this.status = status;
@@ -349,6 +363,9 @@ async function checkResponse(resp) {
349
363
  }
350
364
  }
351
365
  var HTTPConnection = class _HTTPConnection {
366
+ base_url;
367
+ token;
368
+ headers;
352
369
  constructor(base_url) {
353
370
  this.base_url = base_url;
354
371
  this.token = null;
@@ -500,9 +517,13 @@ function logFeedbackImpl(bgLogger, parentIds, {
500
517
  }
501
518
  }
502
519
  var Logger = class {
520
+ lazyMetadata;
521
+ logOptions;
522
+ bgLogger;
523
+ lastStartTime;
524
+ // For type identification.
525
+ kind = "logger";
503
526
  constructor(lazyMetadata, logOptions = {}) {
504
- // For type identification.
505
- this.kind = "logger";
506
527
  this.lazyMetadata = lazyMetadata;
507
528
  this.logOptions = logOptions;
508
529
  const logConn = new LazyValue(
@@ -536,9 +557,19 @@ var Logger = class {
536
557
  * @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
537
558
  * @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
538
559
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
560
+ * @param options Additional logging options
561
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the logger, set this to true.
539
562
  * :returns: The `id` of the logged event.
540
563
  */
541
- log(event) {
564
+ log(event, options) {
565
+ if (!options?.allowLogConcurrentWithActiveSpan) {
566
+ const checkCurrentSpan = currentSpan();
567
+ if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
568
+ throw new Error(
569
+ "Cannot run toplevel Logger.log method while there is an active span. To log to the span, use Span.log"
570
+ );
571
+ }
572
+ }
542
573
  const span = this.startSpan({ startTime: this.lastStartTime, event });
543
574
  this.lastStartTime = span.end();
544
575
  const ret = span.id;
@@ -597,6 +628,7 @@ var Logger = class {
597
628
  startSpan(args) {
598
629
  const { name, ...argsRest } = args ?? {};
599
630
  return new SpanImpl({
631
+ parentObject: this,
600
632
  parentIds: new LazyValue(() => this.lazyParentIds()),
601
633
  bgLogger: this.bgLogger,
602
634
  name: name ?? "root",
@@ -654,10 +686,11 @@ function now() {
654
686
  return (/* @__PURE__ */ new Date()).getTime();
655
687
  }
656
688
  var BackgroundLogger = class {
689
+ logConn;
690
+ items = [];
691
+ active_flush = Promise.resolve([]);
692
+ active_flush_resolved = true;
657
693
  constructor(logConn) {
658
- this.items = [];
659
- this.active_flush = Promise.resolve([]);
660
- this.active_flush_resolved = true;
661
694
  this.logConn = logConn;
662
695
  isomorph_default.processOn("beforeExit", async () => {
663
696
  await this.flush();
@@ -706,12 +739,12 @@ var BackgroundLogger = class {
706
739
  }
707
740
  postPromises.push(
708
741
  (async () => {
709
- const dataS = constructLogs3Data(items);
742
+ const dataStr = constructLogs3Data(items);
710
743
  for (let i = 0; i < NumRetries; i++) {
711
744
  const startTime = now();
712
745
  try {
713
746
  try {
714
- return (await (await this.logConn.get()).post_json("logs3", dataS)).ids.map((res) => res.id);
747
+ return (await (await this.logConn.get()).post_json("logs3", dataStr)).ids.map((res) => res.id);
715
748
  } catch (e) {
716
749
  const legacyDataS = constructJsonArray(
717
750
  items.map(
@@ -730,7 +763,7 @@ var BackgroundLogger = class {
730
763
  }
731
764
  })();
732
765
  console.warn(
733
- `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataS.length}. Error: ${errMsg}.${retryingText}`
766
+ `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}. Error: ${errMsg}.${retryingText}`
734
767
  );
735
768
  }
736
769
  }
@@ -1098,9 +1131,12 @@ async function login(options = {}) {
1098
1131
  }
1099
1132
  };
1100
1133
  var checkUpdatedParam = checkUpdatedParam2;
1101
- ;
1102
1134
  checkUpdatedParam2("appUrl", options.appUrl, _state.appUrl);
1103
- checkUpdatedParam2("apiKey", options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0, _state.loginToken);
1135
+ checkUpdatedParam2(
1136
+ "apiKey",
1137
+ options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
1138
+ _state.loginToken
1139
+ );
1104
1140
  checkUpdatedParam2("orgName", options.orgName, _state.orgName);
1105
1141
  return;
1106
1142
  }
@@ -1321,8 +1357,8 @@ var ObjectFetcher = class {
1321
1357
  this.objectType = objectType;
1322
1358
  this.pinnedVersion = pinnedVersion;
1323
1359
  this.mutateRecord = mutateRecord;
1324
- this._fetchedData = void 0;
1325
1360
  }
1361
+ _fetchedData = void 0;
1326
1362
  get id() {
1327
1363
  throw new Error("ObjectFetcher subclasses must have an 'id' attribute");
1328
1364
  }
@@ -1382,10 +1418,14 @@ var ObjectFetcher = class {
1382
1418
  }
1383
1419
  };
1384
1420
  var Experiment = class extends ObjectFetcher {
1421
+ lazyMetadata;
1422
+ dataset;
1423
+ bgLogger;
1424
+ lastStartTime;
1425
+ // For type identification.
1426
+ kind = "experiment";
1385
1427
  constructor(lazyMetadata, dataset) {
1386
1428
  super("experiment", void 0);
1387
- // For type identification.
1388
- this.kind = "experiment";
1389
1429
  this.lazyMetadata = lazyMetadata;
1390
1430
  this.dataset = dataset;
1391
1431
  const logConn = new LazyValue(
@@ -1426,9 +1466,19 @@ var Experiment = class extends ObjectFetcher {
1426
1466
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
1427
1467
  * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
1428
1468
  * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
1469
+ * @param options Additional logging options
1470
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
1429
1471
  * :returns: The `id` of the logged event.
1430
1472
  */
1431
- log(event) {
1473
+ log(event, options) {
1474
+ if (!options?.allowLogConcurrentWithActiveSpan) {
1475
+ const checkCurrentSpan = currentSpan();
1476
+ if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
1477
+ throw new Error(
1478
+ "Cannot run toplevel Experiment.log method while there is an active span. To log to the span, use Span.log"
1479
+ );
1480
+ }
1481
+ }
1432
1482
  event = validateAndSanitizeExperimentLogFullArgs(event, !!this.dataset);
1433
1483
  const span = this.startSpan({ startTime: this.lastStartTime, event });
1434
1484
  this.lastStartTime = span.end();
@@ -1470,6 +1520,7 @@ var Experiment = class extends ObjectFetcher {
1470
1520
  startSpan(args) {
1471
1521
  const { name, ...argsRest } = args ?? {};
1472
1522
  return new SpanImpl({
1523
+ parentObject: this,
1473
1524
  parentIds: new LazyValue(() => this.lazyParentIds()),
1474
1525
  bgLogger: this.bgLogger,
1475
1526
  name: name ?? "root",
@@ -1624,10 +1675,21 @@ var ReadonlyExperiment = class extends ObjectFetcher {
1624
1675
  };
1625
1676
  var executionCounter = 0;
1626
1677
  var SpanImpl = class _SpanImpl {
1678
+ bgLogger;
1679
+ // `internalData` contains fields that are not part of the "user-sanitized"
1680
+ // set of fields which we want to log in just one of the span rows.
1681
+ internalData;
1682
+ isMerge;
1683
+ loggedEndTime;
1684
+ // For internal use only.
1685
+ parentObject;
1686
+ // These fields are logged to every span row.
1687
+ parentIds;
1688
+ rowIds;
1689
+ kind = "span";
1627
1690
  // root_experiment should only be specified for a root span. parent_span
1628
1691
  // should only be specified for non-root spans.
1629
1692
  constructor(args) {
1630
- this.kind = "span";
1631
1693
  this.loggedEndTime = void 0;
1632
1694
  this.bgLogger = args.bgLogger;
1633
1695
  const callerLocation = isomorph_default.getCallerLocation();
@@ -1655,6 +1717,7 @@ var SpanImpl = class _SpanImpl {
1655
1717
  },
1656
1718
  created: (/* @__PURE__ */ new Date()).toISOString()
1657
1719
  };
1720
+ this.parentObject = args.parentObject;
1658
1721
  this.parentIds = args.parentIds;
1659
1722
  const id = args.event?.id ?? v4_default();
1660
1723
  const span_id = v4_default();
@@ -1726,6 +1789,7 @@ var SpanImpl = class _SpanImpl {
1726
1789
  }
1727
1790
  startSpan(args) {
1728
1791
  return new _SpanImpl({
1792
+ parentObject: this.parentObject,
1729
1793
  parentIds: this.parentIds,
1730
1794
  bgLogger: this.bgLogger,
1731
1795
  parentSpanInfo: {
@@ -1751,6 +1815,8 @@ var SpanImpl = class _SpanImpl {
1751
1815
  }
1752
1816
  };
1753
1817
  var Dataset = class extends ObjectFetcher {
1818
+ lazyMetadata;
1819
+ bgLogger;
1754
1820
  constructor(lazyMetadata, pinnedVersion, legacy) {
1755
1821
  const isLegacyDataset = legacy ?? DEFAULT_IS_LEGACY_DATASET;
1756
1822
  if (isLegacyDataset) {
@@ -2111,6 +2177,9 @@ function wrapEmbeddings(create) {
2111
2177
  };
2112
2178
  }
2113
2179
  var WrapperStream = class {
2180
+ span;
2181
+ iter;
2182
+ startTime;
2114
2183
  constructor(span, startTime, iter) {
2115
2184
  this.span = span;
2116
2185
  this.iter = iter;
package/dist/cli.js CHANGED
@@ -9065,7 +9065,7 @@ var require_package = __commonJS({
9065
9065
  "package.json"(exports2, module2) {
9066
9066
  module2.exports = {
9067
9067
  name: "braintrust",
9068
- version: "0.0.98",
9068
+ version: "0.0.99",
9069
9069
  description: "SDK for integrating Braintrust",
9070
9070
  main: "./dist/index.js",
9071
9071
  browser: {
@@ -9108,7 +9108,7 @@ var require_package = __commonJS({
9108
9108
  typescript: "^5.3.3"
9109
9109
  },
9110
9110
  dependencies: {
9111
- "@braintrust/core": "^0.0.16",
9111
+ "@braintrust/core": "^0.0.17",
9112
9112
  argparse: "^2.0.1",
9113
9113
  chalk: "^4.1.2",
9114
9114
  "cli-progress": "^3.12.0",
@@ -10664,10 +10664,11 @@ function isEmpty(a) {
10664
10664
  return a === void 0 || a === null;
10665
10665
  }
10666
10666
  var LazyValue = class {
10667
+ callable;
10668
+ value = {
10669
+ hasComputed: false
10670
+ };
10667
10671
  constructor(callable) {
10668
- this.value = {
10669
- hasComputed: false
10670
- };
10671
10672
  this.callable = callable;
10672
10673
  }
10673
10674
  async get() {
@@ -10681,8 +10682,11 @@ var LazyValue = class {
10681
10682
 
10682
10683
  // src/logger.ts
10683
10684
  var NoopSpan = class {
10685
+ id;
10686
+ span_id;
10687
+ root_span_id;
10688
+ kind = "span";
10684
10689
  constructor() {
10685
- this.kind = "span";
10686
10690
  this.id = "";
10687
10691
  this.span_id = "";
10688
10692
  this.root_span_id = "";
@@ -10706,15 +10710,22 @@ var NoopSpan = class {
10706
10710
  };
10707
10711
  var NOOP_SPAN = new NoopSpan();
10708
10712
  var BraintrustState = class {
10713
+ id;
10714
+ currentExperiment;
10715
+ // Note: the value of IsAsyncFlush doesn't really matter here, since we
10716
+ // (safely) dynamically cast it whenever retrieving the logger.
10717
+ currentLogger;
10718
+ currentSpan;
10719
+ appUrl = null;
10720
+ loginToken = null;
10721
+ orgId = null;
10722
+ orgName = null;
10723
+ logUrl = null;
10724
+ loggedIn = false;
10725
+ gitMetadataSettings;
10726
+ _apiConn = null;
10727
+ _logConn = null;
10709
10728
  constructor() {
10710
- this.appUrl = null;
10711
- this.loginToken = null;
10712
- this.orgId = null;
10713
- this.orgName = null;
10714
- this.logUrl = null;
10715
- this.loggedIn = false;
10716
- this._apiConn = null;
10717
- this._logConn = null;
10718
10729
  this.id = v4_default();
10719
10730
  this.currentExperiment = void 0;
10720
10731
  this.currentLogger = void 0;
@@ -10761,6 +10772,9 @@ function _internalSetInitialState() {
10761
10772
  }
10762
10773
  var _internalGetGlobalState = () => _state;
10763
10774
  var FailedHTTPResponse = class extends Error {
10775
+ status;
10776
+ text;
10777
+ data;
10764
10778
  constructor(status, text, data = null) {
10765
10779
  super(`${status}: ${text}`);
10766
10780
  this.status = status;
@@ -10780,6 +10794,9 @@ async function checkResponse(resp) {
10780
10794
  }
10781
10795
  }
10782
10796
  var HTTPConnection = class _HTTPConnection {
10797
+ base_url;
10798
+ token;
10799
+ headers;
10783
10800
  constructor(base_url) {
10784
10801
  this.base_url = base_url;
10785
10802
  this.token = null;
@@ -10943,10 +10960,11 @@ function now() {
10943
10960
  return (/* @__PURE__ */ new Date()).getTime();
10944
10961
  }
10945
10962
  var BackgroundLogger = class {
10963
+ logConn;
10964
+ items = [];
10965
+ active_flush = Promise.resolve([]);
10966
+ active_flush_resolved = true;
10946
10967
  constructor(logConn) {
10947
- this.items = [];
10948
- this.active_flush = Promise.resolve([]);
10949
- this.active_flush_resolved = true;
10950
10968
  this.logConn = logConn;
10951
10969
  isomorph_default.processOn("beforeExit", async () => {
10952
10970
  await this.flush();
@@ -10995,12 +11013,12 @@ var BackgroundLogger = class {
10995
11013
  }
10996
11014
  postPromises.push(
10997
11015
  (async () => {
10998
- const dataS = constructLogs3Data(items);
11016
+ const dataStr = constructLogs3Data(items);
10999
11017
  for (let i = 0; i < NumRetries; i++) {
11000
11018
  const startTime = now();
11001
11019
  try {
11002
11020
  try {
11003
- return (await (await this.logConn.get()).post_json("logs3", dataS)).ids.map((res) => res.id);
11021
+ return (await (await this.logConn.get()).post_json("logs3", dataStr)).ids.map((res) => res.id);
11004
11022
  } catch (e) {
11005
11023
  const legacyDataS = constructJsonArray(
11006
11024
  items.map(
@@ -11019,7 +11037,7 @@ var BackgroundLogger = class {
11019
11037
  }
11020
11038
  })();
11021
11039
  console.warn(
11022
- `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataS.length}. Error: ${errMsg}.${retryingText}`
11040
+ `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}. Error: ${errMsg}.${retryingText}`
11023
11041
  );
11024
11042
  }
11025
11043
  }
@@ -11236,9 +11254,12 @@ async function login(options = {}) {
11236
11254
  }
11237
11255
  };
11238
11256
  var checkUpdatedParam = checkUpdatedParam2;
11239
- ;
11240
11257
  checkUpdatedParam2("appUrl", options.appUrl, _state.appUrl);
11241
- checkUpdatedParam2("apiKey", options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0, _state.loginToken);
11258
+ checkUpdatedParam2(
11259
+ "apiKey",
11260
+ options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
11261
+ _state.loginToken
11262
+ );
11242
11263
  checkUpdatedParam2("orgName", options.orgName, _state.orgName);
11243
11264
  return;
11244
11265
  }
@@ -11279,6 +11300,9 @@ async function login(options = {}) {
11279
11300
  _state.loginToken = conn.token;
11280
11301
  _state.loggedIn = true;
11281
11302
  }
11303
+ function currentSpan() {
11304
+ return _state.currentSpan.getStore() ?? NOOP_SPAN;
11305
+ }
11282
11306
  function withCurrent(span, callback) {
11283
11307
  return _state.currentSpan.run(span, () => callback(span));
11284
11308
  }
@@ -11380,8 +11404,8 @@ var ObjectFetcher = class {
11380
11404
  this.objectType = objectType;
11381
11405
  this.pinnedVersion = pinnedVersion;
11382
11406
  this.mutateRecord = mutateRecord;
11383
- this._fetchedData = void 0;
11384
11407
  }
11408
+ _fetchedData = void 0;
11385
11409
  get id() {
11386
11410
  throw new Error("ObjectFetcher subclasses must have an 'id' attribute");
11387
11411
  }
@@ -11441,10 +11465,14 @@ var ObjectFetcher = class {
11441
11465
  }
11442
11466
  };
11443
11467
  var Experiment = class extends ObjectFetcher {
11468
+ lazyMetadata;
11469
+ dataset;
11470
+ bgLogger;
11471
+ lastStartTime;
11472
+ // For type identification.
11473
+ kind = "experiment";
11444
11474
  constructor(lazyMetadata, dataset) {
11445
11475
  super("experiment", void 0);
11446
- // For type identification.
11447
- this.kind = "experiment";
11448
11476
  this.lazyMetadata = lazyMetadata;
11449
11477
  this.dataset = dataset;
11450
11478
  const logConn = new LazyValue(
@@ -11485,9 +11513,19 @@ var Experiment = class extends ObjectFetcher {
11485
11513
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
11486
11514
  * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
11487
11515
  * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
11516
+ * @param options Additional logging options
11517
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
11488
11518
  * :returns: The `id` of the logged event.
11489
11519
  */
11490
- log(event) {
11520
+ log(event, options) {
11521
+ if (!options?.allowLogConcurrentWithActiveSpan) {
11522
+ const checkCurrentSpan = currentSpan();
11523
+ if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
11524
+ throw new Error(
11525
+ "Cannot run toplevel Experiment.log method while there is an active span. To log to the span, use Span.log"
11526
+ );
11527
+ }
11528
+ }
11491
11529
  event = validateAndSanitizeExperimentLogFullArgs(event, !!this.dataset);
11492
11530
  const span = this.startSpan({ startTime: this.lastStartTime, event });
11493
11531
  this.lastStartTime = span.end();
@@ -11529,6 +11567,7 @@ var Experiment = class extends ObjectFetcher {
11529
11567
  startSpan(args) {
11530
11568
  const { name, ...argsRest } = args ?? {};
11531
11569
  return new SpanImpl({
11570
+ parentObject: this,
11532
11571
  parentIds: new LazyValue(() => this.lazyParentIds()),
11533
11572
  bgLogger: this.bgLogger,
11534
11573
  name: name ?? "root",
@@ -11683,10 +11722,21 @@ var ReadonlyExperiment = class extends ObjectFetcher {
11683
11722
  };
11684
11723
  var executionCounter = 0;
11685
11724
  var SpanImpl = class _SpanImpl {
11725
+ bgLogger;
11726
+ // `internalData` contains fields that are not part of the "user-sanitized"
11727
+ // set of fields which we want to log in just one of the span rows.
11728
+ internalData;
11729
+ isMerge;
11730
+ loggedEndTime;
11731
+ // For internal use only.
11732
+ parentObject;
11733
+ // These fields are logged to every span row.
11734
+ parentIds;
11735
+ rowIds;
11736
+ kind = "span";
11686
11737
  // root_experiment should only be specified for a root span. parent_span
11687
11738
  // should only be specified for non-root spans.
11688
11739
  constructor(args) {
11689
- this.kind = "span";
11690
11740
  this.loggedEndTime = void 0;
11691
11741
  this.bgLogger = args.bgLogger;
11692
11742
  const callerLocation = isomorph_default.getCallerLocation();
@@ -11714,6 +11764,7 @@ var SpanImpl = class _SpanImpl {
11714
11764
  },
11715
11765
  created: (/* @__PURE__ */ new Date()).toISOString()
11716
11766
  };
11767
+ this.parentObject = args.parentObject;
11717
11768
  this.parentIds = args.parentIds;
11718
11769
  const id = args.event?.id ?? v4_default();
11719
11770
  const span_id = v4_default();
@@ -11785,6 +11836,7 @@ var SpanImpl = class _SpanImpl {
11785
11836
  }
11786
11837
  startSpan(args) {
11787
11838
  return new _SpanImpl({
11839
+ parentObject: this.parentObject,
11788
11840
  parentIds: this.parentIds,
11789
11841
  bgLogger: this.bgLogger,
11790
11842
  parentSpanInfo: {
@@ -11830,8 +11882,9 @@ var SimpleProgressReporter = class {
11830
11882
  }
11831
11883
  };
11832
11884
  var BarProgressReporter = class {
11885
+ multiBar;
11886
+ bars = {};
11833
11887
  constructor() {
11834
- this.bars = {};
11835
11888
  this.multiBar = new cliProgress.MultiBar(
11836
11889
  {
11837
11890
  clearOnComplete: false,
@@ -11973,6 +12026,9 @@ function evaluateFilter(object, filter2) {
11973
12026
  }
11974
12027
  return pattern.test(serializeJSONWithPlainString(key));
11975
12028
  }
12029
+ function scorerName(scorer, scorer_idx) {
12030
+ return scorer.name || `scorer_${scorer_idx}`;
12031
+ }
11976
12032
  async function runEvaluator(experiment, evaluator, progressReporter, filters) {
11977
12033
  if (typeof evaluator.data === "string") {
11978
12034
  throw new Error("Unimplemented: string data paths");
@@ -12016,11 +12072,13 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
12016
12072
  );
12017
12073
  progressReporter.start(evaluator.evalName, data.length);
12018
12074
  const evals = data.map(async (datum) => {
12019
- let metadata = { ..."metadata" in datum ? datum.metadata : {} };
12020
- let output = void 0;
12021
- let error2 = void 0;
12022
- let scores = {};
12023
12075
  const callback = async (rootSpan) => {
12076
+ let metadata = {
12077
+ ..."metadata" in datum ? datum.metadata : {}
12078
+ };
12079
+ let output = void 0;
12080
+ let error2 = void 0;
12081
+ let scores = {};
12024
12082
  try {
12025
12083
  const meta = (o) => metadata = { ...metadata, ...o };
12026
12084
  await rootSpan.traced(
@@ -12037,42 +12095,55 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
12037
12095
  );
12038
12096
  rootSpan.log({ output });
12039
12097
  const scoringArgs = { ...datum, metadata, output };
12098
+ const scorerNames = evaluator.scores.map(scorerName);
12040
12099
  const scoreResults = await Promise.all(
12041
12100
  evaluator.scores.map(async (score, score_idx) => {
12042
- return rootSpan.traced(
12043
- async (span) => {
12044
- const scoreResult = score(scoringArgs);
12045
- const result = scoreResult instanceof Promise ? await scoreResult : scoreResult;
12046
- const {
12047
- metadata: resultMetadata,
12048
- name: _,
12049
- ...resultRest
12050
- } = result;
12051
- span.log({
12052
- output: resultRest,
12053
- metadata: resultMetadata
12054
- });
12055
- return result;
12056
- },
12057
- {
12058
- name: score.name || `scorer_${score_idx}`,
12059
- spanAttributes: {
12060
- type: SpanTypeAttribute.SCORE
12101
+ try {
12102
+ const result = await rootSpan.traced(
12103
+ async (span) => {
12104
+ const scoreResult = score(scoringArgs);
12105
+ const result2 = scoreResult instanceof Promise ? await scoreResult : scoreResult;
12106
+ const {
12107
+ metadata: resultMetadata,
12108
+ name: _,
12109
+ ...resultRest
12110
+ } = result2;
12111
+ span.log({
12112
+ output: resultRest,
12113
+ metadata: resultMetadata
12114
+ });
12115
+ return result2;
12061
12116
  },
12062
- event: { input: scoringArgs }
12063
- }
12064
- );
12117
+ {
12118
+ name: scorerNames[score_idx],
12119
+ spanAttributes: {
12120
+ type: SpanTypeAttribute.SCORE
12121
+ },
12122
+ event: { input: scoringArgs }
12123
+ }
12124
+ );
12125
+ return { kind: "score", value: result };
12126
+ } catch (e) {
12127
+ return { kind: "error", value: e };
12128
+ }
12065
12129
  })
12066
12130
  );
12131
+ const passingScorersAndResults = [];
12132
+ const failingScorersAndResults = [];
12133
+ scoreResults.forEach((result, i) => {
12134
+ const name = scorerNames[i];
12135
+ if (result.kind === "score") {
12136
+ passingScorersAndResults.push({ name, score: result.value });
12137
+ } else {
12138
+ failingScorersAndResults.push({ name, error: result.value });
12139
+ }
12140
+ });
12067
12141
  const scoreMetadata = {};
12068
- for (const scoreResult of scoreResults) {
12142
+ for (const { score: scoreResult } of passingScorersAndResults) {
12069
12143
  scores[scoreResult.name] = scoreResult.score;
12070
12144
  const metadata2 = {
12071
12145
  ...scoreResult.metadata
12072
12146
  };
12073
- if (scoreResult.error !== void 0) {
12074
- metadata2.error = scoreResult.error;
12075
- }
12076
12147
  if (Object.keys(metadata2).length > 0) {
12077
12148
  scoreMetadata[scoreResult.name] = metadata2;
12078
12149
  }
@@ -12081,6 +12152,21 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
12081
12152
  meta({ scores: scoreMetadata });
12082
12153
  }
12083
12154
  rootSpan.log({ scores, metadata });
12155
+ if (failingScorersAndResults.length) {
12156
+ const scorerErrors = Object.fromEntries(
12157
+ failingScorersAndResults.map(({ name, error: error3 }) => [
12158
+ name,
12159
+ error3 instanceof Error ? error3.stack : `${error3}`
12160
+ ])
12161
+ );
12162
+ metadata["scorer_errors"] = scorerErrors;
12163
+ const names = Object.keys(scorerErrors).join(", ");
12164
+ const errors = failingScorersAndResults.map((item) => item.error);
12165
+ throw new AggregateError(
12166
+ errors,
12167
+ `Found exceptions for the following scorers: ${names}`
12168
+ );
12169
+ }
12084
12170
  } catch (e) {
12085
12171
  error2 = e;
12086
12172
  } finally {
@@ -12159,7 +12245,8 @@ function reportEvaluatorResult(evaluatorName, evaluatorResult, {
12159
12245
  if (!verbose && !jsonl) {
12160
12246
  console.error(warning("Add --verbose to see full stack traces."));
12161
12247
  }
12162
- } else if (summary) {
12248
+ }
12249
+ if (summary) {
12163
12250
  console.log(jsonl ? JSON.stringify(summary) : summary);
12164
12251
  } else {
12165
12252
  const scoresByName = {};
@@ -91,7 +91,7 @@ export declare function parseFilters(filters: string[]): Filter[];
91
91
  export declare function runEvaluator(experiment: Experiment | null, evaluator: EvaluatorDef<any, any, any | void, any | void>, progressReporter: ProgressReporter, filters: Filter[]): Promise<{
92
92
  results: {
93
93
  output: any;
94
- metadata: object;
94
+ metadata: Record<string, unknown>;
95
95
  scores: Record<string, number | null>;
96
96
  error: unknown;
97
97
  }[];