braintrust 0.0.97 → 0.0.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -9065,7 +9065,7 @@ var require_package = __commonJS({
9065
9065
  "package.json"(exports2, module2) {
9066
9066
  module2.exports = {
9067
9067
  name: "braintrust",
9068
- version: "0.0.97",
9068
+ version: "0.0.99",
9069
9069
  description: "SDK for integrating Braintrust",
9070
9070
  main: "./dist/index.js",
9071
9071
  browser: {
@@ -9108,7 +9108,7 @@ var require_package = __commonJS({
9108
9108
  typescript: "^5.3.3"
9109
9109
  },
9110
9110
  dependencies: {
9111
- "@braintrust/core": "^0.0.15",
9111
+ "@braintrust/core": "^0.0.17",
9112
9112
  argparse: "^2.0.1",
9113
9113
  chalk: "^4.1.2",
9114
9114
  "cli-progress": "^3.12.0",
@@ -10508,9 +10508,10 @@ var v4_default = v4;
10508
10508
  // src/cli.ts
10509
10509
  var import_pluralize2 = __toESM(require_pluralize());
10510
10510
 
10511
- // ../core/js/dist/index.mjs
10511
+ // ../core/js/dist/main/index.mjs
10512
10512
  var TRANSACTION_ID_FIELD = "_xact_id";
10513
10513
  var IS_MERGE_FIELD = "_is_merge";
10514
+ var MERGE_PATHS_FIELD = "_merge_paths";
10514
10515
  var AUDIT_SOURCE_FIELD = "_audit_source";
10515
10516
  var AUDIT_METADATA_FIELD = "_audit_metadata";
10516
10517
  var VALID_SOURCES = ["app", "api", "external"];
@@ -10569,6 +10570,24 @@ function mergeRowBatch(rows) {
10569
10570
  out.push(...Object.values(rowGroups));
10570
10571
  return out;
10571
10572
  }
10573
+ function makeLegacyEvent(e) {
10574
+ if (!("dataset_id" in e) || !("expected" in e)) {
10575
+ return e;
10576
+ }
10577
+ const event = {
10578
+ ...e,
10579
+ output: e.expected
10580
+ };
10581
+ delete event.expected;
10582
+ if (MERGE_PATHS_FIELD in event) {
10583
+ for (const path5 of event[MERGE_PATHS_FIELD] || []) {
10584
+ if (path5.length > 0 && path5[0] === "expected") {
10585
+ path5[0] = "output";
10586
+ }
10587
+ }
10588
+ }
10589
+ return event;
10590
+ }
10572
10591
  var SpanTypeAttribute = /* @__PURE__ */ ((SpanTypeAttribute2) => {
10573
10592
  SpanTypeAttribute2["LLM"] = "llm";
10574
10593
  SpanTypeAttribute2["SCORE"] = "score";
@@ -10611,7 +10630,7 @@ var DefaultAsyncLocalStorage = class {
10611
10630
  }
10612
10631
  };
10613
10632
  var iso = {
10614
- getRepoStatus: async (_settings) => void 0,
10633
+ getRepoInfo: async (_settings) => void 0,
10615
10634
  getPastNAncestors: async () => [],
10616
10635
  getEnv: (_name) => void 0,
10617
10636
  getCallerLocation: () => void 0,
@@ -10645,10 +10664,11 @@ function isEmpty(a) {
10645
10664
  return a === void 0 || a === null;
10646
10665
  }
10647
10666
  var LazyValue = class {
10667
+ callable;
10668
+ value = {
10669
+ hasComputed: false
10670
+ };
10648
10671
  constructor(callable) {
10649
- this.value = {
10650
- hasComputed: false
10651
- };
10652
10672
  this.callable = callable;
10653
10673
  }
10654
10674
  async get() {
@@ -10662,8 +10682,11 @@ var LazyValue = class {
10662
10682
 
10663
10683
  // src/logger.ts
10664
10684
  var NoopSpan = class {
10685
+ id;
10686
+ span_id;
10687
+ root_span_id;
10688
+ kind = "span";
10665
10689
  constructor() {
10666
- this.kind = "span";
10667
10690
  this.id = "";
10668
10691
  this.span_id = "";
10669
10692
  this.root_span_id = "";
@@ -10687,15 +10710,22 @@ var NoopSpan = class {
10687
10710
  };
10688
10711
  var NOOP_SPAN = new NoopSpan();
10689
10712
  var BraintrustState = class {
10713
+ id;
10714
+ currentExperiment;
10715
+ // Note: the value of IsAsyncFlush doesn't really matter here, since we
10716
+ // (safely) dynamically cast it whenever retrieving the logger.
10717
+ currentLogger;
10718
+ currentSpan;
10719
+ appUrl = null;
10720
+ loginToken = null;
10721
+ orgId = null;
10722
+ orgName = null;
10723
+ logUrl = null;
10724
+ loggedIn = false;
10725
+ gitMetadataSettings;
10726
+ _apiConn = null;
10727
+ _logConn = null;
10690
10728
  constructor() {
10691
- this.appUrl = null;
10692
- this.loginToken = null;
10693
- this.orgId = null;
10694
- this.orgName = null;
10695
- this.logUrl = null;
10696
- this.loggedIn = false;
10697
- this._apiConn = null;
10698
- this._logConn = null;
10699
10729
  this.id = v4_default();
10700
10730
  this.currentExperiment = void 0;
10701
10731
  this.currentLogger = void 0;
@@ -10742,6 +10772,9 @@ function _internalSetInitialState() {
10742
10772
  }
10743
10773
  var _internalGetGlobalState = () => _state;
10744
10774
  var FailedHTTPResponse = class extends Error {
10775
+ status;
10776
+ text;
10777
+ data;
10745
10778
  constructor(status, text, data = null) {
10746
10779
  super(`${status}: ${text}`);
10747
10780
  this.status = status;
@@ -10761,6 +10794,9 @@ async function checkResponse(resp) {
10761
10794
  }
10762
10795
  }
10763
10796
  var HTTPConnection = class _HTTPConnection {
10797
+ base_url;
10798
+ token;
10799
+ headers;
10764
10800
  constructor(base_url) {
10765
10801
  this.base_url = base_url;
10766
10802
  this.token = null;
@@ -10915,16 +10951,20 @@ var MaxRequestSize = 6 * 1024 * 1024;
10915
10951
  function constructJsonArray(items) {
10916
10952
  return `[${items.join(",")}]`;
10917
10953
  }
10954
+ function constructLogs3Data(items) {
10955
+ return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
10956
+ }
10918
10957
  var DefaultBatchSize = 100;
10919
10958
  var NumRetries = 3;
10920
10959
  function now() {
10921
10960
  return (/* @__PURE__ */ new Date()).getTime();
10922
10961
  }
10923
10962
  var BackgroundLogger = class {
10963
+ logConn;
10964
+ items = [];
10965
+ active_flush = Promise.resolve([]);
10966
+ active_flush_resolved = true;
10924
10967
  constructor(logConn) {
10925
- this.items = [];
10926
- this.active_flush = Promise.resolve([]);
10927
- this.active_flush_resolved = true;
10928
10968
  this.logConn = logConn;
10929
10969
  isomorph_default.processOn("beforeExit", async () => {
10930
10970
  await this.flush();
@@ -10973,11 +11013,20 @@ var BackgroundLogger = class {
10973
11013
  }
10974
11014
  postPromises.push(
10975
11015
  (async () => {
10976
- const itemsS = constructJsonArray(items);
11016
+ const dataStr = constructLogs3Data(items);
10977
11017
  for (let i = 0; i < NumRetries; i++) {
10978
11018
  const startTime = now();
10979
11019
  try {
10980
- return (await (await this.logConn.get()).post_json("logs", itemsS)).map((res) => res.id);
11020
+ try {
11021
+ return (await (await this.logConn.get()).post_json("logs3", dataStr)).ids.map((res) => res.id);
11022
+ } catch (e) {
11023
+ const legacyDataS = constructJsonArray(
11024
+ items.map(
11025
+ (r) => JSON.stringify(makeLegacyEvent(JSON.parse(r)))
11026
+ )
11027
+ );
11028
+ return (await (await this.logConn.get()).post_json("logs", legacyDataS)).map((res) => res.id);
11029
+ }
10981
11030
  } catch (e) {
10982
11031
  const retryingText = i + 1 === NumRetries ? "" : " Retrying";
10983
11032
  const errMsg = (() => {
@@ -10988,7 +11037,7 @@ var BackgroundLogger = class {
10988
11037
  }
10989
11038
  })();
10990
11039
  console.warn(
10991
- `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${itemsS.length}. Error: ${errMsg}.${retryingText}`
11040
+ `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}. Error: ${errMsg}.${retryingText}`
10992
11041
  );
10993
11042
  }
10994
11043
  }
@@ -11016,8 +11065,21 @@ var BackgroundLogger = class {
11016
11065
  }
11017
11066
  }
11018
11067
  };
11019
- function init(project, options = {}) {
11068
+ function init(projectOrOptions, optionalOptions) {
11069
+ const options = (() => {
11070
+ if (typeof projectOrOptions === "string") {
11071
+ return { ...optionalOptions, project: projectOrOptions };
11072
+ } else {
11073
+ if (optionalOptions !== void 0) {
11074
+ throw new Error(
11075
+ "Cannot specify options struct as both parameters. Must call either init(project, options) or init(options)."
11076
+ );
11077
+ }
11078
+ return projectOrOptions;
11079
+ }
11080
+ })();
11020
11081
  const {
11082
+ project,
11021
11083
  experiment,
11022
11084
  description,
11023
11085
  dataset,
@@ -11029,47 +11091,55 @@ function init(project, options = {}) {
11029
11091
  apiKey,
11030
11092
  orgName,
11031
11093
  metadata,
11032
- gitMetadataSettings
11033
- } = options || {};
11094
+ gitMetadataSettings,
11095
+ projectId,
11096
+ baseExperimentId,
11097
+ repoInfo: repoInfo2
11098
+ } = options;
11034
11099
  if (open && update) {
11035
11100
  throw new Error("Cannot open and update an experiment at the same time");
11036
11101
  }
11037
11102
  if (open || update) {
11038
11103
  if (isEmpty(experiment)) {
11039
11104
  const action = open ? "open" : "update";
11040
- throw new Error(`Cannot ${action} an experiment without specifying its name`);
11105
+ throw new Error(
11106
+ `Cannot ${action} an experiment without specifying its name`
11107
+ );
11041
11108
  }
11042
- const lazyMetadata2 = new LazyValue(async () => {
11043
- await login({
11044
- orgName,
11045
- apiKey,
11046
- appUrl
11047
- });
11048
- const args = {
11049
- project_name: project,
11050
- org_name: _state.orgName,
11051
- experiment_name: experiment
11052
- };
11053
- const response = await _state.apiConn().post_json("api/experiment/get", args);
11054
- if (response.length === 0) {
11055
- throw new Error(
11056
- `Experiment ${experiment} not found in project ${project}.`
11057
- );
11058
- }
11059
- const info = response[0];
11060
- return {
11061
- project: {
11062
- id: info.project_id,
11063
- name: "",
11064
- fullInfo: {}
11065
- },
11066
- experiment: {
11067
- id: info.id,
11068
- name: info.name,
11069
- fullInfo: info
11109
+ const lazyMetadata2 = new LazyValue(
11110
+ async () => {
11111
+ await login({
11112
+ orgName,
11113
+ apiKey,
11114
+ appUrl
11115
+ });
11116
+ const args = {
11117
+ project_name: project,
11118
+ project_id: projectId,
11119
+ org_name: _state.orgName,
11120
+ experiment_name: experiment
11121
+ };
11122
+ const response = await _state.apiConn().post_json("api/experiment/get", args);
11123
+ if (response.length === 0) {
11124
+ throw new Error(
11125
+ `Experiment ${experiment} not found in project ${projectId ?? project}.`
11126
+ );
11070
11127
  }
11071
- };
11072
- });
11128
+ const info = response[0];
11129
+ return {
11130
+ project: {
11131
+ id: info.project_id,
11132
+ name: "",
11133
+ fullInfo: {}
11134
+ },
11135
+ experiment: {
11136
+ id: info.id,
11137
+ name: info.name,
11138
+ fullInfo: info
11139
+ }
11140
+ };
11141
+ }
11142
+ );
11073
11143
  if (open) {
11074
11144
  return new ReadonlyExperiment(
11075
11145
  lazyMetadata2
@@ -11091,6 +11161,7 @@ function init(project, options = {}) {
11091
11161
  });
11092
11162
  const args = {
11093
11163
  project_name: project,
11164
+ project_id: projectId,
11094
11165
  org_id: _state.orgId
11095
11166
  };
11096
11167
  if (experiment) {
@@ -11099,22 +11170,29 @@ function init(project, options = {}) {
11099
11170
  if (description) {
11100
11171
  args["description"] = description;
11101
11172
  }
11102
- let mergedGitMetadataSettings = {
11103
- ..._state.gitMetadataSettings || {
11104
- collect: "all"
11173
+ const repoInfoArg = await (async () => {
11174
+ if (repoInfo2) {
11175
+ return repoInfo2;
11105
11176
  }
11106
- };
11107
- if (gitMetadataSettings) {
11108
- mergedGitMetadataSettings = mergeGitMetadataSettings(
11109
- mergedGitMetadataSettings,
11110
- gitMetadataSettings
11111
- );
11112
- }
11113
- const repoStatus2 = await isomorph_default.getRepoStatus(gitMetadataSettings);
11114
- if (repoStatus2) {
11115
- args["repo_info"] = repoStatus2;
11177
+ let mergedGitMetadataSettings = {
11178
+ ..._state.gitMetadataSettings || {
11179
+ collect: "all"
11180
+ }
11181
+ };
11182
+ if (gitMetadataSettings) {
11183
+ mergedGitMetadataSettings = mergeGitMetadataSettings(
11184
+ mergedGitMetadataSettings,
11185
+ gitMetadataSettings
11186
+ );
11187
+ }
11188
+ return await isomorph_default.getRepoInfo(mergedGitMetadataSettings);
11189
+ })();
11190
+ if (repoInfoArg) {
11191
+ args["repo_info"] = repoInfoArg;
11116
11192
  }
11117
- if (baseExperiment) {
11193
+ if (baseExperimentId) {
11194
+ args["base_exp_id"] = baseExperimentId;
11195
+ } else if (baseExperiment) {
11118
11196
  args["base_experiment"] = baseExperiment;
11119
11197
  } else {
11120
11198
  args["ancestor_commits"] = await isomorph_default.getPastNAncestors();
@@ -11166,15 +11244,30 @@ function init(project, options = {}) {
11166
11244
  return ret;
11167
11245
  }
11168
11246
  async function login(options = {}) {
11247
+ let { forceLogin = false } = options || {};
11248
+ if (_state.loggedIn && !forceLogin) {
11249
+ let checkUpdatedParam2 = function(varname, arg, orig) {
11250
+ if (!isEmpty(arg) && !isEmpty(orig) && arg !== orig) {
11251
+ throw new Error(
11252
+ `Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
11253
+ );
11254
+ }
11255
+ };
11256
+ var checkUpdatedParam = checkUpdatedParam2;
11257
+ checkUpdatedParam2("appUrl", options.appUrl, _state.appUrl);
11258
+ checkUpdatedParam2(
11259
+ "apiKey",
11260
+ options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
11261
+ _state.loginToken
11262
+ );
11263
+ checkUpdatedParam2("orgName", options.orgName, _state.orgName);
11264
+ return;
11265
+ }
11169
11266
  const {
11170
11267
  appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
11171
11268
  apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
11172
11269
  orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
11173
11270
  } = options || {};
11174
- let { forceLogin = false } = options || {};
11175
- if (_state.loggedIn && !forceLogin) {
11176
- return;
11177
- }
11178
11271
  _state.resetLoginInfo();
11179
11272
  _state.appUrl = appUrl;
11180
11273
  let conn = null;
@@ -11207,6 +11300,9 @@ async function login(options = {}) {
11207
11300
  _state.loginToken = conn.token;
11208
11301
  _state.loggedIn = true;
11209
11302
  }
11303
+ function currentSpan() {
11304
+ return _state.currentSpan.getStore() ?? NOOP_SPAN;
11305
+ }
11210
11306
  function withCurrent(span, callback) {
11211
11307
  return _state.currentSpan.run(span, () => callback(span));
11212
11308
  }
@@ -11304,11 +11400,12 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
11304
11400
  return event;
11305
11401
  }
11306
11402
  var ObjectFetcher = class {
11307
- constructor(objectType, pinnedVersion) {
11403
+ constructor(objectType, pinnedVersion, mutateRecord) {
11308
11404
  this.objectType = objectType;
11309
11405
  this.pinnedVersion = pinnedVersion;
11310
- this._fetchedData = void 0;
11406
+ this.mutateRecord = mutateRecord;
11311
11407
  }
11408
+ _fetchedData = void 0;
11312
11409
  get id() {
11313
11410
  throw new Error("ObjectFetcher subclasses must have an 'id' attribute");
11314
11411
  }
@@ -11327,12 +11424,24 @@ var ObjectFetcher = class {
11327
11424
  async fetchedData() {
11328
11425
  if (this._fetchedData === void 0) {
11329
11426
  const state = await this.getState();
11330
- const resp = await state.logConn().get(`object/${this.objectType}`, {
11331
- id: await this.id,
11332
- fmt: "json2",
11333
- version: this.pinnedVersion
11334
- });
11335
- this._fetchedData = await resp.json();
11427
+ let data = void 0;
11428
+ try {
11429
+ const resp = await state.logConn().get(`object3/${this.objectType}`, {
11430
+ id: await this.id,
11431
+ fmt: "json2",
11432
+ version: this.pinnedVersion,
11433
+ api_version: "2"
11434
+ });
11435
+ data = await resp.json();
11436
+ } catch (e) {
11437
+ const resp = await state.logConn().get(`object/${this.objectType}`, {
11438
+ id: await this.id,
11439
+ fmt: "json2",
11440
+ version: this.pinnedVersion
11441
+ });
11442
+ data = await resp.json();
11443
+ }
11444
+ this._fetchedData = this.mutateRecord ? data?.map(this.mutateRecord) : data;
11336
11445
  }
11337
11446
  return this._fetchedData || [];
11338
11447
  }
@@ -11356,10 +11465,14 @@ var ObjectFetcher = class {
11356
11465
  }
11357
11466
  };
11358
11467
  var Experiment = class extends ObjectFetcher {
11468
+ lazyMetadata;
11469
+ dataset;
11470
+ bgLogger;
11471
+ lastStartTime;
11472
+ // For type identification.
11473
+ kind = "experiment";
11359
11474
  constructor(lazyMetadata, dataset) {
11360
11475
  super("experiment", void 0);
11361
- // For type identification.
11362
- this.kind = "experiment";
11363
11476
  this.lazyMetadata = lazyMetadata;
11364
11477
  this.dataset = dataset;
11365
11478
  const logConn = new LazyValue(
@@ -11400,9 +11513,19 @@ var Experiment = class extends ObjectFetcher {
11400
11513
  * @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
11401
11514
  * @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
11402
11515
  * @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
11516
+ * @param options Additional logging options
11517
+ * @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
11403
11518
  * :returns: The `id` of the logged event.
11404
11519
  */
11405
- log(event) {
11520
+ log(event, options) {
11521
+ if (!options?.allowLogConcurrentWithActiveSpan) {
11522
+ const checkCurrentSpan = currentSpan();
11523
+ if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
11524
+ throw new Error(
11525
+ "Cannot run toplevel Experiment.log method while there is an active span. To log to the span, use Span.log"
11526
+ );
11527
+ }
11528
+ }
11406
11529
  event = validateAndSanitizeExperimentLogFullArgs(event, !!this.dataset);
11407
11530
  const span = this.startSpan({ startTime: this.lastStartTime, event });
11408
11531
  this.lastStartTime = span.end();
@@ -11444,6 +11567,7 @@ var Experiment = class extends ObjectFetcher {
11444
11567
  startSpan(args) {
11445
11568
  const { name, ...argsRest } = args ?? {};
11446
11569
  return new SpanImpl({
11570
+ parentObject: this,
11447
11571
  parentIds: new LazyValue(() => this.lazyParentIds()),
11448
11572
  bgLogger: this.bgLogger,
11449
11573
  name: name ?? "root",
@@ -11581,20 +11705,38 @@ var ReadonlyExperiment = class extends ObjectFetcher {
11581
11705
  if (record.root_span_id !== record.span_id) {
11582
11706
  continue;
11583
11707
  }
11584
- const { output, expected } = record;
11585
- yield {
11586
- input: record.input,
11587
- expected: expected ?? output
11588
- };
11708
+ const { output, expected: expectedRecord } = record;
11709
+ const expected = expectedRecord ?? output;
11710
+ if (isEmpty(expected)) {
11711
+ yield {
11712
+ input: record.input
11713
+ };
11714
+ } else {
11715
+ yield {
11716
+ input: record.input,
11717
+ expected
11718
+ };
11719
+ }
11589
11720
  }
11590
11721
  }
11591
11722
  };
11592
11723
  var executionCounter = 0;
11593
11724
  var SpanImpl = class _SpanImpl {
11725
+ bgLogger;
11726
+ // `internalData` contains fields that are not part of the "user-sanitized"
11727
+ // set of fields which we want to log in just one of the span rows.
11728
+ internalData;
11729
+ isMerge;
11730
+ loggedEndTime;
11731
+ // For internal use only.
11732
+ parentObject;
11733
+ // These fields are logged to every span row.
11734
+ parentIds;
11735
+ rowIds;
11736
+ kind = "span";
11594
11737
  // root_experiment should only be specified for a root span. parent_span
11595
11738
  // should only be specified for non-root spans.
11596
11739
  constructor(args) {
11597
- this.kind = "span";
11598
11740
  this.loggedEndTime = void 0;
11599
11741
  this.bgLogger = args.bgLogger;
11600
11742
  const callerLocation = isomorph_default.getCallerLocation();
@@ -11622,6 +11764,7 @@ var SpanImpl = class _SpanImpl {
11622
11764
  },
11623
11765
  created: (/* @__PURE__ */ new Date()).toISOString()
11624
11766
  };
11767
+ this.parentObject = args.parentObject;
11625
11768
  this.parentIds = args.parentIds;
11626
11769
  const id = args.event?.id ?? v4_default();
11627
11770
  const span_id = v4_default();
@@ -11693,6 +11836,7 @@ var SpanImpl = class _SpanImpl {
11693
11836
  }
11694
11837
  startSpan(args) {
11695
11838
  return new _SpanImpl({
11839
+ parentObject: this.parentObject,
11696
11840
  parentIds: this.parentIds,
11697
11841
  bgLogger: this.bgLogger,
11698
11842
  parentSpanInfo: {
@@ -11738,8 +11882,9 @@ var SimpleProgressReporter = class {
11738
11882
  }
11739
11883
  };
11740
11884
  var BarProgressReporter = class {
11885
+ multiBar;
11886
+ bars = {};
11741
11887
  constructor() {
11742
- this.bars = {};
11743
11888
  this.multiBar = new cliProgress.MultiBar(
11744
11889
  {
11745
11890
  clearOnComplete: false,
@@ -11881,6 +12026,9 @@ function evaluateFilter(object, filter2) {
11881
12026
  }
11882
12027
  return pattern.test(serializeJSONWithPlainString(key));
11883
12028
  }
12029
+ function scorerName(scorer, scorer_idx) {
12030
+ return scorer.name || `scorer_${scorer_idx}`;
12031
+ }
11884
12032
  async function runEvaluator(experiment, evaluator, progressReporter, filters) {
11885
12033
  if (typeof evaluator.data === "string") {
11886
12034
  throw new Error("Unimplemented: string data paths");
@@ -11924,11 +12072,13 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
11924
12072
  );
11925
12073
  progressReporter.start(evaluator.evalName, data.length);
11926
12074
  const evals = data.map(async (datum) => {
11927
- let metadata = { ...datum.metadata };
11928
- let output = void 0;
11929
- let error2 = void 0;
11930
- let scores = {};
11931
12075
  const callback = async (rootSpan) => {
12076
+ let metadata = {
12077
+ ..."metadata" in datum ? datum.metadata : {}
12078
+ };
12079
+ let output = void 0;
12080
+ let error2 = void 0;
12081
+ let scores = {};
11932
12082
  try {
11933
12083
  const meta = (o) => metadata = { ...metadata, ...o };
11934
12084
  await rootSpan.traced(
@@ -11945,42 +12095,55 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
11945
12095
  );
11946
12096
  rootSpan.log({ output });
11947
12097
  const scoringArgs = { ...datum, metadata, output };
12098
+ const scorerNames = evaluator.scores.map(scorerName);
11948
12099
  const scoreResults = await Promise.all(
11949
12100
  evaluator.scores.map(async (score, score_idx) => {
11950
- return rootSpan.traced(
11951
- async (span) => {
11952
- const scoreResult = score(scoringArgs);
11953
- const result = scoreResult instanceof Promise ? await scoreResult : scoreResult;
11954
- const {
11955
- metadata: resultMetadata,
11956
- name: _,
11957
- ...resultRest
11958
- } = result;
11959
- span.log({
11960
- output: resultRest,
11961
- metadata: resultMetadata
11962
- });
11963
- return result;
11964
- },
11965
- {
11966
- name: score.name || `scorer_${score_idx}`,
11967
- spanAttributes: {
11968
- type: SpanTypeAttribute.SCORE
12101
+ try {
12102
+ const result = await rootSpan.traced(
12103
+ async (span) => {
12104
+ const scoreResult = score(scoringArgs);
12105
+ const result2 = scoreResult instanceof Promise ? await scoreResult : scoreResult;
12106
+ const {
12107
+ metadata: resultMetadata,
12108
+ name: _,
12109
+ ...resultRest
12110
+ } = result2;
12111
+ span.log({
12112
+ output: resultRest,
12113
+ metadata: resultMetadata
12114
+ });
12115
+ return result2;
11969
12116
  },
11970
- event: { input: scoringArgs }
11971
- }
11972
- );
12117
+ {
12118
+ name: scorerNames[score_idx],
12119
+ spanAttributes: {
12120
+ type: SpanTypeAttribute.SCORE
12121
+ },
12122
+ event: { input: scoringArgs }
12123
+ }
12124
+ );
12125
+ return { kind: "score", value: result };
12126
+ } catch (e) {
12127
+ return { kind: "error", value: e };
12128
+ }
11973
12129
  })
11974
12130
  );
12131
+ const passingScorersAndResults = [];
12132
+ const failingScorersAndResults = [];
12133
+ scoreResults.forEach((result, i) => {
12134
+ const name = scorerNames[i];
12135
+ if (result.kind === "score") {
12136
+ passingScorersAndResults.push({ name, score: result.value });
12137
+ } else {
12138
+ failingScorersAndResults.push({ name, error: result.value });
12139
+ }
12140
+ });
11975
12141
  const scoreMetadata = {};
11976
- for (const scoreResult of scoreResults) {
12142
+ for (const { score: scoreResult } of passingScorersAndResults) {
11977
12143
  scores[scoreResult.name] = scoreResult.score;
11978
12144
  const metadata2 = {
11979
12145
  ...scoreResult.metadata
11980
12146
  };
11981
- if (scoreResult.error !== void 0) {
11982
- metadata2.error = scoreResult.error;
11983
- }
11984
12147
  if (Object.keys(metadata2).length > 0) {
11985
12148
  scoreMetadata[scoreResult.name] = metadata2;
11986
12149
  }
@@ -11989,6 +12152,21 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
11989
12152
  meta({ scores: scoreMetadata });
11990
12153
  }
11991
12154
  rootSpan.log({ scores, metadata });
12155
+ if (failingScorersAndResults.length) {
12156
+ const scorerErrors = Object.fromEntries(
12157
+ failingScorersAndResults.map(({ name, error: error3 }) => [
12158
+ name,
12159
+ error3 instanceof Error ? error3.stack : `${error3}`
12160
+ ])
12161
+ );
12162
+ metadata["scorer_errors"] = scorerErrors;
12163
+ const names = Object.keys(scorerErrors).join(", ");
12164
+ const errors = failingScorersAndResults.map((item) => item.error);
12165
+ throw new AggregateError(
12166
+ errors,
12167
+ `Found exceptions for the following scorers: ${names}`
12168
+ );
12169
+ }
11992
12170
  } catch (e) {
11993
12171
  error2 = e;
11994
12172
  } finally {
@@ -12011,7 +12189,7 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
12011
12189
  },
12012
12190
  event: {
12013
12191
  input: datum.input,
12014
- expected: datum.expected
12192
+ expected: "expected" in datum ? datum.expected : void 0
12015
12193
  }
12016
12194
  });
12017
12195
  }
@@ -12067,7 +12245,8 @@ function reportEvaluatorResult(evaluatorName, evaluatorResult, {
12067
12245
  if (!verbose && !jsonl) {
12068
12246
  console.error(warning("Add --verbose to see full stack traces."));
12069
12247
  }
12070
- } else if (summary) {
12248
+ }
12249
+ if (summary) {
12071
12250
  console.log(jsonl ? JSON.stringify(summary) : summary);
12072
12251
  } else {
12073
12252
  const scoresByName = {};
@@ -16127,11 +16306,11 @@ function truncateToByteLimit(s, byteLimit = 65536) {
16127
16306
  const truncated = encoded.subarray(0, byteLimit);
16128
16307
  return new TextDecoder().decode(truncated);
16129
16308
  }
16130
- async function getRepoStatus(settings) {
16309
+ async function getRepoInfo(settings) {
16131
16310
  if (settings && settings.collect === "none") {
16132
16311
  return void 0;
16133
16312
  }
16134
- const repo = await repoStatus();
16313
+ const repo = await repoInfo();
16135
16314
  if (!repo || !settings || settings.collect === "all") {
16136
16315
  return repo;
16137
16316
  }
@@ -16141,7 +16320,7 @@ async function getRepoStatus(settings) {
16141
16320
  });
16142
16321
  return sanitized;
16143
16322
  }
16144
- async function repoStatus() {
16323
+ async function repoInfo() {
16145
16324
  const git = await currentRepo();
16146
16325
  if (git === null) {
16147
16326
  return void 0;
@@ -16238,7 +16417,7 @@ function getCallerLocation() {
16238
16417
 
16239
16418
  // src/node.ts
16240
16419
  function configureNode() {
16241
- isomorph_default.getRepoStatus = getRepoStatus;
16420
+ isomorph_default.getRepoInfo = getRepoInfo;
16242
16421
  isomorph_default.getPastNAncestors = getPastNAncestors;
16243
16422
  isomorph_default.getEnv = (name) => process.env[name];
16244
16423
  isomorph_default.getCallerLocation = getCallerLocation;