braintrust 0.0.80 → 0.0.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,7 @@
1
1
  import chalk from "chalk";
2
- import { Experiment, ExperimentSummary, InitOptions, Span } from "./logger";
2
+ import { Experiment, ExperimentSummary, Metadata, Span } from "./logger";
3
3
  import { Score } from "@braintrust/core";
4
4
  import { ProgressReporter } from "./progress";
5
- export type Metadata = Record<string, unknown>;
6
5
  export interface EvalCase<Input, Expected> {
7
6
  input: Input;
8
7
  expected?: Expected;
@@ -18,30 +17,25 @@ export type EvalScorerArgs<Input, Output, Expected> = EvalCase<Input, Expected>
18
17
  output: Output;
19
18
  };
20
19
  export type EvalScorer<Input, Output, Expected> = (args: EvalScorerArgs<Input, Output, Expected>) => Score | Promise<Score>;
21
- /**
22
- * Additional metadata for the eval definition, such as experiment name.
23
- */
24
- export interface EvalMetadata {
25
- experimentName?: string;
26
- }
27
- export declare function evalMetadataToInitOptions(metadata: EvalMetadata | undefined): InitOptions;
28
20
  /**
29
21
  * An evaluator is a collection of functions that can be used to evaluate a model.
30
22
  * It consists of:
31
23
  * - `data`, a function that returns a list of inputs, expected outputs, and metadata
32
24
  * - `task`, a function that takes an input and returns an output
33
25
  * - `scores`, a set of functions that take an input, output, and expected value and return a score
34
- * - `metadata`, optional additional metadata for the eval definition, such as experiment name.
26
+ * - `experimentName`, an optional name for the experiment.
35
27
  * - `trialCount`, the number of times to run the evaluator per input. This is useful for evaluating applications that
36
28
  * have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the
37
29
  * variance in the results.
30
+ * - `metadata`, optional additional metadata for the experiment.
38
31
  */
39
32
  export interface Evaluator<Input, Output, Expected> {
40
33
  data: EvalData<Input, Expected>;
41
34
  task: EvalTask<Input, Output>;
42
35
  scores: EvalScorer<Input, Output, Expected>[];
43
- metadata?: EvalMetadata;
36
+ experimentName?: string;
44
37
  trialCount?: number;
38
+ metadata?: Metadata;
45
39
  }
46
40
  export type EvaluatorDef<Input, Output, Expected> = {
47
41
  projectName: string;
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  /**
2
2
  * An isomorphic JS library for logging data to Braintrust. `braintrust` is distributed as a [library on NPM](https://www.npmjs.com/package/braintrust).
3
+ * It is also open source and available on [GitHub](https://github.com/braintrustdata/braintrust-sdk/tree/main/js).
3
4
  *
4
5
  * ### Quickstart
5
6
  *
@@ -42,5 +43,5 @@
42
43
  * @module braintrust
43
44
  */
44
45
  export * from "./logger";
45
- export { Evaluator, EvalTask, Eval, EvalMetadata, EvalScorerArgs, } from "./framework";
46
+ export { Evaluator, EvalTask, Eval, EvalScorerArgs } from "./framework";
46
47
  export * from "./oai";
package/dist/index.js CHANGED
@@ -3703,7 +3703,7 @@ var iso = {
3703
3703
  };
3704
3704
  var isomorph_default = iso;
3705
3705
 
3706
- // ../../node_modules/.pnpm/simple-git@3.19.1/node_modules/simple-git/dist/esm/index.js
3706
+ // ../../node_modules/.pnpm/simple-git@3.21.0/node_modules/simple-git/dist/esm/index.js
3707
3707
  var import_file_exists = __toESM(require_dist(), 1);
3708
3708
  var import_debug = __toESM(require_src(), 1);
3709
3709
  var import_child_process = require("child_process");
@@ -3774,6 +3774,11 @@ var __async = (__this, __arguments, generator) => {
3774
3774
  step((generator = generator.apply(__this, __arguments)).next());
3775
3775
  });
3776
3776
  };
3777
+ function pathspec(...paths) {
3778
+ const key = new String(paths);
3779
+ cache.set(key, paths);
3780
+ return key;
3781
+ }
3777
3782
  function isPathSpec(path2) {
3778
3783
  return path2 instanceof String && cache.has(path2);
3779
3784
  }
@@ -5222,6 +5227,19 @@ var init_commit = __esm({
5222
5227
  init_task();
5223
5228
  }
5224
5229
  });
5230
+ function first_commit_default() {
5231
+ return {
5232
+ firstCommit() {
5233
+ return this._runTask(straightThroughStringTask(["rev-list", "--max-parents=0", "HEAD"], true), trailingFunctionArgument(arguments));
5234
+ }
5235
+ };
5236
+ }
5237
+ var init_first_commit = __esm({
5238
+ "src/lib/tasks/first-commit.ts"() {
5239
+ init_utils();
5240
+ init_task();
5241
+ }
5242
+ });
5225
5243
  function hashObjectTask(filePath, write) {
5226
5244
  const commands = ["hash-object", filePath];
5227
5245
  if (write) {
@@ -5555,7 +5573,7 @@ function parseLogOptions(opt = {}, customArgs = []) {
5555
5573
  suffix.push(`${opt.from || ""}${rangeOperator}${opt.to || ""}`);
5556
5574
  }
5557
5575
  if (filterString(opt.file)) {
5558
- suffix.push("--follow", opt.file);
5576
+ command.push("--follow", pathspec(opt.file));
5559
5577
  }
5560
5578
  appendTaskOptions(userOptions(opt), command);
5561
5579
  return {
@@ -5592,6 +5610,7 @@ var excludeOptions;
5592
5610
  var init_log = __esm({
5593
5611
  "src/lib/tasks/log.ts"() {
5594
5612
  init_log_format();
5613
+ init_pathspec();
5595
5614
  init_parse_list_log_summary();
5596
5615
  init_utils();
5597
5616
  init_task();
@@ -6251,6 +6270,7 @@ var init_simple_git_api = __esm({
6251
6270
  init_checkout();
6252
6271
  init_commit();
6253
6272
  init_config();
6273
+ init_first_commit();
6254
6274
  init_grep();
6255
6275
  init_hash_object();
6256
6276
  init_init();
@@ -6324,7 +6344,7 @@ var init_simple_git_api = __esm({
6324
6344
  return this._runTask(statusTask(getTrailingOptions(arguments)), trailingFunctionArgument(arguments));
6325
6345
  }
6326
6346
  };
6327
- Object.assign(SimpleGitApi.prototype, checkout_default(), commit_default(), config_default(), grep_default(), log_default(), show_default(), version_default());
6347
+ Object.assign(SimpleGitApi.prototype, checkout_default(), commit_default(), config_default(), first_commit_default(), grep_default(), log_default(), show_default(), version_default());
6328
6348
  }
6329
6349
  });
6330
6350
  var scheduler_exports = {};
@@ -7793,7 +7813,7 @@ function getCallerLocation() {
7793
7813
  return void 0;
7794
7814
  }
7795
7815
 
7796
- // ../../node_modules/.pnpm/uuid@9.0.0/node_modules/uuid/dist/esm-node/rng.js
7816
+ // ../../node_modules/.pnpm/uuid@9.0.1/node_modules/uuid/dist/esm-node/rng.js
7797
7817
  var import_crypto = __toESM(require("crypto"));
7798
7818
  var rnds8Pool = new Uint8Array(256);
7799
7819
  var poolPtr = rnds8Pool.length;
@@ -7805,22 +7825,22 @@ function rng() {
7805
7825
  return rnds8Pool.slice(poolPtr, poolPtr += 16);
7806
7826
  }
7807
7827
 
7808
- // ../../node_modules/.pnpm/uuid@9.0.0/node_modules/uuid/dist/esm-node/stringify.js
7828
+ // ../../node_modules/.pnpm/uuid@9.0.1/node_modules/uuid/dist/esm-node/stringify.js
7809
7829
  var byteToHex = [];
7810
7830
  for (let i = 0; i < 256; ++i) {
7811
7831
  byteToHex.push((i + 256).toString(16).slice(1));
7812
7832
  }
7813
7833
  function unsafeStringify(arr, offset = 0) {
7814
- return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
7834
+ return byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]];
7815
7835
  }
7816
7836
 
7817
- // ../../node_modules/.pnpm/uuid@9.0.0/node_modules/uuid/dist/esm-node/native.js
7837
+ // ../../node_modules/.pnpm/uuid@9.0.1/node_modules/uuid/dist/esm-node/native.js
7818
7838
  var import_crypto2 = __toESM(require("crypto"));
7819
7839
  var native_default = {
7820
7840
  randomUUID: import_crypto2.default.randomUUID
7821
7841
  };
7822
7842
 
7823
- // ../../node_modules/.pnpm/uuid@9.0.0/node_modules/uuid/dist/esm-node/v4.js
7843
+ // ../../node_modules/.pnpm/uuid@9.0.1/node_modules/uuid/dist/esm-node/v4.js
7824
7844
  function v4(options, buf, offset) {
7825
7845
  if (native_default.randomUUID && !buf && !options) {
7826
7846
  return native_default.randomUUID();
@@ -7840,58 +7860,40 @@ function v4(options, buf, offset) {
7840
7860
  }
7841
7861
  var v4_default = v4;
7842
7862
 
7843
- // src/util.ts
7863
+ // ../core/js/dist/index.mjs
7844
7864
  var TRANSACTION_ID_FIELD = "_xact_id";
7845
7865
  var IS_MERGE_FIELD = "_is_merge";
7846
- var GLOBAL_PROJECT = "Global";
7847
- function runFinally(f, finallyF) {
7848
- let runSyncCleanup = true;
7849
- try {
7850
- const ret = f();
7851
- if (ret instanceof Promise) {
7852
- runSyncCleanup = false;
7853
- return ret.finally(finallyF);
7854
- } else {
7855
- return ret;
7856
- }
7857
- } finally {
7858
- if (runSyncCleanup) {
7859
- finallyF();
7860
- }
7861
- }
7862
- }
7863
7866
  function mergeDicts(mergeInto, mergeFrom) {
7864
7867
  for (const [k, mergeFromV] of Object.entries(mergeFrom)) {
7865
7868
  const mergeIntoV = mergeInto[k];
7866
7869
  if (mergeIntoV instanceof Object && !Array.isArray(mergeIntoV) && mergeFrom instanceof Object && !Array.isArray(mergeFromV)) {
7867
- mergeDicts(mergeIntoV, mergeFromV);
7870
+ mergeDicts(
7871
+ mergeIntoV,
7872
+ mergeFromV
7873
+ );
7868
7874
  } else {
7869
7875
  mergeInto[k] = mergeFromV;
7870
7876
  }
7871
7877
  }
7872
7878
  }
7873
- function getCurrentUnixTimestamp() {
7874
- return (/* @__PURE__ */ new Date()).getTime() / 1e3;
7875
- }
7876
-
7877
- // src/merge_row_batch.ts
7878
- var DATA_OBJECT_KEYS = [
7879
- "org_id",
7880
- "project_id",
7881
- "experiment_id",
7882
- "dataset_id",
7883
- "prompt_session_id",
7884
- "log_id"
7885
- ];
7886
- function generateUniqueRowKey(row) {
7887
- const coalesceEmpty = (field) => row[field] ?? "";
7888
- return DATA_OBJECT_KEYS.concat(["id"]).map(coalesceEmpty).join(":");
7879
+ function generateMergedRowKey(row) {
7880
+ return JSON.stringify(
7881
+ [
7882
+ "org_id",
7883
+ "project_id",
7884
+ "experiment_id",
7885
+ "dataset_id",
7886
+ "prompt_session_id",
7887
+ "log_id",
7888
+ "id"
7889
+ ].map((k) => row[k])
7890
+ );
7889
7891
  }
7890
7892
  function mergeRowBatch(rows) {
7891
7893
  const out = [];
7892
7894
  const remainingRows = [];
7893
7895
  for (const row of rows) {
7894
- if (row["id"] === void 0) {
7896
+ if (row.id === void 0) {
7895
7897
  out.push(row);
7896
7898
  } else {
7897
7899
  remainingRows.push(row);
@@ -7899,7 +7901,7 @@ function mergeRowBatch(rows) {
7899
7901
  }
7900
7902
  const rowGroups = {};
7901
7903
  for (const row of remainingRows) {
7902
- const key = generateUniqueRowKey(row);
7904
+ const key = generateMergedRowKey(row);
7903
7905
  const existingRow = rowGroups[key];
7904
7906
  if (existingRow !== void 0 && row[IS_MERGE_FIELD]) {
7905
7907
  const preserveNoMerge = !existingRow[IS_MERGE_FIELD];
@@ -7915,6 +7917,28 @@ function mergeRowBatch(rows) {
7915
7917
  return out;
7916
7918
  }
7917
7919
 
7920
+ // src/util.ts
7921
+ var GLOBAL_PROJECT = "Global";
7922
+ function runFinally(f, finallyF) {
7923
+ let runSyncCleanup = true;
7924
+ try {
7925
+ const ret = f();
7926
+ if (ret instanceof Promise) {
7927
+ runSyncCleanup = false;
7928
+ return ret.finally(finallyF);
7929
+ } else {
7930
+ return ret;
7931
+ }
7932
+ } finally {
7933
+ if (runSyncCleanup) {
7934
+ finallyF();
7935
+ }
7936
+ }
7937
+ }
7938
+ function getCurrentUnixTimestamp() {
7939
+ return (/* @__PURE__ */ new Date()).getTime() / 1e3;
7940
+ }
7941
+
7918
7942
  // src/logger.ts
7919
7943
  var NoopSpan = class {
7920
7944
  constructor() {
@@ -8325,7 +8349,8 @@ async function init(project, options = {}) {
8325
8349
  apiUrl,
8326
8350
  apiKey,
8327
8351
  orgName,
8328
- disableCache
8352
+ disableCache,
8353
+ metadata
8329
8354
  } = options || {};
8330
8355
  await login({
8331
8356
  orgName,
@@ -8339,7 +8364,8 @@ async function init(project, options = {}) {
8339
8364
  dataset,
8340
8365
  update,
8341
8366
  baseExperiment,
8342
- isPublic
8367
+ isPublic,
8368
+ metadata
8343
8369
  });
8344
8370
  }
8345
8371
  async function withExperiment(project, callback, options = {}) {
@@ -8637,12 +8663,14 @@ async function _initExperiment(projectName, {
8637
8663
  dataset,
8638
8664
  update,
8639
8665
  baseExperiment,
8640
- isPublic
8666
+ isPublic,
8667
+ metadata
8641
8668
  } = {
8642
8669
  experimentName: void 0,
8643
8670
  description: void 0,
8644
8671
  baseExperiment: void 0,
8645
- isPublic: false
8672
+ isPublic: false,
8673
+ metadata: void 0
8646
8674
  }) {
8647
8675
  const args = {
8648
8676
  project_name: projectName,
@@ -8673,6 +8701,9 @@ async function _initExperiment(projectName, {
8673
8701
  if (isPublic !== void 0) {
8674
8702
  args["public"] = isPublic;
8675
8703
  }
8704
+ if (metadata) {
8705
+ args["metadata"] = metadata;
8706
+ }
8676
8707
  let response = null;
8677
8708
  while (true) {
8678
8709
  try {
@@ -9219,19 +9250,16 @@ var BarProgressReporter = class {
9219
9250
 
9220
9251
  // src/framework.ts
9221
9252
  var import_pluralize = __toESM(require_pluralize());
9222
- function evalMetadataToInitOptions(metadata) {
9223
- return { experiment: metadata?.experimentName };
9224
- }
9225
- function makeEvalName(projectName, metadata) {
9253
+ function makeEvalName(projectName, experimentName) {
9226
9254
  let out = projectName;
9227
- if (metadata?.experimentName) {
9228
- out += ` [experimentName=${metadata.experimentName}]`;
9255
+ if (experimentName) {
9256
+ out += ` [experimentName=${experimentName}]`;
9229
9257
  }
9230
9258
  return out;
9231
9259
  }
9232
9260
  globalThis._evals = {};
9233
9261
  async function Eval(name, evaluator) {
9234
- const evalName = makeEvalName(name, evaluator.metadata);
9262
+ const evalName = makeEvalName(name, evaluator.experimentName);
9235
9263
  if (_evals[evalName]) {
9236
9264
  throw new Error(`Evaluator ${evalName} already exists`);
9237
9265
  }
@@ -9241,7 +9269,6 @@ async function Eval(name, evaluator) {
9241
9269
  }
9242
9270
  const progressReporter = new BarProgressReporter();
9243
9271
  try {
9244
- const { metadata } = evaluator;
9245
9272
  return await withExperiment(
9246
9273
  name,
9247
9274
  async (experiment) => {
@@ -9258,7 +9285,10 @@ async function Eval(name, evaluator) {
9258
9285
  reportEvaluatorResult(name, ret, true);
9259
9286
  return ret.summary;
9260
9287
  },
9261
- evalMetadataToInitOptions(metadata)
9288
+ {
9289
+ experiment: evaluator.experimentName,
9290
+ metadata: evaluator.metadata
9291
+ }
9262
9292
  );
9263
9293
  } finally {
9264
9294
  progressReporter.stop();
package/dist/logger.d.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  /// <reference lib="dom" />
2
+ import { IS_MERGE_FIELD } from "@braintrust/core";
2
3
  import { IsoAsyncLocalStorage } from "./isomorph";
3
- import { IS_MERGE_FIELD } from "./util";
4
+ export type Metadata = Record<string, unknown>;
4
5
  export type SetCurrentArg = {
5
6
  setCurrent?: boolean;
6
7
  };
@@ -249,6 +250,7 @@ export type InitOptions = {
249
250
  apiKey?: string;
250
251
  orgName?: string;
251
252
  disableCache?: boolean;
253
+ metadata?: Metadata;
252
254
  };
253
255
  /**
254
256
  * Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
@@ -270,6 +272,10 @@ export type InitOptions = {
270
272
  * key is specified, will prompt the user to login.
271
273
  * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
272
274
  * @param options.disableCache Do not use cached login information.
275
+ * @param options.metadata (Optional) A dictionary with additional data about the test example, model outputs, or just
276
+ * about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
277
+ * `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
278
+ * JSON-serializable type, but its keys must be strings.
273
279
  * @returns The newly created Experiment.
274
280
  */
275
281
  export declare function init(project: string, options?: Readonly<InitOptions>): Promise<Experiment>;