braintrust 0.0.80 → 0.0.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +14 -14
- package/dist/browser.js +57 -46
- package/dist/cli.js +122 -89
- package/dist/framework.d.ts +5 -11
- package/dist/index.d.ts +2 -1
- package/dist/index.js +88 -58
- package/dist/logger.d.ts +7 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/util.d.ts +0 -3
- package/package.json +13 -13
- package/dist/merge_row_batch.d.ts +0 -1
package/dist/framework.d.ts
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import chalk from "chalk";
|
|
2
|
-
import { Experiment, ExperimentSummary,
|
|
2
|
+
import { Experiment, ExperimentSummary, Metadata, Span } from "./logger";
|
|
3
3
|
import { Score } from "@braintrust/core";
|
|
4
4
|
import { ProgressReporter } from "./progress";
|
|
5
|
-
export type Metadata = Record<string, unknown>;
|
|
6
5
|
export interface EvalCase<Input, Expected> {
|
|
7
6
|
input: Input;
|
|
8
7
|
expected?: Expected;
|
|
@@ -18,30 +17,25 @@ export type EvalScorerArgs<Input, Output, Expected> = EvalCase<Input, Expected>
|
|
|
18
17
|
output: Output;
|
|
19
18
|
};
|
|
20
19
|
export type EvalScorer<Input, Output, Expected> = (args: EvalScorerArgs<Input, Output, Expected>) => Score | Promise<Score>;
|
|
21
|
-
/**
|
|
22
|
-
* Additional metadata for the eval definition, such as experiment name.
|
|
23
|
-
*/
|
|
24
|
-
export interface EvalMetadata {
|
|
25
|
-
experimentName?: string;
|
|
26
|
-
}
|
|
27
|
-
export declare function evalMetadataToInitOptions(metadata: EvalMetadata | undefined): InitOptions;
|
|
28
20
|
/**
|
|
29
21
|
* An evaluator is a collection of functions that can be used to evaluate a model.
|
|
30
22
|
* It consists of:
|
|
31
23
|
* - `data`, a function that returns a list of inputs, expected outputs, and metadata
|
|
32
24
|
* - `task`, a function that takes an input and returns an output
|
|
33
25
|
* - `scores`, a set of functions that take an input, output, and expected value and return a score
|
|
34
|
-
* - `
|
|
26
|
+
* - `experimentName`, an optional name for the experiment.
|
|
35
27
|
* - `trialCount`, the number of times to run the evaluator per input. This is useful for evaluating applications that
|
|
36
28
|
* have non-deterministic behavior and gives you both a stronger aggregate measure and a sense of the
|
|
37
29
|
* variance in the results.
|
|
30
|
+
* - `metadata`, optional additional metadata for the experiment.
|
|
38
31
|
*/
|
|
39
32
|
export interface Evaluator<Input, Output, Expected> {
|
|
40
33
|
data: EvalData<Input, Expected>;
|
|
41
34
|
task: EvalTask<Input, Output>;
|
|
42
35
|
scores: EvalScorer<Input, Output, Expected>[];
|
|
43
|
-
|
|
36
|
+
experimentName?: string;
|
|
44
37
|
trialCount?: number;
|
|
38
|
+
metadata?: Metadata;
|
|
45
39
|
}
|
|
46
40
|
export type EvaluatorDef<Input, Output, Expected> = {
|
|
47
41
|
projectName: string;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* An isomorphic JS library for logging data to Braintrust. `braintrust` is distributed as a [library on NPM](https://www.npmjs.com/package/braintrust).
|
|
3
|
+
* It is also open source and available on [GitHub](https://github.com/braintrustdata/braintrust-sdk/tree/main/js).
|
|
3
4
|
*
|
|
4
5
|
* ### Quickstart
|
|
5
6
|
*
|
|
@@ -42,5 +43,5 @@
|
|
|
42
43
|
* @module braintrust
|
|
43
44
|
*/
|
|
44
45
|
export * from "./logger";
|
|
45
|
-
export { Evaluator, EvalTask, Eval,
|
|
46
|
+
export { Evaluator, EvalTask, Eval, EvalScorerArgs } from "./framework";
|
|
46
47
|
export * from "./oai";
|
package/dist/index.js
CHANGED
|
@@ -3703,7 +3703,7 @@ var iso = {
|
|
|
3703
3703
|
};
|
|
3704
3704
|
var isomorph_default = iso;
|
|
3705
3705
|
|
|
3706
|
-
// ../../node_modules/.pnpm/simple-git@3.
|
|
3706
|
+
// ../../node_modules/.pnpm/simple-git@3.21.0/node_modules/simple-git/dist/esm/index.js
|
|
3707
3707
|
var import_file_exists = __toESM(require_dist(), 1);
|
|
3708
3708
|
var import_debug = __toESM(require_src(), 1);
|
|
3709
3709
|
var import_child_process = require("child_process");
|
|
@@ -3774,6 +3774,11 @@ var __async = (__this, __arguments, generator) => {
|
|
|
3774
3774
|
step((generator = generator.apply(__this, __arguments)).next());
|
|
3775
3775
|
});
|
|
3776
3776
|
};
|
|
3777
|
+
function pathspec(...paths) {
|
|
3778
|
+
const key = new String(paths);
|
|
3779
|
+
cache.set(key, paths);
|
|
3780
|
+
return key;
|
|
3781
|
+
}
|
|
3777
3782
|
function isPathSpec(path2) {
|
|
3778
3783
|
return path2 instanceof String && cache.has(path2);
|
|
3779
3784
|
}
|
|
@@ -5222,6 +5227,19 @@ var init_commit = __esm({
|
|
|
5222
5227
|
init_task();
|
|
5223
5228
|
}
|
|
5224
5229
|
});
|
|
5230
|
+
function first_commit_default() {
|
|
5231
|
+
return {
|
|
5232
|
+
firstCommit() {
|
|
5233
|
+
return this._runTask(straightThroughStringTask(["rev-list", "--max-parents=0", "HEAD"], true), trailingFunctionArgument(arguments));
|
|
5234
|
+
}
|
|
5235
|
+
};
|
|
5236
|
+
}
|
|
5237
|
+
var init_first_commit = __esm({
|
|
5238
|
+
"src/lib/tasks/first-commit.ts"() {
|
|
5239
|
+
init_utils();
|
|
5240
|
+
init_task();
|
|
5241
|
+
}
|
|
5242
|
+
});
|
|
5225
5243
|
function hashObjectTask(filePath, write) {
|
|
5226
5244
|
const commands = ["hash-object", filePath];
|
|
5227
5245
|
if (write) {
|
|
@@ -5555,7 +5573,7 @@ function parseLogOptions(opt = {}, customArgs = []) {
|
|
|
5555
5573
|
suffix.push(`${opt.from || ""}${rangeOperator}${opt.to || ""}`);
|
|
5556
5574
|
}
|
|
5557
5575
|
if (filterString(opt.file)) {
|
|
5558
|
-
|
|
5576
|
+
command.push("--follow", pathspec(opt.file));
|
|
5559
5577
|
}
|
|
5560
5578
|
appendTaskOptions(userOptions(opt), command);
|
|
5561
5579
|
return {
|
|
@@ -5592,6 +5610,7 @@ var excludeOptions;
|
|
|
5592
5610
|
var init_log = __esm({
|
|
5593
5611
|
"src/lib/tasks/log.ts"() {
|
|
5594
5612
|
init_log_format();
|
|
5613
|
+
init_pathspec();
|
|
5595
5614
|
init_parse_list_log_summary();
|
|
5596
5615
|
init_utils();
|
|
5597
5616
|
init_task();
|
|
@@ -6251,6 +6270,7 @@ var init_simple_git_api = __esm({
|
|
|
6251
6270
|
init_checkout();
|
|
6252
6271
|
init_commit();
|
|
6253
6272
|
init_config();
|
|
6273
|
+
init_first_commit();
|
|
6254
6274
|
init_grep();
|
|
6255
6275
|
init_hash_object();
|
|
6256
6276
|
init_init();
|
|
@@ -6324,7 +6344,7 @@ var init_simple_git_api = __esm({
|
|
|
6324
6344
|
return this._runTask(statusTask(getTrailingOptions(arguments)), trailingFunctionArgument(arguments));
|
|
6325
6345
|
}
|
|
6326
6346
|
};
|
|
6327
|
-
Object.assign(SimpleGitApi.prototype, checkout_default(), commit_default(), config_default(), grep_default(), log_default(), show_default(), version_default());
|
|
6347
|
+
Object.assign(SimpleGitApi.prototype, checkout_default(), commit_default(), config_default(), first_commit_default(), grep_default(), log_default(), show_default(), version_default());
|
|
6328
6348
|
}
|
|
6329
6349
|
});
|
|
6330
6350
|
var scheduler_exports = {};
|
|
@@ -7793,7 +7813,7 @@ function getCallerLocation() {
|
|
|
7793
7813
|
return void 0;
|
|
7794
7814
|
}
|
|
7795
7815
|
|
|
7796
|
-
// ../../node_modules/.pnpm/uuid@9.0.
|
|
7816
|
+
// ../../node_modules/.pnpm/uuid@9.0.1/node_modules/uuid/dist/esm-node/rng.js
|
|
7797
7817
|
var import_crypto = __toESM(require("crypto"));
|
|
7798
7818
|
var rnds8Pool = new Uint8Array(256);
|
|
7799
7819
|
var poolPtr = rnds8Pool.length;
|
|
@@ -7805,22 +7825,22 @@ function rng() {
|
|
|
7805
7825
|
return rnds8Pool.slice(poolPtr, poolPtr += 16);
|
|
7806
7826
|
}
|
|
7807
7827
|
|
|
7808
|
-
// ../../node_modules/.pnpm/uuid@9.0.
|
|
7828
|
+
// ../../node_modules/.pnpm/uuid@9.0.1/node_modules/uuid/dist/esm-node/stringify.js
|
|
7809
7829
|
var byteToHex = [];
|
|
7810
7830
|
for (let i = 0; i < 256; ++i) {
|
|
7811
7831
|
byteToHex.push((i + 256).toString(16).slice(1));
|
|
7812
7832
|
}
|
|
7813
7833
|
function unsafeStringify(arr, offset = 0) {
|
|
7814
|
-
return
|
|
7834
|
+
return byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]];
|
|
7815
7835
|
}
|
|
7816
7836
|
|
|
7817
|
-
// ../../node_modules/.pnpm/uuid@9.0.
|
|
7837
|
+
// ../../node_modules/.pnpm/uuid@9.0.1/node_modules/uuid/dist/esm-node/native.js
|
|
7818
7838
|
var import_crypto2 = __toESM(require("crypto"));
|
|
7819
7839
|
var native_default = {
|
|
7820
7840
|
randomUUID: import_crypto2.default.randomUUID
|
|
7821
7841
|
};
|
|
7822
7842
|
|
|
7823
|
-
// ../../node_modules/.pnpm/uuid@9.0.
|
|
7843
|
+
// ../../node_modules/.pnpm/uuid@9.0.1/node_modules/uuid/dist/esm-node/v4.js
|
|
7824
7844
|
function v4(options, buf, offset) {
|
|
7825
7845
|
if (native_default.randomUUID && !buf && !options) {
|
|
7826
7846
|
return native_default.randomUUID();
|
|
@@ -7840,58 +7860,40 @@ function v4(options, buf, offset) {
|
|
|
7840
7860
|
}
|
|
7841
7861
|
var v4_default = v4;
|
|
7842
7862
|
|
|
7843
|
-
//
|
|
7863
|
+
// ../core/js/dist/index.mjs
|
|
7844
7864
|
var TRANSACTION_ID_FIELD = "_xact_id";
|
|
7845
7865
|
var IS_MERGE_FIELD = "_is_merge";
|
|
7846
|
-
var GLOBAL_PROJECT = "Global";
|
|
7847
|
-
function runFinally(f, finallyF) {
|
|
7848
|
-
let runSyncCleanup = true;
|
|
7849
|
-
try {
|
|
7850
|
-
const ret = f();
|
|
7851
|
-
if (ret instanceof Promise) {
|
|
7852
|
-
runSyncCleanup = false;
|
|
7853
|
-
return ret.finally(finallyF);
|
|
7854
|
-
} else {
|
|
7855
|
-
return ret;
|
|
7856
|
-
}
|
|
7857
|
-
} finally {
|
|
7858
|
-
if (runSyncCleanup) {
|
|
7859
|
-
finallyF();
|
|
7860
|
-
}
|
|
7861
|
-
}
|
|
7862
|
-
}
|
|
7863
7866
|
function mergeDicts(mergeInto, mergeFrom) {
|
|
7864
7867
|
for (const [k, mergeFromV] of Object.entries(mergeFrom)) {
|
|
7865
7868
|
const mergeIntoV = mergeInto[k];
|
|
7866
7869
|
if (mergeIntoV instanceof Object && !Array.isArray(mergeIntoV) && mergeFrom instanceof Object && !Array.isArray(mergeFromV)) {
|
|
7867
|
-
mergeDicts(
|
|
7870
|
+
mergeDicts(
|
|
7871
|
+
mergeIntoV,
|
|
7872
|
+
mergeFromV
|
|
7873
|
+
);
|
|
7868
7874
|
} else {
|
|
7869
7875
|
mergeInto[k] = mergeFromV;
|
|
7870
7876
|
}
|
|
7871
7877
|
}
|
|
7872
7878
|
}
|
|
7873
|
-
function
|
|
7874
|
-
return
|
|
7875
|
-
|
|
7876
|
-
|
|
7877
|
-
|
|
7878
|
-
|
|
7879
|
-
|
|
7880
|
-
|
|
7881
|
-
|
|
7882
|
-
|
|
7883
|
-
|
|
7884
|
-
|
|
7885
|
-
];
|
|
7886
|
-
function generateUniqueRowKey(row) {
|
|
7887
|
-
const coalesceEmpty = (field) => row[field] ?? "";
|
|
7888
|
-
return DATA_OBJECT_KEYS.concat(["id"]).map(coalesceEmpty).join(":");
|
|
7879
|
+
function generateMergedRowKey(row) {
|
|
7880
|
+
return JSON.stringify(
|
|
7881
|
+
[
|
|
7882
|
+
"org_id",
|
|
7883
|
+
"project_id",
|
|
7884
|
+
"experiment_id",
|
|
7885
|
+
"dataset_id",
|
|
7886
|
+
"prompt_session_id",
|
|
7887
|
+
"log_id",
|
|
7888
|
+
"id"
|
|
7889
|
+
].map((k) => row[k])
|
|
7890
|
+
);
|
|
7889
7891
|
}
|
|
7890
7892
|
function mergeRowBatch(rows) {
|
|
7891
7893
|
const out = [];
|
|
7892
7894
|
const remainingRows = [];
|
|
7893
7895
|
for (const row of rows) {
|
|
7894
|
-
if (row
|
|
7896
|
+
if (row.id === void 0) {
|
|
7895
7897
|
out.push(row);
|
|
7896
7898
|
} else {
|
|
7897
7899
|
remainingRows.push(row);
|
|
@@ -7899,7 +7901,7 @@ function mergeRowBatch(rows) {
|
|
|
7899
7901
|
}
|
|
7900
7902
|
const rowGroups = {};
|
|
7901
7903
|
for (const row of remainingRows) {
|
|
7902
|
-
const key =
|
|
7904
|
+
const key = generateMergedRowKey(row);
|
|
7903
7905
|
const existingRow = rowGroups[key];
|
|
7904
7906
|
if (existingRow !== void 0 && row[IS_MERGE_FIELD]) {
|
|
7905
7907
|
const preserveNoMerge = !existingRow[IS_MERGE_FIELD];
|
|
@@ -7915,6 +7917,28 @@ function mergeRowBatch(rows) {
|
|
|
7915
7917
|
return out;
|
|
7916
7918
|
}
|
|
7917
7919
|
|
|
7920
|
+
// src/util.ts
|
|
7921
|
+
var GLOBAL_PROJECT = "Global";
|
|
7922
|
+
function runFinally(f, finallyF) {
|
|
7923
|
+
let runSyncCleanup = true;
|
|
7924
|
+
try {
|
|
7925
|
+
const ret = f();
|
|
7926
|
+
if (ret instanceof Promise) {
|
|
7927
|
+
runSyncCleanup = false;
|
|
7928
|
+
return ret.finally(finallyF);
|
|
7929
|
+
} else {
|
|
7930
|
+
return ret;
|
|
7931
|
+
}
|
|
7932
|
+
} finally {
|
|
7933
|
+
if (runSyncCleanup) {
|
|
7934
|
+
finallyF();
|
|
7935
|
+
}
|
|
7936
|
+
}
|
|
7937
|
+
}
|
|
7938
|
+
function getCurrentUnixTimestamp() {
|
|
7939
|
+
return (/* @__PURE__ */ new Date()).getTime() / 1e3;
|
|
7940
|
+
}
|
|
7941
|
+
|
|
7918
7942
|
// src/logger.ts
|
|
7919
7943
|
var NoopSpan = class {
|
|
7920
7944
|
constructor() {
|
|
@@ -8325,7 +8349,8 @@ async function init(project, options = {}) {
|
|
|
8325
8349
|
apiUrl,
|
|
8326
8350
|
apiKey,
|
|
8327
8351
|
orgName,
|
|
8328
|
-
disableCache
|
|
8352
|
+
disableCache,
|
|
8353
|
+
metadata
|
|
8329
8354
|
} = options || {};
|
|
8330
8355
|
await login({
|
|
8331
8356
|
orgName,
|
|
@@ -8339,7 +8364,8 @@ async function init(project, options = {}) {
|
|
|
8339
8364
|
dataset,
|
|
8340
8365
|
update,
|
|
8341
8366
|
baseExperiment,
|
|
8342
|
-
isPublic
|
|
8367
|
+
isPublic,
|
|
8368
|
+
metadata
|
|
8343
8369
|
});
|
|
8344
8370
|
}
|
|
8345
8371
|
async function withExperiment(project, callback, options = {}) {
|
|
@@ -8637,12 +8663,14 @@ async function _initExperiment(projectName, {
|
|
|
8637
8663
|
dataset,
|
|
8638
8664
|
update,
|
|
8639
8665
|
baseExperiment,
|
|
8640
|
-
isPublic
|
|
8666
|
+
isPublic,
|
|
8667
|
+
metadata
|
|
8641
8668
|
} = {
|
|
8642
8669
|
experimentName: void 0,
|
|
8643
8670
|
description: void 0,
|
|
8644
8671
|
baseExperiment: void 0,
|
|
8645
|
-
isPublic: false
|
|
8672
|
+
isPublic: false,
|
|
8673
|
+
metadata: void 0
|
|
8646
8674
|
}) {
|
|
8647
8675
|
const args = {
|
|
8648
8676
|
project_name: projectName,
|
|
@@ -8673,6 +8701,9 @@ async function _initExperiment(projectName, {
|
|
|
8673
8701
|
if (isPublic !== void 0) {
|
|
8674
8702
|
args["public"] = isPublic;
|
|
8675
8703
|
}
|
|
8704
|
+
if (metadata) {
|
|
8705
|
+
args["metadata"] = metadata;
|
|
8706
|
+
}
|
|
8676
8707
|
let response = null;
|
|
8677
8708
|
while (true) {
|
|
8678
8709
|
try {
|
|
@@ -9219,19 +9250,16 @@ var BarProgressReporter = class {
|
|
|
9219
9250
|
|
|
9220
9251
|
// src/framework.ts
|
|
9221
9252
|
var import_pluralize = __toESM(require_pluralize());
|
|
9222
|
-
function
|
|
9223
|
-
return { experiment: metadata?.experimentName };
|
|
9224
|
-
}
|
|
9225
|
-
function makeEvalName(projectName, metadata) {
|
|
9253
|
+
function makeEvalName(projectName, experimentName) {
|
|
9226
9254
|
let out = projectName;
|
|
9227
|
-
if (
|
|
9228
|
-
out += ` [experimentName=${
|
|
9255
|
+
if (experimentName) {
|
|
9256
|
+
out += ` [experimentName=${experimentName}]`;
|
|
9229
9257
|
}
|
|
9230
9258
|
return out;
|
|
9231
9259
|
}
|
|
9232
9260
|
globalThis._evals = {};
|
|
9233
9261
|
async function Eval(name, evaluator) {
|
|
9234
|
-
const evalName = makeEvalName(name, evaluator.
|
|
9262
|
+
const evalName = makeEvalName(name, evaluator.experimentName);
|
|
9235
9263
|
if (_evals[evalName]) {
|
|
9236
9264
|
throw new Error(`Evaluator ${evalName} already exists`);
|
|
9237
9265
|
}
|
|
@@ -9241,7 +9269,6 @@ async function Eval(name, evaluator) {
|
|
|
9241
9269
|
}
|
|
9242
9270
|
const progressReporter = new BarProgressReporter();
|
|
9243
9271
|
try {
|
|
9244
|
-
const { metadata } = evaluator;
|
|
9245
9272
|
return await withExperiment(
|
|
9246
9273
|
name,
|
|
9247
9274
|
async (experiment) => {
|
|
@@ -9258,7 +9285,10 @@ async function Eval(name, evaluator) {
|
|
|
9258
9285
|
reportEvaluatorResult(name, ret, true);
|
|
9259
9286
|
return ret.summary;
|
|
9260
9287
|
},
|
|
9261
|
-
|
|
9288
|
+
{
|
|
9289
|
+
experiment: evaluator.experimentName,
|
|
9290
|
+
metadata: evaluator.metadata
|
|
9291
|
+
}
|
|
9262
9292
|
);
|
|
9263
9293
|
} finally {
|
|
9264
9294
|
progressReporter.stop();
|
package/dist/logger.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/// <reference lib="dom" />
|
|
2
|
+
import { IS_MERGE_FIELD } from "@braintrust/core";
|
|
2
3
|
import { IsoAsyncLocalStorage } from "./isomorph";
|
|
3
|
-
|
|
4
|
+
export type Metadata = Record<string, unknown>;
|
|
4
5
|
export type SetCurrentArg = {
|
|
5
6
|
setCurrent?: boolean;
|
|
6
7
|
};
|
|
@@ -249,6 +250,7 @@ export type InitOptions = {
|
|
|
249
250
|
apiKey?: string;
|
|
250
251
|
orgName?: string;
|
|
251
252
|
disableCache?: boolean;
|
|
253
|
+
metadata?: Metadata;
|
|
252
254
|
};
|
|
253
255
|
/**
|
|
254
256
|
* Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
|
|
@@ -270,6 +272,10 @@ export type InitOptions = {
|
|
|
270
272
|
* key is specified, will prompt the user to login.
|
|
271
273
|
* @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
|
|
272
274
|
* @param options.disableCache Do not use cached login information.
|
|
275
|
+
* @param options.metadata (Optional) A dictionary with additional data about the test example, model outputs, or just
|
|
276
|
+
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
277
|
+
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
278
|
+
* JSON-serializable type, but its keys must be strings.
|
|
273
279
|
* @returns The newly created Experiment.
|
|
274
280
|
*/
|
|
275
281
|
export declare function init(project: string, options?: Readonly<InitOptions>): Promise<Experiment>;
|