braintrust 0.0.94 → 0.0.96
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +164 -94
- package/dist/cli.js +200 -227
- package/dist/framework.d.ts +21 -8
- package/dist/index.d.ts +1 -1
- package/dist/index.js +220 -104
- package/dist/logger.d.ts +50 -45
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -9065,7 +9065,7 @@ var require_package = __commonJS({
|
|
|
9065
9065
|
"package.json"(exports2, module2) {
|
|
9066
9066
|
module2.exports = {
|
|
9067
9067
|
name: "braintrust",
|
|
9068
|
-
version: "0.0.
|
|
9068
|
+
version: "0.0.96",
|
|
9069
9069
|
description: "SDK for integrating Braintrust",
|
|
9070
9070
|
main: "./dist/index.js",
|
|
9071
9071
|
browser: {
|
|
@@ -9108,7 +9108,7 @@ var require_package = __commonJS({
|
|
|
9108
9108
|
typescript: "^5.3.3"
|
|
9109
9109
|
},
|
|
9110
9110
|
dependencies: {
|
|
9111
|
-
"@braintrust/core": "^0.0.
|
|
9111
|
+
"@braintrust/core": "^0.0.14",
|
|
9112
9112
|
argparse: "^2.0.1",
|
|
9113
9113
|
chalk: "^4.1.2",
|
|
9114
9114
|
"cli-progress": "^3.12.0",
|
|
@@ -9128,7 +9128,7 @@ var esbuild = __toESM(require("esbuild"));
|
|
|
9128
9128
|
var import_fs = __toESM(require("fs"));
|
|
9129
9129
|
var import_os = __toESM(require("os"));
|
|
9130
9130
|
var import_path = __toESM(require("path"));
|
|
9131
|
-
var
|
|
9131
|
+
var import_util3 = __toESM(require("util"));
|
|
9132
9132
|
var fsWalk = __toESM(require_out3());
|
|
9133
9133
|
|
|
9134
9134
|
// ../../node_modules/.pnpm/minimatch@9.0.3/node_modules/minimatch/dist/mjs/index.js
|
|
@@ -11023,6 +11023,7 @@ function init(project, options = {}) {
|
|
|
11023
11023
|
dataset,
|
|
11024
11024
|
baseExperiment,
|
|
11025
11025
|
isPublic,
|
|
11026
|
+
open,
|
|
11026
11027
|
update,
|
|
11027
11028
|
appUrl,
|
|
11028
11029
|
apiKey,
|
|
@@ -11030,6 +11031,41 @@ function init(project, options = {}) {
|
|
|
11030
11031
|
metadata,
|
|
11031
11032
|
gitMetadataSettings
|
|
11032
11033
|
} = options || {};
|
|
11034
|
+
if (open) {
|
|
11035
|
+
if (isEmpty(experiment)) {
|
|
11036
|
+
throw new Error("Cannot open an experiment without specifying its name");
|
|
11037
|
+
}
|
|
11038
|
+
if (update) {
|
|
11039
|
+
throw new Error("Cannot open and update an experiment at the same time");
|
|
11040
|
+
}
|
|
11041
|
+
const lazyMetadata2 = new LazyValue(async () => {
|
|
11042
|
+
await login({
|
|
11043
|
+
orgName,
|
|
11044
|
+
apiKey,
|
|
11045
|
+
appUrl
|
|
11046
|
+
});
|
|
11047
|
+
const args = {
|
|
11048
|
+
project_name: project,
|
|
11049
|
+
org_name: _state.orgName,
|
|
11050
|
+
experiment_name: experiment
|
|
11051
|
+
};
|
|
11052
|
+
const response = await _state.apiConn().post_json("api/experiment/get", args);
|
|
11053
|
+
if (response.length === 0) {
|
|
11054
|
+
throw new Error(
|
|
11055
|
+
`Experiment ${experiment} not found in project ${project}.`
|
|
11056
|
+
);
|
|
11057
|
+
}
|
|
11058
|
+
const info = response[0];
|
|
11059
|
+
return {
|
|
11060
|
+
id: info.id,
|
|
11061
|
+
name: info.name,
|
|
11062
|
+
fullInfo: info
|
|
11063
|
+
};
|
|
11064
|
+
});
|
|
11065
|
+
return new ReadonlyExperiment(
|
|
11066
|
+
lazyMetadata2
|
|
11067
|
+
);
|
|
11068
|
+
}
|
|
11033
11069
|
const lazyMetadata = new LazyValue(
|
|
11034
11070
|
async () => {
|
|
11035
11071
|
await login({
|
|
@@ -11071,7 +11107,7 @@ function init(project, options = {}) {
|
|
|
11071
11107
|
args["ancestor_commits"] = await isomorph_default.getPastNAncestors();
|
|
11072
11108
|
}
|
|
11073
11109
|
if (dataset !== void 0) {
|
|
11074
|
-
args["dataset_id"] = dataset.id;
|
|
11110
|
+
args["dataset_id"] = await dataset.id;
|
|
11075
11111
|
args["dataset_version"] = await dataset.version();
|
|
11076
11112
|
}
|
|
11077
11113
|
if (isPublic !== void 0) {
|
|
@@ -11234,15 +11270,15 @@ function validateAndSanitizeExperimentLogPartialArgs(event) {
|
|
|
11234
11270
|
}
|
|
11235
11271
|
}
|
|
11236
11272
|
function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
11237
|
-
if ("input" in event && event.input && "inputs" in event && event.inputs || !("input" in event) && !("inputs" in event)) {
|
|
11273
|
+
if ("input" in event && !isEmpty(event.input) && "inputs" in event && !isEmpty(event.inputs) || !("input" in event) && !("inputs" in event)) {
|
|
11238
11274
|
throw new Error(
|
|
11239
11275
|
"Exactly one of input or inputs (deprecated) must be specified. Prefer input."
|
|
11240
11276
|
);
|
|
11241
11277
|
}
|
|
11242
|
-
if (
|
|
11278
|
+
if (isEmpty(event.output)) {
|
|
11243
11279
|
throw new Error("output must be specified");
|
|
11244
11280
|
}
|
|
11245
|
-
if (
|
|
11281
|
+
if (isEmpty(event.scores)) {
|
|
11246
11282
|
throw new Error("scores must be specified");
|
|
11247
11283
|
}
|
|
11248
11284
|
if (hasDataset && event.datasetRecordId === void 0) {
|
|
@@ -11254,8 +11290,61 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
|
11254
11290
|
}
|
|
11255
11291
|
return event;
|
|
11256
11292
|
}
|
|
11257
|
-
var
|
|
11293
|
+
var ObjectFetcher = class {
|
|
11294
|
+
constructor(objectType, pinnedVersion) {
|
|
11295
|
+
this.objectType = objectType;
|
|
11296
|
+
this.pinnedVersion = pinnedVersion;
|
|
11297
|
+
this._fetchedData = void 0;
|
|
11298
|
+
}
|
|
11299
|
+
get id() {
|
|
11300
|
+
throw new Error("ObjectFetcher subclasses must have an 'id' attribute");
|
|
11301
|
+
}
|
|
11302
|
+
async getState() {
|
|
11303
|
+
throw new Error("ObjectFetcher subclasses must have a 'getState' method");
|
|
11304
|
+
}
|
|
11305
|
+
async *fetch() {
|
|
11306
|
+
const records = await this.fetchedData();
|
|
11307
|
+
for (const record of records) {
|
|
11308
|
+
yield record;
|
|
11309
|
+
}
|
|
11310
|
+
}
|
|
11311
|
+
[Symbol.iterator]() {
|
|
11312
|
+
return this.fetch();
|
|
11313
|
+
}
|
|
11314
|
+
async fetchedData() {
|
|
11315
|
+
if (this._fetchedData === void 0) {
|
|
11316
|
+
const state = await this.getState();
|
|
11317
|
+
const resp = await state.logConn().get(`object/${this.objectType}`, {
|
|
11318
|
+
id: await this.id,
|
|
11319
|
+
fmt: "json2",
|
|
11320
|
+
version: this.pinnedVersion
|
|
11321
|
+
});
|
|
11322
|
+
this._fetchedData = await resp.json();
|
|
11323
|
+
}
|
|
11324
|
+
return this._fetchedData || [];
|
|
11325
|
+
}
|
|
11326
|
+
clearCache() {
|
|
11327
|
+
this._fetchedData = void 0;
|
|
11328
|
+
}
|
|
11329
|
+
async version() {
|
|
11330
|
+
if (this.pinnedVersion !== void 0) {
|
|
11331
|
+
return this.pinnedVersion;
|
|
11332
|
+
} else {
|
|
11333
|
+
const fetchedData = await this.fetchedData();
|
|
11334
|
+
let maxVersion = void 0;
|
|
11335
|
+
for (const record of fetchedData) {
|
|
11336
|
+
const xactId = record[TRANSACTION_ID_FIELD];
|
|
11337
|
+
if (maxVersion === void 0 || (xactId ?? xactId > maxVersion)) {
|
|
11338
|
+
maxVersion = xactId;
|
|
11339
|
+
}
|
|
11340
|
+
}
|
|
11341
|
+
return maxVersion;
|
|
11342
|
+
}
|
|
11343
|
+
}
|
|
11344
|
+
};
|
|
11345
|
+
var Experiment = class extends ObjectFetcher {
|
|
11258
11346
|
constructor(lazyMetadata, dataset) {
|
|
11347
|
+
super("experiment", void 0);
|
|
11259
11348
|
// For type identification.
|
|
11260
11349
|
this.kind = "experiment";
|
|
11261
11350
|
this.lazyMetadata = lazyMetadata;
|
|
@@ -11348,6 +11437,26 @@ var Experiment = class {
|
|
|
11348
11437
|
...argsRest
|
|
11349
11438
|
});
|
|
11350
11439
|
}
|
|
11440
|
+
async fetchBaseExperiment() {
|
|
11441
|
+
const state = await this.getState();
|
|
11442
|
+
const conn = state.apiConn();
|
|
11443
|
+
try {
|
|
11444
|
+
const resp = await conn.post("/api/base_experiment/get_id", {
|
|
11445
|
+
id: await this.id
|
|
11446
|
+
});
|
|
11447
|
+
const base = await resp.json();
|
|
11448
|
+
return {
|
|
11449
|
+
id: base["base_exp_id"],
|
|
11450
|
+
name: base["base_exp_name"]
|
|
11451
|
+
};
|
|
11452
|
+
} catch (e) {
|
|
11453
|
+
if (e instanceof FailedHTTPResponse && e.status === 400) {
|
|
11454
|
+
return null;
|
|
11455
|
+
} else {
|
|
11456
|
+
throw e;
|
|
11457
|
+
}
|
|
11458
|
+
}
|
|
11459
|
+
}
|
|
11351
11460
|
/**
|
|
11352
11461
|
* Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
|
|
11353
11462
|
*
|
|
@@ -11371,14 +11480,10 @@ var Experiment = class {
|
|
|
11371
11480
|
let comparisonExperimentName = void 0;
|
|
11372
11481
|
if (summarizeScores) {
|
|
11373
11482
|
if (comparisonExperimentId === void 0) {
|
|
11374
|
-
const
|
|
11375
|
-
|
|
11376
|
-
|
|
11377
|
-
|
|
11378
|
-
const base_experiments = await resp.json();
|
|
11379
|
-
if (base_experiments.length > 0) {
|
|
11380
|
-
comparisonExperimentId = base_experiments[0]["base_exp_id"];
|
|
11381
|
-
comparisonExperimentName = base_experiments[0]["base_exp_name"];
|
|
11483
|
+
const baseExperiment = await this.fetchBaseExperiment();
|
|
11484
|
+
if (baseExperiment !== null) {
|
|
11485
|
+
comparisonExperimentId = baseExperiment.id;
|
|
11486
|
+
comparisonExperimentName = baseExperiment.name;
|
|
11382
11487
|
}
|
|
11383
11488
|
}
|
|
11384
11489
|
if (comparisonExperimentId !== void 0) {
|
|
@@ -11438,6 +11543,40 @@ var Experiment = class {
|
|
|
11438
11543
|
return this.id;
|
|
11439
11544
|
}
|
|
11440
11545
|
};
|
|
11546
|
+
var ReadonlyExperiment = class extends ObjectFetcher {
|
|
11547
|
+
constructor(lazyMetadata) {
|
|
11548
|
+
super("experiment", void 0);
|
|
11549
|
+
this.lazyMetadata = lazyMetadata;
|
|
11550
|
+
}
|
|
11551
|
+
get id() {
|
|
11552
|
+
return (async () => {
|
|
11553
|
+
return (await this.lazyMetadata.get()).id;
|
|
11554
|
+
})();
|
|
11555
|
+
}
|
|
11556
|
+
get name() {
|
|
11557
|
+
return (async () => {
|
|
11558
|
+
return (await this.lazyMetadata.get()).name;
|
|
11559
|
+
})();
|
|
11560
|
+
}
|
|
11561
|
+
async getState() {
|
|
11562
|
+
await this.lazyMetadata.get();
|
|
11563
|
+
return _state;
|
|
11564
|
+
}
|
|
11565
|
+
async *asDataset() {
|
|
11566
|
+
const records = this.fetch();
|
|
11567
|
+
for await (const record of records) {
|
|
11568
|
+
if (record.root_span_id !== record.span_id) {
|
|
11569
|
+
continue;
|
|
11570
|
+
}
|
|
11571
|
+
const { output, expected } = record;
|
|
11572
|
+
yield {
|
|
11573
|
+
input: record.input,
|
|
11574
|
+
expected: expected ?? output
|
|
11575
|
+
};
|
|
11576
|
+
}
|
|
11577
|
+
}
|
|
11578
|
+
};
|
|
11579
|
+
var executionCounter = 0;
|
|
11441
11580
|
var SpanImpl = class _SpanImpl {
|
|
11442
11581
|
// root_experiment should only be specified for a root span. parent_span
|
|
11443
11582
|
// should only be specified for non-root spans.
|
|
@@ -11463,7 +11602,11 @@ var SpanImpl = class _SpanImpl {
|
|
|
11463
11602
|
start: args.startTime ?? getCurrentUnixTimestamp()
|
|
11464
11603
|
},
|
|
11465
11604
|
context: { ...callerLocation },
|
|
11466
|
-
span_attributes: {
|
|
11605
|
+
span_attributes: {
|
|
11606
|
+
...args.spanAttributes,
|
|
11607
|
+
name,
|
|
11608
|
+
exec_counter: executionCounter++
|
|
11609
|
+
},
|
|
11467
11610
|
created: (/* @__PURE__ */ new Date()).toISOString()
|
|
11468
11611
|
};
|
|
11469
11612
|
this.parentIds = args.parentIds;
|
|
@@ -11561,209 +11704,6 @@ var SpanImpl = class _SpanImpl {
|
|
|
11561
11704
|
return this.end(args);
|
|
11562
11705
|
}
|
|
11563
11706
|
};
|
|
11564
|
-
var Dataset = class {
|
|
11565
|
-
constructor(lazyMetadata, pinnedVersion) {
|
|
11566
|
-
this._fetchedData = void 0;
|
|
11567
|
-
this.lazyMetadata = lazyMetadata;
|
|
11568
|
-
this.pinnedVersion = pinnedVersion;
|
|
11569
|
-
const logConn = new LazyValue(
|
|
11570
|
-
() => this.getState().then((state) => state.logConn())
|
|
11571
|
-
);
|
|
11572
|
-
this.bgLogger = new BackgroundLogger(logConn);
|
|
11573
|
-
}
|
|
11574
|
-
get id() {
|
|
11575
|
-
return (async () => {
|
|
11576
|
-
return (await this.lazyMetadata.get()).dataset.id;
|
|
11577
|
-
})();
|
|
11578
|
-
}
|
|
11579
|
-
get name() {
|
|
11580
|
-
return (async () => {
|
|
11581
|
-
return (await this.lazyMetadata.get()).dataset.name;
|
|
11582
|
-
})();
|
|
11583
|
-
}
|
|
11584
|
-
get project() {
|
|
11585
|
-
return (async () => {
|
|
11586
|
-
return (await this.lazyMetadata.get()).project;
|
|
11587
|
-
})();
|
|
11588
|
-
}
|
|
11589
|
-
async getState() {
|
|
11590
|
-
await this.lazyMetadata.get();
|
|
11591
|
-
return _state;
|
|
11592
|
-
}
|
|
11593
|
-
/**
|
|
11594
|
-
* Insert a single record to the dataset. The record will be batched and uploaded behind the scenes. If you pass in an `id`,
|
|
11595
|
-
* and a record with that `id` already exists, it will be overwritten (upsert).
|
|
11596
|
-
*
|
|
11597
|
-
* @param event The event to log.
|
|
11598
|
-
* @param event.input The argument that uniquely define an input case (an arbitrary, JSON serializable object).
|
|
11599
|
-
* @param event.output The output of your application, including post-processing (an arbitrary, JSON serializable object).
|
|
11600
|
-
* @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
|
|
11601
|
-
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
11602
|
-
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
11603
|
-
* JSON-serializable type, but its keys must be strings.
|
|
11604
|
-
* @param event.id (Optional) a unique identifier for the event. If you don't provide one, Braintrust will generate one for you.
|
|
11605
|
-
* @returns The `id` of the logged record.
|
|
11606
|
-
*/
|
|
11607
|
-
insert({
|
|
11608
|
-
input,
|
|
11609
|
-
output,
|
|
11610
|
-
metadata,
|
|
11611
|
-
id
|
|
11612
|
-
}) {
|
|
11613
|
-
if (metadata !== void 0) {
|
|
11614
|
-
for (const key of Object.keys(metadata)) {
|
|
11615
|
-
if (typeof key !== "string") {
|
|
11616
|
-
throw new Error("metadata keys must be strings");
|
|
11617
|
-
}
|
|
11618
|
-
}
|
|
11619
|
-
}
|
|
11620
|
-
const rowId = id || v4_default();
|
|
11621
|
-
const args = new LazyValue(async () => ({
|
|
11622
|
-
id: rowId,
|
|
11623
|
-
inputs: input,
|
|
11624
|
-
output,
|
|
11625
|
-
project_id: (await this.project).id,
|
|
11626
|
-
dataset_id: await this.id,
|
|
11627
|
-
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
11628
|
-
metadata
|
|
11629
|
-
}));
|
|
11630
|
-
this.bgLogger.log([args]);
|
|
11631
|
-
return rowId;
|
|
11632
|
-
}
|
|
11633
|
-
delete(id) {
|
|
11634
|
-
const args = new LazyValue(async () => ({
|
|
11635
|
-
id,
|
|
11636
|
-
project_id: (await this.project).id,
|
|
11637
|
-
dataset_id: await this.id,
|
|
11638
|
-
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
11639
|
-
_object_delete: true
|
|
11640
|
-
}));
|
|
11641
|
-
this.bgLogger.log([args]);
|
|
11642
|
-
return id;
|
|
11643
|
-
}
|
|
11644
|
-
/**
|
|
11645
|
-
* Summarize the dataset, including high level metrics about its size and other metadata.
|
|
11646
|
-
* @param summarizeData Whether to summarize the data. If false, only the metadata will be returned.
|
|
11647
|
-
* @returns `DatasetSummary`
|
|
11648
|
-
* @returns A summary of the dataset.
|
|
11649
|
-
*/
|
|
11650
|
-
async summarize(options = {}) {
|
|
11651
|
-
let { summarizeData = true } = options || {};
|
|
11652
|
-
await this.bgLogger.flush();
|
|
11653
|
-
const state = await this.getState();
|
|
11654
|
-
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
11655
|
-
state.orgName
|
|
11656
|
-
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
11657
|
-
const datasetUrl = `${projectUrl}/d/${encodeURIComponent(await this.name)}`;
|
|
11658
|
-
let dataSummary = void 0;
|
|
11659
|
-
if (summarizeData) {
|
|
11660
|
-
dataSummary = await state.logConn().get_json(
|
|
11661
|
-
"dataset-summary",
|
|
11662
|
-
{
|
|
11663
|
-
dataset_id: await this.id
|
|
11664
|
-
},
|
|
11665
|
-
3
|
|
11666
|
-
);
|
|
11667
|
-
}
|
|
11668
|
-
return {
|
|
11669
|
-
projectName: (await this.project).name,
|
|
11670
|
-
datasetName: await this.name,
|
|
11671
|
-
projectUrl,
|
|
11672
|
-
datasetUrl,
|
|
11673
|
-
dataSummary
|
|
11674
|
-
};
|
|
11675
|
-
}
|
|
11676
|
-
/**
|
|
11677
|
-
* Fetch all records in the dataset.
|
|
11678
|
-
*
|
|
11679
|
-
* @example
|
|
11680
|
-
* ```
|
|
11681
|
-
* // Use an async iterator to fetch all records in the dataset.
|
|
11682
|
-
* for await (const record of dataset.fetch()) {
|
|
11683
|
-
* console.log(record);
|
|
11684
|
-
* }
|
|
11685
|
-
*
|
|
11686
|
-
* // You can also iterate over the dataset directly.
|
|
11687
|
-
* for await (const record of dataset) {
|
|
11688
|
-
* console.log(record);
|
|
11689
|
-
* }
|
|
11690
|
-
* ```
|
|
11691
|
-
*
|
|
11692
|
-
* @returns An iterator over the dataset's records.
|
|
11693
|
-
*/
|
|
11694
|
-
async *fetch() {
|
|
11695
|
-
const records = await this.fetchedData();
|
|
11696
|
-
for (const record of records) {
|
|
11697
|
-
yield {
|
|
11698
|
-
id: record.id,
|
|
11699
|
-
input: record.input && JSON.parse(record.input),
|
|
11700
|
-
output: record.input && JSON.parse(record.output),
|
|
11701
|
-
metadata: record.metadata && JSON.parse(record.metadata)
|
|
11702
|
-
};
|
|
11703
|
-
}
|
|
11704
|
-
this.clearCache();
|
|
11705
|
-
}
|
|
11706
|
-
/**
|
|
11707
|
-
* Fetch all records in the dataset.
|
|
11708
|
-
*
|
|
11709
|
-
* @example
|
|
11710
|
-
* ```
|
|
11711
|
-
* // Use an async iterator to fetch all records in the dataset.
|
|
11712
|
-
* for await (const record of dataset) {
|
|
11713
|
-
* console.log(record);
|
|
11714
|
-
* }
|
|
11715
|
-
* ```
|
|
11716
|
-
*/
|
|
11717
|
-
[Symbol.asyncIterator]() {
|
|
11718
|
-
return this.fetch();
|
|
11719
|
-
}
|
|
11720
|
-
async fetchedData() {
|
|
11721
|
-
if (this._fetchedData === void 0) {
|
|
11722
|
-
const state = await this.getState();
|
|
11723
|
-
const resp = await state.logConn().get("object/dataset", {
|
|
11724
|
-
id: await this.id,
|
|
11725
|
-
fmt: "json",
|
|
11726
|
-
version: this.pinnedVersion
|
|
11727
|
-
});
|
|
11728
|
-
const text = await resp.text();
|
|
11729
|
-
this._fetchedData = text.split("\n").filter((x) => x.trim() !== "").map((x) => JSON.parse(x));
|
|
11730
|
-
}
|
|
11731
|
-
return this._fetchedData || [];
|
|
11732
|
-
}
|
|
11733
|
-
clearCache() {
|
|
11734
|
-
this._fetchedData = void 0;
|
|
11735
|
-
}
|
|
11736
|
-
async version() {
|
|
11737
|
-
if (this.pinnedVersion !== void 0) {
|
|
11738
|
-
return this.pinnedVersion;
|
|
11739
|
-
} else {
|
|
11740
|
-
const fetchedData = await this.fetchedData();
|
|
11741
|
-
let maxVersion = void 0;
|
|
11742
|
-
for (const record of fetchedData) {
|
|
11743
|
-
const xactId = record[TRANSACTION_ID_FIELD];
|
|
11744
|
-
if (maxVersion === void 0 || (xactId ?? xactId > maxVersion)) {
|
|
11745
|
-
maxVersion = xactId;
|
|
11746
|
-
}
|
|
11747
|
-
}
|
|
11748
|
-
return maxVersion;
|
|
11749
|
-
}
|
|
11750
|
-
}
|
|
11751
|
-
/**
|
|
11752
|
-
* Flush any pending rows to the server.
|
|
11753
|
-
*/
|
|
11754
|
-
async flush() {
|
|
11755
|
-
return await this.bgLogger.flush();
|
|
11756
|
-
}
|
|
11757
|
-
/**
|
|
11758
|
-
* This function is deprecated. You can simply remove it from your code.
|
|
11759
|
-
*/
|
|
11760
|
-
async close() {
|
|
11761
|
-
console.warn(
|
|
11762
|
-
"close is deprecated and will be removed in a future version of braintrust. It is now a no-op and can be removed"
|
|
11763
|
-
);
|
|
11764
|
-
return this.id;
|
|
11765
|
-
}
|
|
11766
|
-
};
|
|
11767
11707
|
|
|
11768
11708
|
// src/progress.ts
|
|
11769
11709
|
var cliProgress = __toESM(require_cli_progress());
|
|
@@ -11880,6 +11820,12 @@ var GlobalPaths = findGlobalPaths();
|
|
|
11880
11820
|
// src/framework.ts
|
|
11881
11821
|
var import_chalk = __toESM(require_source());
|
|
11882
11822
|
var import_pluralize = __toESM(require_pluralize());
|
|
11823
|
+
function initExperiment(projectName, options = {}) {
|
|
11824
|
+
return init(projectName, {
|
|
11825
|
+
...options,
|
|
11826
|
+
setCurrent: false
|
|
11827
|
+
});
|
|
11828
|
+
}
|
|
11883
11829
|
globalThis._evals = {};
|
|
11884
11830
|
function serializeJSONWithPlainString(v) {
|
|
11885
11831
|
if (typeof v === "string") {
|
|
@@ -11926,18 +11872,45 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
11926
11872
|
if (typeof evaluator.data === "string") {
|
|
11927
11873
|
throw new Error("Unimplemented: string data paths");
|
|
11928
11874
|
}
|
|
11929
|
-
|
|
11930
|
-
|
|
11875
|
+
let dataResult = typeof evaluator.data === "function" ? evaluator.data() : evaluator.data;
|
|
11876
|
+
if ("_type" in dataResult) {
|
|
11877
|
+
if (dataResult._type !== "BaseExperiment") {
|
|
11878
|
+
throw new Error("Invalid _type");
|
|
11879
|
+
}
|
|
11880
|
+
if (!experiment) {
|
|
11881
|
+
throw new Error(
|
|
11882
|
+
"Cannot use BaseExperiment() without connecting to Braintrust (you most likely set --no-send-logs)"
|
|
11883
|
+
);
|
|
11884
|
+
}
|
|
11885
|
+
let name = dataResult.name;
|
|
11886
|
+
if (isEmpty(name)) {
|
|
11887
|
+
const baseExperiment = await experiment.fetchBaseExperiment();
|
|
11888
|
+
if (!baseExperiment) {
|
|
11889
|
+
throw new Error("BaseExperiment() failed to fetch base experiment");
|
|
11890
|
+
}
|
|
11891
|
+
name = baseExperiment.name;
|
|
11892
|
+
}
|
|
11893
|
+
dataResult = initExperiment(evaluator.projectName, {
|
|
11894
|
+
experiment: name,
|
|
11895
|
+
open: true
|
|
11896
|
+
}).asDataset();
|
|
11897
|
+
}
|
|
11898
|
+
let data = [];
|
|
11931
11899
|
if (dataResult instanceof Promise) {
|
|
11932
11900
|
data = await dataResult;
|
|
11901
|
+
} else if (Symbol.asyncIterator in dataResult) {
|
|
11902
|
+
data = [];
|
|
11903
|
+
for await (const d of dataResult) {
|
|
11904
|
+
data.push(d);
|
|
11905
|
+
}
|
|
11933
11906
|
} else {
|
|
11934
11907
|
data = dataResult;
|
|
11935
11908
|
}
|
|
11936
|
-
data = data.filter((d) => filters.every((f) => evaluateFilter(d, f)))
|
|
11937
|
-
progressReporter.start(evaluator.evalName, data.length);
|
|
11938
|
-
const evals = data.flatMap(
|
|
11909
|
+
data = data.filter((d) => filters.every((f) => evaluateFilter(d, f))).flatMap(
|
|
11939
11910
|
(datum) => [...Array(evaluator.trialCount ?? 1).keys()].map(() => datum)
|
|
11940
|
-
)
|
|
11911
|
+
);
|
|
11912
|
+
progressReporter.start(evaluator.evalName, data.length);
|
|
11913
|
+
const evals = data.map(async (datum) => {
|
|
11941
11914
|
let metadata = { ...datum.metadata };
|
|
11942
11915
|
let output = void 0;
|
|
11943
11916
|
let error2 = void 0;
|
|
@@ -16512,7 +16485,7 @@ async function collectFiles(inputPath) {
|
|
|
16512
16485
|
files.push(inputPath);
|
|
16513
16486
|
}
|
|
16514
16487
|
} else {
|
|
16515
|
-
const walked = await
|
|
16488
|
+
const walked = await import_util3.default.promisify(fsWalk.walk)(inputPath, {
|
|
16516
16489
|
deepFilter: (entry) => {
|
|
16517
16490
|
return checkMatch(entry.path, null, EXCLUDE);
|
|
16518
16491
|
},
|
package/dist/framework.d.ts
CHANGED
|
@@ -1,13 +1,26 @@
|
|
|
1
1
|
import chalk from "chalk";
|
|
2
|
-
import { Experiment, ExperimentSummary, Metadata, Span } from "./logger";
|
|
2
|
+
import { Experiment, ExperimentSummary, Metadata, Span, EvalCase } from "./logger";
|
|
3
3
|
import { Score } from "@braintrust/core";
|
|
4
4
|
import { ProgressReporter } from "./progress";
|
|
5
|
-
export
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
}
|
|
10
|
-
|
|
5
|
+
export type BaseExperiment<Input, Expected> = {
|
|
6
|
+
_type: "BaseExperiment";
|
|
7
|
+
_phantom?: [Input, Expected];
|
|
8
|
+
name?: string;
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Use this to specify that the dataset should actually be the data from a previous (base) experiment.
|
|
12
|
+
* If you do not specify a name, Braintrust will automatically figure out the best base experiment to
|
|
13
|
+
* use based on your git history (or fall back to timestamps).
|
|
14
|
+
*
|
|
15
|
+
* @param options
|
|
16
|
+
* @param options.name The name of the base experiment to use. If unspecified, Braintrust will automatically figure out the best base
|
|
17
|
+
* using your git history (or fall back to timestamps).
|
|
18
|
+
* @returns
|
|
19
|
+
*/
|
|
20
|
+
export declare function BaseExperiment<Input = unknown, Expected = unknown>(options?: {
|
|
21
|
+
name?: string;
|
|
22
|
+
}): BaseExperiment<Input, Expected>;
|
|
23
|
+
export type EvalData<Input, Expected> = EvalCase<Input, Expected>[] | (() => EvalCase<Input, Expected>[]) | (() => Promise<EvalCase<Input, Expected>[]>) | AsyncGenerator<EvalCase<Input, Expected>> | BaseExperiment<Input, Expected> | (() => BaseExperiment<Input, Expected>);
|
|
11
24
|
export type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
|
|
12
25
|
export interface EvalHooks {
|
|
13
26
|
meta: (info: Record<string, unknown>) => void;
|
|
@@ -60,7 +73,7 @@ declare global {
|
|
|
60
73
|
var _evals: EvaluatorFile;
|
|
61
74
|
var _lazy_load: boolean;
|
|
62
75
|
}
|
|
63
|
-
export declare function Eval<Input, Output, Expected>(name: string, evaluator: Evaluator<Input, Output, Expected>): Promise<
|
|
76
|
+
export declare function Eval<Input, Output, Expected>(name: string, evaluator: Evaluator<Input, Output, Expected>): Promise<ExperimentSummary>;
|
|
64
77
|
export declare function getLoadedEvals(): EvaluatorFile;
|
|
65
78
|
export interface Filter {
|
|
66
79
|
path: string[];
|
package/dist/index.d.ts
CHANGED