braintrust 0.0.97 → 0.0.98
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +257 -80
- package/dist/cli.js +169 -77
- package/dist/framework.d.ts +18 -18
- package/dist/gitutil.d.ts +2 -2
- package/dist/index.d.ts +1 -0
- package/dist/index.js +267 -90
- package/dist/isomorph.d.ts +2 -2
- package/dist/logger.d.ts +77 -132
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -9065,7 +9065,7 @@ var require_package = __commonJS({
|
|
|
9065
9065
|
"package.json"(exports2, module2) {
|
|
9066
9066
|
module2.exports = {
|
|
9067
9067
|
name: "braintrust",
|
|
9068
|
-
version: "0.0.
|
|
9068
|
+
version: "0.0.98",
|
|
9069
9069
|
description: "SDK for integrating Braintrust",
|
|
9070
9070
|
main: "./dist/index.js",
|
|
9071
9071
|
browser: {
|
|
@@ -9108,7 +9108,7 @@ var require_package = __commonJS({
|
|
|
9108
9108
|
typescript: "^5.3.3"
|
|
9109
9109
|
},
|
|
9110
9110
|
dependencies: {
|
|
9111
|
-
"@braintrust/core": "^0.0.
|
|
9111
|
+
"@braintrust/core": "^0.0.16",
|
|
9112
9112
|
argparse: "^2.0.1",
|
|
9113
9113
|
chalk: "^4.1.2",
|
|
9114
9114
|
"cli-progress": "^3.12.0",
|
|
@@ -10508,9 +10508,10 @@ var v4_default = v4;
|
|
|
10508
10508
|
// src/cli.ts
|
|
10509
10509
|
var import_pluralize2 = __toESM(require_pluralize());
|
|
10510
10510
|
|
|
10511
|
-
// ../core/js/dist/index.mjs
|
|
10511
|
+
// ../core/js/dist/main/index.mjs
|
|
10512
10512
|
var TRANSACTION_ID_FIELD = "_xact_id";
|
|
10513
10513
|
var IS_MERGE_FIELD = "_is_merge";
|
|
10514
|
+
var MERGE_PATHS_FIELD = "_merge_paths";
|
|
10514
10515
|
var AUDIT_SOURCE_FIELD = "_audit_source";
|
|
10515
10516
|
var AUDIT_METADATA_FIELD = "_audit_metadata";
|
|
10516
10517
|
var VALID_SOURCES = ["app", "api", "external"];
|
|
@@ -10569,6 +10570,24 @@ function mergeRowBatch(rows) {
|
|
|
10569
10570
|
out.push(...Object.values(rowGroups));
|
|
10570
10571
|
return out;
|
|
10571
10572
|
}
|
|
10573
|
+
function makeLegacyEvent(e) {
|
|
10574
|
+
if (!("dataset_id" in e) || !("expected" in e)) {
|
|
10575
|
+
return e;
|
|
10576
|
+
}
|
|
10577
|
+
const event = {
|
|
10578
|
+
...e,
|
|
10579
|
+
output: e.expected
|
|
10580
|
+
};
|
|
10581
|
+
delete event.expected;
|
|
10582
|
+
if (MERGE_PATHS_FIELD in event) {
|
|
10583
|
+
for (const path5 of event[MERGE_PATHS_FIELD] || []) {
|
|
10584
|
+
if (path5.length > 0 && path5[0] === "expected") {
|
|
10585
|
+
path5[0] = "output";
|
|
10586
|
+
}
|
|
10587
|
+
}
|
|
10588
|
+
}
|
|
10589
|
+
return event;
|
|
10590
|
+
}
|
|
10572
10591
|
var SpanTypeAttribute = /* @__PURE__ */ ((SpanTypeAttribute2) => {
|
|
10573
10592
|
SpanTypeAttribute2["LLM"] = "llm";
|
|
10574
10593
|
SpanTypeAttribute2["SCORE"] = "score";
|
|
@@ -10611,7 +10630,7 @@ var DefaultAsyncLocalStorage = class {
|
|
|
10611
10630
|
}
|
|
10612
10631
|
};
|
|
10613
10632
|
var iso = {
|
|
10614
|
-
|
|
10633
|
+
getRepoInfo: async (_settings) => void 0,
|
|
10615
10634
|
getPastNAncestors: async () => [],
|
|
10616
10635
|
getEnv: (_name) => void 0,
|
|
10617
10636
|
getCallerLocation: () => void 0,
|
|
@@ -10915,6 +10934,9 @@ var MaxRequestSize = 6 * 1024 * 1024;
|
|
|
10915
10934
|
function constructJsonArray(items) {
|
|
10916
10935
|
return `[${items.join(",")}]`;
|
|
10917
10936
|
}
|
|
10937
|
+
function constructLogs3Data(items) {
|
|
10938
|
+
return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
|
|
10939
|
+
}
|
|
10918
10940
|
var DefaultBatchSize = 100;
|
|
10919
10941
|
var NumRetries = 3;
|
|
10920
10942
|
function now() {
|
|
@@ -10973,11 +10995,20 @@ var BackgroundLogger = class {
|
|
|
10973
10995
|
}
|
|
10974
10996
|
postPromises.push(
|
|
10975
10997
|
(async () => {
|
|
10976
|
-
const
|
|
10998
|
+
const dataS = constructLogs3Data(items);
|
|
10977
10999
|
for (let i = 0; i < NumRetries; i++) {
|
|
10978
11000
|
const startTime = now();
|
|
10979
11001
|
try {
|
|
10980
|
-
|
|
11002
|
+
try {
|
|
11003
|
+
return (await (await this.logConn.get()).post_json("logs3", dataS)).ids.map((res) => res.id);
|
|
11004
|
+
} catch (e) {
|
|
11005
|
+
const legacyDataS = constructJsonArray(
|
|
11006
|
+
items.map(
|
|
11007
|
+
(r) => JSON.stringify(makeLegacyEvent(JSON.parse(r)))
|
|
11008
|
+
)
|
|
11009
|
+
);
|
|
11010
|
+
return (await (await this.logConn.get()).post_json("logs", legacyDataS)).map((res) => res.id);
|
|
11011
|
+
}
|
|
10981
11012
|
} catch (e) {
|
|
10982
11013
|
const retryingText = i + 1 === NumRetries ? "" : " Retrying";
|
|
10983
11014
|
const errMsg = (() => {
|
|
@@ -10988,7 +11019,7 @@ var BackgroundLogger = class {
|
|
|
10988
11019
|
}
|
|
10989
11020
|
})();
|
|
10990
11021
|
console.warn(
|
|
10991
|
-
`log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${
|
|
11022
|
+
`log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataS.length}. Error: ${errMsg}.${retryingText}`
|
|
10992
11023
|
);
|
|
10993
11024
|
}
|
|
10994
11025
|
}
|
|
@@ -11016,8 +11047,21 @@ var BackgroundLogger = class {
|
|
|
11016
11047
|
}
|
|
11017
11048
|
}
|
|
11018
11049
|
};
|
|
11019
|
-
function init(
|
|
11050
|
+
function init(projectOrOptions, optionalOptions) {
|
|
11051
|
+
const options = (() => {
|
|
11052
|
+
if (typeof projectOrOptions === "string") {
|
|
11053
|
+
return { ...optionalOptions, project: projectOrOptions };
|
|
11054
|
+
} else {
|
|
11055
|
+
if (optionalOptions !== void 0) {
|
|
11056
|
+
throw new Error(
|
|
11057
|
+
"Cannot specify options struct as both parameters. Must call either init(project, options) or init(options)."
|
|
11058
|
+
);
|
|
11059
|
+
}
|
|
11060
|
+
return projectOrOptions;
|
|
11061
|
+
}
|
|
11062
|
+
})();
|
|
11020
11063
|
const {
|
|
11064
|
+
project,
|
|
11021
11065
|
experiment,
|
|
11022
11066
|
description,
|
|
11023
11067
|
dataset,
|
|
@@ -11029,47 +11073,55 @@ function init(project, options = {}) {
|
|
|
11029
11073
|
apiKey,
|
|
11030
11074
|
orgName,
|
|
11031
11075
|
metadata,
|
|
11032
|
-
gitMetadataSettings
|
|
11033
|
-
|
|
11076
|
+
gitMetadataSettings,
|
|
11077
|
+
projectId,
|
|
11078
|
+
baseExperimentId,
|
|
11079
|
+
repoInfo: repoInfo2
|
|
11080
|
+
} = options;
|
|
11034
11081
|
if (open && update) {
|
|
11035
11082
|
throw new Error("Cannot open and update an experiment at the same time");
|
|
11036
11083
|
}
|
|
11037
11084
|
if (open || update) {
|
|
11038
11085
|
if (isEmpty(experiment)) {
|
|
11039
11086
|
const action = open ? "open" : "update";
|
|
11040
|
-
throw new Error(
|
|
11087
|
+
throw new Error(
|
|
11088
|
+
`Cannot ${action} an experiment without specifying its name`
|
|
11089
|
+
);
|
|
11041
11090
|
}
|
|
11042
|
-
const lazyMetadata2 = new LazyValue(
|
|
11043
|
-
|
|
11044
|
-
|
|
11045
|
-
|
|
11046
|
-
|
|
11047
|
-
|
|
11048
|
-
|
|
11049
|
-
|
|
11050
|
-
|
|
11051
|
-
|
|
11052
|
-
|
|
11053
|
-
|
|
11054
|
-
|
|
11055
|
-
|
|
11056
|
-
|
|
11057
|
-
|
|
11058
|
-
|
|
11059
|
-
|
|
11060
|
-
return {
|
|
11061
|
-
project: {
|
|
11062
|
-
id: info.project_id,
|
|
11063
|
-
name: "",
|
|
11064
|
-
fullInfo: {}
|
|
11065
|
-
},
|
|
11066
|
-
experiment: {
|
|
11067
|
-
id: info.id,
|
|
11068
|
-
name: info.name,
|
|
11069
|
-
fullInfo: info
|
|
11091
|
+
const lazyMetadata2 = new LazyValue(
|
|
11092
|
+
async () => {
|
|
11093
|
+
await login({
|
|
11094
|
+
orgName,
|
|
11095
|
+
apiKey,
|
|
11096
|
+
appUrl
|
|
11097
|
+
});
|
|
11098
|
+
const args = {
|
|
11099
|
+
project_name: project,
|
|
11100
|
+
project_id: projectId,
|
|
11101
|
+
org_name: _state.orgName,
|
|
11102
|
+
experiment_name: experiment
|
|
11103
|
+
};
|
|
11104
|
+
const response = await _state.apiConn().post_json("api/experiment/get", args);
|
|
11105
|
+
if (response.length === 0) {
|
|
11106
|
+
throw new Error(
|
|
11107
|
+
`Experiment ${experiment} not found in project ${projectId ?? project}.`
|
|
11108
|
+
);
|
|
11070
11109
|
}
|
|
11071
|
-
|
|
11072
|
-
|
|
11110
|
+
const info = response[0];
|
|
11111
|
+
return {
|
|
11112
|
+
project: {
|
|
11113
|
+
id: info.project_id,
|
|
11114
|
+
name: "",
|
|
11115
|
+
fullInfo: {}
|
|
11116
|
+
},
|
|
11117
|
+
experiment: {
|
|
11118
|
+
id: info.id,
|
|
11119
|
+
name: info.name,
|
|
11120
|
+
fullInfo: info
|
|
11121
|
+
}
|
|
11122
|
+
};
|
|
11123
|
+
}
|
|
11124
|
+
);
|
|
11073
11125
|
if (open) {
|
|
11074
11126
|
return new ReadonlyExperiment(
|
|
11075
11127
|
lazyMetadata2
|
|
@@ -11091,6 +11143,7 @@ function init(project, options = {}) {
|
|
|
11091
11143
|
});
|
|
11092
11144
|
const args = {
|
|
11093
11145
|
project_name: project,
|
|
11146
|
+
project_id: projectId,
|
|
11094
11147
|
org_id: _state.orgId
|
|
11095
11148
|
};
|
|
11096
11149
|
if (experiment) {
|
|
@@ -11099,22 +11152,29 @@ function init(project, options = {}) {
|
|
|
11099
11152
|
if (description) {
|
|
11100
11153
|
args["description"] = description;
|
|
11101
11154
|
}
|
|
11102
|
-
|
|
11103
|
-
|
|
11104
|
-
|
|
11155
|
+
const repoInfoArg = await (async () => {
|
|
11156
|
+
if (repoInfo2) {
|
|
11157
|
+
return repoInfo2;
|
|
11105
11158
|
}
|
|
11106
|
-
|
|
11107
|
-
|
|
11108
|
-
|
|
11109
|
-
|
|
11110
|
-
|
|
11111
|
-
)
|
|
11112
|
-
|
|
11113
|
-
|
|
11114
|
-
|
|
11115
|
-
|
|
11159
|
+
let mergedGitMetadataSettings = {
|
|
11160
|
+
..._state.gitMetadataSettings || {
|
|
11161
|
+
collect: "all"
|
|
11162
|
+
}
|
|
11163
|
+
};
|
|
11164
|
+
if (gitMetadataSettings) {
|
|
11165
|
+
mergedGitMetadataSettings = mergeGitMetadataSettings(
|
|
11166
|
+
mergedGitMetadataSettings,
|
|
11167
|
+
gitMetadataSettings
|
|
11168
|
+
);
|
|
11169
|
+
}
|
|
11170
|
+
return await isomorph_default.getRepoInfo(mergedGitMetadataSettings);
|
|
11171
|
+
})();
|
|
11172
|
+
if (repoInfoArg) {
|
|
11173
|
+
args["repo_info"] = repoInfoArg;
|
|
11116
11174
|
}
|
|
11117
|
-
if (
|
|
11175
|
+
if (baseExperimentId) {
|
|
11176
|
+
args["base_exp_id"] = baseExperimentId;
|
|
11177
|
+
} else if (baseExperiment) {
|
|
11118
11178
|
args["base_experiment"] = baseExperiment;
|
|
11119
11179
|
} else {
|
|
11120
11180
|
args["ancestor_commits"] = await isomorph_default.getPastNAncestors();
|
|
@@ -11166,15 +11226,27 @@ function init(project, options = {}) {
|
|
|
11166
11226
|
return ret;
|
|
11167
11227
|
}
|
|
11168
11228
|
async function login(options = {}) {
|
|
11229
|
+
let { forceLogin = false } = options || {};
|
|
11230
|
+
if (_state.loggedIn && !forceLogin) {
|
|
11231
|
+
let checkUpdatedParam2 = function(varname, arg, orig) {
|
|
11232
|
+
if (!isEmpty(arg) && !isEmpty(orig) && arg !== orig) {
|
|
11233
|
+
throw new Error(
|
|
11234
|
+
`Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
|
|
11235
|
+
);
|
|
11236
|
+
}
|
|
11237
|
+
};
|
|
11238
|
+
var checkUpdatedParam = checkUpdatedParam2;
|
|
11239
|
+
;
|
|
11240
|
+
checkUpdatedParam2("appUrl", options.appUrl, _state.appUrl);
|
|
11241
|
+
checkUpdatedParam2("apiKey", options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0, _state.loginToken);
|
|
11242
|
+
checkUpdatedParam2("orgName", options.orgName, _state.orgName);
|
|
11243
|
+
return;
|
|
11244
|
+
}
|
|
11169
11245
|
const {
|
|
11170
11246
|
appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
|
|
11171
11247
|
apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
|
|
11172
11248
|
orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
|
|
11173
11249
|
} = options || {};
|
|
11174
|
-
let { forceLogin = false } = options || {};
|
|
11175
|
-
if (_state.loggedIn && !forceLogin) {
|
|
11176
|
-
return;
|
|
11177
|
-
}
|
|
11178
11250
|
_state.resetLoginInfo();
|
|
11179
11251
|
_state.appUrl = appUrl;
|
|
11180
11252
|
let conn = null;
|
|
@@ -11304,9 +11376,10 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
|
11304
11376
|
return event;
|
|
11305
11377
|
}
|
|
11306
11378
|
var ObjectFetcher = class {
|
|
11307
|
-
constructor(objectType, pinnedVersion) {
|
|
11379
|
+
constructor(objectType, pinnedVersion, mutateRecord) {
|
|
11308
11380
|
this.objectType = objectType;
|
|
11309
11381
|
this.pinnedVersion = pinnedVersion;
|
|
11382
|
+
this.mutateRecord = mutateRecord;
|
|
11310
11383
|
this._fetchedData = void 0;
|
|
11311
11384
|
}
|
|
11312
11385
|
get id() {
|
|
@@ -11327,12 +11400,24 @@ var ObjectFetcher = class {
|
|
|
11327
11400
|
async fetchedData() {
|
|
11328
11401
|
if (this._fetchedData === void 0) {
|
|
11329
11402
|
const state = await this.getState();
|
|
11330
|
-
|
|
11331
|
-
|
|
11332
|
-
|
|
11333
|
-
|
|
11334
|
-
|
|
11335
|
-
|
|
11403
|
+
let data = void 0;
|
|
11404
|
+
try {
|
|
11405
|
+
const resp = await state.logConn().get(`object3/${this.objectType}`, {
|
|
11406
|
+
id: await this.id,
|
|
11407
|
+
fmt: "json2",
|
|
11408
|
+
version: this.pinnedVersion,
|
|
11409
|
+
api_version: "2"
|
|
11410
|
+
});
|
|
11411
|
+
data = await resp.json();
|
|
11412
|
+
} catch (e) {
|
|
11413
|
+
const resp = await state.logConn().get(`object/${this.objectType}`, {
|
|
11414
|
+
id: await this.id,
|
|
11415
|
+
fmt: "json2",
|
|
11416
|
+
version: this.pinnedVersion
|
|
11417
|
+
});
|
|
11418
|
+
data = await resp.json();
|
|
11419
|
+
}
|
|
11420
|
+
this._fetchedData = this.mutateRecord ? data?.map(this.mutateRecord) : data;
|
|
11336
11421
|
}
|
|
11337
11422
|
return this._fetchedData || [];
|
|
11338
11423
|
}
|
|
@@ -11581,11 +11666,18 @@ var ReadonlyExperiment = class extends ObjectFetcher {
|
|
|
11581
11666
|
if (record.root_span_id !== record.span_id) {
|
|
11582
11667
|
continue;
|
|
11583
11668
|
}
|
|
11584
|
-
const { output, expected } = record;
|
|
11585
|
-
|
|
11586
|
-
|
|
11587
|
-
|
|
11588
|
-
|
|
11669
|
+
const { output, expected: expectedRecord } = record;
|
|
11670
|
+
const expected = expectedRecord ?? output;
|
|
11671
|
+
if (isEmpty(expected)) {
|
|
11672
|
+
yield {
|
|
11673
|
+
input: record.input
|
|
11674
|
+
};
|
|
11675
|
+
} else {
|
|
11676
|
+
yield {
|
|
11677
|
+
input: record.input,
|
|
11678
|
+
expected
|
|
11679
|
+
};
|
|
11680
|
+
}
|
|
11589
11681
|
}
|
|
11590
11682
|
}
|
|
11591
11683
|
};
|
|
@@ -11924,7 +12016,7 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
11924
12016
|
);
|
|
11925
12017
|
progressReporter.start(evaluator.evalName, data.length);
|
|
11926
12018
|
const evals = data.map(async (datum) => {
|
|
11927
|
-
let metadata = { ...datum.metadata };
|
|
12019
|
+
let metadata = { ..."metadata" in datum ? datum.metadata : {} };
|
|
11928
12020
|
let output = void 0;
|
|
11929
12021
|
let error2 = void 0;
|
|
11930
12022
|
let scores = {};
|
|
@@ -12011,7 +12103,7 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
12011
12103
|
},
|
|
12012
12104
|
event: {
|
|
12013
12105
|
input: datum.input,
|
|
12014
|
-
expected: datum.expected
|
|
12106
|
+
expected: "expected" in datum ? datum.expected : void 0
|
|
12015
12107
|
}
|
|
12016
12108
|
});
|
|
12017
12109
|
}
|
|
@@ -16127,11 +16219,11 @@ function truncateToByteLimit(s, byteLimit = 65536) {
|
|
|
16127
16219
|
const truncated = encoded.subarray(0, byteLimit);
|
|
16128
16220
|
return new TextDecoder().decode(truncated);
|
|
16129
16221
|
}
|
|
16130
|
-
async function
|
|
16222
|
+
async function getRepoInfo(settings) {
|
|
16131
16223
|
if (settings && settings.collect === "none") {
|
|
16132
16224
|
return void 0;
|
|
16133
16225
|
}
|
|
16134
|
-
const repo = await
|
|
16226
|
+
const repo = await repoInfo();
|
|
16135
16227
|
if (!repo || !settings || settings.collect === "all") {
|
|
16136
16228
|
return repo;
|
|
16137
16229
|
}
|
|
@@ -16141,7 +16233,7 @@ async function getRepoStatus(settings) {
|
|
|
16141
16233
|
});
|
|
16142
16234
|
return sanitized;
|
|
16143
16235
|
}
|
|
16144
|
-
async function
|
|
16236
|
+
async function repoInfo() {
|
|
16145
16237
|
const git = await currentRepo();
|
|
16146
16238
|
if (git === null) {
|
|
16147
16239
|
return void 0;
|
|
@@ -16238,7 +16330,7 @@ function getCallerLocation() {
|
|
|
16238
16330
|
|
|
16239
16331
|
// src/node.ts
|
|
16240
16332
|
function configureNode() {
|
|
16241
|
-
isomorph_default.
|
|
16333
|
+
isomorph_default.getRepoInfo = getRepoInfo;
|
|
16242
16334
|
isomorph_default.getPastNAncestors = getPastNAncestors;
|
|
16243
16335
|
isomorph_default.getEnv = (name) => process.env[name];
|
|
16244
16336
|
isomorph_default.getCallerLocation = getCallerLocation;
|
package/dist/framework.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import chalk from "chalk";
|
|
2
|
-
import { Experiment, ExperimentSummary,
|
|
2
|
+
import { Experiment, ExperimentSummary, Span, EvalCase, BaseMetadata, DefaultMetadataType } from "./logger";
|
|
3
3
|
import { Score } from "@braintrust/core";
|
|
4
4
|
import { ProgressReporter } from "./progress";
|
|
5
|
-
export type BaseExperiment<Input, Expected> = {
|
|
5
|
+
export type BaseExperiment<Input, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = {
|
|
6
6
|
_type: "BaseExperiment";
|
|
7
|
-
_phantom?: [Input, Expected];
|
|
7
|
+
_phantom?: [Input, Expected, Metadata];
|
|
8
8
|
name?: string;
|
|
9
9
|
};
|
|
10
10
|
/**
|
|
@@ -17,24 +17,24 @@ export type BaseExperiment<Input, Expected> = {
|
|
|
17
17
|
* using your git history (or fall back to timestamps).
|
|
18
18
|
* @returns
|
|
19
19
|
*/
|
|
20
|
-
export declare function BaseExperiment<Input = unknown, Expected = unknown>(options?: {
|
|
20
|
+
export declare function BaseExperiment<Input = unknown, Expected = unknown, Metadata extends BaseMetadata = DefaultMetadataType>(options?: {
|
|
21
21
|
name?: string;
|
|
22
|
-
}): BaseExperiment<Input, Expected>;
|
|
23
|
-
export type EvalData<Input, Expected> = EvalCase<Input, Expected>[] | (() => EvalCase<Input, Expected>[]) | (() => Promise<EvalCase<Input, Expected>[]>) | AsyncGenerator<EvalCase<Input, Expected>> | BaseExperiment<Input, Expected> | (() => BaseExperiment<Input, Expected>);
|
|
22
|
+
}): BaseExperiment<Input, Expected, Metadata>;
|
|
23
|
+
export type EvalData<Input, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata>[] | (() => EvalCase<Input, Expected, Metadata>[]) | (() => Promise<EvalCase<Input, Expected, Metadata>[]>) | AsyncGenerator<EvalCase<Input, Expected, Metadata>> | AsyncIterable<EvalCase<Input, Expected, Metadata>> | BaseExperiment<Input, Expected, Metadata> | (() => BaseExperiment<Input, Expected, Metadata>);
|
|
24
24
|
export type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
|
|
25
25
|
export interface EvalHooks {
|
|
26
26
|
meta: (info: Record<string, unknown>) => void;
|
|
27
27
|
span: Span;
|
|
28
28
|
}
|
|
29
|
-
export type EvalScorerArgs<Input, Output, Expected> = EvalCase<Input, Expected> & {
|
|
29
|
+
export type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
|
|
30
30
|
output: Output;
|
|
31
31
|
};
|
|
32
|
-
export type EvalScorer<Input, Output, Expected> = (args: EvalScorerArgs<Input, Output, Expected>) => Score | Promise<Score>;
|
|
33
|
-
export interface Evaluator<Input, Output, Expected> {
|
|
32
|
+
export type EvalScorer<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = (args: EvalScorerArgs<Input, Output, Expected, Metadata>) => Score | Promise<Score>;
|
|
33
|
+
export interface Evaluator<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> {
|
|
34
34
|
/**
|
|
35
35
|
* A function that returns a list of inputs, expected outputs, and metadata.
|
|
36
36
|
*/
|
|
37
|
-
data: EvalData<Input, Expected>;
|
|
37
|
+
data: EvalData<Input, Expected, Metadata>;
|
|
38
38
|
/**
|
|
39
39
|
* A function that takes an input and returns an output.
|
|
40
40
|
*/
|
|
@@ -42,7 +42,7 @@ export interface Evaluator<Input, Output, Expected> {
|
|
|
42
42
|
/**
|
|
43
43
|
* A set of functions that take an input, output, and expected value and return a score.
|
|
44
44
|
*/
|
|
45
|
-
scores: EvalScorer<Input, Output, Expected>[];
|
|
45
|
+
scores: EvalScorer<Input, Output, Expected, Metadata>[];
|
|
46
46
|
/**
|
|
47
47
|
* An optional name for the experiment.
|
|
48
48
|
*/
|
|
@@ -56,24 +56,24 @@ export interface Evaluator<Input, Output, Expected> {
|
|
|
56
56
|
/**
|
|
57
57
|
* Optional additional metadata for the experiment.
|
|
58
58
|
*/
|
|
59
|
-
metadata?:
|
|
59
|
+
metadata?: Record<string, unknown>;
|
|
60
60
|
/**
|
|
61
61
|
* Whether the experiment should be public. Defaults to false.
|
|
62
62
|
*/
|
|
63
63
|
isPublic?: boolean;
|
|
64
64
|
}
|
|
65
|
-
export type EvaluatorDef<Input, Output, Expected> = {
|
|
65
|
+
export type EvaluatorDef<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = {
|
|
66
66
|
projectName: string;
|
|
67
67
|
evalName: string;
|
|
68
|
-
} & Evaluator<Input, Output, Expected>;
|
|
68
|
+
} & Evaluator<Input, Output, Expected, Metadata>;
|
|
69
69
|
export type EvaluatorFile = {
|
|
70
|
-
[evalName: string]: EvaluatorDef<any, any, any>;
|
|
70
|
+
[evalName: string]: EvaluatorDef<any, any, any, any>;
|
|
71
71
|
};
|
|
72
72
|
declare global {
|
|
73
73
|
var _evals: EvaluatorFile;
|
|
74
74
|
var _lazy_load: boolean;
|
|
75
75
|
}
|
|
76
|
-
export declare function Eval<Input, Output, Expected>(name: string, evaluator: Evaluator<Input, Output, Expected>): Promise<ExperimentSummary>;
|
|
76
|
+
export declare function Eval<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType>(name: string, evaluator: Evaluator<Input, Output, Expected, Metadata>): Promise<ExperimentSummary>;
|
|
77
77
|
export declare function getLoadedEvals(): EvaluatorFile;
|
|
78
78
|
export interface Filter {
|
|
79
79
|
path: string[];
|
|
@@ -88,10 +88,10 @@ export declare function deserializePlainStringAsJSON(s: string): {
|
|
|
88
88
|
error: unknown;
|
|
89
89
|
};
|
|
90
90
|
export declare function parseFilters(filters: string[]): Filter[];
|
|
91
|
-
export declare function runEvaluator(experiment: Experiment | null, evaluator: EvaluatorDef<
|
|
91
|
+
export declare function runEvaluator(experiment: Experiment | null, evaluator: EvaluatorDef<any, any, any | void, any | void>, progressReporter: ProgressReporter, filters: Filter[]): Promise<{
|
|
92
92
|
results: {
|
|
93
93
|
output: any;
|
|
94
|
-
metadata:
|
|
94
|
+
metadata: object;
|
|
95
95
|
scores: Record<string, number | null>;
|
|
96
96
|
error: unknown;
|
|
97
97
|
}[];
|
package/dist/gitutil.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { GitMetadataSettings,
|
|
1
|
+
import { GitMetadataSettings, RepoInfo } from "@braintrust/core";
|
|
2
2
|
/**
|
|
3
3
|
* Information about the current HEAD of the repo.
|
|
4
4
|
*/
|
|
5
5
|
export declare function currentRepo(): Promise<import("simple-git").SimpleGit | null>;
|
|
6
6
|
export declare function getPastNAncestors(n?: number, remote?: string | undefined): Promise<string[]>;
|
|
7
|
-
export declare function
|
|
7
|
+
export declare function getRepoInfo(settings?: GitMetadataSettings): Promise<RepoInfo | undefined>;
|
package/dist/index.d.ts
CHANGED
|
@@ -45,3 +45,4 @@
|
|
|
45
45
|
export * from "./logger";
|
|
46
46
|
export { BaseExperiment, Evaluator, EvalTask, Eval, EvalScorerArgs, } from "./framework";
|
|
47
47
|
export * from "./oai";
|
|
48
|
+
export { ParentExperimentIds, ParentProjectLogIds, IdField, InputField, InputsField, OtherExperimentLogFields, ExperimentLogPartialArgs, ExperimentLogFullArgs, LogFeedbackFullArgs, LogCommentFullArgs, CommentEvent, DatasetRecord, } from "@braintrust/core";
|