braintrust 0.0.98 → 0.0.100
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +97 -28
- package/dist/cli.js +172 -91
- package/dist/framework.d.ts +1 -1
- package/dist/index.js +192 -94
- package/dist/logger.d.ts +12 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -1973,41 +1973,35 @@ var require_ansi_styles = __commonJS({
|
|
|
1973
1973
|
}
|
|
1974
1974
|
});
|
|
1975
1975
|
|
|
1976
|
-
// ../../node_modules
|
|
1976
|
+
// ../../node_modules/supports-color/node_modules/has-flag/index.js
|
|
1977
1977
|
var require_has_flag2 = __commonJS({
|
|
1978
|
-
"../../node_modules
|
|
1978
|
+
"../../node_modules/supports-color/node_modules/has-flag/index.js"(exports, module2) {
|
|
1979
1979
|
"use strict";
|
|
1980
|
-
module2.exports = (flag, argv
|
|
1980
|
+
module2.exports = (flag, argv) => {
|
|
1981
|
+
argv = argv || process.argv;
|
|
1981
1982
|
const prefix = flag.startsWith("-") ? "" : flag.length === 1 ? "-" : "--";
|
|
1982
|
-
const
|
|
1983
|
-
const
|
|
1984
|
-
return
|
|
1983
|
+
const pos = argv.indexOf(prefix + flag);
|
|
1984
|
+
const terminatorPos = argv.indexOf("--");
|
|
1985
|
+
return pos !== -1 && (terminatorPos === -1 ? true : pos < terminatorPos);
|
|
1985
1986
|
};
|
|
1986
1987
|
}
|
|
1987
1988
|
});
|
|
1988
1989
|
|
|
1989
|
-
// ../../node_modules
|
|
1990
|
+
// ../../node_modules/supports-color/index.js
|
|
1990
1991
|
var require_supports_color2 = __commonJS({
|
|
1991
|
-
"../../node_modules
|
|
1992
|
+
"../../node_modules/supports-color/index.js"(exports, module2) {
|
|
1992
1993
|
"use strict";
|
|
1993
1994
|
var os = require("os");
|
|
1994
|
-
var tty = require("tty");
|
|
1995
1995
|
var hasFlag = require_has_flag2();
|
|
1996
|
-
var
|
|
1996
|
+
var env = process.env;
|
|
1997
1997
|
var forceColor;
|
|
1998
|
-
if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false")
|
|
1999
|
-
forceColor =
|
|
1998
|
+
if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false")) {
|
|
1999
|
+
forceColor = false;
|
|
2000
2000
|
} else if (hasFlag("color") || hasFlag("colors") || hasFlag("color=true") || hasFlag("color=always")) {
|
|
2001
|
-
forceColor =
|
|
2001
|
+
forceColor = true;
|
|
2002
2002
|
}
|
|
2003
2003
|
if ("FORCE_COLOR" in env) {
|
|
2004
|
-
|
|
2005
|
-
forceColor = 1;
|
|
2006
|
-
} else if (env.FORCE_COLOR === "false") {
|
|
2007
|
-
forceColor = 0;
|
|
2008
|
-
} else {
|
|
2009
|
-
forceColor = env.FORCE_COLOR.length === 0 ? 1 : Math.min(parseInt(env.FORCE_COLOR, 10), 3);
|
|
2010
|
-
}
|
|
2004
|
+
forceColor = env.FORCE_COLOR.length === 0 || parseInt(env.FORCE_COLOR, 10) !== 0;
|
|
2011
2005
|
}
|
|
2012
2006
|
function translateLevel(level) {
|
|
2013
2007
|
if (level === 0) {
|
|
@@ -2020,8 +2014,8 @@ var require_supports_color2 = __commonJS({
|
|
|
2020
2014
|
has16m: level >= 3
|
|
2021
2015
|
};
|
|
2022
2016
|
}
|
|
2023
|
-
function supportsColor(
|
|
2024
|
-
if (forceColor ===
|
|
2017
|
+
function supportsColor(stream) {
|
|
2018
|
+
if (forceColor === false) {
|
|
2025
2019
|
return 0;
|
|
2026
2020
|
}
|
|
2027
2021
|
if (hasFlag("color=16m") || hasFlag("color=full") || hasFlag("color=truecolor")) {
|
|
@@ -2030,22 +2024,19 @@ var require_supports_color2 = __commonJS({
|
|
|
2030
2024
|
if (hasFlag("color=256")) {
|
|
2031
2025
|
return 2;
|
|
2032
2026
|
}
|
|
2033
|
-
if (
|
|
2027
|
+
if (stream && !stream.isTTY && forceColor !== true) {
|
|
2034
2028
|
return 0;
|
|
2035
2029
|
}
|
|
2036
|
-
const min = forceColor
|
|
2037
|
-
if (env.TERM === "dumb") {
|
|
2038
|
-
return min;
|
|
2039
|
-
}
|
|
2030
|
+
const min = forceColor ? 1 : 0;
|
|
2040
2031
|
if (process.platform === "win32") {
|
|
2041
2032
|
const osRelease = os.release().split(".");
|
|
2042
|
-
if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
|
|
2033
|
+
if (Number(process.versions.node.split(".")[0]) >= 8 && Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
|
|
2043
2034
|
return Number(osRelease[2]) >= 14931 ? 3 : 2;
|
|
2044
2035
|
}
|
|
2045
2036
|
return 1;
|
|
2046
2037
|
}
|
|
2047
2038
|
if ("CI" in env) {
|
|
2048
|
-
if (["TRAVIS", "CIRCLECI", "APPVEYOR", "GITLAB_CI"
|
|
2039
|
+
if (["TRAVIS", "CIRCLECI", "APPVEYOR", "GITLAB_CI"].some((sign) => sign in env) || env.CI_NAME === "codeship") {
|
|
2049
2040
|
return 1;
|
|
2050
2041
|
}
|
|
2051
2042
|
return min;
|
|
@@ -2074,16 +2065,19 @@ var require_supports_color2 = __commonJS({
|
|
|
2074
2065
|
if ("COLORTERM" in env) {
|
|
2075
2066
|
return 1;
|
|
2076
2067
|
}
|
|
2068
|
+
if (env.TERM === "dumb") {
|
|
2069
|
+
return min;
|
|
2070
|
+
}
|
|
2077
2071
|
return min;
|
|
2078
2072
|
}
|
|
2079
2073
|
function getSupportLevel(stream) {
|
|
2080
|
-
const level = supportsColor(stream
|
|
2074
|
+
const level = supportsColor(stream);
|
|
2081
2075
|
return translateLevel(level);
|
|
2082
2076
|
}
|
|
2083
2077
|
module2.exports = {
|
|
2084
2078
|
supportsColor: getSupportLevel,
|
|
2085
|
-
stdout:
|
|
2086
|
-
stderr:
|
|
2079
|
+
stdout: getSupportLevel(process.stdout),
|
|
2080
|
+
stderr: getSupportLevel(process.stderr)
|
|
2087
2081
|
};
|
|
2088
2082
|
}
|
|
2089
2083
|
});
|
|
@@ -8053,10 +8047,11 @@ function isEmpty(a) {
|
|
|
8053
8047
|
return a === void 0 || a === null;
|
|
8054
8048
|
}
|
|
8055
8049
|
var LazyValue = class {
|
|
8050
|
+
callable;
|
|
8051
|
+
value = {
|
|
8052
|
+
hasComputed: false
|
|
8053
|
+
};
|
|
8056
8054
|
constructor(callable) {
|
|
8057
|
-
this.value = {
|
|
8058
|
-
hasComputed: false
|
|
8059
|
-
};
|
|
8060
8055
|
this.callable = callable;
|
|
8061
8056
|
}
|
|
8062
8057
|
async get() {
|
|
@@ -8070,8 +8065,11 @@ var LazyValue = class {
|
|
|
8070
8065
|
|
|
8071
8066
|
// src/logger.ts
|
|
8072
8067
|
var NoopSpan = class {
|
|
8068
|
+
id;
|
|
8069
|
+
span_id;
|
|
8070
|
+
root_span_id;
|
|
8071
|
+
kind = "span";
|
|
8073
8072
|
constructor() {
|
|
8074
|
-
this.kind = "span";
|
|
8075
8073
|
this.id = "";
|
|
8076
8074
|
this.span_id = "";
|
|
8077
8075
|
this.root_span_id = "";
|
|
@@ -8095,15 +8093,22 @@ var NoopSpan = class {
|
|
|
8095
8093
|
};
|
|
8096
8094
|
var NOOP_SPAN = new NoopSpan();
|
|
8097
8095
|
var BraintrustState = class {
|
|
8096
|
+
id;
|
|
8097
|
+
currentExperiment;
|
|
8098
|
+
// Note: the value of IsAsyncFlush doesn't really matter here, since we
|
|
8099
|
+
// (safely) dynamically cast it whenever retrieving the logger.
|
|
8100
|
+
currentLogger;
|
|
8101
|
+
currentSpan;
|
|
8102
|
+
appUrl = null;
|
|
8103
|
+
loginToken = null;
|
|
8104
|
+
orgId = null;
|
|
8105
|
+
orgName = null;
|
|
8106
|
+
logUrl = null;
|
|
8107
|
+
loggedIn = false;
|
|
8108
|
+
gitMetadataSettings;
|
|
8109
|
+
_apiConn = null;
|
|
8110
|
+
_logConn = null;
|
|
8098
8111
|
constructor() {
|
|
8099
|
-
this.appUrl = null;
|
|
8100
|
-
this.loginToken = null;
|
|
8101
|
-
this.orgId = null;
|
|
8102
|
-
this.orgName = null;
|
|
8103
|
-
this.logUrl = null;
|
|
8104
|
-
this.loggedIn = false;
|
|
8105
|
-
this._apiConn = null;
|
|
8106
|
-
this._logConn = null;
|
|
8107
8112
|
this.id = v4_default();
|
|
8108
8113
|
this.currentExperiment = void 0;
|
|
8109
8114
|
this.currentLogger = void 0;
|
|
@@ -8150,6 +8155,9 @@ function _internalSetInitialState() {
|
|
|
8150
8155
|
}
|
|
8151
8156
|
var _internalGetGlobalState = () => _state;
|
|
8152
8157
|
var FailedHTTPResponse = class extends Error {
|
|
8158
|
+
status;
|
|
8159
|
+
text;
|
|
8160
|
+
data;
|
|
8153
8161
|
constructor(status, text, data = null) {
|
|
8154
8162
|
super(`${status}: ${text}`);
|
|
8155
8163
|
this.status = status;
|
|
@@ -8169,6 +8177,9 @@ async function checkResponse(resp) {
|
|
|
8169
8177
|
}
|
|
8170
8178
|
}
|
|
8171
8179
|
var HTTPConnection = class _HTTPConnection {
|
|
8180
|
+
base_url;
|
|
8181
|
+
token;
|
|
8182
|
+
headers;
|
|
8172
8183
|
constructor(base_url) {
|
|
8173
8184
|
this.base_url = base_url;
|
|
8174
8185
|
this.token = null;
|
|
@@ -8320,9 +8331,13 @@ function logFeedbackImpl(bgLogger, parentIds, {
|
|
|
8320
8331
|
}
|
|
8321
8332
|
}
|
|
8322
8333
|
var Logger = class {
|
|
8334
|
+
lazyMetadata;
|
|
8335
|
+
logOptions;
|
|
8336
|
+
bgLogger;
|
|
8337
|
+
lastStartTime;
|
|
8338
|
+
// For type identification.
|
|
8339
|
+
kind = "logger";
|
|
8323
8340
|
constructor(lazyMetadata, logOptions = {}) {
|
|
8324
|
-
// For type identification.
|
|
8325
|
-
this.kind = "logger";
|
|
8326
8341
|
this.lazyMetadata = lazyMetadata;
|
|
8327
8342
|
this.logOptions = logOptions;
|
|
8328
8343
|
const logConn = new LazyValue(
|
|
@@ -8356,9 +8371,19 @@ var Logger = class {
|
|
|
8356
8371
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
8357
8372
|
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
8358
8373
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
8374
|
+
* @param options Additional logging options
|
|
8375
|
+
* @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the logger, set this to true.
|
|
8359
8376
|
* :returns: The `id` of the logged event.
|
|
8360
8377
|
*/
|
|
8361
|
-
log(event) {
|
|
8378
|
+
log(event, options) {
|
|
8379
|
+
if (!options?.allowLogConcurrentWithActiveSpan) {
|
|
8380
|
+
const checkCurrentSpan = currentSpan();
|
|
8381
|
+
if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
|
|
8382
|
+
throw new Error(
|
|
8383
|
+
"Cannot run toplevel Logger.log method while there is an active span. To log to the span, use Span.log"
|
|
8384
|
+
);
|
|
8385
|
+
}
|
|
8386
|
+
}
|
|
8362
8387
|
const span = this.startSpan({ startTime: this.lastStartTime, event });
|
|
8363
8388
|
this.lastStartTime = span.end();
|
|
8364
8389
|
const ret = span.id;
|
|
@@ -8417,6 +8442,7 @@ var Logger = class {
|
|
|
8417
8442
|
startSpan(args) {
|
|
8418
8443
|
const { name, ...argsRest } = args ?? {};
|
|
8419
8444
|
return new SpanImpl({
|
|
8445
|
+
parentObject: this,
|
|
8420
8446
|
parentIds: new LazyValue(() => this.lazyParentIds()),
|
|
8421
8447
|
bgLogger: this.bgLogger,
|
|
8422
8448
|
name: name ?? "root",
|
|
@@ -8474,10 +8500,11 @@ function now() {
|
|
|
8474
8500
|
return (/* @__PURE__ */ new Date()).getTime();
|
|
8475
8501
|
}
|
|
8476
8502
|
var BackgroundLogger = class {
|
|
8503
|
+
logConn;
|
|
8504
|
+
items = [];
|
|
8505
|
+
active_flush = Promise.resolve([]);
|
|
8506
|
+
active_flush_resolved = true;
|
|
8477
8507
|
constructor(logConn) {
|
|
8478
|
-
this.items = [];
|
|
8479
|
-
this.active_flush = Promise.resolve([]);
|
|
8480
|
-
this.active_flush_resolved = true;
|
|
8481
8508
|
this.logConn = logConn;
|
|
8482
8509
|
isomorph_default.processOn("beforeExit", async () => {
|
|
8483
8510
|
await this.flush();
|
|
@@ -8526,12 +8553,12 @@ var BackgroundLogger = class {
|
|
|
8526
8553
|
}
|
|
8527
8554
|
postPromises.push(
|
|
8528
8555
|
(async () => {
|
|
8529
|
-
const
|
|
8556
|
+
const dataStr = constructLogs3Data(items);
|
|
8530
8557
|
for (let i = 0; i < NumRetries; i++) {
|
|
8531
8558
|
const startTime = now();
|
|
8532
8559
|
try {
|
|
8533
8560
|
try {
|
|
8534
|
-
return (await (await this.logConn.get()).post_json("logs3",
|
|
8561
|
+
return (await (await this.logConn.get()).post_json("logs3", dataStr)).ids.map((res) => res.id);
|
|
8535
8562
|
} catch (e) {
|
|
8536
8563
|
const legacyDataS = constructJsonArray(
|
|
8537
8564
|
items.map(
|
|
@@ -8550,7 +8577,7 @@ var BackgroundLogger = class {
|
|
|
8550
8577
|
}
|
|
8551
8578
|
})();
|
|
8552
8579
|
console.warn(
|
|
8553
|
-
`log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${
|
|
8580
|
+
`log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}. Error: ${errMsg}.${retryingText}`
|
|
8554
8581
|
);
|
|
8555
8582
|
}
|
|
8556
8583
|
}
|
|
@@ -8918,9 +8945,12 @@ async function login(options = {}) {
|
|
|
8918
8945
|
}
|
|
8919
8946
|
};
|
|
8920
8947
|
var checkUpdatedParam = checkUpdatedParam2;
|
|
8921
|
-
;
|
|
8922
8948
|
checkUpdatedParam2("appUrl", options.appUrl, _state.appUrl);
|
|
8923
|
-
checkUpdatedParam2(
|
|
8949
|
+
checkUpdatedParam2(
|
|
8950
|
+
"apiKey",
|
|
8951
|
+
options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
|
|
8952
|
+
_state.loginToken
|
|
8953
|
+
);
|
|
8924
8954
|
checkUpdatedParam2("orgName", options.orgName, _state.orgName);
|
|
8925
8955
|
return;
|
|
8926
8956
|
}
|
|
@@ -9141,8 +9171,8 @@ var ObjectFetcher = class {
|
|
|
9141
9171
|
this.objectType = objectType;
|
|
9142
9172
|
this.pinnedVersion = pinnedVersion;
|
|
9143
9173
|
this.mutateRecord = mutateRecord;
|
|
9144
|
-
this._fetchedData = void 0;
|
|
9145
9174
|
}
|
|
9175
|
+
_fetchedData = void 0;
|
|
9146
9176
|
get id() {
|
|
9147
9177
|
throw new Error("ObjectFetcher subclasses must have an 'id' attribute");
|
|
9148
9178
|
}
|
|
@@ -9202,10 +9232,14 @@ var ObjectFetcher = class {
|
|
|
9202
9232
|
}
|
|
9203
9233
|
};
|
|
9204
9234
|
var Experiment = class extends ObjectFetcher {
|
|
9235
|
+
lazyMetadata;
|
|
9236
|
+
dataset;
|
|
9237
|
+
bgLogger;
|
|
9238
|
+
lastStartTime;
|
|
9239
|
+
// For type identification.
|
|
9240
|
+
kind = "experiment";
|
|
9205
9241
|
constructor(lazyMetadata, dataset) {
|
|
9206
9242
|
super("experiment", void 0);
|
|
9207
|
-
// For type identification.
|
|
9208
|
-
this.kind = "experiment";
|
|
9209
9243
|
this.lazyMetadata = lazyMetadata;
|
|
9210
9244
|
this.dataset = dataset;
|
|
9211
9245
|
const logConn = new LazyValue(
|
|
@@ -9246,9 +9280,19 @@ var Experiment = class extends ObjectFetcher {
|
|
|
9246
9280
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
9247
9281
|
* @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
|
|
9248
9282
|
* @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
|
|
9283
|
+
* @param options Additional logging options
|
|
9284
|
+
* @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
|
|
9249
9285
|
* :returns: The `id` of the logged event.
|
|
9250
9286
|
*/
|
|
9251
|
-
log(event) {
|
|
9287
|
+
log(event, options) {
|
|
9288
|
+
if (!options?.allowLogConcurrentWithActiveSpan) {
|
|
9289
|
+
const checkCurrentSpan = currentSpan();
|
|
9290
|
+
if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
|
|
9291
|
+
throw new Error(
|
|
9292
|
+
"Cannot run toplevel Experiment.log method while there is an active span. To log to the span, use Span.log"
|
|
9293
|
+
);
|
|
9294
|
+
}
|
|
9295
|
+
}
|
|
9252
9296
|
event = validateAndSanitizeExperimentLogFullArgs(event, !!this.dataset);
|
|
9253
9297
|
const span = this.startSpan({ startTime: this.lastStartTime, event });
|
|
9254
9298
|
this.lastStartTime = span.end();
|
|
@@ -9290,6 +9334,7 @@ var Experiment = class extends ObjectFetcher {
|
|
|
9290
9334
|
startSpan(args) {
|
|
9291
9335
|
const { name, ...argsRest } = args ?? {};
|
|
9292
9336
|
return new SpanImpl({
|
|
9337
|
+
parentObject: this,
|
|
9293
9338
|
parentIds: new LazyValue(() => this.lazyParentIds()),
|
|
9294
9339
|
bgLogger: this.bgLogger,
|
|
9295
9340
|
name: name ?? "root",
|
|
@@ -9444,10 +9489,21 @@ var ReadonlyExperiment = class extends ObjectFetcher {
|
|
|
9444
9489
|
};
|
|
9445
9490
|
var executionCounter = 0;
|
|
9446
9491
|
var SpanImpl = class _SpanImpl {
|
|
9492
|
+
bgLogger;
|
|
9493
|
+
// `internalData` contains fields that are not part of the "user-sanitized"
|
|
9494
|
+
// set of fields which we want to log in just one of the span rows.
|
|
9495
|
+
internalData;
|
|
9496
|
+
isMerge;
|
|
9497
|
+
loggedEndTime;
|
|
9498
|
+
// For internal use only.
|
|
9499
|
+
parentObject;
|
|
9500
|
+
// These fields are logged to every span row.
|
|
9501
|
+
parentIds;
|
|
9502
|
+
rowIds;
|
|
9503
|
+
kind = "span";
|
|
9447
9504
|
// root_experiment should only be specified for a root span. parent_span
|
|
9448
9505
|
// should only be specified for non-root spans.
|
|
9449
9506
|
constructor(args) {
|
|
9450
|
-
this.kind = "span";
|
|
9451
9507
|
this.loggedEndTime = void 0;
|
|
9452
9508
|
this.bgLogger = args.bgLogger;
|
|
9453
9509
|
const callerLocation = isomorph_default.getCallerLocation();
|
|
@@ -9475,6 +9531,7 @@ var SpanImpl = class _SpanImpl {
|
|
|
9475
9531
|
},
|
|
9476
9532
|
created: (/* @__PURE__ */ new Date()).toISOString()
|
|
9477
9533
|
};
|
|
9534
|
+
this.parentObject = args.parentObject;
|
|
9478
9535
|
this.parentIds = args.parentIds;
|
|
9479
9536
|
const id = args.event?.id ?? v4_default();
|
|
9480
9537
|
const span_id = v4_default();
|
|
@@ -9546,6 +9603,7 @@ var SpanImpl = class _SpanImpl {
|
|
|
9546
9603
|
}
|
|
9547
9604
|
startSpan(args) {
|
|
9548
9605
|
return new _SpanImpl({
|
|
9606
|
+
parentObject: this.parentObject,
|
|
9549
9607
|
parentIds: this.parentIds,
|
|
9550
9608
|
bgLogger: this.bgLogger,
|
|
9551
9609
|
parentSpanInfo: {
|
|
@@ -9571,6 +9629,8 @@ var SpanImpl = class _SpanImpl {
|
|
|
9571
9629
|
}
|
|
9572
9630
|
};
|
|
9573
9631
|
var Dataset = class extends ObjectFetcher {
|
|
9632
|
+
lazyMetadata;
|
|
9633
|
+
bgLogger;
|
|
9574
9634
|
constructor(lazyMetadata, pinnedVersion, legacy) {
|
|
9575
9635
|
const isLegacyDataset = legacy ?? DEFAULT_IS_LEGACY_DATASET;
|
|
9576
9636
|
if (isLegacyDataset) {
|
|
@@ -9742,8 +9802,9 @@ function fitNameToSpaces(name, length) {
|
|
|
9742
9802
|
return padded.substring(0, length - 3) + "...";
|
|
9743
9803
|
}
|
|
9744
9804
|
var BarProgressReporter = class {
|
|
9805
|
+
multiBar;
|
|
9806
|
+
bars = {};
|
|
9745
9807
|
constructor() {
|
|
9746
|
-
this.bars = {};
|
|
9747
9808
|
this.multiBar = new cliProgress.MultiBar(
|
|
9748
9809
|
{
|
|
9749
9810
|
clearOnComplete: false,
|
|
@@ -9787,9 +9848,9 @@ function initExperiment2(projectName, options = {}) {
|
|
|
9787
9848
|
}
|
|
9788
9849
|
globalThis._evals = {};
|
|
9789
9850
|
async function Eval(name, evaluator) {
|
|
9790
|
-
|
|
9851
|
+
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
9791
9852
|
if (_evals[evalName]) {
|
|
9792
|
-
|
|
9853
|
+
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
9793
9854
|
}
|
|
9794
9855
|
if (globalThis._lazy_load) {
|
|
9795
9856
|
_evals[evalName] = { evalName, projectName: name, ...evaluator };
|
|
@@ -9845,6 +9906,9 @@ function evaluateFilter(object, filter) {
|
|
|
9845
9906
|
}
|
|
9846
9907
|
return pattern.test(serializeJSONWithPlainString(key));
|
|
9847
9908
|
}
|
|
9909
|
+
function scorerName(scorer, scorer_idx) {
|
|
9910
|
+
return scorer.name || `scorer_${scorer_idx}`;
|
|
9911
|
+
}
|
|
9848
9912
|
async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
9849
9913
|
if (typeof evaluator.data === "string") {
|
|
9850
9914
|
throw new Error("Unimplemented: string data paths");
|
|
@@ -9888,11 +9952,13 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
9888
9952
|
);
|
|
9889
9953
|
progressReporter.start(evaluator.evalName, data.length);
|
|
9890
9954
|
const evals = data.map(async (datum) => {
|
|
9891
|
-
let metadata = { ..."metadata" in datum ? datum.metadata : {} };
|
|
9892
|
-
let output = void 0;
|
|
9893
|
-
let error2 = void 0;
|
|
9894
|
-
let scores = {};
|
|
9895
9955
|
const callback = async (rootSpan) => {
|
|
9956
|
+
let metadata = {
|
|
9957
|
+
..."metadata" in datum ? datum.metadata : {}
|
|
9958
|
+
};
|
|
9959
|
+
let output = void 0;
|
|
9960
|
+
let error2 = void 0;
|
|
9961
|
+
let scores = {};
|
|
9896
9962
|
try {
|
|
9897
9963
|
const meta = (o) => metadata = { ...metadata, ...o };
|
|
9898
9964
|
await rootSpan.traced(
|
|
@@ -9909,42 +9975,55 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
9909
9975
|
);
|
|
9910
9976
|
rootSpan.log({ output });
|
|
9911
9977
|
const scoringArgs = { ...datum, metadata, output };
|
|
9978
|
+
const scorerNames = evaluator.scores.map(scorerName);
|
|
9912
9979
|
const scoreResults = await Promise.all(
|
|
9913
9980
|
evaluator.scores.map(async (score, score_idx) => {
|
|
9914
|
-
|
|
9915
|
-
|
|
9916
|
-
|
|
9917
|
-
|
|
9918
|
-
|
|
9919
|
-
|
|
9920
|
-
|
|
9921
|
-
|
|
9922
|
-
|
|
9923
|
-
|
|
9924
|
-
|
|
9925
|
-
|
|
9926
|
-
|
|
9927
|
-
|
|
9928
|
-
|
|
9929
|
-
{
|
|
9930
|
-
name: score.name || `scorer_${score_idx}`,
|
|
9931
|
-
spanAttributes: {
|
|
9932
|
-
type: SpanTypeAttribute.SCORE
|
|
9981
|
+
try {
|
|
9982
|
+
const result = await rootSpan.traced(
|
|
9983
|
+
async (span) => {
|
|
9984
|
+
const scoreResult = score(scoringArgs);
|
|
9985
|
+
const result2 = scoreResult instanceof Promise ? await scoreResult : scoreResult;
|
|
9986
|
+
const {
|
|
9987
|
+
metadata: resultMetadata,
|
|
9988
|
+
name: _,
|
|
9989
|
+
...resultRest
|
|
9990
|
+
} = result2;
|
|
9991
|
+
span.log({
|
|
9992
|
+
output: resultRest,
|
|
9993
|
+
metadata: resultMetadata
|
|
9994
|
+
});
|
|
9995
|
+
return result2;
|
|
9933
9996
|
},
|
|
9934
|
-
|
|
9935
|
-
|
|
9936
|
-
|
|
9997
|
+
{
|
|
9998
|
+
name: scorerNames[score_idx],
|
|
9999
|
+
spanAttributes: {
|
|
10000
|
+
type: SpanTypeAttribute.SCORE
|
|
10001
|
+
},
|
|
10002
|
+
event: { input: scoringArgs }
|
|
10003
|
+
}
|
|
10004
|
+
);
|
|
10005
|
+
return { kind: "score", value: result };
|
|
10006
|
+
} catch (e) {
|
|
10007
|
+
return { kind: "error", value: e };
|
|
10008
|
+
}
|
|
9937
10009
|
})
|
|
9938
10010
|
);
|
|
10011
|
+
const passingScorersAndResults = [];
|
|
10012
|
+
const failingScorersAndResults = [];
|
|
10013
|
+
scoreResults.forEach((result, i) => {
|
|
10014
|
+
const name = scorerNames[i];
|
|
10015
|
+
if (result.kind === "score") {
|
|
10016
|
+
passingScorersAndResults.push({ name, score: result.value });
|
|
10017
|
+
} else {
|
|
10018
|
+
failingScorersAndResults.push({ name, error: result.value });
|
|
10019
|
+
}
|
|
10020
|
+
});
|
|
9939
10021
|
const scoreMetadata = {};
|
|
9940
|
-
for (const scoreResult of
|
|
10022
|
+
for (const { score: scoreResult } of passingScorersAndResults) {
|
|
9941
10023
|
scores[scoreResult.name] = scoreResult.score;
|
|
9942
10024
|
const metadata2 = {
|
|
9943
10025
|
...scoreResult.metadata
|
|
9944
10026
|
};
|
|
9945
|
-
if (scoreResult.error !== void 0) {
|
|
9946
|
-
metadata2.error = scoreResult.error;
|
|
9947
|
-
}
|
|
9948
10027
|
if (Object.keys(metadata2).length > 0) {
|
|
9949
10028
|
scoreMetadata[scoreResult.name] = metadata2;
|
|
9950
10029
|
}
|
|
@@ -9953,6 +10032,21 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
9953
10032
|
meta({ scores: scoreMetadata });
|
|
9954
10033
|
}
|
|
9955
10034
|
rootSpan.log({ scores, metadata });
|
|
10035
|
+
if (failingScorersAndResults.length) {
|
|
10036
|
+
const scorerErrors = Object.fromEntries(
|
|
10037
|
+
failingScorersAndResults.map(({ name, error: error3 }) => [
|
|
10038
|
+
name,
|
|
10039
|
+
error3 instanceof Error ? error3.stack : `${error3}`
|
|
10040
|
+
])
|
|
10041
|
+
);
|
|
10042
|
+
metadata["scorer_errors"] = scorerErrors;
|
|
10043
|
+
const names = Object.keys(scorerErrors).join(", ");
|
|
10044
|
+
const errors = failingScorersAndResults.map((item) => item.error);
|
|
10045
|
+
throw new AggregateError(
|
|
10046
|
+
errors,
|
|
10047
|
+
`Found exceptions for the following scorers: ${names}`
|
|
10048
|
+
);
|
|
10049
|
+
}
|
|
9956
10050
|
} catch (e) {
|
|
9957
10051
|
error2 = e;
|
|
9958
10052
|
} finally {
|
|
@@ -10031,7 +10125,8 @@ function reportEvaluatorResult(evaluatorName, evaluatorResult, {
|
|
|
10031
10125
|
if (!verbose && !jsonl) {
|
|
10032
10126
|
console.error(warning("Add --verbose to see full stack traces."));
|
|
10033
10127
|
}
|
|
10034
|
-
}
|
|
10128
|
+
}
|
|
10129
|
+
if (summary) {
|
|
10035
10130
|
console.log(jsonl ? JSON.stringify(summary) : summary);
|
|
10036
10131
|
} else {
|
|
10037
10132
|
const scoresByName = {};
|
|
@@ -10259,6 +10354,9 @@ function wrapEmbeddings(create) {
|
|
|
10259
10354
|
};
|
|
10260
10355
|
}
|
|
10261
10356
|
var WrapperStream = class {
|
|
10357
|
+
span;
|
|
10358
|
+
iter;
|
|
10359
|
+
startTime;
|
|
10262
10360
|
constructor(span, startTime, iter) {
|
|
10263
10361
|
this.span = span;
|
|
10264
10362
|
this.iter = iter;
|
package/dist/logger.d.ts
CHANGED
|
@@ -186,9 +186,13 @@ export declare class Logger<IsAsyncFlush extends boolean> {
|
|
|
186
186
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
187
187
|
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
188
188
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
189
|
+
* @param options Additional logging options
|
|
190
|
+
* @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the logger, set this to true.
|
|
189
191
|
* :returns: The `id` of the logged event.
|
|
190
192
|
*/
|
|
191
|
-
log(event: Readonly<StartSpanEventArgs
|
|
193
|
+
log(event: Readonly<StartSpanEventArgs>, options?: {
|
|
194
|
+
allowLogConcurrentWithActiveSpan?: boolean;
|
|
195
|
+
}): PromiseUnless<IsAsyncFlush, string>;
|
|
192
196
|
/**
|
|
193
197
|
* Create a new toplevel span underneath the logger. The name defaults to "root".
|
|
194
198
|
*
|
|
@@ -501,9 +505,13 @@ export declare class Experiment extends ObjectFetcher<ExperimentEvent> {
|
|
|
501
505
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
502
506
|
* @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
|
|
503
507
|
* @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
|
|
508
|
+
* @param options Additional logging options
|
|
509
|
+
* @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
|
|
504
510
|
* :returns: The `id` of the logged event.
|
|
505
511
|
*/
|
|
506
|
-
log(event: Readonly<ExperimentLogFullArgs
|
|
512
|
+
log(event: Readonly<ExperimentLogFullArgs>, options?: {
|
|
513
|
+
allowLogConcurrentWithActiveSpan?: boolean;
|
|
514
|
+
}): string;
|
|
507
515
|
/**
|
|
508
516
|
* Create a new toplevel span underneath the experiment. The name defaults to "root".
|
|
509
517
|
*
|
|
@@ -577,10 +585,12 @@ export declare class SpanImpl implements Span {
|
|
|
577
585
|
private internalData;
|
|
578
586
|
private isMerge;
|
|
579
587
|
private loggedEndTime;
|
|
588
|
+
parentObject: Experiment | Logger<any>;
|
|
580
589
|
private parentIds;
|
|
581
590
|
private readonly rowIds;
|
|
582
591
|
kind: "span";
|
|
583
592
|
constructor(args: {
|
|
593
|
+
parentObject: Experiment | Logger<any>;
|
|
584
594
|
parentIds: LazyValue<ParentExperimentIds | ParentProjectLogIds>;
|
|
585
595
|
bgLogger: BackgroundLogger;
|
|
586
596
|
} & Omit<StartSpanArgs, "parentId"> & ({
|