braintrust 0.0.97 → 0.0.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +349 -103
- package/dist/cli.js +307 -128
- package/dist/framework.d.ts +18 -18
- package/dist/gitutil.d.ts +2 -2
- package/dist/index.d.ts +1 -0
- package/dist/index.js +427 -146
- package/dist/isomorph.d.ts +2 -2
- package/dist/logger.d.ts +89 -134
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -3648,7 +3648,6 @@ var require_pluralize = __commonJS({
|
|
|
3648
3648
|
var src_exports = {};
|
|
3649
3649
|
__export(src_exports, {
|
|
3650
3650
|
BaseExperiment: () => BaseExperiment,
|
|
3651
|
-
Dataset: () => Dataset,
|
|
3652
3651
|
Eval: () => Eval,
|
|
3653
3652
|
Experiment: () => Experiment,
|
|
3654
3653
|
Logger: () => Logger,
|
|
@@ -3664,6 +3663,7 @@ __export(src_exports, {
|
|
|
3664
3663
|
getSpanParentObject: () => getSpanParentObject,
|
|
3665
3664
|
init: () => init,
|
|
3666
3665
|
initDataset: () => initDataset,
|
|
3666
|
+
initExperiment: () => initExperiment,
|
|
3667
3667
|
initLogger: () => initLogger,
|
|
3668
3668
|
log: () => log,
|
|
3669
3669
|
login: () => login,
|
|
@@ -3695,7 +3695,7 @@ var DefaultAsyncLocalStorage = class {
|
|
|
3695
3695
|
}
|
|
3696
3696
|
};
|
|
3697
3697
|
var iso = {
|
|
3698
|
-
|
|
3698
|
+
getRepoInfo: async (_settings) => void 0,
|
|
3699
3699
|
getPastNAncestors: async () => [],
|
|
3700
3700
|
getEnv: (_name) => void 0,
|
|
3701
3701
|
getCallerLocation: () => void 0,
|
|
@@ -7734,11 +7734,11 @@ function truncateToByteLimit(s, byteLimit = 65536) {
|
|
|
7734
7734
|
const truncated = encoded.subarray(0, byteLimit);
|
|
7735
7735
|
return new TextDecoder().decode(truncated);
|
|
7736
7736
|
}
|
|
7737
|
-
async function
|
|
7737
|
+
async function getRepoInfo(settings) {
|
|
7738
7738
|
if (settings && settings.collect === "none") {
|
|
7739
7739
|
return void 0;
|
|
7740
7740
|
}
|
|
7741
|
-
const repo = await
|
|
7741
|
+
const repo = await repoInfo();
|
|
7742
7742
|
if (!repo || !settings || settings.collect === "all") {
|
|
7743
7743
|
return repo;
|
|
7744
7744
|
}
|
|
@@ -7748,7 +7748,7 @@ async function getRepoStatus(settings) {
|
|
|
7748
7748
|
});
|
|
7749
7749
|
return sanitized;
|
|
7750
7750
|
}
|
|
7751
|
-
async function
|
|
7751
|
+
async function repoInfo() {
|
|
7752
7752
|
const git = await currentRepo();
|
|
7753
7753
|
if (git === null) {
|
|
7754
7754
|
return void 0;
|
|
@@ -7890,9 +7890,10 @@ function v4(options, buf, offset) {
|
|
|
7890
7890
|
}
|
|
7891
7891
|
var v4_default = v4;
|
|
7892
7892
|
|
|
7893
|
-
// ../core/js/dist/index.mjs
|
|
7893
|
+
// ../core/js/dist/main/index.mjs
|
|
7894
7894
|
var TRANSACTION_ID_FIELD = "_xact_id";
|
|
7895
7895
|
var IS_MERGE_FIELD = "_is_merge";
|
|
7896
|
+
var MERGE_PATHS_FIELD = "_merge_paths";
|
|
7896
7897
|
var AUDIT_SOURCE_FIELD = "_audit_source";
|
|
7897
7898
|
var AUDIT_METADATA_FIELD = "_audit_metadata";
|
|
7898
7899
|
var VALID_SOURCES = ["app", "api", "external"];
|
|
@@ -7951,6 +7952,54 @@ function mergeRowBatch(rows) {
|
|
|
7951
7952
|
out.push(...Object.values(rowGroups));
|
|
7952
7953
|
return out;
|
|
7953
7954
|
}
|
|
7955
|
+
var DEFAULT_IS_LEGACY_DATASET = true;
|
|
7956
|
+
function ensureDatasetRecord(r, legacy) {
|
|
7957
|
+
if (legacy) {
|
|
7958
|
+
return ensureLegacyDatasetRecord(r);
|
|
7959
|
+
} else {
|
|
7960
|
+
return ensureNewDatasetRecord(r);
|
|
7961
|
+
}
|
|
7962
|
+
}
|
|
7963
|
+
function ensureLegacyDatasetRecord(r) {
|
|
7964
|
+
if ("output" in r) {
|
|
7965
|
+
return r;
|
|
7966
|
+
}
|
|
7967
|
+
const row = {
|
|
7968
|
+
...r,
|
|
7969
|
+
output: r.expected
|
|
7970
|
+
};
|
|
7971
|
+
delete row.expected;
|
|
7972
|
+
return row;
|
|
7973
|
+
}
|
|
7974
|
+
function ensureNewDatasetRecord(r) {
|
|
7975
|
+
if ("expected" in r) {
|
|
7976
|
+
return r;
|
|
7977
|
+
}
|
|
7978
|
+
const row = {
|
|
7979
|
+
...r,
|
|
7980
|
+
expected: r.output
|
|
7981
|
+
};
|
|
7982
|
+
delete row.output;
|
|
7983
|
+
return row;
|
|
7984
|
+
}
|
|
7985
|
+
function makeLegacyEvent(e) {
|
|
7986
|
+
if (!("dataset_id" in e) || !("expected" in e)) {
|
|
7987
|
+
return e;
|
|
7988
|
+
}
|
|
7989
|
+
const event = {
|
|
7990
|
+
...e,
|
|
7991
|
+
output: e.expected
|
|
7992
|
+
};
|
|
7993
|
+
delete event.expected;
|
|
7994
|
+
if (MERGE_PATHS_FIELD in event) {
|
|
7995
|
+
for (const path2 of event[MERGE_PATHS_FIELD] || []) {
|
|
7996
|
+
if (path2.length > 0 && path2[0] === "expected") {
|
|
7997
|
+
path2[0] = "output";
|
|
7998
|
+
}
|
|
7999
|
+
}
|
|
8000
|
+
}
|
|
8001
|
+
return event;
|
|
8002
|
+
}
|
|
7954
8003
|
var SpanTypeAttribute = /* @__PURE__ */ ((SpanTypeAttribute2) => {
|
|
7955
8004
|
SpanTypeAttribute2["LLM"] = "llm";
|
|
7956
8005
|
SpanTypeAttribute2["SCORE"] = "score";
|
|
@@ -8004,10 +8053,11 @@ function isEmpty(a) {
|
|
|
8004
8053
|
return a === void 0 || a === null;
|
|
8005
8054
|
}
|
|
8006
8055
|
var LazyValue = class {
|
|
8056
|
+
callable;
|
|
8057
|
+
value = {
|
|
8058
|
+
hasComputed: false
|
|
8059
|
+
};
|
|
8007
8060
|
constructor(callable) {
|
|
8008
|
-
this.value = {
|
|
8009
|
-
hasComputed: false
|
|
8010
|
-
};
|
|
8011
8061
|
this.callable = callable;
|
|
8012
8062
|
}
|
|
8013
8063
|
async get() {
|
|
@@ -8021,8 +8071,11 @@ var LazyValue = class {
|
|
|
8021
8071
|
|
|
8022
8072
|
// src/logger.ts
|
|
8023
8073
|
var NoopSpan = class {
|
|
8074
|
+
id;
|
|
8075
|
+
span_id;
|
|
8076
|
+
root_span_id;
|
|
8077
|
+
kind = "span";
|
|
8024
8078
|
constructor() {
|
|
8025
|
-
this.kind = "span";
|
|
8026
8079
|
this.id = "";
|
|
8027
8080
|
this.span_id = "";
|
|
8028
8081
|
this.root_span_id = "";
|
|
@@ -8046,15 +8099,22 @@ var NoopSpan = class {
|
|
|
8046
8099
|
};
|
|
8047
8100
|
var NOOP_SPAN = new NoopSpan();
|
|
8048
8101
|
var BraintrustState = class {
|
|
8102
|
+
id;
|
|
8103
|
+
currentExperiment;
|
|
8104
|
+
// Note: the value of IsAsyncFlush doesn't really matter here, since we
|
|
8105
|
+
// (safely) dynamically cast it whenever retrieving the logger.
|
|
8106
|
+
currentLogger;
|
|
8107
|
+
currentSpan;
|
|
8108
|
+
appUrl = null;
|
|
8109
|
+
loginToken = null;
|
|
8110
|
+
orgId = null;
|
|
8111
|
+
orgName = null;
|
|
8112
|
+
logUrl = null;
|
|
8113
|
+
loggedIn = false;
|
|
8114
|
+
gitMetadataSettings;
|
|
8115
|
+
_apiConn = null;
|
|
8116
|
+
_logConn = null;
|
|
8049
8117
|
constructor() {
|
|
8050
|
-
this.appUrl = null;
|
|
8051
|
-
this.loginToken = null;
|
|
8052
|
-
this.orgId = null;
|
|
8053
|
-
this.orgName = null;
|
|
8054
|
-
this.logUrl = null;
|
|
8055
|
-
this.loggedIn = false;
|
|
8056
|
-
this._apiConn = null;
|
|
8057
|
-
this._logConn = null;
|
|
8058
8118
|
this.id = v4_default();
|
|
8059
8119
|
this.currentExperiment = void 0;
|
|
8060
8120
|
this.currentLogger = void 0;
|
|
@@ -8101,6 +8161,9 @@ function _internalSetInitialState() {
|
|
|
8101
8161
|
}
|
|
8102
8162
|
var _internalGetGlobalState = () => _state;
|
|
8103
8163
|
var FailedHTTPResponse = class extends Error {
|
|
8164
|
+
status;
|
|
8165
|
+
text;
|
|
8166
|
+
data;
|
|
8104
8167
|
constructor(status, text, data = null) {
|
|
8105
8168
|
super(`${status}: ${text}`);
|
|
8106
8169
|
this.status = status;
|
|
@@ -8120,6 +8183,9 @@ async function checkResponse(resp) {
|
|
|
8120
8183
|
}
|
|
8121
8184
|
}
|
|
8122
8185
|
var HTTPConnection = class _HTTPConnection {
|
|
8186
|
+
base_url;
|
|
8187
|
+
token;
|
|
8188
|
+
headers;
|
|
8123
8189
|
constructor(base_url) {
|
|
8124
8190
|
this.base_url = base_url;
|
|
8125
8191
|
this.token = null;
|
|
@@ -8271,9 +8337,13 @@ function logFeedbackImpl(bgLogger, parentIds, {
|
|
|
8271
8337
|
}
|
|
8272
8338
|
}
|
|
8273
8339
|
var Logger = class {
|
|
8340
|
+
lazyMetadata;
|
|
8341
|
+
logOptions;
|
|
8342
|
+
bgLogger;
|
|
8343
|
+
lastStartTime;
|
|
8344
|
+
// For type identification.
|
|
8345
|
+
kind = "logger";
|
|
8274
8346
|
constructor(lazyMetadata, logOptions = {}) {
|
|
8275
|
-
// For type identification.
|
|
8276
|
-
this.kind = "logger";
|
|
8277
8347
|
this.lazyMetadata = lazyMetadata;
|
|
8278
8348
|
this.logOptions = logOptions;
|
|
8279
8349
|
const logConn = new LazyValue(
|
|
@@ -8307,9 +8377,19 @@ var Logger = class {
|
|
|
8307
8377
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
8308
8378
|
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
8309
8379
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
8380
|
+
* @param options Additional logging options
|
|
8381
|
+
* @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the logger, set this to true.
|
|
8310
8382
|
* :returns: The `id` of the logged event.
|
|
8311
8383
|
*/
|
|
8312
|
-
log(event) {
|
|
8384
|
+
log(event, options) {
|
|
8385
|
+
if (!options?.allowLogConcurrentWithActiveSpan) {
|
|
8386
|
+
const checkCurrentSpan = currentSpan();
|
|
8387
|
+
if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
|
|
8388
|
+
throw new Error(
|
|
8389
|
+
"Cannot run toplevel Logger.log method while there is an active span. To log to the span, use Span.log"
|
|
8390
|
+
);
|
|
8391
|
+
}
|
|
8392
|
+
}
|
|
8313
8393
|
const span = this.startSpan({ startTime: this.lastStartTime, event });
|
|
8314
8394
|
this.lastStartTime = span.end();
|
|
8315
8395
|
const ret = span.id;
|
|
@@ -8368,6 +8448,7 @@ var Logger = class {
|
|
|
8368
8448
|
startSpan(args) {
|
|
8369
8449
|
const { name, ...argsRest } = args ?? {};
|
|
8370
8450
|
return new SpanImpl({
|
|
8451
|
+
parentObject: this,
|
|
8371
8452
|
parentIds: new LazyValue(() => this.lazyParentIds()),
|
|
8372
8453
|
bgLogger: this.bgLogger,
|
|
8373
8454
|
name: name ?? "root",
|
|
@@ -8416,16 +8497,20 @@ var MaxRequestSize = 6 * 1024 * 1024;
|
|
|
8416
8497
|
function constructJsonArray(items) {
|
|
8417
8498
|
return `[${items.join(",")}]`;
|
|
8418
8499
|
}
|
|
8500
|
+
function constructLogs3Data(items) {
|
|
8501
|
+
return `{"rows": ${constructJsonArray(items)}, "api_version": 2}`;
|
|
8502
|
+
}
|
|
8419
8503
|
var DefaultBatchSize = 100;
|
|
8420
8504
|
var NumRetries = 3;
|
|
8421
8505
|
function now() {
|
|
8422
8506
|
return (/* @__PURE__ */ new Date()).getTime();
|
|
8423
8507
|
}
|
|
8424
8508
|
var BackgroundLogger = class {
|
|
8509
|
+
logConn;
|
|
8510
|
+
items = [];
|
|
8511
|
+
active_flush = Promise.resolve([]);
|
|
8512
|
+
active_flush_resolved = true;
|
|
8425
8513
|
constructor(logConn) {
|
|
8426
|
-
this.items = [];
|
|
8427
|
-
this.active_flush = Promise.resolve([]);
|
|
8428
|
-
this.active_flush_resolved = true;
|
|
8429
8514
|
this.logConn = logConn;
|
|
8430
8515
|
isomorph_default.processOn("beforeExit", async () => {
|
|
8431
8516
|
await this.flush();
|
|
@@ -8474,11 +8559,20 @@ var BackgroundLogger = class {
|
|
|
8474
8559
|
}
|
|
8475
8560
|
postPromises.push(
|
|
8476
8561
|
(async () => {
|
|
8477
|
-
const
|
|
8562
|
+
const dataStr = constructLogs3Data(items);
|
|
8478
8563
|
for (let i = 0; i < NumRetries; i++) {
|
|
8479
8564
|
const startTime = now();
|
|
8480
8565
|
try {
|
|
8481
|
-
|
|
8566
|
+
try {
|
|
8567
|
+
return (await (await this.logConn.get()).post_json("logs3", dataStr)).ids.map((res) => res.id);
|
|
8568
|
+
} catch (e) {
|
|
8569
|
+
const legacyDataS = constructJsonArray(
|
|
8570
|
+
items.map(
|
|
8571
|
+
(r) => JSON.stringify(makeLegacyEvent(JSON.parse(r)))
|
|
8572
|
+
)
|
|
8573
|
+
);
|
|
8574
|
+
return (await (await this.logConn.get()).post_json("logs", legacyDataS)).map((res) => res.id);
|
|
8575
|
+
}
|
|
8482
8576
|
} catch (e) {
|
|
8483
8577
|
const retryingText = i + 1 === NumRetries ? "" : " Retrying";
|
|
8484
8578
|
const errMsg = (() => {
|
|
@@ -8489,7 +8583,7 @@ var BackgroundLogger = class {
|
|
|
8489
8583
|
}
|
|
8490
8584
|
})();
|
|
8491
8585
|
console.warn(
|
|
8492
|
-
`log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${
|
|
8586
|
+
`log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}. Error: ${errMsg}.${retryingText}`
|
|
8493
8587
|
);
|
|
8494
8588
|
}
|
|
8495
8589
|
}
|
|
@@ -8517,8 +8611,21 @@ var BackgroundLogger = class {
|
|
|
8517
8611
|
}
|
|
8518
8612
|
}
|
|
8519
8613
|
};
|
|
8520
|
-
function init(
|
|
8614
|
+
function init(projectOrOptions, optionalOptions) {
|
|
8615
|
+
const options = (() => {
|
|
8616
|
+
if (typeof projectOrOptions === "string") {
|
|
8617
|
+
return { ...optionalOptions, project: projectOrOptions };
|
|
8618
|
+
} else {
|
|
8619
|
+
if (optionalOptions !== void 0) {
|
|
8620
|
+
throw new Error(
|
|
8621
|
+
"Cannot specify options struct as both parameters. Must call either init(project, options) or init(options)."
|
|
8622
|
+
);
|
|
8623
|
+
}
|
|
8624
|
+
return projectOrOptions;
|
|
8625
|
+
}
|
|
8626
|
+
})();
|
|
8521
8627
|
const {
|
|
8628
|
+
project,
|
|
8522
8629
|
experiment,
|
|
8523
8630
|
description,
|
|
8524
8631
|
dataset,
|
|
@@ -8530,47 +8637,55 @@ function init(project, options = {}) {
|
|
|
8530
8637
|
apiKey,
|
|
8531
8638
|
orgName,
|
|
8532
8639
|
metadata,
|
|
8533
|
-
gitMetadataSettings
|
|
8534
|
-
|
|
8640
|
+
gitMetadataSettings,
|
|
8641
|
+
projectId,
|
|
8642
|
+
baseExperimentId,
|
|
8643
|
+
repoInfo: repoInfo2
|
|
8644
|
+
} = options;
|
|
8535
8645
|
if (open && update) {
|
|
8536
8646
|
throw new Error("Cannot open and update an experiment at the same time");
|
|
8537
8647
|
}
|
|
8538
8648
|
if (open || update) {
|
|
8539
8649
|
if (isEmpty(experiment)) {
|
|
8540
8650
|
const action = open ? "open" : "update";
|
|
8541
|
-
throw new Error(
|
|
8651
|
+
throw new Error(
|
|
8652
|
+
`Cannot ${action} an experiment without specifying its name`
|
|
8653
|
+
);
|
|
8542
8654
|
}
|
|
8543
|
-
const lazyMetadata2 = new LazyValue(
|
|
8544
|
-
|
|
8545
|
-
|
|
8546
|
-
|
|
8547
|
-
|
|
8548
|
-
|
|
8549
|
-
|
|
8550
|
-
|
|
8551
|
-
|
|
8552
|
-
|
|
8553
|
-
|
|
8554
|
-
|
|
8555
|
-
|
|
8556
|
-
|
|
8557
|
-
|
|
8558
|
-
|
|
8559
|
-
|
|
8560
|
-
|
|
8561
|
-
return {
|
|
8562
|
-
project: {
|
|
8563
|
-
id: info.project_id,
|
|
8564
|
-
name: "",
|
|
8565
|
-
fullInfo: {}
|
|
8566
|
-
},
|
|
8567
|
-
experiment: {
|
|
8568
|
-
id: info.id,
|
|
8569
|
-
name: info.name,
|
|
8570
|
-
fullInfo: info
|
|
8655
|
+
const lazyMetadata2 = new LazyValue(
|
|
8656
|
+
async () => {
|
|
8657
|
+
await login({
|
|
8658
|
+
orgName,
|
|
8659
|
+
apiKey,
|
|
8660
|
+
appUrl
|
|
8661
|
+
});
|
|
8662
|
+
const args = {
|
|
8663
|
+
project_name: project,
|
|
8664
|
+
project_id: projectId,
|
|
8665
|
+
org_name: _state.orgName,
|
|
8666
|
+
experiment_name: experiment
|
|
8667
|
+
};
|
|
8668
|
+
const response = await _state.apiConn().post_json("api/experiment/get", args);
|
|
8669
|
+
if (response.length === 0) {
|
|
8670
|
+
throw new Error(
|
|
8671
|
+
`Experiment ${experiment} not found in project ${projectId ?? project}.`
|
|
8672
|
+
);
|
|
8571
8673
|
}
|
|
8572
|
-
|
|
8573
|
-
|
|
8674
|
+
const info = response[0];
|
|
8675
|
+
return {
|
|
8676
|
+
project: {
|
|
8677
|
+
id: info.project_id,
|
|
8678
|
+
name: "",
|
|
8679
|
+
fullInfo: {}
|
|
8680
|
+
},
|
|
8681
|
+
experiment: {
|
|
8682
|
+
id: info.id,
|
|
8683
|
+
name: info.name,
|
|
8684
|
+
fullInfo: info
|
|
8685
|
+
}
|
|
8686
|
+
};
|
|
8687
|
+
}
|
|
8688
|
+
);
|
|
8574
8689
|
if (open) {
|
|
8575
8690
|
return new ReadonlyExperiment(
|
|
8576
8691
|
lazyMetadata2
|
|
@@ -8592,6 +8707,7 @@ function init(project, options = {}) {
|
|
|
8592
8707
|
});
|
|
8593
8708
|
const args = {
|
|
8594
8709
|
project_name: project,
|
|
8710
|
+
project_id: projectId,
|
|
8595
8711
|
org_id: _state.orgId
|
|
8596
8712
|
};
|
|
8597
8713
|
if (experiment) {
|
|
@@ -8600,22 +8716,29 @@ function init(project, options = {}) {
|
|
|
8600
8716
|
if (description) {
|
|
8601
8717
|
args["description"] = description;
|
|
8602
8718
|
}
|
|
8603
|
-
|
|
8604
|
-
|
|
8605
|
-
|
|
8719
|
+
const repoInfoArg = await (async () => {
|
|
8720
|
+
if (repoInfo2) {
|
|
8721
|
+
return repoInfo2;
|
|
8606
8722
|
}
|
|
8607
|
-
|
|
8608
|
-
|
|
8609
|
-
|
|
8610
|
-
|
|
8611
|
-
|
|
8612
|
-
)
|
|
8613
|
-
|
|
8614
|
-
|
|
8615
|
-
|
|
8616
|
-
|
|
8723
|
+
let mergedGitMetadataSettings = {
|
|
8724
|
+
..._state.gitMetadataSettings || {
|
|
8725
|
+
collect: "all"
|
|
8726
|
+
}
|
|
8727
|
+
};
|
|
8728
|
+
if (gitMetadataSettings) {
|
|
8729
|
+
mergedGitMetadataSettings = mergeGitMetadataSettings(
|
|
8730
|
+
mergedGitMetadataSettings,
|
|
8731
|
+
gitMetadataSettings
|
|
8732
|
+
);
|
|
8733
|
+
}
|
|
8734
|
+
return await isomorph_default.getRepoInfo(mergedGitMetadataSettings);
|
|
8735
|
+
})();
|
|
8736
|
+
if (repoInfoArg) {
|
|
8737
|
+
args["repo_info"] = repoInfoArg;
|
|
8617
8738
|
}
|
|
8618
|
-
if (
|
|
8739
|
+
if (baseExperimentId) {
|
|
8740
|
+
args["base_exp_id"] = baseExperimentId;
|
|
8741
|
+
} else if (baseExperiment) {
|
|
8619
8742
|
args["base_experiment"] = baseExperiment;
|
|
8620
8743
|
} else {
|
|
8621
8744
|
args["ancestor_commits"] = await isomorph_default.getPastNAncestors();
|
|
@@ -8666,6 +8789,21 @@ function init(project, options = {}) {
|
|
|
8666
8789
|
}
|
|
8667
8790
|
return ret;
|
|
8668
8791
|
}
|
|
8792
|
+
function initExperiment(projectOrOptions, optionalOptions) {
|
|
8793
|
+
const options = (() => {
|
|
8794
|
+
if (typeof projectOrOptions === "string") {
|
|
8795
|
+
return { ...optionalOptions, project: projectOrOptions };
|
|
8796
|
+
} else {
|
|
8797
|
+
if (optionalOptions !== void 0) {
|
|
8798
|
+
throw new Error(
|
|
8799
|
+
"Cannot specify options struct as both parameters. Must call either init(project, options) or init(options)."
|
|
8800
|
+
);
|
|
8801
|
+
}
|
|
8802
|
+
return projectOrOptions;
|
|
8803
|
+
}
|
|
8804
|
+
})();
|
|
8805
|
+
return init(options);
|
|
8806
|
+
}
|
|
8669
8807
|
function withExperiment(project, callback, options = {}) {
|
|
8670
8808
|
console.warn(
|
|
8671
8809
|
"withExperiment is deprecated and will be removed in a future version of braintrust. Simply create the experiment with `init`."
|
|
@@ -8680,8 +8818,30 @@ function withLogger(callback, options = {}) {
|
|
|
8680
8818
|
const logger = initLogger(options);
|
|
8681
8819
|
return callback(logger);
|
|
8682
8820
|
}
|
|
8683
|
-
function initDataset(
|
|
8684
|
-
const
|
|
8821
|
+
function initDataset(projectOrOptions, optionalOptions) {
|
|
8822
|
+
const options = (() => {
|
|
8823
|
+
if (typeof projectOrOptions === "string") {
|
|
8824
|
+
return { ...optionalOptions, project: projectOrOptions };
|
|
8825
|
+
} else {
|
|
8826
|
+
if (optionalOptions !== void 0) {
|
|
8827
|
+
throw new Error(
|
|
8828
|
+
"Cannot specify options struct as both parameters. Must call either initDataset(project, options) or initDataset(options)."
|
|
8829
|
+
);
|
|
8830
|
+
}
|
|
8831
|
+
return projectOrOptions;
|
|
8832
|
+
}
|
|
8833
|
+
})();
|
|
8834
|
+
const {
|
|
8835
|
+
project,
|
|
8836
|
+
dataset,
|
|
8837
|
+
description,
|
|
8838
|
+
version,
|
|
8839
|
+
appUrl,
|
|
8840
|
+
apiKey,
|
|
8841
|
+
orgName,
|
|
8842
|
+
projectId,
|
|
8843
|
+
useOutput: legacy
|
|
8844
|
+
} = options;
|
|
8685
8845
|
const lazyMetadata = new LazyValue(
|
|
8686
8846
|
async () => {
|
|
8687
8847
|
await login({
|
|
@@ -8692,6 +8852,7 @@ function initDataset(project, options = {}) {
|
|
|
8692
8852
|
const args = {
|
|
8693
8853
|
org_id: _state.orgId,
|
|
8694
8854
|
project_name: project,
|
|
8855
|
+
project_id: projectId,
|
|
8695
8856
|
dataset_name: dataset,
|
|
8696
8857
|
description
|
|
8697
8858
|
};
|
|
@@ -8710,7 +8871,7 @@ function initDataset(project, options = {}) {
|
|
|
8710
8871
|
};
|
|
8711
8872
|
}
|
|
8712
8873
|
);
|
|
8713
|
-
return new Dataset(lazyMetadata, version);
|
|
8874
|
+
return new Dataset(lazyMetadata, version, legacy);
|
|
8714
8875
|
}
|
|
8715
8876
|
function withDataset(project, callback, options = {}) {
|
|
8716
8877
|
console.warn(
|
|
@@ -8780,15 +8941,30 @@ function initLogger(options = {}) {
|
|
|
8780
8941
|
return ret;
|
|
8781
8942
|
}
|
|
8782
8943
|
async function login(options = {}) {
|
|
8944
|
+
let { forceLogin = false } = options || {};
|
|
8945
|
+
if (_state.loggedIn && !forceLogin) {
|
|
8946
|
+
let checkUpdatedParam2 = function(varname, arg, orig) {
|
|
8947
|
+
if (!isEmpty(arg) && !isEmpty(orig) && arg !== orig) {
|
|
8948
|
+
throw new Error(
|
|
8949
|
+
`Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
|
|
8950
|
+
);
|
|
8951
|
+
}
|
|
8952
|
+
};
|
|
8953
|
+
var checkUpdatedParam = checkUpdatedParam2;
|
|
8954
|
+
checkUpdatedParam2("appUrl", options.appUrl, _state.appUrl);
|
|
8955
|
+
checkUpdatedParam2(
|
|
8956
|
+
"apiKey",
|
|
8957
|
+
options.apiKey ? HTTPConnection.sanitize_token(options.apiKey) : void 0,
|
|
8958
|
+
_state.loginToken
|
|
8959
|
+
);
|
|
8960
|
+
checkUpdatedParam2("orgName", options.orgName, _state.orgName);
|
|
8961
|
+
return;
|
|
8962
|
+
}
|
|
8783
8963
|
const {
|
|
8784
8964
|
appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
|
|
8785
8965
|
apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
|
|
8786
8966
|
orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
|
|
8787
8967
|
} = options || {};
|
|
8788
|
-
let { forceLogin = false } = options || {};
|
|
8789
|
-
if (_state.loggedIn && !forceLogin) {
|
|
8790
|
-
return;
|
|
8791
|
-
}
|
|
8792
8968
|
_state.resetLoginInfo();
|
|
8793
8969
|
_state.appUrl = appUrl;
|
|
8794
8970
|
let conn = null;
|
|
@@ -8997,11 +9173,12 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
|
8997
9173
|
return event;
|
|
8998
9174
|
}
|
|
8999
9175
|
var ObjectFetcher = class {
|
|
9000
|
-
constructor(objectType, pinnedVersion) {
|
|
9176
|
+
constructor(objectType, pinnedVersion, mutateRecord) {
|
|
9001
9177
|
this.objectType = objectType;
|
|
9002
9178
|
this.pinnedVersion = pinnedVersion;
|
|
9003
|
-
this.
|
|
9179
|
+
this.mutateRecord = mutateRecord;
|
|
9004
9180
|
}
|
|
9181
|
+
_fetchedData = void 0;
|
|
9005
9182
|
get id() {
|
|
9006
9183
|
throw new Error("ObjectFetcher subclasses must have an 'id' attribute");
|
|
9007
9184
|
}
|
|
@@ -9020,12 +9197,24 @@ var ObjectFetcher = class {
|
|
|
9020
9197
|
async fetchedData() {
|
|
9021
9198
|
if (this._fetchedData === void 0) {
|
|
9022
9199
|
const state = await this.getState();
|
|
9023
|
-
|
|
9024
|
-
|
|
9025
|
-
|
|
9026
|
-
|
|
9027
|
-
|
|
9028
|
-
|
|
9200
|
+
let data = void 0;
|
|
9201
|
+
try {
|
|
9202
|
+
const resp = await state.logConn().get(`object3/${this.objectType}`, {
|
|
9203
|
+
id: await this.id,
|
|
9204
|
+
fmt: "json2",
|
|
9205
|
+
version: this.pinnedVersion,
|
|
9206
|
+
api_version: "2"
|
|
9207
|
+
});
|
|
9208
|
+
data = await resp.json();
|
|
9209
|
+
} catch (e) {
|
|
9210
|
+
const resp = await state.logConn().get(`object/${this.objectType}`, {
|
|
9211
|
+
id: await this.id,
|
|
9212
|
+
fmt: "json2",
|
|
9213
|
+
version: this.pinnedVersion
|
|
9214
|
+
});
|
|
9215
|
+
data = await resp.json();
|
|
9216
|
+
}
|
|
9217
|
+
this._fetchedData = this.mutateRecord ? data?.map(this.mutateRecord) : data;
|
|
9029
9218
|
}
|
|
9030
9219
|
return this._fetchedData || [];
|
|
9031
9220
|
}
|
|
@@ -9049,10 +9238,14 @@ var ObjectFetcher = class {
|
|
|
9049
9238
|
}
|
|
9050
9239
|
};
|
|
9051
9240
|
var Experiment = class extends ObjectFetcher {
|
|
9241
|
+
lazyMetadata;
|
|
9242
|
+
dataset;
|
|
9243
|
+
bgLogger;
|
|
9244
|
+
lastStartTime;
|
|
9245
|
+
// For type identification.
|
|
9246
|
+
kind = "experiment";
|
|
9052
9247
|
constructor(lazyMetadata, dataset) {
|
|
9053
9248
|
super("experiment", void 0);
|
|
9054
|
-
// For type identification.
|
|
9055
|
-
this.kind = "experiment";
|
|
9056
9249
|
this.lazyMetadata = lazyMetadata;
|
|
9057
9250
|
this.dataset = dataset;
|
|
9058
9251
|
const logConn = new LazyValue(
|
|
@@ -9093,9 +9286,19 @@ var Experiment = class extends ObjectFetcher {
|
|
|
9093
9286
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
9094
9287
|
* @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
|
|
9095
9288
|
* @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
|
|
9289
|
+
* @param options Additional logging options
|
|
9290
|
+
* @param options.allowLogConcurrentWithActiveSpan in rare cases where you need to log at the top level separately from an active span on the experiment, set this to true.
|
|
9096
9291
|
* :returns: The `id` of the logged event.
|
|
9097
9292
|
*/
|
|
9098
|
-
log(event) {
|
|
9293
|
+
log(event, options) {
|
|
9294
|
+
if (!options?.allowLogConcurrentWithActiveSpan) {
|
|
9295
|
+
const checkCurrentSpan = currentSpan();
|
|
9296
|
+
if (checkCurrentSpan instanceof SpanImpl && checkCurrentSpan.parentObject === this) {
|
|
9297
|
+
throw new Error(
|
|
9298
|
+
"Cannot run toplevel Experiment.log method while there is an active span. To log to the span, use Span.log"
|
|
9299
|
+
);
|
|
9300
|
+
}
|
|
9301
|
+
}
|
|
9099
9302
|
event = validateAndSanitizeExperimentLogFullArgs(event, !!this.dataset);
|
|
9100
9303
|
const span = this.startSpan({ startTime: this.lastStartTime, event });
|
|
9101
9304
|
this.lastStartTime = span.end();
|
|
@@ -9137,6 +9340,7 @@ var Experiment = class extends ObjectFetcher {
|
|
|
9137
9340
|
startSpan(args) {
|
|
9138
9341
|
const { name, ...argsRest } = args ?? {};
|
|
9139
9342
|
return new SpanImpl({
|
|
9343
|
+
parentObject: this,
|
|
9140
9344
|
parentIds: new LazyValue(() => this.lazyParentIds()),
|
|
9141
9345
|
bgLogger: this.bgLogger,
|
|
9142
9346
|
name: name ?? "root",
|
|
@@ -9274,20 +9478,38 @@ var ReadonlyExperiment = class extends ObjectFetcher {
|
|
|
9274
9478
|
if (record.root_span_id !== record.span_id) {
|
|
9275
9479
|
continue;
|
|
9276
9480
|
}
|
|
9277
|
-
const { output, expected } = record;
|
|
9278
|
-
|
|
9279
|
-
|
|
9280
|
-
|
|
9281
|
-
|
|
9481
|
+
const { output, expected: expectedRecord } = record;
|
|
9482
|
+
const expected = expectedRecord ?? output;
|
|
9483
|
+
if (isEmpty(expected)) {
|
|
9484
|
+
yield {
|
|
9485
|
+
input: record.input
|
|
9486
|
+
};
|
|
9487
|
+
} else {
|
|
9488
|
+
yield {
|
|
9489
|
+
input: record.input,
|
|
9490
|
+
expected
|
|
9491
|
+
};
|
|
9492
|
+
}
|
|
9282
9493
|
}
|
|
9283
9494
|
}
|
|
9284
9495
|
};
|
|
9285
9496
|
var executionCounter = 0;
|
|
9286
9497
|
var SpanImpl = class _SpanImpl {
|
|
9498
|
+
bgLogger;
|
|
9499
|
+
// `internalData` contains fields that are not part of the "user-sanitized"
|
|
9500
|
+
// set of fields which we want to log in just one of the span rows.
|
|
9501
|
+
internalData;
|
|
9502
|
+
isMerge;
|
|
9503
|
+
loggedEndTime;
|
|
9504
|
+
// For internal use only.
|
|
9505
|
+
parentObject;
|
|
9506
|
+
// These fields are logged to every span row.
|
|
9507
|
+
parentIds;
|
|
9508
|
+
rowIds;
|
|
9509
|
+
kind = "span";
|
|
9287
9510
|
// root_experiment should only be specified for a root span. parent_span
|
|
9288
9511
|
// should only be specified for non-root spans.
|
|
9289
9512
|
constructor(args) {
|
|
9290
|
-
this.kind = "span";
|
|
9291
9513
|
this.loggedEndTime = void 0;
|
|
9292
9514
|
this.bgLogger = args.bgLogger;
|
|
9293
9515
|
const callerLocation = isomorph_default.getCallerLocation();
|
|
@@ -9315,6 +9537,7 @@ var SpanImpl = class _SpanImpl {
|
|
|
9315
9537
|
},
|
|
9316
9538
|
created: (/* @__PURE__ */ new Date()).toISOString()
|
|
9317
9539
|
};
|
|
9540
|
+
this.parentObject = args.parentObject;
|
|
9318
9541
|
this.parentIds = args.parentIds;
|
|
9319
9542
|
const id = args.event?.id ?? v4_default();
|
|
9320
9543
|
const span_id = v4_default();
|
|
@@ -9386,6 +9609,7 @@ var SpanImpl = class _SpanImpl {
|
|
|
9386
9609
|
}
|
|
9387
9610
|
startSpan(args) {
|
|
9388
9611
|
return new _SpanImpl({
|
|
9612
|
+
parentObject: this.parentObject,
|
|
9389
9613
|
parentIds: this.parentIds,
|
|
9390
9614
|
bgLogger: this.bgLogger,
|
|
9391
9615
|
parentSpanInfo: {
|
|
@@ -9411,8 +9635,20 @@ var SpanImpl = class _SpanImpl {
|
|
|
9411
9635
|
}
|
|
9412
9636
|
};
|
|
9413
9637
|
var Dataset = class extends ObjectFetcher {
|
|
9414
|
-
|
|
9415
|
-
|
|
9638
|
+
lazyMetadata;
|
|
9639
|
+
bgLogger;
|
|
9640
|
+
constructor(lazyMetadata, pinnedVersion, legacy) {
|
|
9641
|
+
const isLegacyDataset = legacy ?? DEFAULT_IS_LEGACY_DATASET;
|
|
9642
|
+
if (isLegacyDataset) {
|
|
9643
|
+
console.warn(
|
|
9644
|
+
`Records will be fetched from this dataset in the legacy format, with the "expected" field renamed to "output". Please update your code to use "expected", and use \`braintrust.initDataset()\` with \`{ useOutput: false }\`, which will become the default in a future version of Braintrust.`
|
|
9645
|
+
);
|
|
9646
|
+
}
|
|
9647
|
+
super(
|
|
9648
|
+
"dataset",
|
|
9649
|
+
pinnedVersion,
|
|
9650
|
+
(r) => ensureDatasetRecord(r, isLegacyDataset)
|
|
9651
|
+
);
|
|
9416
9652
|
this.lazyMetadata = lazyMetadata;
|
|
9417
9653
|
const logConn = new LazyValue(
|
|
9418
9654
|
() => this.getState().then((state) => state.logConn())
|
|
@@ -9444,19 +9680,21 @@ var Dataset = class extends ObjectFetcher {
|
|
|
9444
9680
|
*
|
|
9445
9681
|
* @param event The event to log.
|
|
9446
9682
|
* @param event.input The argument that uniquely define an input case (an arbitrary, JSON serializable object).
|
|
9447
|
-
* @param event.
|
|
9683
|
+
* @param event.expected The output of your application, including post-processing (an arbitrary, JSON serializable object).
|
|
9448
9684
|
* @param event.metadata (Optional) a dictionary with additional data about the test example, model outputs, or just
|
|
9449
9685
|
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
9450
9686
|
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
9451
9687
|
* JSON-serializable type, but its keys must be strings.
|
|
9452
9688
|
* @param event.id (Optional) a unique identifier for the event. If you don't provide one, Braintrust will generate one for you.
|
|
9689
|
+
* @param event.output: (Deprecated) The output of your application. Use `expected` instead.
|
|
9453
9690
|
* @returns The `id` of the logged record.
|
|
9454
9691
|
*/
|
|
9455
9692
|
insert({
|
|
9456
9693
|
input,
|
|
9457
|
-
|
|
9694
|
+
expected,
|
|
9458
9695
|
metadata,
|
|
9459
|
-
id
|
|
9696
|
+
id,
|
|
9697
|
+
output
|
|
9460
9698
|
}) {
|
|
9461
9699
|
if (metadata !== void 0) {
|
|
9462
9700
|
for (const key of Object.keys(metadata)) {
|
|
@@ -9465,11 +9703,16 @@ var Dataset = class extends ObjectFetcher {
|
|
|
9465
9703
|
}
|
|
9466
9704
|
}
|
|
9467
9705
|
}
|
|
9706
|
+
if (expected && output) {
|
|
9707
|
+
throw new Error(
|
|
9708
|
+
"Only one of expected or output (deprecated) can be specified. Prefer expected."
|
|
9709
|
+
);
|
|
9710
|
+
}
|
|
9468
9711
|
const rowId = id || v4_default();
|
|
9469
9712
|
const args = new LazyValue(async () => ({
|
|
9470
9713
|
id: rowId,
|
|
9471
|
-
|
|
9472
|
-
output,
|
|
9714
|
+
input,
|
|
9715
|
+
expected: expected === void 0 ? output : expected,
|
|
9473
9716
|
project_id: (await this.project).id,
|
|
9474
9717
|
dataset_id: await this.id,
|
|
9475
9718
|
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -9540,7 +9783,7 @@ var Dataset = class extends ObjectFetcher {
|
|
|
9540
9783
|
|
|
9541
9784
|
// src/node.ts
|
|
9542
9785
|
function configureNode() {
|
|
9543
|
-
isomorph_default.
|
|
9786
|
+
isomorph_default.getRepoInfo = getRepoInfo;
|
|
9544
9787
|
isomorph_default.getPastNAncestors = getPastNAncestors;
|
|
9545
9788
|
isomorph_default.getEnv = (name) => process.env[name];
|
|
9546
9789
|
isomorph_default.getCallerLocation = getCallerLocation;
|
|
@@ -9565,8 +9808,9 @@ function fitNameToSpaces(name, length) {
|
|
|
9565
9808
|
return padded.substring(0, length - 3) + "...";
|
|
9566
9809
|
}
|
|
9567
9810
|
var BarProgressReporter = class {
|
|
9811
|
+
multiBar;
|
|
9812
|
+
bars = {};
|
|
9568
9813
|
constructor() {
|
|
9569
|
-
this.bars = {};
|
|
9570
9814
|
this.multiBar = new cliProgress.MultiBar(
|
|
9571
9815
|
{
|
|
9572
9816
|
clearOnComplete: false,
|
|
@@ -9602,7 +9846,7 @@ function makeEvalName(projectName, experimentName) {
|
|
|
9602
9846
|
}
|
|
9603
9847
|
return out;
|
|
9604
9848
|
}
|
|
9605
|
-
function
|
|
9849
|
+
function initExperiment2(projectName, options = {}) {
|
|
9606
9850
|
return init(projectName, {
|
|
9607
9851
|
...options,
|
|
9608
9852
|
setCurrent: false
|
|
@@ -9610,9 +9854,9 @@ function initExperiment(projectName, options = {}) {
|
|
|
9610
9854
|
}
|
|
9611
9855
|
globalThis._evals = {};
|
|
9612
9856
|
async function Eval(name, evaluator) {
|
|
9613
|
-
|
|
9857
|
+
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
9614
9858
|
if (_evals[evalName]) {
|
|
9615
|
-
|
|
9859
|
+
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
9616
9860
|
}
|
|
9617
9861
|
if (globalThis._lazy_load) {
|
|
9618
9862
|
_evals[evalName] = { evalName, projectName: name, ...evaluator };
|
|
@@ -9628,7 +9872,7 @@ async function Eval(name, evaluator) {
|
|
|
9628
9872
|
}
|
|
9629
9873
|
const progressReporter = new BarProgressReporter();
|
|
9630
9874
|
try {
|
|
9631
|
-
const experiment =
|
|
9875
|
+
const experiment = initExperiment2(name, {
|
|
9632
9876
|
experiment: evaluator.experimentName,
|
|
9633
9877
|
metadata: evaluator.metadata,
|
|
9634
9878
|
isPublic: evaluator.isPublic
|
|
@@ -9668,6 +9912,9 @@ function evaluateFilter(object, filter) {
|
|
|
9668
9912
|
}
|
|
9669
9913
|
return pattern.test(serializeJSONWithPlainString(key));
|
|
9670
9914
|
}
|
|
9915
|
+
function scorerName(scorer, scorer_idx) {
|
|
9916
|
+
return scorer.name || `scorer_${scorer_idx}`;
|
|
9917
|
+
}
|
|
9671
9918
|
async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
9672
9919
|
if (typeof evaluator.data === "string") {
|
|
9673
9920
|
throw new Error("Unimplemented: string data paths");
|
|
@@ -9690,7 +9937,7 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
9690
9937
|
}
|
|
9691
9938
|
name = baseExperiment.name;
|
|
9692
9939
|
}
|
|
9693
|
-
dataResult =
|
|
9940
|
+
dataResult = initExperiment2(evaluator.projectName, {
|
|
9694
9941
|
experiment: name,
|
|
9695
9942
|
open: true
|
|
9696
9943
|
}).asDataset();
|
|
@@ -9711,11 +9958,13 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
9711
9958
|
);
|
|
9712
9959
|
progressReporter.start(evaluator.evalName, data.length);
|
|
9713
9960
|
const evals = data.map(async (datum) => {
|
|
9714
|
-
let metadata = { ...datum.metadata };
|
|
9715
|
-
let output = void 0;
|
|
9716
|
-
let error2 = void 0;
|
|
9717
|
-
let scores = {};
|
|
9718
9961
|
const callback = async (rootSpan) => {
|
|
9962
|
+
let metadata = {
|
|
9963
|
+
..."metadata" in datum ? datum.metadata : {}
|
|
9964
|
+
};
|
|
9965
|
+
let output = void 0;
|
|
9966
|
+
let error2 = void 0;
|
|
9967
|
+
let scores = {};
|
|
9719
9968
|
try {
|
|
9720
9969
|
const meta = (o) => metadata = { ...metadata, ...o };
|
|
9721
9970
|
await rootSpan.traced(
|
|
@@ -9732,42 +9981,55 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
9732
9981
|
);
|
|
9733
9982
|
rootSpan.log({ output });
|
|
9734
9983
|
const scoringArgs = { ...datum, metadata, output };
|
|
9984
|
+
const scorerNames = evaluator.scores.map(scorerName);
|
|
9735
9985
|
const scoreResults = await Promise.all(
|
|
9736
9986
|
evaluator.scores.map(async (score, score_idx) => {
|
|
9737
|
-
|
|
9738
|
-
|
|
9739
|
-
|
|
9740
|
-
|
|
9741
|
-
|
|
9742
|
-
|
|
9743
|
-
|
|
9744
|
-
|
|
9745
|
-
|
|
9746
|
-
|
|
9747
|
-
|
|
9748
|
-
|
|
9749
|
-
|
|
9750
|
-
|
|
9751
|
-
|
|
9752
|
-
{
|
|
9753
|
-
name: score.name || `scorer_${score_idx}`,
|
|
9754
|
-
spanAttributes: {
|
|
9755
|
-
type: SpanTypeAttribute.SCORE
|
|
9987
|
+
try {
|
|
9988
|
+
const result = await rootSpan.traced(
|
|
9989
|
+
async (span) => {
|
|
9990
|
+
const scoreResult = score(scoringArgs);
|
|
9991
|
+
const result2 = scoreResult instanceof Promise ? await scoreResult : scoreResult;
|
|
9992
|
+
const {
|
|
9993
|
+
metadata: resultMetadata,
|
|
9994
|
+
name: _,
|
|
9995
|
+
...resultRest
|
|
9996
|
+
} = result2;
|
|
9997
|
+
span.log({
|
|
9998
|
+
output: resultRest,
|
|
9999
|
+
metadata: resultMetadata
|
|
10000
|
+
});
|
|
10001
|
+
return result2;
|
|
9756
10002
|
},
|
|
9757
|
-
|
|
9758
|
-
|
|
9759
|
-
|
|
10003
|
+
{
|
|
10004
|
+
name: scorerNames[score_idx],
|
|
10005
|
+
spanAttributes: {
|
|
10006
|
+
type: SpanTypeAttribute.SCORE
|
|
10007
|
+
},
|
|
10008
|
+
event: { input: scoringArgs }
|
|
10009
|
+
}
|
|
10010
|
+
);
|
|
10011
|
+
return { kind: "score", value: result };
|
|
10012
|
+
} catch (e) {
|
|
10013
|
+
return { kind: "error", value: e };
|
|
10014
|
+
}
|
|
9760
10015
|
})
|
|
9761
10016
|
);
|
|
10017
|
+
const passingScorersAndResults = [];
|
|
10018
|
+
const failingScorersAndResults = [];
|
|
10019
|
+
scoreResults.forEach((result, i) => {
|
|
10020
|
+
const name = scorerNames[i];
|
|
10021
|
+
if (result.kind === "score") {
|
|
10022
|
+
passingScorersAndResults.push({ name, score: result.value });
|
|
10023
|
+
} else {
|
|
10024
|
+
failingScorersAndResults.push({ name, error: result.value });
|
|
10025
|
+
}
|
|
10026
|
+
});
|
|
9762
10027
|
const scoreMetadata = {};
|
|
9763
|
-
for (const scoreResult of
|
|
10028
|
+
for (const { score: scoreResult } of passingScorersAndResults) {
|
|
9764
10029
|
scores[scoreResult.name] = scoreResult.score;
|
|
9765
10030
|
const metadata2 = {
|
|
9766
10031
|
...scoreResult.metadata
|
|
9767
10032
|
};
|
|
9768
|
-
if (scoreResult.error !== void 0) {
|
|
9769
|
-
metadata2.error = scoreResult.error;
|
|
9770
|
-
}
|
|
9771
10033
|
if (Object.keys(metadata2).length > 0) {
|
|
9772
10034
|
scoreMetadata[scoreResult.name] = metadata2;
|
|
9773
10035
|
}
|
|
@@ -9776,6 +10038,21 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
9776
10038
|
meta({ scores: scoreMetadata });
|
|
9777
10039
|
}
|
|
9778
10040
|
rootSpan.log({ scores, metadata });
|
|
10041
|
+
if (failingScorersAndResults.length) {
|
|
10042
|
+
const scorerErrors = Object.fromEntries(
|
|
10043
|
+
failingScorersAndResults.map(({ name, error: error3 }) => [
|
|
10044
|
+
name,
|
|
10045
|
+
error3 instanceof Error ? error3.stack : `${error3}`
|
|
10046
|
+
])
|
|
10047
|
+
);
|
|
10048
|
+
metadata["scorer_errors"] = scorerErrors;
|
|
10049
|
+
const names = Object.keys(scorerErrors).join(", ");
|
|
10050
|
+
const errors = failingScorersAndResults.map((item) => item.error);
|
|
10051
|
+
throw new AggregateError(
|
|
10052
|
+
errors,
|
|
10053
|
+
`Found exceptions for the following scorers: ${names}`
|
|
10054
|
+
);
|
|
10055
|
+
}
|
|
9779
10056
|
} catch (e) {
|
|
9780
10057
|
error2 = e;
|
|
9781
10058
|
} finally {
|
|
@@ -9798,7 +10075,7 @@ async function runEvaluator(experiment, evaluator, progressReporter, filters) {
|
|
|
9798
10075
|
},
|
|
9799
10076
|
event: {
|
|
9800
10077
|
input: datum.input,
|
|
9801
|
-
expected: datum.expected
|
|
10078
|
+
expected: "expected" in datum ? datum.expected : void 0
|
|
9802
10079
|
}
|
|
9803
10080
|
});
|
|
9804
10081
|
}
|
|
@@ -9854,7 +10131,8 @@ function reportEvaluatorResult(evaluatorName, evaluatorResult, {
|
|
|
9854
10131
|
if (!verbose && !jsonl) {
|
|
9855
10132
|
console.error(warning("Add --verbose to see full stack traces."));
|
|
9856
10133
|
}
|
|
9857
|
-
}
|
|
10134
|
+
}
|
|
10135
|
+
if (summary) {
|
|
9858
10136
|
console.log(jsonl ? JSON.stringify(summary) : summary);
|
|
9859
10137
|
} else {
|
|
9860
10138
|
const scoresByName = {};
|
|
@@ -10082,6 +10360,9 @@ function wrapEmbeddings(create) {
|
|
|
10082
10360
|
};
|
|
10083
10361
|
}
|
|
10084
10362
|
var WrapperStream = class {
|
|
10363
|
+
span;
|
|
10364
|
+
iter;
|
|
10365
|
+
startTime;
|
|
10085
10366
|
constructor(span, startTime, iter) {
|
|
10086
10367
|
this.span = span;
|
|
10087
10368
|
this.iter = iter;
|
|
@@ -10118,7 +10399,6 @@ configureNode();
|
|
|
10118
10399
|
// Annotate the CommonJS export names for ESM import in node:
|
|
10119
10400
|
0 && (module.exports = {
|
|
10120
10401
|
BaseExperiment,
|
|
10121
|
-
Dataset,
|
|
10122
10402
|
Eval,
|
|
10123
10403
|
Experiment,
|
|
10124
10404
|
Logger,
|
|
@@ -10134,6 +10414,7 @@ configureNode();
|
|
|
10134
10414
|
getSpanParentObject,
|
|
10135
10415
|
init,
|
|
10136
10416
|
initDataset,
|
|
10417
|
+
initExperiment,
|
|
10137
10418
|
initLogger,
|
|
10138
10419
|
log,
|
|
10139
10420
|
login,
|