braintrust 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/dist/index.d.mts +30 -25
- package/dev/dist/index.d.ts +30 -25
- package/dev/dist/index.js +336 -325
- package/dev/dist/index.mjs +259 -248
- package/dist/browser.d.mts +2313 -322
- package/dist/browser.d.ts +2313 -322
- package/dist/browser.js +2819 -198
- package/dist/browser.mjs +2681 -60
- package/dist/cli.js +358 -160
- package/dist/index.d.mts +254 -284
- package/dist/index.d.ts +254 -284
- package/dist/index.js +484 -470
- package/dist/index.mjs +335 -321
- package/package.json +6 -2
package/dev/dist/index.mjs
CHANGED
|
@@ -26,7 +26,9 @@ var iso = {
|
|
|
26
26
|
getCallerLocation: () => void 0,
|
|
27
27
|
newAsyncLocalStorage: () => new DefaultAsyncLocalStorage(),
|
|
28
28
|
processOn: (_0, _1) => {
|
|
29
|
-
}
|
|
29
|
+
},
|
|
30
|
+
basename: (filepath) => filepath.split(/[\\/]/).pop() || filepath,
|
|
31
|
+
writeln: (text) => console.log(text)
|
|
30
32
|
};
|
|
31
33
|
var isomorph_default = iso;
|
|
32
34
|
|
|
@@ -119,7 +121,7 @@ async function getPastNAncestors(n = 1e3, remote = void 0) {
|
|
|
119
121
|
return [];
|
|
120
122
|
}
|
|
121
123
|
const commits = await git.log({ from: ancestor, to: "HEAD", maxCount: n });
|
|
122
|
-
return commits.all.map((c) => c.hash);
|
|
124
|
+
return commits.all.slice(0, n).map((c) => c.hash);
|
|
123
125
|
}
|
|
124
126
|
async function attempt(fn) {
|
|
125
127
|
try {
|
|
@@ -962,11 +964,11 @@ function mergeDictsWithPaths({
|
|
|
962
964
|
function mergeDictsWithPathsHelper({
|
|
963
965
|
mergeInto,
|
|
964
966
|
mergeFrom,
|
|
965
|
-
path:
|
|
967
|
+
path: path2,
|
|
966
968
|
mergePaths
|
|
967
969
|
}) {
|
|
968
970
|
Object.entries(mergeFrom).forEach(([k, mergeFromV]) => {
|
|
969
|
-
const fullPath =
|
|
971
|
+
const fullPath = path2.concat([k]);
|
|
970
972
|
const fullPathSerialized = JSON.stringify(fullPath);
|
|
971
973
|
const mergeIntoV = recordFind(mergeInto, k);
|
|
972
974
|
if (isObject(mergeIntoV) && isObject(mergeFromV) && !mergePaths.has(fullPathSerialized)) {
|
|
@@ -997,9 +999,9 @@ function mapAt(m, k) {
|
|
|
997
999
|
function recordFind(m, k) {
|
|
998
1000
|
return m[k];
|
|
999
1001
|
}
|
|
1000
|
-
function getObjValueByPath(row,
|
|
1002
|
+
function getObjValueByPath(row, path2) {
|
|
1001
1003
|
let curr = row;
|
|
1002
|
-
for (const p of
|
|
1004
|
+
for (const p of path2) {
|
|
1003
1005
|
if (!isObjectOrArray(curr)) {
|
|
1004
1006
|
return null;
|
|
1005
1007
|
}
|
|
@@ -1274,6 +1276,93 @@ function _urljoin(...parts) {
|
|
|
1274
1276
|
(x, i) => x.replace(/^\//, "").replace(i < parts.length - 1 ? /\/$/ : "", "")
|
|
1275
1277
|
).filter((x) => x.trim() !== "").join("/");
|
|
1276
1278
|
}
|
|
1279
|
+
function slugify(text, options) {
|
|
1280
|
+
if (typeof text !== "string") {
|
|
1281
|
+
throw new Error("slugify: string argument expected");
|
|
1282
|
+
}
|
|
1283
|
+
const charMap = {
|
|
1284
|
+
// Currency and symbols
|
|
1285
|
+
$: "dollar",
|
|
1286
|
+
"%": "percent",
|
|
1287
|
+
"&": "and",
|
|
1288
|
+
// Latin characters
|
|
1289
|
+
\u00C0: "A",
|
|
1290
|
+
\u00C1: "A",
|
|
1291
|
+
\u00C2: "A",
|
|
1292
|
+
\u00C3: "A",
|
|
1293
|
+
\u00C4: "A",
|
|
1294
|
+
\u00C5: "A",
|
|
1295
|
+
\u00C6: "AE",
|
|
1296
|
+
\u00C7: "C",
|
|
1297
|
+
\u00C8: "E",
|
|
1298
|
+
\u00C9: "E",
|
|
1299
|
+
\u00CA: "E",
|
|
1300
|
+
\u00CB: "E",
|
|
1301
|
+
\u00CC: "I",
|
|
1302
|
+
\u00CD: "I",
|
|
1303
|
+
\u00CE: "I",
|
|
1304
|
+
\u00CF: "I",
|
|
1305
|
+
\u00D1: "N",
|
|
1306
|
+
\u00D2: "O",
|
|
1307
|
+
\u00D3: "O",
|
|
1308
|
+
\u00D4: "O",
|
|
1309
|
+
\u00D5: "O",
|
|
1310
|
+
\u00D6: "O",
|
|
1311
|
+
\u00D8: "O",
|
|
1312
|
+
\u00D9: "U",
|
|
1313
|
+
\u00DA: "U",
|
|
1314
|
+
\u00DB: "U",
|
|
1315
|
+
\u00DC: "U",
|
|
1316
|
+
\u00DD: "Y",
|
|
1317
|
+
\u00E0: "a",
|
|
1318
|
+
\u00E1: "a",
|
|
1319
|
+
\u00E2: "a",
|
|
1320
|
+
\u00E3: "a",
|
|
1321
|
+
\u00E4: "a",
|
|
1322
|
+
\u00E5: "a",
|
|
1323
|
+
\u00E6: "ae",
|
|
1324
|
+
\u00E7: "c",
|
|
1325
|
+
\u00E8: "e",
|
|
1326
|
+
\u00E9: "e",
|
|
1327
|
+
\u00EA: "e",
|
|
1328
|
+
\u00EB: "e",
|
|
1329
|
+
\u00EC: "i",
|
|
1330
|
+
\u00ED: "i",
|
|
1331
|
+
\u00EE: "i",
|
|
1332
|
+
\u00EF: "i",
|
|
1333
|
+
\u00F1: "n",
|
|
1334
|
+
\u00F2: "o",
|
|
1335
|
+
\u00F3: "o",
|
|
1336
|
+
\u00F4: "o",
|
|
1337
|
+
\u00F5: "o",
|
|
1338
|
+
\u00F6: "o",
|
|
1339
|
+
\u00F8: "o",
|
|
1340
|
+
\u00F9: "u",
|
|
1341
|
+
\u00FA: "u",
|
|
1342
|
+
\u00FB: "u",
|
|
1343
|
+
\u00FC: "u",
|
|
1344
|
+
\u00FD: "y",
|
|
1345
|
+
\u00FF: "y"
|
|
1346
|
+
};
|
|
1347
|
+
const replacement = "-";
|
|
1348
|
+
const trim = options?.trim !== false;
|
|
1349
|
+
let slug = text.normalize().split("").reduce((result, ch) => {
|
|
1350
|
+
const mapped = charMap[ch] || ch;
|
|
1351
|
+
const appendChar = mapped === replacement ? " " : mapped;
|
|
1352
|
+
return result + appendChar.replace(/[^\w\s$*_+~.()'"!\-:@]+/g, "");
|
|
1353
|
+
}, "");
|
|
1354
|
+
if (options?.strict) {
|
|
1355
|
+
slug = slug.replace(/[^A-Za-z0-9\s]/g, "");
|
|
1356
|
+
}
|
|
1357
|
+
if (trim) {
|
|
1358
|
+
slug = slug.trim();
|
|
1359
|
+
}
|
|
1360
|
+
slug = slug.replace(/\s+/g, replacement);
|
|
1361
|
+
if (options?.lower) {
|
|
1362
|
+
slug = slug.toLowerCase();
|
|
1363
|
+
}
|
|
1364
|
+
return slug;
|
|
1365
|
+
}
|
|
1277
1366
|
|
|
1278
1367
|
// util/span_identifier_v4.ts
|
|
1279
1368
|
import { z as z4 } from "zod/v3";
|
|
@@ -2925,9 +3014,9 @@ var BraintrustStream = class _BraintrustStream {
|
|
|
2925
3014
|
reader.releaseLock();
|
|
2926
3015
|
return { done: true, value: void 0 };
|
|
2927
3016
|
},
|
|
2928
|
-
async throw(
|
|
3017
|
+
async throw(error) {
|
|
2929
3018
|
reader.releaseLock();
|
|
2930
|
-
throw
|
|
3019
|
+
throw error;
|
|
2931
3020
|
}
|
|
2932
3021
|
};
|
|
2933
3022
|
}
|
|
@@ -3241,10 +3330,10 @@ var DiskCache = class {
|
|
|
3241
3330
|
return;
|
|
3242
3331
|
}
|
|
3243
3332
|
const stats = await Promise.all(
|
|
3244
|
-
paths.map(async (
|
|
3245
|
-
const stat2 = await isomorph_default.stat(
|
|
3333
|
+
paths.map(async (path2) => {
|
|
3334
|
+
const stat2 = await isomorph_default.stat(path2);
|
|
3246
3335
|
return {
|
|
3247
|
-
path:
|
|
3336
|
+
path: path2,
|
|
3248
3337
|
mtime: stat2.mtime.getTime()
|
|
3249
3338
|
};
|
|
3250
3339
|
})
|
|
@@ -3385,7 +3474,7 @@ function runCatchFinally(f, catchF, finallyF) {
|
|
|
3385
3474
|
function getCurrentUnixTimestamp() {
|
|
3386
3475
|
return (/* @__PURE__ */ new Date()).getTime() / 1e3;
|
|
3387
3476
|
}
|
|
3388
|
-
function
|
|
3477
|
+
function isEmpty2(a) {
|
|
3389
3478
|
return a === void 0 || a === null;
|
|
3390
3479
|
}
|
|
3391
3480
|
var LazyValue = class {
|
|
@@ -3510,8 +3599,8 @@ var MaskingError = class {
|
|
|
3510
3599
|
function applyMaskingToField(maskingFunction, data, fieldName) {
|
|
3511
3600
|
try {
|
|
3512
3601
|
return maskingFunction(data);
|
|
3513
|
-
} catch (
|
|
3514
|
-
const errorType =
|
|
3602
|
+
} catch (error) {
|
|
3603
|
+
const errorType = error instanceof Error ? error.constructor.name : "Error";
|
|
3515
3604
|
if (fieldName === "scores" || fieldName === "metrics") {
|
|
3516
3605
|
return new MaskingError(fieldName, errorType);
|
|
3517
3606
|
}
|
|
@@ -3798,7 +3887,7 @@ var BraintrustState = class _BraintrustState {
|
|
|
3798
3887
|
const newState = await loginToState({
|
|
3799
3888
|
...this.loginParams,
|
|
3800
3889
|
...Object.fromEntries(
|
|
3801
|
-
Object.entries(loginParams).filter(([k, v]) => !
|
|
3890
|
+
Object.entries(loginParams).filter(([k, v]) => !isEmpty2(v))
|
|
3802
3891
|
)
|
|
3803
3892
|
});
|
|
3804
3893
|
this.copyLoginInfo(newState);
|
|
@@ -3963,9 +4052,9 @@ var HTTPConnection = class _HTTPConnection {
|
|
|
3963
4052
|
this.headers["Authorization"] = `Bearer ${this.token}`;
|
|
3964
4053
|
}
|
|
3965
4054
|
}
|
|
3966
|
-
async get(
|
|
4055
|
+
async get(path2, params = void 0, config) {
|
|
3967
4056
|
const { headers, ...rest } = config || {};
|
|
3968
|
-
const url = new URL(_urljoin(this.base_url,
|
|
4057
|
+
const url = new URL(_urljoin(this.base_url, path2));
|
|
3969
4058
|
url.search = new URLSearchParams(
|
|
3970
4059
|
params ? Object.entries(params).filter(([_, v]) => v !== void 0).flatMap(
|
|
3971
4060
|
([k, v]) => v !== void 0 ? typeof v === "string" ? [[k, v]] : v.map((x) => [k, x]) : []
|
|
@@ -3986,13 +4075,13 @@ var HTTPConnection = class _HTTPConnection {
|
|
|
3986
4075
|
})
|
|
3987
4076
|
);
|
|
3988
4077
|
}
|
|
3989
|
-
async post(
|
|
4078
|
+
async post(path2, params, config) {
|
|
3990
4079
|
const { headers, ...rest } = config || {};
|
|
3991
4080
|
const this_fetch = this.fetch;
|
|
3992
4081
|
const this_base_url = this.base_url;
|
|
3993
4082
|
const this_headers = this.headers;
|
|
3994
4083
|
return await checkResponse(
|
|
3995
|
-
await this_fetch(_urljoin(this_base_url,
|
|
4084
|
+
await this_fetch(_urljoin(this_base_url, path2), {
|
|
3996
4085
|
method: "POST",
|
|
3997
4086
|
headers: {
|
|
3998
4087
|
Accept: "application/json",
|
|
@@ -4144,12 +4233,12 @@ var Attachment = class extends BaseAttachment {
|
|
|
4144
4233
|
signedUrl: z8.string().url(),
|
|
4145
4234
|
headers: z8.record(z8.string())
|
|
4146
4235
|
}).parse(await metadataResponse.json()));
|
|
4147
|
-
} catch (
|
|
4148
|
-
if (
|
|
4149
|
-
const errorStr = JSON.stringify(
|
|
4236
|
+
} catch (error) {
|
|
4237
|
+
if (error instanceof ZodError) {
|
|
4238
|
+
const errorStr = JSON.stringify(error.flatten());
|
|
4150
4239
|
throw new Error(`Invalid response from API server: ${errorStr}`);
|
|
4151
4240
|
}
|
|
4152
|
-
throw
|
|
4241
|
+
throw error;
|
|
4153
4242
|
}
|
|
4154
4243
|
addAzureBlobHeaders(headers, signedUrl);
|
|
4155
4244
|
let objectStoreResponse;
|
|
@@ -4161,13 +4250,13 @@ var Attachment = class extends BaseAttachment {
|
|
|
4161
4250
|
body: data
|
|
4162
4251
|
})
|
|
4163
4252
|
);
|
|
4164
|
-
} catch (
|
|
4165
|
-
if (
|
|
4253
|
+
} catch (error) {
|
|
4254
|
+
if (error instanceof FailedHTTPResponse) {
|
|
4166
4255
|
throw new Error(
|
|
4167
|
-
`Failed to upload attachment to object store: ${
|
|
4256
|
+
`Failed to upload attachment to object store: ${error.status} ${error.text} ${error.data}`
|
|
4168
4257
|
);
|
|
4169
4258
|
}
|
|
4170
|
-
throw
|
|
4259
|
+
throw error;
|
|
4171
4260
|
}
|
|
4172
4261
|
return { signedUrl, metadataResponse, objectStoreResponse };
|
|
4173
4262
|
};
|
|
@@ -4179,9 +4268,9 @@ var Attachment = class extends BaseAttachment {
|
|
|
4179
4268
|
const orgId = state.orgId ?? "";
|
|
4180
4269
|
try {
|
|
4181
4270
|
await doUpload(conn, orgId);
|
|
4182
|
-
} catch (
|
|
4271
|
+
} catch (error) {
|
|
4183
4272
|
status.upload_status = "error";
|
|
4184
|
-
status.error_message =
|
|
4273
|
+
status.error_message = error instanceof Error ? error.message : JSON.stringify(error);
|
|
4185
4274
|
}
|
|
4186
4275
|
const requestParams = {
|
|
4187
4276
|
key: this.reference.key,
|
|
@@ -4313,8 +4402,8 @@ var ReadonlyAttachment = class {
|
|
|
4313
4402
|
}
|
|
4314
4403
|
const objResponse = await fetch(downloadUrl);
|
|
4315
4404
|
if (objResponse.status !== 200) {
|
|
4316
|
-
const
|
|
4317
|
-
throw new Error(`Couldn't download attachment: ${
|
|
4405
|
+
const error = await objResponse.text();
|
|
4406
|
+
throw new Error(`Couldn't download attachment: ${error}`);
|
|
4318
4407
|
}
|
|
4319
4408
|
return await objResponse.blob();
|
|
4320
4409
|
};
|
|
@@ -4334,7 +4423,7 @@ function logFeedbackImpl(state, parentObjectType, parentObjectId, {
|
|
|
4334
4423
|
if (!VALID_SOURCES.includes(source)) {
|
|
4335
4424
|
throw new Error(`source must be one of ${VALID_SOURCES}`);
|
|
4336
4425
|
}
|
|
4337
|
-
if (
|
|
4426
|
+
if (isEmpty2(scores) && isEmpty2(expected) && isEmpty2(tags) && isEmpty2(comment)) {
|
|
4338
4427
|
throw new Error(
|
|
4339
4428
|
"At least one of scores, expected, tags, or comment must be specified"
|
|
4340
4429
|
);
|
|
@@ -4347,7 +4436,7 @@ function logFeedbackImpl(state, parentObjectType, parentObjectId, {
|
|
|
4347
4436
|
});
|
|
4348
4437
|
let { metadata, ...updateEvent } = deepCopyEvent(validatedEvent);
|
|
4349
4438
|
updateEvent = Object.fromEntries(
|
|
4350
|
-
Object.entries(updateEvent).filter(([_, v]) => !
|
|
4439
|
+
Object.entries(updateEvent).filter(([_, v]) => !isEmpty2(v))
|
|
4351
4440
|
);
|
|
4352
4441
|
const parentIds = async () => new SpanComponentsV3({
|
|
4353
4442
|
object_type: parentObjectType,
|
|
@@ -4366,7 +4455,7 @@ function logFeedbackImpl(state, parentObjectType, parentObjectId, {
|
|
|
4366
4455
|
});
|
|
4367
4456
|
state.bgLogger().log([record]);
|
|
4368
4457
|
}
|
|
4369
|
-
if (!
|
|
4458
|
+
if (!isEmpty2(comment)) {
|
|
4370
4459
|
const record = new LazyValue(async () => {
|
|
4371
4460
|
return {
|
|
4372
4461
|
id: uuidv42(),
|
|
@@ -4931,8 +5020,8 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
4931
5020
|
if (result.upload_status === "error") {
|
|
4932
5021
|
throw new Error(result.error_message);
|
|
4933
5022
|
}
|
|
4934
|
-
} catch (
|
|
4935
|
-
attachmentErrors.push(
|
|
5023
|
+
} catch (error) {
|
|
5024
|
+
attachmentErrors.push(error);
|
|
4936
5025
|
}
|
|
4937
5026
|
}
|
|
4938
5027
|
if (attachmentErrors.length === 1) {
|
|
@@ -5015,22 +5104,22 @@ var HTTPBackgroundLogger = class _HTTPBackgroundLogger {
|
|
|
5015
5104
|
}
|
|
5016
5105
|
for (let i = 0; i < this.numTries; i++) {
|
|
5017
5106
|
const startTime = now();
|
|
5018
|
-
let
|
|
5107
|
+
let error = void 0;
|
|
5019
5108
|
try {
|
|
5020
5109
|
await conn.post_json("logs3", dataStr);
|
|
5021
5110
|
} catch (e) {
|
|
5022
|
-
|
|
5111
|
+
error = e;
|
|
5023
5112
|
}
|
|
5024
|
-
if (
|
|
5113
|
+
if (error === void 0) {
|
|
5025
5114
|
return;
|
|
5026
5115
|
}
|
|
5027
5116
|
const isRetrying = i + 1 < this.numTries;
|
|
5028
5117
|
const retryingText = isRetrying ? "" : " Retrying";
|
|
5029
5118
|
const errorText = (() => {
|
|
5030
|
-
if (
|
|
5031
|
-
return `${
|
|
5119
|
+
if (error instanceof FailedHTTPResponse) {
|
|
5120
|
+
return `${error.status} (${error.text}): ${error.data}`;
|
|
5032
5121
|
} else {
|
|
5033
|
-
return `${
|
|
5122
|
+
return `${error}`;
|
|
5034
5123
|
}
|
|
5035
5124
|
})();
|
|
5036
5125
|
const errMsg = `log request failed. Elapsed time: ${(now() - startTime) / 1e3} seconds. Payload size: ${dataStr.length}.${retryingText}
|
|
@@ -5206,7 +5295,7 @@ function init(projectOrOptions, optionalOptions) {
|
|
|
5206
5295
|
const state = stateArg ?? _globalState;
|
|
5207
5296
|
state.enforceQueueSizeLimit(false);
|
|
5208
5297
|
if (open) {
|
|
5209
|
-
if (
|
|
5298
|
+
if (isEmpty2(experiment)) {
|
|
5210
5299
|
throw new Error(`Cannot open an experiment without specifying its name`);
|
|
5211
5300
|
}
|
|
5212
5301
|
const lazyMetadata2 = new LazyValue(
|
|
@@ -5409,7 +5498,7 @@ async function computeLoggerMetadata(state, {
|
|
|
5409
5498
|
}) {
|
|
5410
5499
|
await state.login({});
|
|
5411
5500
|
const org_id = state.orgId;
|
|
5412
|
-
if (
|
|
5501
|
+
if (isEmpty2(project_id)) {
|
|
5413
5502
|
const response = await state.appConn().post_json("api/project/register", {
|
|
5414
5503
|
project_name: project_name || GLOBAL_PROJECT,
|
|
5415
5504
|
org_id
|
|
@@ -5422,7 +5511,7 @@ async function computeLoggerMetadata(state, {
|
|
|
5422
5511
|
fullInfo: response.project
|
|
5423
5512
|
}
|
|
5424
5513
|
};
|
|
5425
|
-
} else if (
|
|
5514
|
+
} else if (isEmpty2(project_name)) {
|
|
5426
5515
|
const response = await state.appConn().get_json("api/project", {
|
|
5427
5516
|
id: project_id
|
|
5428
5517
|
});
|
|
@@ -5445,7 +5534,7 @@ async function login(options = {}) {
|
|
|
5445
5534
|
const { forceLogin = false } = options || {};
|
|
5446
5535
|
if (_globalState.loggedIn && !forceLogin) {
|
|
5447
5536
|
let checkUpdatedParam2 = function(varname, arg, orig) {
|
|
5448
|
-
if (!
|
|
5537
|
+
if (!isEmpty2(arg) && !isEmpty2(orig) && arg !== orig) {
|
|
5449
5538
|
throw new Error(
|
|
5450
5539
|
`Re-logging in with different ${varname} (${arg}) than original (${orig}). To force re-login, pass \`forceLogin: true\``
|
|
5451
5540
|
);
|
|
@@ -5563,14 +5652,14 @@ function getSpanParentObject(options) {
|
|
|
5563
5652
|
}
|
|
5564
5653
|
return NOOP_SPAN;
|
|
5565
5654
|
}
|
|
5566
|
-
function logError(span,
|
|
5655
|
+
function logError(span, error) {
|
|
5567
5656
|
let errorMessage = "<error>";
|
|
5568
5657
|
let stackTrace = "";
|
|
5569
|
-
if (
|
|
5570
|
-
errorMessage =
|
|
5571
|
-
stackTrace =
|
|
5658
|
+
if (error instanceof Error) {
|
|
5659
|
+
errorMessage = error.message;
|
|
5660
|
+
stackTrace = error.stack || "";
|
|
5572
5661
|
} else {
|
|
5573
|
-
errorMessage = String(
|
|
5662
|
+
errorMessage = String(error);
|
|
5574
5663
|
}
|
|
5575
5664
|
span.log({ error: `${errorMessage}
|
|
5576
5665
|
|
|
@@ -5832,15 +5921,15 @@ async function resolveAttachmentsToBase64(event, state) {
|
|
|
5832
5921
|
return event;
|
|
5833
5922
|
}
|
|
5834
5923
|
function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
5835
|
-
if ("input" in event && !
|
|
5924
|
+
if ("input" in event && !isEmpty2(event.input) && "inputs" in event && !isEmpty2(event.inputs) || !("input" in event) && !("inputs" in event)) {
|
|
5836
5925
|
throw new Error(
|
|
5837
5926
|
"Exactly one of input or inputs (deprecated) must be specified. Prefer input."
|
|
5838
5927
|
);
|
|
5839
5928
|
}
|
|
5840
|
-
if (
|
|
5929
|
+
if (isEmpty2(event.output)) {
|
|
5841
5930
|
throw new Error("output must be specified");
|
|
5842
5931
|
}
|
|
5843
|
-
if (
|
|
5932
|
+
if (isEmpty2(event.scores)) {
|
|
5844
5933
|
throw new Error("scores must be specified");
|
|
5845
5934
|
}
|
|
5846
5935
|
if (hasDataset && event.datasetRecordId === void 0) {
|
|
@@ -6900,7 +6989,7 @@ function renderMessage(render, message) {
|
|
|
6900
6989
|
return {
|
|
6901
6990
|
...message,
|
|
6902
6991
|
..."content" in message ? {
|
|
6903
|
-
content:
|
|
6992
|
+
content: isEmpty2(message.content) ? void 0 : typeof message.content === "string" ? render(message.content) : message.content.map((c) => {
|
|
6904
6993
|
switch (c.type) {
|
|
6905
6994
|
case "text":
|
|
6906
6995
|
return { ...c, text: render(c.text) };
|
|
@@ -6937,7 +7026,7 @@ function renderMessage(render, message) {
|
|
|
6937
7026
|
})
|
|
6938
7027
|
} : {},
|
|
6939
7028
|
..."tool_calls" in message ? {
|
|
6940
|
-
tool_calls:
|
|
7029
|
+
tool_calls: isEmpty2(message.tool_calls) ? void 0 : message.tool_calls.map((t) => {
|
|
6941
7030
|
return {
|
|
6942
7031
|
type: t.type,
|
|
6943
7032
|
id: render(t.id),
|
|
@@ -7086,11 +7175,11 @@ var Prompt2 = class _Prompt {
|
|
|
7086
7175
|
([k, _v]) => !BRAINTRUST_PARAMS.includes(k)
|
|
7087
7176
|
)
|
|
7088
7177
|
),
|
|
7089
|
-
...!
|
|
7178
|
+
...!isEmpty2(this.options.model) ? {
|
|
7090
7179
|
model: this.options.model
|
|
7091
7180
|
} : {}
|
|
7092
7181
|
};
|
|
7093
|
-
if (!("model" in params) ||
|
|
7182
|
+
if (!("model" in params) || isEmpty2(params.model)) {
|
|
7094
7183
|
throw new Error(
|
|
7095
7184
|
"No model specified. Either specify it in the prompt or as a default"
|
|
7096
7185
|
);
|
|
@@ -7254,6 +7343,8 @@ function configureNode() {
|
|
|
7254
7343
|
isomorph_default.processOn = (event, handler) => {
|
|
7255
7344
|
process.on(event, handler);
|
|
7256
7345
|
};
|
|
7346
|
+
isomorph_default.basename = path.basename;
|
|
7347
|
+
isomorph_default.writeln = (text) => process.stdout.write(text + "\n");
|
|
7257
7348
|
isomorph_default.pathJoin = path.join;
|
|
7258
7349
|
isomorph_default.pathDirname = path.dirname;
|
|
7259
7350
|
isomorph_default.mkdir = fs.mkdir;
|
|
@@ -7331,9 +7422,9 @@ function handlePromise(promise, callback) {
|
|
|
7331
7422
|
invokeCallback(callback, err && (err instanceof Error || err.message) ? err : new Error(err));
|
|
7332
7423
|
});
|
|
7333
7424
|
}
|
|
7334
|
-
function invokeCallback(callback,
|
|
7425
|
+
function invokeCallback(callback, error, value) {
|
|
7335
7426
|
try {
|
|
7336
|
-
callback(
|
|
7427
|
+
callback(error, value);
|
|
7337
7428
|
} catch (err) {
|
|
7338
7429
|
setImmediate$1((e) => {
|
|
7339
7430
|
throw e;
|
|
@@ -8255,7 +8346,7 @@ function sortBy(coll, iteratee, callback) {
|
|
|
8255
8346
|
}
|
|
8256
8347
|
var sortBy$1 = awaitify(sortBy, 3);
|
|
8257
8348
|
function tryEach(tasks, callback) {
|
|
8258
|
-
var
|
|
8349
|
+
var error = null;
|
|
8259
8350
|
var result;
|
|
8260
8351
|
return eachSeries$1(tasks, (task, taskCb) => {
|
|
8261
8352
|
wrapAsync(task)((err, ...args) => {
|
|
@@ -8265,10 +8356,10 @@ function tryEach(tasks, callback) {
|
|
|
8265
8356
|
} else {
|
|
8266
8357
|
result = args;
|
|
8267
8358
|
}
|
|
8268
|
-
|
|
8359
|
+
error = err;
|
|
8269
8360
|
taskCb(err ? null : {});
|
|
8270
8361
|
});
|
|
8271
|
-
}, () => callback(
|
|
8362
|
+
}, () => callback(error, result));
|
|
8272
8363
|
}
|
|
8273
8364
|
var tryEach$1 = awaitify(tryEach);
|
|
8274
8365
|
function whilst(test, iteratee, callback) {
|
|
@@ -8311,53 +8402,16 @@ function waterfall(tasks, callback) {
|
|
|
8311
8402
|
}
|
|
8312
8403
|
var waterfall$1 = awaitify(waterfall);
|
|
8313
8404
|
|
|
8314
|
-
// src/
|
|
8315
|
-
|
|
8316
|
-
|
|
8317
|
-
|
|
8318
|
-
import pluralize from "pluralize";
|
|
8319
|
-
import Table from "cli-table3";
|
|
8320
|
-
|
|
8321
|
-
// src/progress.ts
|
|
8322
|
-
import chalk from "chalk";
|
|
8323
|
-
import * as cliProgress from "cli-progress";
|
|
8324
|
-
var MAX_NAME_LENGTH = 40;
|
|
8325
|
-
function fitNameToSpaces(name, length) {
|
|
8326
|
-
const padded = name.padEnd(length);
|
|
8327
|
-
if (padded.length <= length) {
|
|
8328
|
-
return padded;
|
|
8329
|
-
}
|
|
8330
|
-
return padded.substring(0, length - 3) + "...";
|
|
8331
|
-
}
|
|
8332
|
-
var BarProgressReporter = class {
|
|
8333
|
-
multiBar;
|
|
8334
|
-
bars = {};
|
|
8335
|
-
constructor() {
|
|
8336
|
-
this.multiBar = new cliProgress.MultiBar(
|
|
8337
|
-
{
|
|
8338
|
-
// clearOnComplete: true,
|
|
8339
|
-
format: `${chalk.blueBright("{bar}")} ${chalk.blue("{evaluator}")} {percentage}% ${chalk.gray("{value}/{total} {eta_formatted}")}`,
|
|
8340
|
-
// autopadding: true,
|
|
8341
|
-
hideCursor: true,
|
|
8342
|
-
barsize: 10
|
|
8343
|
-
},
|
|
8344
|
-
cliProgress.Presets.shades_grey
|
|
8345
|
-
);
|
|
8346
|
-
}
|
|
8347
|
-
start(name, total) {
|
|
8348
|
-
const bar = this.multiBar.create(total, 0);
|
|
8349
|
-
this.bars[name] = bar;
|
|
8405
|
+
// src/reporters/progress.ts
|
|
8406
|
+
var SimpleProgressReporter = class {
|
|
8407
|
+
start(name, _total) {
|
|
8408
|
+
console.log(`Running evaluator ${name}`);
|
|
8350
8409
|
}
|
|
8351
8410
|
stop() {
|
|
8352
|
-
this.multiBar.stop();
|
|
8353
8411
|
}
|
|
8354
|
-
increment(
|
|
8355
|
-
this.bars[name].increment({
|
|
8356
|
-
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
8357
|
-
});
|
|
8412
|
+
increment(_name) {
|
|
8358
8413
|
}
|
|
8359
|
-
setTotal(
|
|
8360
|
-
this.bars[name].setTotal(total);
|
|
8414
|
+
setTotal(_name, _total) {
|
|
8361
8415
|
}
|
|
8362
8416
|
};
|
|
8363
8417
|
|
|
@@ -8365,9 +8419,8 @@ var BarProgressReporter = class {
|
|
|
8365
8419
|
import { z as z10 } from "zod/v3";
|
|
8366
8420
|
|
|
8367
8421
|
// src/framework2.ts
|
|
8368
|
-
import path2 from "path";
|
|
8369
|
-
import slugifyLib from "slugify";
|
|
8370
8422
|
import { z as z9 } from "zod/v3";
|
|
8423
|
+
var currentFilename = typeof __filename !== "undefined" ? __filename : "unknown";
|
|
8371
8424
|
var ProjectBuilder = class {
|
|
8372
8425
|
create(opts) {
|
|
8373
8426
|
return new Project2(opts);
|
|
@@ -8439,12 +8492,12 @@ var ToolBuilder = class {
|
|
|
8439
8492
|
const { handler, name, slug, parameters, returns, ...rest } = opts;
|
|
8440
8493
|
let resolvedName = name ?? handler.name;
|
|
8441
8494
|
if (resolvedName.trim().length === 0) {
|
|
8442
|
-
resolvedName = `Tool ${
|
|
8495
|
+
resolvedName = `Tool ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
8443
8496
|
}
|
|
8444
8497
|
const tool = new CodeFunction(this.project, {
|
|
8445
8498
|
handler,
|
|
8446
8499
|
name: resolvedName,
|
|
8447
|
-
slug: slug ??
|
|
8500
|
+
slug: slug ?? slugify(resolvedName, { lower: true, strict: true }),
|
|
8448
8501
|
type: "tool",
|
|
8449
8502
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/consistent-type-assertions
|
|
8450
8503
|
parameters,
|
|
@@ -8468,9 +8521,9 @@ var ScorerBuilder = class {
|
|
|
8468
8521
|
resolvedName = opts.handler.name;
|
|
8469
8522
|
}
|
|
8470
8523
|
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
8471
|
-
resolvedName = `Scorer ${
|
|
8524
|
+
resolvedName = `Scorer ${isomorph_default.basename(currentFilename)} ${this.taskCounter}`;
|
|
8472
8525
|
}
|
|
8473
|
-
const slug = opts.slug ??
|
|
8526
|
+
const slug = opts.slug ?? slugify(resolvedName, { lower: true, strict: true });
|
|
8474
8527
|
if ("handler" in opts) {
|
|
8475
8528
|
const scorer = new CodeFunction(this.project, {
|
|
8476
8529
|
...opts,
|
|
@@ -8641,7 +8694,7 @@ var PromptBuilder = class {
|
|
|
8641
8694
|
rawTools.push(tool);
|
|
8642
8695
|
}
|
|
8643
8696
|
}
|
|
8644
|
-
const slug = opts.slug ??
|
|
8697
|
+
const slug = opts.slug ?? slugify(opts.name, { lower: true, strict: true });
|
|
8645
8698
|
const promptData = promptDefinitionToPromptData(opts, rawTools);
|
|
8646
8699
|
const promptRow = {
|
|
8647
8700
|
id: opts.id,
|
|
@@ -8766,8 +8819,11 @@ var EvalResultWithSummary = class {
|
|
|
8766
8819
|
this.summary = summary;
|
|
8767
8820
|
this.results = results;
|
|
8768
8821
|
}
|
|
8822
|
+
/**
|
|
8823
|
+
* @deprecated Use `summary` instead.
|
|
8824
|
+
*/
|
|
8769
8825
|
toString() {
|
|
8770
|
-
return
|
|
8826
|
+
return JSON.stringify(this.summary);
|
|
8771
8827
|
}
|
|
8772
8828
|
[Symbol.for("nodejs.util.inspect.custom")]() {
|
|
8773
8829
|
return `EvalResultWithSummary(summary="...", results=[...])`;
|
|
@@ -8820,7 +8876,7 @@ function _initializeSpanContext() {
|
|
|
8820
8876
|
globalThis._spanContext = { currentSpan, withCurrent, startSpan, NOOP_SPAN };
|
|
8821
8877
|
}
|
|
8822
8878
|
async function Eval(name, evaluator, reporterOrOpts) {
|
|
8823
|
-
const options =
|
|
8879
|
+
const options = isEmpty2(reporterOrOpts) ? {} : typeof reporterOrOpts === "string" ? { reporter: reporterOrOpts } : "name" in reporterOrOpts ? { reporter: reporterOrOpts } : reporterOrOpts;
|
|
8824
8880
|
let evalName = makeEvalName(name, evaluator.experimentName);
|
|
8825
8881
|
if (globalThis._evals.evaluators[evalName]) {
|
|
8826
8882
|
evalName = `${evalName}_${Object.keys(_evals).length}`;
|
|
@@ -8846,7 +8902,7 @@ async function Eval(name, evaluator, reporterOrOpts) {
|
|
|
8846
8902
|
[]
|
|
8847
8903
|
);
|
|
8848
8904
|
}
|
|
8849
|
-
const progressReporter = options.progress ?? new
|
|
8905
|
+
const progressReporter = options.progress ?? new SimpleProgressReporter();
|
|
8850
8906
|
const shouldCollectResults = options.returnResults ?? true;
|
|
8851
8907
|
if (typeof options.reporter === "string") {
|
|
8852
8908
|
throw new Error(
|
|
@@ -8936,8 +8992,8 @@ function serializeJSONWithPlainString(v) {
|
|
|
8936
8992
|
}
|
|
8937
8993
|
}
|
|
8938
8994
|
function evaluateFilter(object, filter2) {
|
|
8939
|
-
const { path:
|
|
8940
|
-
const key =
|
|
8995
|
+
const { path: path2, pattern } = filter2;
|
|
8996
|
+
const key = path2.reduce(
|
|
8941
8997
|
(acc, p) => typeof acc === "object" && acc !== null ? (
|
|
8942
8998
|
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
|
|
8943
8999
|
acc[p]
|
|
@@ -8979,7 +9035,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
8979
9035
|
);
|
|
8980
9036
|
}
|
|
8981
9037
|
let name = dataResult.name;
|
|
8982
|
-
if (
|
|
9038
|
+
if (isEmpty2(name)) {
|
|
8983
9039
|
const baseExperiment = await experiment.fetchBaseExperiment();
|
|
8984
9040
|
if (!baseExperiment) {
|
|
8985
9041
|
throw new Error("BaseExperiment() failed to fetch base experiment");
|
|
@@ -9048,7 +9104,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9048
9104
|
};
|
|
9049
9105
|
const expected = "expected" in datum ? datum.expected : void 0;
|
|
9050
9106
|
let output = void 0;
|
|
9051
|
-
let
|
|
9107
|
+
let error = void 0;
|
|
9052
9108
|
let tags = [...datum.tags ?? []];
|
|
9053
9109
|
const scores = {};
|
|
9054
9110
|
const scorerNames = evaluator.scores.map(scorerName);
|
|
@@ -9112,7 +9168,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9112
9168
|
}
|
|
9113
9169
|
if (Array.isArray(scoreValue)) {
|
|
9114
9170
|
for (const s of scoreValue) {
|
|
9115
|
-
if (!(typeof s === "object" && !
|
|
9171
|
+
if (!(typeof s === "object" && !isEmpty2(s))) {
|
|
9116
9172
|
throw new Error(
|
|
9117
9173
|
`When returning an array of scores, each score must be a non-empty object. Got: ${JSON.stringify(
|
|
9118
9174
|
s
|
|
@@ -9121,7 +9177,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9121
9177
|
}
|
|
9122
9178
|
}
|
|
9123
9179
|
}
|
|
9124
|
-
const results2 = Array.isArray(scoreValue) ? scoreValue : typeof scoreValue === "object" && !
|
|
9180
|
+
const results2 = Array.isArray(scoreValue) ? scoreValue : typeof scoreValue === "object" && !isEmpty2(scoreValue) ? [scoreValue] : [
|
|
9125
9181
|
{
|
|
9126
9182
|
name: scorerNames[score_idx],
|
|
9127
9183
|
score: scoreValue
|
|
@@ -9179,9 +9235,9 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9179
9235
|
unhandledScores = null;
|
|
9180
9236
|
if (failingScorersAndResults.length) {
|
|
9181
9237
|
const scorerErrors = Object.fromEntries(
|
|
9182
|
-
failingScorersAndResults.map(({ name, error:
|
|
9238
|
+
failingScorersAndResults.map(({ name, error: error2 }) => [
|
|
9183
9239
|
name,
|
|
9184
|
-
|
|
9240
|
+
error2 instanceof Error ? error2.stack : `${error2}`
|
|
9185
9241
|
])
|
|
9186
9242
|
);
|
|
9187
9243
|
metadata["scorer_errors"] = scorerErrors;
|
|
@@ -9198,7 +9254,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9198
9254
|
}
|
|
9199
9255
|
} catch (e) {
|
|
9200
9256
|
logError(rootSpan, e);
|
|
9201
|
-
|
|
9257
|
+
error = e;
|
|
9202
9258
|
} finally {
|
|
9203
9259
|
progressReporter.increment(evaluator.evalName);
|
|
9204
9260
|
}
|
|
@@ -9221,7 +9277,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9221
9277
|
tags: tags.length ? tags : void 0,
|
|
9222
9278
|
metadata,
|
|
9223
9279
|
scores: mergedScores,
|
|
9224
|
-
error
|
|
9280
|
+
error,
|
|
9225
9281
|
origin: baseEvent.event?.origin
|
|
9226
9282
|
});
|
|
9227
9283
|
}
|
|
@@ -9268,7 +9324,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9268
9324
|
}
|
|
9269
9325
|
let timeoutId;
|
|
9270
9326
|
let abortHandler;
|
|
9271
|
-
const rejectOnce = (
|
|
9327
|
+
const rejectOnce = (error) => {
|
|
9272
9328
|
if (cancelled) {
|
|
9273
9329
|
return;
|
|
9274
9330
|
}
|
|
@@ -9280,7 +9336,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9280
9336
|
if (abortHandler && evaluator.signal) {
|
|
9281
9337
|
evaluator.signal.removeEventListener("abort", abortHandler);
|
|
9282
9338
|
}
|
|
9283
|
-
reject2(
|
|
9339
|
+
reject2(error);
|
|
9284
9340
|
};
|
|
9285
9341
|
if (evaluator.timeout) {
|
|
9286
9342
|
timeoutId = setTimeout(() => {
|
|
@@ -9307,7 +9363,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9307
9363
|
} catch (e) {
|
|
9308
9364
|
q.kill();
|
|
9309
9365
|
if (e instanceof InternalAbortError) {
|
|
9310
|
-
if (
|
|
9366
|
+
if (isomorph_default.getEnv("BRAINTRUST_VERBOSE")) {
|
|
9311
9367
|
console.warn("Evaluator cancelled:", e.message);
|
|
9312
9368
|
}
|
|
9313
9369
|
}
|
|
@@ -9327,8 +9383,7 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
9327
9383
|
collectResults ? collectedResults : []
|
|
9328
9384
|
);
|
|
9329
9385
|
}
|
|
9330
|
-
var
|
|
9331
|
-
var warning = chalk2.yellow;
|
|
9386
|
+
var warning = (text) => `Warning: ${text}`;
|
|
9332
9387
|
function logError2(e, verbose) {
|
|
9333
9388
|
if (!verbose) {
|
|
9334
9389
|
console.error(`${e}`);
|
|
@@ -9377,11 +9432,7 @@ function reportFailures(evaluator, failingResults, { verbose, jsonl }) {
|
|
|
9377
9432
|
if (failingResults.length > 0) {
|
|
9378
9433
|
console.error(
|
|
9379
9434
|
warning(
|
|
9380
|
-
`Evaluator ${evaluator.evalName} failed with ${
|
|
9381
|
-
"error",
|
|
9382
|
-
failingResults.length,
|
|
9383
|
-
true
|
|
9384
|
-
)}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
|
|
9435
|
+
`Evaluator ${evaluator.evalName} failed with ${failingResults.length} error${failingResults.length === 1 ? "" : "s"}. This evaluation ("${evaluator.evalName}") will not be fully logged.`
|
|
9385
9436
|
)
|
|
9386
9437
|
);
|
|
9387
9438
|
if (jsonl) {
|
|
@@ -9413,124 +9464,84 @@ var defaultReporter = {
|
|
|
9413
9464
|
if (failingResults.length > 0) {
|
|
9414
9465
|
reportFailures(evaluator, failingResults, { verbose, jsonl });
|
|
9415
9466
|
}
|
|
9416
|
-
|
|
9417
|
-
|
|
9418
|
-
|
|
9419
|
-
|
|
9467
|
+
if (jsonl) {
|
|
9468
|
+
isomorph_default.writeln(JSON.stringify(summary));
|
|
9469
|
+
} else {
|
|
9470
|
+
isomorph_default.writeln("Experiment summary");
|
|
9471
|
+
isomorph_default.writeln("==================");
|
|
9472
|
+
if (summary.comparisonExperimentName) {
|
|
9473
|
+
isomorph_default.writeln(
|
|
9474
|
+
`${summary.comparisonExperimentName} (baseline) <- ${summary.experimentName} (comparison)`
|
|
9475
|
+
);
|
|
9476
|
+
isomorph_default.writeln("");
|
|
9477
|
+
}
|
|
9478
|
+
const hasScores = Object.keys(summary.scores).length > 0;
|
|
9479
|
+
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
9480
|
+
const hasComparison = !!summary.comparisonExperimentName;
|
|
9481
|
+
if (hasScores || hasMetrics) {
|
|
9482
|
+
if (hasComparison) {
|
|
9483
|
+
isomorph_default.writeln(
|
|
9484
|
+
"Name Value Change Improvements Regressions"
|
|
9485
|
+
);
|
|
9486
|
+
isomorph_default.writeln(
|
|
9487
|
+
"----------------------------------------------------------------"
|
|
9488
|
+
);
|
|
9489
|
+
}
|
|
9490
|
+
for (const score of Object.values(summary.scores)) {
|
|
9491
|
+
const scorePercent = (score.score * 100).toFixed(2);
|
|
9492
|
+
const scoreValue = `${scorePercent}%`;
|
|
9493
|
+
if (hasComparison) {
|
|
9494
|
+
let diffString = "-";
|
|
9495
|
+
if (!isEmpty2(score.diff)) {
|
|
9496
|
+
const diffPercent = (score.diff * 100).toFixed(2);
|
|
9497
|
+
const diffSign = score.diff > 0 ? "+" : "";
|
|
9498
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
9499
|
+
}
|
|
9500
|
+
const improvements = score.improvements > 0 ? score.improvements.toString() : "-";
|
|
9501
|
+
const regressions = score.regressions > 0 ? score.regressions.toString() : "-";
|
|
9502
|
+
isomorph_default.writeln(
|
|
9503
|
+
`${score.name.padEnd(18)} ${scoreValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
9504
|
+
);
|
|
9505
|
+
} else {
|
|
9506
|
+
isomorph_default.writeln(`${score.name.padEnd(20)} ${scoreValue.padStart(15)}`);
|
|
9507
|
+
}
|
|
9508
|
+
}
|
|
9509
|
+
for (const metric of Object.values(summary.metrics ?? {})) {
|
|
9510
|
+
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
9511
|
+
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
9512
|
+
const metricValue = metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`;
|
|
9513
|
+
if (hasComparison) {
|
|
9514
|
+
let diffString = "-";
|
|
9515
|
+
if (!isEmpty2(metric.diff)) {
|
|
9516
|
+
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
9517
|
+
const diffSign = metric.diff > 0 ? "+" : "";
|
|
9518
|
+
diffString = `${diffSign}${diffPercent}%`;
|
|
9519
|
+
}
|
|
9520
|
+
const improvements = metric.improvements > 0 ? metric.improvements.toString() : "-";
|
|
9521
|
+
const regressions = metric.regressions > 0 ? metric.regressions.toString() : "-";
|
|
9522
|
+
isomorph_default.writeln(
|
|
9523
|
+
`${metric.name.padEnd(18)} ${metricValue.padStart(10)} ${diffString.padStart(10)} ${improvements.padStart(12)} ${regressions.padStart(11)}`
|
|
9524
|
+
);
|
|
9525
|
+
} else {
|
|
9526
|
+
isomorph_default.writeln(
|
|
9527
|
+
`${metric.name.padEnd(20)} ${metricValue.padStart(15)}`
|
|
9528
|
+
);
|
|
9529
|
+
}
|
|
9530
|
+
}
|
|
9531
|
+
}
|
|
9532
|
+
if (summary.experimentUrl) {
|
|
9533
|
+
isomorph_default.writeln("");
|
|
9534
|
+
isomorph_default.writeln(`View results for ${summary.experimentName}`);
|
|
9535
|
+
isomorph_default.writeln(`See results at ${summary.experimentUrl}`);
|
|
9536
|
+
}
|
|
9537
|
+
}
|
|
9538
|
+
isomorph_default.writeln("");
|
|
9420
9539
|
return failingResults.length === 0;
|
|
9421
9540
|
},
|
|
9422
9541
|
async reportRun(evalReports) {
|
|
9423
9542
|
return evalReports.every((r) => r);
|
|
9424
9543
|
}
|
|
9425
9544
|
};
|
|
9426
|
-
function formatExperimentSummary(summary) {
|
|
9427
|
-
let comparisonLine = "";
|
|
9428
|
-
if (summary.comparisonExperimentName) {
|
|
9429
|
-
comparisonLine = `${summary.comparisonExperimentName} ${chalk2.gray("(baseline)")} \u2190 ${summary.experimentName} ${chalk2.gray("(comparison)")}
|
|
9430
|
-
|
|
9431
|
-
`;
|
|
9432
|
-
}
|
|
9433
|
-
const tableParts = [];
|
|
9434
|
-
const hasScores = Object.keys(summary.scores).length > 0;
|
|
9435
|
-
const hasMetrics = Object.keys(summary.metrics ?? {}).length > 0;
|
|
9436
|
-
const hasComparison = !!summary.comparisonExperimentName;
|
|
9437
|
-
if (hasScores || hasMetrics) {
|
|
9438
|
-
const headers = [chalk2.gray("Name"), chalk2.gray("Value")];
|
|
9439
|
-
if (hasComparison) {
|
|
9440
|
-
headers.push(
|
|
9441
|
-
chalk2.gray("Change"),
|
|
9442
|
-
chalk2.gray("Improvements"),
|
|
9443
|
-
chalk2.gray("Regressions")
|
|
9444
|
-
);
|
|
9445
|
-
}
|
|
9446
|
-
const combinedTable = new Table({
|
|
9447
|
-
head: hasComparison ? headers : [],
|
|
9448
|
-
style: { head: [], "padding-left": 0, "padding-right": 0, border: [] },
|
|
9449
|
-
chars: {
|
|
9450
|
-
top: "",
|
|
9451
|
-
"top-mid": "",
|
|
9452
|
-
"top-left": "",
|
|
9453
|
-
"top-right": "",
|
|
9454
|
-
bottom: "",
|
|
9455
|
-
"bottom-mid": "",
|
|
9456
|
-
"bottom-left": "",
|
|
9457
|
-
"bottom-right": "",
|
|
9458
|
-
left: "",
|
|
9459
|
-
"left-mid": "",
|
|
9460
|
-
mid: "",
|
|
9461
|
-
"mid-mid": "",
|
|
9462
|
-
right: "",
|
|
9463
|
-
"right-mid": "",
|
|
9464
|
-
middle: " "
|
|
9465
|
-
},
|
|
9466
|
-
colWidths: hasComparison ? [18, 10, 10, 13, 12] : [20, 15],
|
|
9467
|
-
colAligns: hasComparison ? ["left", "right", "right", "right", "right"] : ["left", "right"],
|
|
9468
|
-
wordWrap: false
|
|
9469
|
-
});
|
|
9470
|
-
const scoreValues = Object.values(summary.scores);
|
|
9471
|
-
for (let i = 0; i < scoreValues.length; i++) {
|
|
9472
|
-
const score = scoreValues[i];
|
|
9473
|
-
const scorePercent = (score.score * 100).toFixed(2);
|
|
9474
|
-
const scoreValue = chalk2.white(`${scorePercent}%`);
|
|
9475
|
-
let diffString = "";
|
|
9476
|
-
if (!isEmpty(score.diff)) {
|
|
9477
|
-
const diffPercent = (score.diff * 100).toFixed(2);
|
|
9478
|
-
const diffSign = score.diff > 0 ? "+" : "";
|
|
9479
|
-
const diffColor = score.diff > 0 ? chalk2.green : chalk2.red;
|
|
9480
|
-
diffString = diffColor(`${diffSign}${diffPercent}%`);
|
|
9481
|
-
} else {
|
|
9482
|
-
diffString = chalk2.gray("-");
|
|
9483
|
-
}
|
|
9484
|
-
const improvements = score.improvements > 0 ? chalk2.dim.green(score.improvements) : chalk2.gray("-");
|
|
9485
|
-
const regressions = score.regressions > 0 ? chalk2.dim.red(score.regressions) : chalk2.gray("-");
|
|
9486
|
-
const row = [`${chalk2.blue("\u25EF")} ${score.name}`, scoreValue];
|
|
9487
|
-
if (hasComparison) {
|
|
9488
|
-
row.push(diffString, improvements, regressions);
|
|
9489
|
-
}
|
|
9490
|
-
combinedTable.push(row);
|
|
9491
|
-
}
|
|
9492
|
-
const metricValues = Object.values(summary.metrics ?? {});
|
|
9493
|
-
for (let i = 0; i < metricValues.length; i++) {
|
|
9494
|
-
const metric = metricValues[i];
|
|
9495
|
-
const fractionDigits = Number.isInteger(metric.metric) ? 0 : 2;
|
|
9496
|
-
const formattedValue = metric.metric.toFixed(fractionDigits);
|
|
9497
|
-
const metricValue = chalk2.white(
|
|
9498
|
-
metric.unit === "$" ? `${metric.unit}${formattedValue}` : `${formattedValue}${metric.unit}`
|
|
9499
|
-
);
|
|
9500
|
-
let diffString = "";
|
|
9501
|
-
if (!isEmpty(metric.diff)) {
|
|
9502
|
-
const diffPercent = (metric.diff * 100).toFixed(2);
|
|
9503
|
-
const diffSign = metric.diff > 0 ? "+" : "";
|
|
9504
|
-
const diffColor = metric.diff > 0 ? chalk2.green : chalk2.red;
|
|
9505
|
-
diffString = diffColor(`${diffSign}${diffPercent}%`);
|
|
9506
|
-
} else {
|
|
9507
|
-
diffString = chalk2.gray("-");
|
|
9508
|
-
}
|
|
9509
|
-
const improvements = metric.improvements > 0 ? chalk2.dim.green(metric.improvements) : chalk2.gray("-");
|
|
9510
|
-
const regressions = metric.regressions > 0 ? chalk2.dim.red(metric.regressions) : chalk2.gray("-");
|
|
9511
|
-
const row = [`${chalk2.magenta("\u25EF")} ${metric.name}`, metricValue];
|
|
9512
|
-
if (hasComparison) {
|
|
9513
|
-
row.push(diffString, improvements, regressions);
|
|
9514
|
-
}
|
|
9515
|
-
combinedTable.push(row);
|
|
9516
|
-
}
|
|
9517
|
-
tableParts.push(combinedTable.toString());
|
|
9518
|
-
}
|
|
9519
|
-
const content = [comparisonLine, ...tableParts].filter(Boolean).join("\n");
|
|
9520
|
-
const footer = summary.experimentUrl ? terminalLink(
|
|
9521
|
-
`View results for ${summary.experimentName}`,
|
|
9522
|
-
summary.experimentUrl,
|
|
9523
|
-
{ fallback: () => `See results at ${summary.experimentUrl}` }
|
|
9524
|
-
) : "";
|
|
9525
|
-
const boxContent = [content, footer].filter(Boolean).join("\n\n");
|
|
9526
|
-
return "\n" + boxen(boxContent, {
|
|
9527
|
-
title: chalk2.gray("Experiment summary"),
|
|
9528
|
-
titleAlignment: "left",
|
|
9529
|
-
padding: 0.5,
|
|
9530
|
-
borderColor: "gray",
|
|
9531
|
-
borderStyle: "round"
|
|
9532
|
-
});
|
|
9533
|
-
}
|
|
9534
9545
|
|
|
9535
9546
|
// dev/errorHandler.ts
|
|
9536
9547
|
import { z as z11 } from "zod/v3";
|