braintrust 0.0.91 → 0.0.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +70 -31
- package/dist/cli.js +82 -33
- package/dist/gitutil.d.ts +2 -22
- package/dist/index.js +88 -37
- package/dist/isomorph.d.ts +2 -11
- package/dist/logger.d.ts +36 -22
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -2
package/dist/browser.js
CHANGED
|
@@ -12,7 +12,7 @@ var DefaultAsyncLocalStorage = class {
|
|
|
12
12
|
}
|
|
13
13
|
};
|
|
14
14
|
var iso = {
|
|
15
|
-
getRepoStatus: async () => void 0,
|
|
15
|
+
getRepoStatus: async (_settings) => void 0,
|
|
16
16
|
getPastNAncestors: async () => [],
|
|
17
17
|
getEnv: (_name) => void 0,
|
|
18
18
|
getCallerLocation: () => void 0,
|
|
@@ -140,6 +140,24 @@ var SpanTypeAttribute = /* @__PURE__ */ ((SpanTypeAttribute2) => {
|
|
|
140
140
|
SpanTypeAttribute2["TOOL"] = "tool";
|
|
141
141
|
return SpanTypeAttribute2;
|
|
142
142
|
})(SpanTypeAttribute || {});
|
|
143
|
+
function mergeGitMetadataSettings(s1, s2) {
|
|
144
|
+
var _a;
|
|
145
|
+
if (s1.collect === "all") {
|
|
146
|
+
return s2;
|
|
147
|
+
} else if (s2.collect === "all") {
|
|
148
|
+
return s1;
|
|
149
|
+
} else if (s1.collect === "none") {
|
|
150
|
+
return s1;
|
|
151
|
+
} else if (s2.collect === "none") {
|
|
152
|
+
return s2;
|
|
153
|
+
}
|
|
154
|
+
const fields = ((_a = s1.fields) != null ? _a : []).filter((f) => {
|
|
155
|
+
var _a2;
|
|
156
|
+
return ((_a2 = s2.fields) != null ? _a2 : []).includes(f);
|
|
157
|
+
});
|
|
158
|
+
const collect = fields.length > 0 ? "some" : "none";
|
|
159
|
+
return { collect, fields };
|
|
160
|
+
}
|
|
143
161
|
|
|
144
162
|
// src/util.ts
|
|
145
163
|
var GLOBAL_PROJECT = "Global";
|
|
@@ -194,7 +212,7 @@ var NoopSpan = class {
|
|
|
194
212
|
var NOOP_SPAN = new NoopSpan();
|
|
195
213
|
var BraintrustState = class {
|
|
196
214
|
constructor() {
|
|
197
|
-
this.
|
|
215
|
+
this.appUrl = null;
|
|
198
216
|
this.loginToken = null;
|
|
199
217
|
this.orgId = null;
|
|
200
218
|
this.orgName = null;
|
|
@@ -210,21 +228,22 @@ var BraintrustState = class {
|
|
|
210
228
|
globalThis.__inherited_braintrust_state = this;
|
|
211
229
|
}
|
|
212
230
|
resetLoginInfo() {
|
|
213
|
-
this.
|
|
231
|
+
this.appUrl = null;
|
|
214
232
|
this.loginToken = null;
|
|
215
233
|
this.orgId = null;
|
|
216
234
|
this.orgName = null;
|
|
217
235
|
this.logUrl = null;
|
|
218
236
|
this.loggedIn = false;
|
|
237
|
+
this.gitMetadataSettings = void 0;
|
|
219
238
|
this._apiConn = null;
|
|
220
239
|
this._logConn = null;
|
|
221
240
|
}
|
|
222
241
|
apiConn() {
|
|
223
242
|
if (!this._apiConn) {
|
|
224
|
-
if (!this.
|
|
225
|
-
throw new Error("Must initialize
|
|
243
|
+
if (!this.appUrl) {
|
|
244
|
+
throw new Error("Must initialize appUrl before requesting apiConn");
|
|
226
245
|
}
|
|
227
|
-
this._apiConn = new HTTPConnection(this.
|
|
246
|
+
this._apiConn = new HTTPConnection(this.appUrl);
|
|
228
247
|
}
|
|
229
248
|
return this._apiConn;
|
|
230
249
|
}
|
|
@@ -444,12 +463,12 @@ var Logger = class {
|
|
|
444
463
|
* Log a single event. The event will be batched and uploaded behind the scenes if `logOptions.asyncFlush` is true.
|
|
445
464
|
*
|
|
446
465
|
* @param event The event to log.
|
|
447
|
-
* @param event.input:
|
|
448
|
-
* @param event.output:
|
|
449
|
-
* @param event.expected:
|
|
450
|
-
* @param event.scores:
|
|
466
|
+
* @param event.input: (Optional) the arguments that uniquely define a user input (an arbitrary, JSON serializable object).
|
|
467
|
+
* @param event.output: (Optional) the output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
468
|
+
* @param event.expected: (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
469
|
+
* @param event.scores: (Optional) a dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
|
|
451
470
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
452
|
-
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end"
|
|
471
|
+
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
453
472
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
454
473
|
* :returns: The `id` of the logged event.
|
|
455
474
|
*/
|
|
@@ -503,7 +522,9 @@ var Logger = class {
|
|
|
503
522
|
};
|
|
504
523
|
}
|
|
505
524
|
/**
|
|
506
|
-
* Lower-level alternative to `traced
|
|
525
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
526
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
527
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
507
528
|
*
|
|
508
529
|
* See `traced` for full details.
|
|
509
530
|
*/
|
|
@@ -654,16 +675,17 @@ function init(project, options = {}) {
|
|
|
654
675
|
baseExperiment,
|
|
655
676
|
isPublic,
|
|
656
677
|
update,
|
|
657
|
-
|
|
678
|
+
appUrl,
|
|
658
679
|
apiKey,
|
|
659
680
|
orgName,
|
|
660
|
-
metadata
|
|
681
|
+
metadata,
|
|
682
|
+
gitMetadataSettings
|
|
661
683
|
} = options || {};
|
|
662
684
|
const lazyMetadata = (async () => {
|
|
663
685
|
await login({
|
|
664
686
|
orgName,
|
|
665
687
|
apiKey,
|
|
666
|
-
|
|
688
|
+
appUrl
|
|
667
689
|
});
|
|
668
690
|
const args = {
|
|
669
691
|
project_name: project,
|
|
@@ -678,7 +700,18 @@ function init(project, options = {}) {
|
|
|
678
700
|
if (update) {
|
|
679
701
|
args["update"] = update;
|
|
680
702
|
}
|
|
681
|
-
|
|
703
|
+
let mergedGitMetadataSettings = {
|
|
704
|
+
..._state.gitMetadataSettings || {
|
|
705
|
+
collect: "all"
|
|
706
|
+
}
|
|
707
|
+
};
|
|
708
|
+
if (gitMetadataSettings) {
|
|
709
|
+
mergedGitMetadataSettings = mergeGitMetadataSettings(
|
|
710
|
+
mergedGitMetadataSettings,
|
|
711
|
+
gitMetadataSettings
|
|
712
|
+
);
|
|
713
|
+
}
|
|
714
|
+
const repoStatus = await isomorph_default.getRepoStatus(gitMetadataSettings);
|
|
682
715
|
if (repoStatus) {
|
|
683
716
|
args["repo_info"] = repoStatus;
|
|
684
717
|
}
|
|
@@ -745,12 +778,12 @@ function withLogger(callback, options = {}) {
|
|
|
745
778
|
return callback(logger);
|
|
746
779
|
}
|
|
747
780
|
function initDataset(project, options = {}) {
|
|
748
|
-
const { dataset, description, version,
|
|
781
|
+
const { dataset, description, version, appUrl, apiKey, orgName } = options || {};
|
|
749
782
|
const lazyMetadata = (async () => {
|
|
750
783
|
await login({
|
|
751
784
|
orgName,
|
|
752
785
|
apiKey,
|
|
753
|
-
|
|
786
|
+
appUrl
|
|
754
787
|
});
|
|
755
788
|
const args = {
|
|
756
789
|
org_id: _state.orgId,
|
|
@@ -786,7 +819,7 @@ function initLogger(options = {}) {
|
|
|
786
819
|
projectName,
|
|
787
820
|
projectId,
|
|
788
821
|
asyncFlush,
|
|
789
|
-
|
|
822
|
+
appUrl,
|
|
790
823
|
apiKey,
|
|
791
824
|
orgName,
|
|
792
825
|
forceLogin
|
|
@@ -795,7 +828,7 @@ function initLogger(options = {}) {
|
|
|
795
828
|
await login({
|
|
796
829
|
orgName,
|
|
797
830
|
apiKey,
|
|
798
|
-
|
|
831
|
+
appUrl,
|
|
799
832
|
forceLogin
|
|
800
833
|
});
|
|
801
834
|
const org_id = _state.orgId;
|
|
@@ -841,7 +874,7 @@ function initLogger(options = {}) {
|
|
|
841
874
|
}
|
|
842
875
|
async function login(options = {}) {
|
|
843
876
|
const {
|
|
844
|
-
|
|
877
|
+
appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
|
|
845
878
|
apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
|
|
846
879
|
orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
|
|
847
880
|
} = options || {};
|
|
@@ -850,11 +883,11 @@ async function login(options = {}) {
|
|
|
850
883
|
return;
|
|
851
884
|
}
|
|
852
885
|
_state.resetLoginInfo();
|
|
853
|
-
_state.
|
|
886
|
+
_state.appUrl = appUrl;
|
|
854
887
|
let conn = null;
|
|
855
888
|
if (apiKey !== void 0) {
|
|
856
889
|
const resp = await checkResponse(
|
|
857
|
-
await fetch(_urljoin(_state.
|
|
890
|
+
await fetch(_urljoin(_state.appUrl, `/api/apikey/login`), {
|
|
858
891
|
method: "POST",
|
|
859
892
|
headers: {
|
|
860
893
|
"Content-Type": "application/json"
|
|
@@ -971,7 +1004,8 @@ function _check_org_info(org_info, org_name) {
|
|
|
971
1004
|
if (org_name === void 0 || org.name === org_name) {
|
|
972
1005
|
_state.orgId = org.id;
|
|
973
1006
|
_state.orgName = org.name;
|
|
974
|
-
_state.logUrl = isomorph_default.getEnv("
|
|
1007
|
+
_state.logUrl = isomorph_default.getEnv("BRAINTRUST_API_URL") ?? org.api_url;
|
|
1008
|
+
_state.gitMetadataSettings = org.git_metadata || void 0;
|
|
975
1009
|
break;
|
|
976
1010
|
}
|
|
977
1011
|
}
|
|
@@ -1040,6 +1074,9 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
|
1040
1074
|
"Exactly one of input or inputs (deprecated) must be specified. Prefer input."
|
|
1041
1075
|
);
|
|
1042
1076
|
}
|
|
1077
|
+
if (!event.output) {
|
|
1078
|
+
throw new Error("output must be specified");
|
|
1079
|
+
}
|
|
1043
1080
|
if (!event.scores) {
|
|
1044
1081
|
throw new Error("scores must be specified");
|
|
1045
1082
|
}
|
|
@@ -1087,10 +1124,10 @@ var Experiment = class {
|
|
|
1087
1124
|
* @param event The event to log.
|
|
1088
1125
|
* @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
|
|
1089
1126
|
* @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
1090
|
-
* @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
1127
|
+
* @param event.expected: (Optional) The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
1091
1128
|
* @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
1092
1129
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
1093
|
-
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end"
|
|
1130
|
+
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
1094
1131
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
1095
1132
|
* @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
|
|
1096
1133
|
* @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
|
|
@@ -1129,7 +1166,9 @@ var Experiment = class {
|
|
|
1129
1166
|
};
|
|
1130
1167
|
}
|
|
1131
1168
|
/**
|
|
1132
|
-
* Lower-level alternative to `traced
|
|
1169
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
1170
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
1171
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
1133
1172
|
*
|
|
1134
1173
|
* See `traced` for full details.
|
|
1135
1174
|
*/
|
|
@@ -1154,7 +1193,7 @@ var Experiment = class {
|
|
|
1154
1193
|
let { summarizeScores = true, comparisonExperimentId = void 0 } = options || {};
|
|
1155
1194
|
await this.bgLogger.flush();
|
|
1156
1195
|
const state = await this.getState();
|
|
1157
|
-
const projectUrl = `${state.
|
|
1196
|
+
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
1158
1197
|
state.orgName
|
|
1159
1198
|
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
1160
1199
|
const experimentUrl = `${projectUrl}/${encodeURIComponent(
|
|
@@ -1250,9 +1289,9 @@ var SpanImpl = class _SpanImpl {
|
|
|
1250
1289
|
})();
|
|
1251
1290
|
this.internalData = {
|
|
1252
1291
|
metrics: {
|
|
1253
|
-
start: args.startTime ?? getCurrentUnixTimestamp()
|
|
1254
|
-
...callerLocation
|
|
1292
|
+
start: args.startTime ?? getCurrentUnixTimestamp()
|
|
1255
1293
|
},
|
|
1294
|
+
context: { ...callerLocation },
|
|
1256
1295
|
span_attributes: { ...args.spanAttributes, name },
|
|
1257
1296
|
created: (/* @__PURE__ */ new Date()).toISOString()
|
|
1258
1297
|
};
|
|
@@ -1439,7 +1478,7 @@ var Dataset = class {
|
|
|
1439
1478
|
let { summarizeData = true } = options || {};
|
|
1440
1479
|
await this.bgLogger.flush();
|
|
1441
1480
|
const state = await this.getState();
|
|
1442
|
-
const projectUrl = `${state.
|
|
1481
|
+
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
1443
1482
|
state.orgName
|
|
1444
1483
|
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
1445
1484
|
const datasetUrl = `${projectUrl}/d/${encodeURIComponent(await this.name)}`;
|
package/dist/cli.js
CHANGED
|
@@ -9065,7 +9065,7 @@ var require_package = __commonJS({
|
|
|
9065
9065
|
"package.json"(exports2, module2) {
|
|
9066
9066
|
module2.exports = {
|
|
9067
9067
|
name: "braintrust",
|
|
9068
|
-
version: "0.0.
|
|
9068
|
+
version: "0.0.93",
|
|
9069
9069
|
description: "SDK for integrating Braintrust",
|
|
9070
9070
|
main: "./dist/index.js",
|
|
9071
9071
|
browser: {
|
|
@@ -9108,7 +9108,7 @@ var require_package = __commonJS({
|
|
|
9108
9108
|
typescript: "^5.3.3"
|
|
9109
9109
|
},
|
|
9110
9110
|
dependencies: {
|
|
9111
|
-
"@braintrust/core": "^0.0.
|
|
9111
|
+
"@braintrust/core": "^0.0.13",
|
|
9112
9112
|
argparse: "^2.0.1",
|
|
9113
9113
|
chalk: "^4.1.2",
|
|
9114
9114
|
"cli-progress": "^3.12.0",
|
|
@@ -10578,6 +10578,24 @@ var SpanTypeAttribute = /* @__PURE__ */ ((SpanTypeAttribute2) => {
|
|
|
10578
10578
|
SpanTypeAttribute2["TOOL"] = "tool";
|
|
10579
10579
|
return SpanTypeAttribute2;
|
|
10580
10580
|
})(SpanTypeAttribute || {});
|
|
10581
|
+
function mergeGitMetadataSettings(s1, s2) {
|
|
10582
|
+
var _a2;
|
|
10583
|
+
if (s1.collect === "all") {
|
|
10584
|
+
return s2;
|
|
10585
|
+
} else if (s2.collect === "all") {
|
|
10586
|
+
return s1;
|
|
10587
|
+
} else if (s1.collect === "none") {
|
|
10588
|
+
return s1;
|
|
10589
|
+
} else if (s2.collect === "none") {
|
|
10590
|
+
return s2;
|
|
10591
|
+
}
|
|
10592
|
+
const fields = ((_a2 = s1.fields) != null ? _a2 : []).filter((f) => {
|
|
10593
|
+
var _a22;
|
|
10594
|
+
return ((_a22 = s2.fields) != null ? _a22 : []).includes(f);
|
|
10595
|
+
});
|
|
10596
|
+
const collect = fields.length > 0 ? "some" : "none";
|
|
10597
|
+
return { collect, fields };
|
|
10598
|
+
}
|
|
10581
10599
|
|
|
10582
10600
|
// src/isomorph.ts
|
|
10583
10601
|
var DefaultAsyncLocalStorage = class {
|
|
@@ -10593,7 +10611,7 @@ var DefaultAsyncLocalStorage = class {
|
|
|
10593
10611
|
}
|
|
10594
10612
|
};
|
|
10595
10613
|
var iso = {
|
|
10596
|
-
getRepoStatus: async () => void 0,
|
|
10614
|
+
getRepoStatus: async (_settings) => void 0,
|
|
10597
10615
|
getPastNAncestors: async () => [],
|
|
10598
10616
|
getEnv: (_name) => void 0,
|
|
10599
10617
|
getCallerLocation: () => void 0,
|
|
@@ -10655,7 +10673,7 @@ var NoopSpan = class {
|
|
|
10655
10673
|
var NOOP_SPAN = new NoopSpan();
|
|
10656
10674
|
var BraintrustState = class {
|
|
10657
10675
|
constructor() {
|
|
10658
|
-
this.
|
|
10676
|
+
this.appUrl = null;
|
|
10659
10677
|
this.loginToken = null;
|
|
10660
10678
|
this.orgId = null;
|
|
10661
10679
|
this.orgName = null;
|
|
@@ -10671,21 +10689,22 @@ var BraintrustState = class {
|
|
|
10671
10689
|
globalThis.__inherited_braintrust_state = this;
|
|
10672
10690
|
}
|
|
10673
10691
|
resetLoginInfo() {
|
|
10674
|
-
this.
|
|
10692
|
+
this.appUrl = null;
|
|
10675
10693
|
this.loginToken = null;
|
|
10676
10694
|
this.orgId = null;
|
|
10677
10695
|
this.orgName = null;
|
|
10678
10696
|
this.logUrl = null;
|
|
10679
10697
|
this.loggedIn = false;
|
|
10698
|
+
this.gitMetadataSettings = void 0;
|
|
10680
10699
|
this._apiConn = null;
|
|
10681
10700
|
this._logConn = null;
|
|
10682
10701
|
}
|
|
10683
10702
|
apiConn() {
|
|
10684
10703
|
if (!this._apiConn) {
|
|
10685
|
-
if (!this.
|
|
10686
|
-
throw new Error("Must initialize
|
|
10704
|
+
if (!this.appUrl) {
|
|
10705
|
+
throw new Error("Must initialize appUrl before requesting apiConn");
|
|
10687
10706
|
}
|
|
10688
|
-
this._apiConn = new HTTPConnection(this.
|
|
10707
|
+
this._apiConn = new HTTPConnection(this.appUrl);
|
|
10689
10708
|
}
|
|
10690
10709
|
return this._apiConn;
|
|
10691
10710
|
}
|
|
@@ -10981,16 +11000,17 @@ function init(project, options = {}) {
|
|
|
10981
11000
|
baseExperiment,
|
|
10982
11001
|
isPublic,
|
|
10983
11002
|
update,
|
|
10984
|
-
|
|
11003
|
+
appUrl,
|
|
10985
11004
|
apiKey,
|
|
10986
11005
|
orgName,
|
|
10987
|
-
metadata
|
|
11006
|
+
metadata,
|
|
11007
|
+
gitMetadataSettings
|
|
10988
11008
|
} = options || {};
|
|
10989
11009
|
const lazyMetadata = (async () => {
|
|
10990
11010
|
await login({
|
|
10991
11011
|
orgName,
|
|
10992
11012
|
apiKey,
|
|
10993
|
-
|
|
11013
|
+
appUrl
|
|
10994
11014
|
});
|
|
10995
11015
|
const args = {
|
|
10996
11016
|
project_name: project,
|
|
@@ -11005,9 +11025,20 @@ function init(project, options = {}) {
|
|
|
11005
11025
|
if (update) {
|
|
11006
11026
|
args["update"] = update;
|
|
11007
11027
|
}
|
|
11008
|
-
|
|
11009
|
-
|
|
11010
|
-
|
|
11028
|
+
let mergedGitMetadataSettings = {
|
|
11029
|
+
..._state.gitMetadataSettings || {
|
|
11030
|
+
collect: "all"
|
|
11031
|
+
}
|
|
11032
|
+
};
|
|
11033
|
+
if (gitMetadataSettings) {
|
|
11034
|
+
mergedGitMetadataSettings = mergeGitMetadataSettings(
|
|
11035
|
+
mergedGitMetadataSettings,
|
|
11036
|
+
gitMetadataSettings
|
|
11037
|
+
);
|
|
11038
|
+
}
|
|
11039
|
+
const repoStatus2 = await isomorph_default.getRepoStatus(gitMetadataSettings);
|
|
11040
|
+
if (repoStatus2) {
|
|
11041
|
+
args["repo_info"] = repoStatus2;
|
|
11011
11042
|
}
|
|
11012
11043
|
if (baseExperiment) {
|
|
11013
11044
|
args["base_experiment"] = baseExperiment;
|
|
@@ -11059,7 +11090,7 @@ function init(project, options = {}) {
|
|
|
11059
11090
|
}
|
|
11060
11091
|
async function login(options = {}) {
|
|
11061
11092
|
const {
|
|
11062
|
-
|
|
11093
|
+
appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
|
|
11063
11094
|
apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
|
|
11064
11095
|
orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
|
|
11065
11096
|
} = options || {};
|
|
@@ -11068,11 +11099,11 @@ async function login(options = {}) {
|
|
|
11068
11099
|
return;
|
|
11069
11100
|
}
|
|
11070
11101
|
_state.resetLoginInfo();
|
|
11071
|
-
_state.
|
|
11102
|
+
_state.appUrl = appUrl;
|
|
11072
11103
|
let conn = null;
|
|
11073
11104
|
if (apiKey !== void 0) {
|
|
11074
11105
|
const resp = await checkResponse(
|
|
11075
|
-
await fetch(_urljoin(_state.
|
|
11106
|
+
await fetch(_urljoin(_state.appUrl, `/api/apikey/login`), {
|
|
11076
11107
|
method: "POST",
|
|
11077
11108
|
headers: {
|
|
11078
11109
|
"Content-Type": "application/json"
|
|
@@ -11110,7 +11141,8 @@ function _check_org_info(org_info, org_name) {
|
|
|
11110
11141
|
if (org_name === void 0 || org.name === org_name) {
|
|
11111
11142
|
_state.orgId = org.id;
|
|
11112
11143
|
_state.orgName = org.name;
|
|
11113
|
-
_state.logUrl = isomorph_default.getEnv("
|
|
11144
|
+
_state.logUrl = isomorph_default.getEnv("BRAINTRUST_API_URL") ?? org.api_url;
|
|
11145
|
+
_state.gitMetadataSettings = org.git_metadata || void 0;
|
|
11114
11146
|
break;
|
|
11115
11147
|
}
|
|
11116
11148
|
}
|
|
@@ -11179,6 +11211,9 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
|
11179
11211
|
"Exactly one of input or inputs (deprecated) must be specified. Prefer input."
|
|
11180
11212
|
);
|
|
11181
11213
|
}
|
|
11214
|
+
if (!event.output) {
|
|
11215
|
+
throw new Error("output must be specified");
|
|
11216
|
+
}
|
|
11182
11217
|
if (!event.scores) {
|
|
11183
11218
|
throw new Error("scores must be specified");
|
|
11184
11219
|
}
|
|
@@ -11226,10 +11261,10 @@ var Experiment = class {
|
|
|
11226
11261
|
* @param event The event to log.
|
|
11227
11262
|
* @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
|
|
11228
11263
|
* @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
11229
|
-
* @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
11264
|
+
* @param event.expected: (Optional) The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
11230
11265
|
* @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
11231
11266
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
11232
|
-
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end"
|
|
11267
|
+
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
11233
11268
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
11234
11269
|
* @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
|
|
11235
11270
|
* @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
|
|
@@ -11268,7 +11303,9 @@ var Experiment = class {
|
|
|
11268
11303
|
};
|
|
11269
11304
|
}
|
|
11270
11305
|
/**
|
|
11271
|
-
* Lower-level alternative to `traced
|
|
11306
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
11307
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
11308
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
11272
11309
|
*
|
|
11273
11310
|
* See `traced` for full details.
|
|
11274
11311
|
*/
|
|
@@ -11293,7 +11330,7 @@ var Experiment = class {
|
|
|
11293
11330
|
let { summarizeScores = true, comparisonExperimentId = void 0 } = options || {};
|
|
11294
11331
|
await this.bgLogger.flush();
|
|
11295
11332
|
const state = await this.getState();
|
|
11296
|
-
const projectUrl = `${state.
|
|
11333
|
+
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
11297
11334
|
state.orgName
|
|
11298
11335
|
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
11299
11336
|
const experimentUrl = `${projectUrl}/${encodeURIComponent(
|
|
@@ -11389,9 +11426,9 @@ var SpanImpl = class _SpanImpl {
|
|
|
11389
11426
|
})();
|
|
11390
11427
|
this.internalData = {
|
|
11391
11428
|
metrics: {
|
|
11392
|
-
start: args.startTime ?? getCurrentUnixTimestamp()
|
|
11393
|
-
...callerLocation
|
|
11429
|
+
start: args.startTime ?? getCurrentUnixTimestamp()
|
|
11394
11430
|
},
|
|
11431
|
+
context: { ...callerLocation },
|
|
11395
11432
|
span_attributes: { ...args.spanAttributes, name },
|
|
11396
11433
|
created: (/* @__PURE__ */ new Date()).toISOString()
|
|
11397
11434
|
};
|
|
@@ -11578,7 +11615,7 @@ var Dataset = class {
|
|
|
11578
11615
|
let { summarizeData = true } = options || {};
|
|
11579
11616
|
await this.bgLogger.flush();
|
|
11580
11617
|
const state = await this.getState();
|
|
11581
|
-
const projectUrl = `${state.
|
|
11618
|
+
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
11582
11619
|
state.orgName
|
|
11583
11620
|
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
11584
11621
|
const datasetUrl = `${projectUrl}/d/${encodeURIComponent(await this.name)}`;
|
|
@@ -16017,9 +16054,7 @@ async function getBaseBranchAncestor(remote = void 0) {
|
|
|
16017
16054
|
if (git === null) {
|
|
16018
16055
|
throw new Error("Not in a git repo");
|
|
16019
16056
|
}
|
|
16020
|
-
const { remote: remoteName, branch: baseBranch } = await getBaseBranch(
|
|
16021
|
-
remote
|
|
16022
|
-
);
|
|
16057
|
+
const { remote: remoteName, branch: baseBranch } = await getBaseBranch(remote);
|
|
16023
16058
|
const isDirty = (await git.diffSummary()).files.length > 0;
|
|
16024
16059
|
const head = isDirty ? "HEAD" : "HEAD^";
|
|
16025
16060
|
try {
|
|
@@ -16068,7 +16103,21 @@ function truncateToByteLimit(s, byteLimit = 65536) {
|
|
|
16068
16103
|
const truncated = encoded.subarray(0, byteLimit);
|
|
16069
16104
|
return new TextDecoder().decode(truncated);
|
|
16070
16105
|
}
|
|
16071
|
-
async function getRepoStatus() {
|
|
16106
|
+
async function getRepoStatus(settings) {
|
|
16107
|
+
if (settings && settings.collect === "none") {
|
|
16108
|
+
return void 0;
|
|
16109
|
+
}
|
|
16110
|
+
const repo = await repoStatus();
|
|
16111
|
+
if (!repo || !settings || settings.collect === "all") {
|
|
16112
|
+
return repo;
|
|
16113
|
+
}
|
|
16114
|
+
let sanitized = {};
|
|
16115
|
+
settings.fields?.forEach((field) => {
|
|
16116
|
+
sanitized = { ...sanitized, [field]: repo[field] };
|
|
16117
|
+
});
|
|
16118
|
+
return sanitized;
|
|
16119
|
+
}
|
|
16120
|
+
async function repoStatus() {
|
|
16072
16121
|
const git = await currentRepo();
|
|
16073
16122
|
if (git === null) {
|
|
16074
16123
|
return void 0;
|
|
@@ -16511,7 +16560,7 @@ async function run(args) {
|
|
|
16511
16560
|
verbose: args.verbose,
|
|
16512
16561
|
apiKey: args.api_key,
|
|
16513
16562
|
orgName: args.org_name,
|
|
16514
|
-
|
|
16563
|
+
appUrl: args.app_url,
|
|
16515
16564
|
noSendLogs: !!args.no_send_logs,
|
|
16516
16565
|
terminateOnFailure: !!args.terminate_on_failure,
|
|
16517
16566
|
watch: !!args.watch,
|
|
@@ -16525,7 +16574,7 @@ async function run(args) {
|
|
|
16525
16574
|
await login({
|
|
16526
16575
|
apiKey: args.api_key,
|
|
16527
16576
|
orgName: args.org_name,
|
|
16528
|
-
|
|
16577
|
+
appUrl: args.app_url
|
|
16529
16578
|
});
|
|
16530
16579
|
}
|
|
16531
16580
|
if (args.watch) {
|
|
@@ -16563,8 +16612,8 @@ async function main() {
|
|
|
16563
16612
|
parser_run.add_argument("--org-name", {
|
|
16564
16613
|
help: "The name of a specific organization to connect to. This is useful if you belong to multiple."
|
|
16565
16614
|
});
|
|
16566
|
-
parser_run.add_argument("--
|
|
16567
|
-
help: "Specify a custom braintrust
|
|
16615
|
+
parser_run.add_argument("--app-url", {
|
|
16616
|
+
help: "Specify a custom braintrust app url. Defaults to https://www.braintrustdata.com. This is only necessary if you are using an experimental version of Braintrust"
|
|
16568
16617
|
});
|
|
16569
16618
|
parser_run.add_argument("--watch", {
|
|
16570
16619
|
action: "store_true",
|
package/dist/gitutil.d.ts
CHANGED
|
@@ -1,27 +1,7 @@
|
|
|
1
|
+
import { GitMetadataSettings, RepoStatus } from "@braintrust/core";
|
|
1
2
|
/**
|
|
2
3
|
* Information about the current HEAD of the repo.
|
|
3
4
|
*/
|
|
4
|
-
export interface RepoStatus {
|
|
5
|
-
commit?: string;
|
|
6
|
-
branch?: string;
|
|
7
|
-
tag?: string;
|
|
8
|
-
dirty: boolean;
|
|
9
|
-
author_name?: string;
|
|
10
|
-
author_email?: string;
|
|
11
|
-
commit_message?: string;
|
|
12
|
-
commit_time?: string;
|
|
13
|
-
git_diff?: string;
|
|
14
|
-
}
|
|
15
5
|
export declare function currentRepo(): Promise<import("simple-git").SimpleGit | null>;
|
|
16
6
|
export declare function getPastNAncestors(n?: number, remote?: string | undefined): Promise<string[]>;
|
|
17
|
-
export declare function getRepoStatus(): Promise<
|
|
18
|
-
commit: string | undefined;
|
|
19
|
-
branch: string | undefined;
|
|
20
|
-
tag: string | undefined;
|
|
21
|
-
dirty: boolean;
|
|
22
|
-
author_name: string | undefined;
|
|
23
|
-
author_email: string | undefined;
|
|
24
|
-
commit_message: string | undefined;
|
|
25
|
-
commit_time: string | undefined;
|
|
26
|
-
git_diff: string | undefined;
|
|
27
|
-
} | undefined>;
|
|
7
|
+
export declare function getRepoStatus(settings?: GitMetadataSettings): Promise<RepoStatus | undefined>;
|