braintrust 0.0.91 → 0.0.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +70 -31
- package/dist/cli.js +82 -33
- package/dist/gitutil.d.ts +2 -22
- package/dist/index.js +88 -37
- package/dist/isomorph.d.ts +2 -11
- package/dist/logger.d.ts +36 -22
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -3693,7 +3693,7 @@ var DefaultAsyncLocalStorage = class {
|
|
|
3693
3693
|
}
|
|
3694
3694
|
};
|
|
3695
3695
|
var iso = {
|
|
3696
|
-
getRepoStatus: async () => void 0,
|
|
3696
|
+
getRepoStatus: async (_settings) => void 0,
|
|
3697
3697
|
getPastNAncestors: async () => [],
|
|
3698
3698
|
getEnv: (_name) => void 0,
|
|
3699
3699
|
getCallerLocation: () => void 0,
|
|
@@ -7681,9 +7681,7 @@ async function getBaseBranchAncestor(remote = void 0) {
|
|
|
7681
7681
|
if (git === null) {
|
|
7682
7682
|
throw new Error("Not in a git repo");
|
|
7683
7683
|
}
|
|
7684
|
-
const { remote: remoteName, branch: baseBranch } = await getBaseBranch(
|
|
7685
|
-
remote
|
|
7686
|
-
);
|
|
7684
|
+
const { remote: remoteName, branch: baseBranch } = await getBaseBranch(remote);
|
|
7687
7685
|
const isDirty = (await git.diffSummary()).files.length > 0;
|
|
7688
7686
|
const head = isDirty ? "HEAD" : "HEAD^";
|
|
7689
7687
|
try {
|
|
@@ -7732,7 +7730,21 @@ function truncateToByteLimit(s, byteLimit = 65536) {
|
|
|
7732
7730
|
const truncated = encoded.subarray(0, byteLimit);
|
|
7733
7731
|
return new TextDecoder().decode(truncated);
|
|
7734
7732
|
}
|
|
7735
|
-
async function getRepoStatus() {
|
|
7733
|
+
async function getRepoStatus(settings) {
|
|
7734
|
+
if (settings && settings.collect === "none") {
|
|
7735
|
+
return void 0;
|
|
7736
|
+
}
|
|
7737
|
+
const repo = await repoStatus();
|
|
7738
|
+
if (!repo || !settings || settings.collect === "all") {
|
|
7739
|
+
return repo;
|
|
7740
|
+
}
|
|
7741
|
+
let sanitized = {};
|
|
7742
|
+
settings.fields?.forEach((field) => {
|
|
7743
|
+
sanitized = { ...sanitized, [field]: repo[field] };
|
|
7744
|
+
});
|
|
7745
|
+
return sanitized;
|
|
7746
|
+
}
|
|
7747
|
+
async function repoStatus() {
|
|
7736
7748
|
const git = await currentRepo();
|
|
7737
7749
|
if (git === null) {
|
|
7738
7750
|
return void 0;
|
|
@@ -7944,6 +7956,24 @@ var SpanTypeAttribute = /* @__PURE__ */ ((SpanTypeAttribute2) => {
|
|
|
7944
7956
|
SpanTypeAttribute2["TOOL"] = "tool";
|
|
7945
7957
|
return SpanTypeAttribute2;
|
|
7946
7958
|
})(SpanTypeAttribute || {});
|
|
7959
|
+
function mergeGitMetadataSettings(s1, s2) {
|
|
7960
|
+
var _a2;
|
|
7961
|
+
if (s1.collect === "all") {
|
|
7962
|
+
return s2;
|
|
7963
|
+
} else if (s2.collect === "all") {
|
|
7964
|
+
return s1;
|
|
7965
|
+
} else if (s1.collect === "none") {
|
|
7966
|
+
return s1;
|
|
7967
|
+
} else if (s2.collect === "none") {
|
|
7968
|
+
return s2;
|
|
7969
|
+
}
|
|
7970
|
+
const fields = ((_a2 = s1.fields) != null ? _a2 : []).filter((f) => {
|
|
7971
|
+
var _a22;
|
|
7972
|
+
return ((_a22 = s2.fields) != null ? _a22 : []).includes(f);
|
|
7973
|
+
});
|
|
7974
|
+
const collect = fields.length > 0 ? "some" : "none";
|
|
7975
|
+
return { collect, fields };
|
|
7976
|
+
}
|
|
7947
7977
|
|
|
7948
7978
|
// src/util.ts
|
|
7949
7979
|
var GLOBAL_PROJECT = "Global";
|
|
@@ -7998,7 +8028,7 @@ var NoopSpan = class {
|
|
|
7998
8028
|
var NOOP_SPAN = new NoopSpan();
|
|
7999
8029
|
var BraintrustState = class {
|
|
8000
8030
|
constructor() {
|
|
8001
|
-
this.
|
|
8031
|
+
this.appUrl = null;
|
|
8002
8032
|
this.loginToken = null;
|
|
8003
8033
|
this.orgId = null;
|
|
8004
8034
|
this.orgName = null;
|
|
@@ -8014,21 +8044,22 @@ var BraintrustState = class {
|
|
|
8014
8044
|
globalThis.__inherited_braintrust_state = this;
|
|
8015
8045
|
}
|
|
8016
8046
|
resetLoginInfo() {
|
|
8017
|
-
this.
|
|
8047
|
+
this.appUrl = null;
|
|
8018
8048
|
this.loginToken = null;
|
|
8019
8049
|
this.orgId = null;
|
|
8020
8050
|
this.orgName = null;
|
|
8021
8051
|
this.logUrl = null;
|
|
8022
8052
|
this.loggedIn = false;
|
|
8053
|
+
this.gitMetadataSettings = void 0;
|
|
8023
8054
|
this._apiConn = null;
|
|
8024
8055
|
this._logConn = null;
|
|
8025
8056
|
}
|
|
8026
8057
|
apiConn() {
|
|
8027
8058
|
if (!this._apiConn) {
|
|
8028
|
-
if (!this.
|
|
8029
|
-
throw new Error("Must initialize
|
|
8059
|
+
if (!this.appUrl) {
|
|
8060
|
+
throw new Error("Must initialize appUrl before requesting apiConn");
|
|
8030
8061
|
}
|
|
8031
|
-
this._apiConn = new HTTPConnection(this.
|
|
8062
|
+
this._apiConn = new HTTPConnection(this.appUrl);
|
|
8032
8063
|
}
|
|
8033
8064
|
return this._apiConn;
|
|
8034
8065
|
}
|
|
@@ -8248,12 +8279,12 @@ var Logger = class {
|
|
|
8248
8279
|
* Log a single event. The event will be batched and uploaded behind the scenes if `logOptions.asyncFlush` is true.
|
|
8249
8280
|
*
|
|
8250
8281
|
* @param event The event to log.
|
|
8251
|
-
* @param event.input:
|
|
8252
|
-
* @param event.output:
|
|
8253
|
-
* @param event.expected:
|
|
8254
|
-
* @param event.scores:
|
|
8282
|
+
* @param event.input: (Optional) the arguments that uniquely define a user input (an arbitrary, JSON serializable object).
|
|
8283
|
+
* @param event.output: (Optional) the output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
8284
|
+
* @param event.expected: (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
8285
|
+
* @param event.scores: (Optional) a dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
|
|
8255
8286
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
8256
|
-
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end"
|
|
8287
|
+
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
8257
8288
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
8258
8289
|
* :returns: The `id` of the logged event.
|
|
8259
8290
|
*/
|
|
@@ -8307,7 +8338,9 @@ var Logger = class {
|
|
|
8307
8338
|
};
|
|
8308
8339
|
}
|
|
8309
8340
|
/**
|
|
8310
|
-
* Lower-level alternative to `traced
|
|
8341
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
8342
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
8343
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
8311
8344
|
*
|
|
8312
8345
|
* See `traced` for full details.
|
|
8313
8346
|
*/
|
|
@@ -8458,16 +8491,17 @@ function init(project, options = {}) {
|
|
|
8458
8491
|
baseExperiment,
|
|
8459
8492
|
isPublic,
|
|
8460
8493
|
update,
|
|
8461
|
-
|
|
8494
|
+
appUrl,
|
|
8462
8495
|
apiKey,
|
|
8463
8496
|
orgName,
|
|
8464
|
-
metadata
|
|
8497
|
+
metadata,
|
|
8498
|
+
gitMetadataSettings
|
|
8465
8499
|
} = options || {};
|
|
8466
8500
|
const lazyMetadata = (async () => {
|
|
8467
8501
|
await login({
|
|
8468
8502
|
orgName,
|
|
8469
8503
|
apiKey,
|
|
8470
|
-
|
|
8504
|
+
appUrl
|
|
8471
8505
|
});
|
|
8472
8506
|
const args = {
|
|
8473
8507
|
project_name: project,
|
|
@@ -8482,9 +8516,20 @@ function init(project, options = {}) {
|
|
|
8482
8516
|
if (update) {
|
|
8483
8517
|
args["update"] = update;
|
|
8484
8518
|
}
|
|
8485
|
-
|
|
8486
|
-
|
|
8487
|
-
|
|
8519
|
+
let mergedGitMetadataSettings = {
|
|
8520
|
+
..._state.gitMetadataSettings || {
|
|
8521
|
+
collect: "all"
|
|
8522
|
+
}
|
|
8523
|
+
};
|
|
8524
|
+
if (gitMetadataSettings) {
|
|
8525
|
+
mergedGitMetadataSettings = mergeGitMetadataSettings(
|
|
8526
|
+
mergedGitMetadataSettings,
|
|
8527
|
+
gitMetadataSettings
|
|
8528
|
+
);
|
|
8529
|
+
}
|
|
8530
|
+
const repoStatus2 = await isomorph_default.getRepoStatus(gitMetadataSettings);
|
|
8531
|
+
if (repoStatus2) {
|
|
8532
|
+
args["repo_info"] = repoStatus2;
|
|
8488
8533
|
}
|
|
8489
8534
|
if (baseExperiment) {
|
|
8490
8535
|
args["base_experiment"] = baseExperiment;
|
|
@@ -8549,12 +8594,12 @@ function withLogger(callback, options = {}) {
|
|
|
8549
8594
|
return callback(logger);
|
|
8550
8595
|
}
|
|
8551
8596
|
function initDataset(project, options = {}) {
|
|
8552
|
-
const { dataset, description, version,
|
|
8597
|
+
const { dataset, description, version, appUrl, apiKey, orgName } = options || {};
|
|
8553
8598
|
const lazyMetadata = (async () => {
|
|
8554
8599
|
await login({
|
|
8555
8600
|
orgName,
|
|
8556
8601
|
apiKey,
|
|
8557
|
-
|
|
8602
|
+
appUrl
|
|
8558
8603
|
});
|
|
8559
8604
|
const args = {
|
|
8560
8605
|
org_id: _state.orgId,
|
|
@@ -8590,7 +8635,7 @@ function initLogger(options = {}) {
|
|
|
8590
8635
|
projectName,
|
|
8591
8636
|
projectId,
|
|
8592
8637
|
asyncFlush,
|
|
8593
|
-
|
|
8638
|
+
appUrl,
|
|
8594
8639
|
apiKey,
|
|
8595
8640
|
orgName,
|
|
8596
8641
|
forceLogin
|
|
@@ -8599,7 +8644,7 @@ function initLogger(options = {}) {
|
|
|
8599
8644
|
await login({
|
|
8600
8645
|
orgName,
|
|
8601
8646
|
apiKey,
|
|
8602
|
-
|
|
8647
|
+
appUrl,
|
|
8603
8648
|
forceLogin
|
|
8604
8649
|
});
|
|
8605
8650
|
const org_id = _state.orgId;
|
|
@@ -8645,7 +8690,7 @@ function initLogger(options = {}) {
|
|
|
8645
8690
|
}
|
|
8646
8691
|
async function login(options = {}) {
|
|
8647
8692
|
const {
|
|
8648
|
-
|
|
8693
|
+
appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
|
|
8649
8694
|
apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
|
|
8650
8695
|
orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
|
|
8651
8696
|
} = options || {};
|
|
@@ -8654,11 +8699,11 @@ async function login(options = {}) {
|
|
|
8654
8699
|
return;
|
|
8655
8700
|
}
|
|
8656
8701
|
_state.resetLoginInfo();
|
|
8657
|
-
_state.
|
|
8702
|
+
_state.appUrl = appUrl;
|
|
8658
8703
|
let conn = null;
|
|
8659
8704
|
if (apiKey !== void 0) {
|
|
8660
8705
|
const resp = await checkResponse(
|
|
8661
|
-
await fetch(_urljoin(_state.
|
|
8706
|
+
await fetch(_urljoin(_state.appUrl, `/api/apikey/login`), {
|
|
8662
8707
|
method: "POST",
|
|
8663
8708
|
headers: {
|
|
8664
8709
|
"Content-Type": "application/json"
|
|
@@ -8775,7 +8820,8 @@ function _check_org_info(org_info, org_name) {
|
|
|
8775
8820
|
if (org_name === void 0 || org.name === org_name) {
|
|
8776
8821
|
_state.orgId = org.id;
|
|
8777
8822
|
_state.orgName = org.name;
|
|
8778
|
-
_state.logUrl = isomorph_default.getEnv("
|
|
8823
|
+
_state.logUrl = isomorph_default.getEnv("BRAINTRUST_API_URL") ?? org.api_url;
|
|
8824
|
+
_state.gitMetadataSettings = org.git_metadata || void 0;
|
|
8779
8825
|
break;
|
|
8780
8826
|
}
|
|
8781
8827
|
}
|
|
@@ -8844,6 +8890,9 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
|
8844
8890
|
"Exactly one of input or inputs (deprecated) must be specified. Prefer input."
|
|
8845
8891
|
);
|
|
8846
8892
|
}
|
|
8893
|
+
if (!event.output) {
|
|
8894
|
+
throw new Error("output must be specified");
|
|
8895
|
+
}
|
|
8847
8896
|
if (!event.scores) {
|
|
8848
8897
|
throw new Error("scores must be specified");
|
|
8849
8898
|
}
|
|
@@ -8891,10 +8940,10 @@ var Experiment = class {
|
|
|
8891
8940
|
* @param event The event to log.
|
|
8892
8941
|
* @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
|
|
8893
8942
|
* @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
8894
|
-
* @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
8943
|
+
* @param event.expected: (Optional) The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
8895
8944
|
* @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
8896
8945
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
8897
|
-
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end"
|
|
8946
|
+
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
8898
8947
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
8899
8948
|
* @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
|
|
8900
8949
|
* @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
|
|
@@ -8933,7 +8982,9 @@ var Experiment = class {
|
|
|
8933
8982
|
};
|
|
8934
8983
|
}
|
|
8935
8984
|
/**
|
|
8936
|
-
* Lower-level alternative to `traced
|
|
8985
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
8986
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
8987
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
8937
8988
|
*
|
|
8938
8989
|
* See `traced` for full details.
|
|
8939
8990
|
*/
|
|
@@ -8958,7 +9009,7 @@ var Experiment = class {
|
|
|
8958
9009
|
let { summarizeScores = true, comparisonExperimentId = void 0 } = options || {};
|
|
8959
9010
|
await this.bgLogger.flush();
|
|
8960
9011
|
const state = await this.getState();
|
|
8961
|
-
const projectUrl = `${state.
|
|
9012
|
+
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
8962
9013
|
state.orgName
|
|
8963
9014
|
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
8964
9015
|
const experimentUrl = `${projectUrl}/${encodeURIComponent(
|
|
@@ -9054,9 +9105,9 @@ var SpanImpl = class _SpanImpl {
|
|
|
9054
9105
|
})();
|
|
9055
9106
|
this.internalData = {
|
|
9056
9107
|
metrics: {
|
|
9057
|
-
start: args.startTime ?? getCurrentUnixTimestamp()
|
|
9058
|
-
...callerLocation
|
|
9108
|
+
start: args.startTime ?? getCurrentUnixTimestamp()
|
|
9059
9109
|
},
|
|
9110
|
+
context: { ...callerLocation },
|
|
9060
9111
|
span_attributes: { ...args.spanAttributes, name },
|
|
9061
9112
|
created: (/* @__PURE__ */ new Date()).toISOString()
|
|
9062
9113
|
};
|
|
@@ -9243,7 +9294,7 @@ var Dataset = class {
|
|
|
9243
9294
|
let { summarizeData = true } = options || {};
|
|
9244
9295
|
await this.bgLogger.flush();
|
|
9245
9296
|
const state = await this.getState();
|
|
9246
|
-
const projectUrl = `${state.
|
|
9297
|
+
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
9247
9298
|
state.orgName
|
|
9248
9299
|
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
9249
9300
|
const datasetUrl = `${projectUrl}/d/${encodeURIComponent(await this.name)}`;
|
package/dist/isomorph.d.ts
CHANGED
|
@@ -1,13 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
commit?: string;
|
|
3
|
-
branch?: string;
|
|
4
|
-
tag?: string;
|
|
5
|
-
dirty: boolean;
|
|
6
|
-
author_name?: string;
|
|
7
|
-
author_email?: string;
|
|
8
|
-
commit_message?: string;
|
|
9
|
-
commit_time?: string;
|
|
10
|
-
}
|
|
1
|
+
import { GitMetadataSettings, RepoStatus } from "@braintrust/core";
|
|
11
2
|
export interface CallerLocation {
|
|
12
3
|
caller_functionname: string;
|
|
13
4
|
caller_filename: string;
|
|
@@ -19,7 +10,7 @@ export interface IsoAsyncLocalStorage<T> {
|
|
|
19
10
|
getStore(): T | undefined;
|
|
20
11
|
}
|
|
21
12
|
export interface Common {
|
|
22
|
-
getRepoStatus: () => Promise<RepoStatus | undefined>;
|
|
13
|
+
getRepoStatus: (settings?: GitMetadataSettings) => Promise<RepoStatus | undefined>;
|
|
23
14
|
getPastNAncestors: () => Promise<string[]>;
|
|
24
15
|
getEnv: (name: string) => string | undefined;
|
|
25
16
|
getCallerLocation: () => CallerLocation | undefined;
|
package/dist/logger.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/// <reference lib="dom" />
|
|
2
|
-
import { IS_MERGE_FIELD, PARENT_ID_FIELD, Source, AUDIT_SOURCE_FIELD, AUDIT_METADATA_FIELD } from "@braintrust/core";
|
|
2
|
+
import { IS_MERGE_FIELD, PARENT_ID_FIELD, Source, AUDIT_SOURCE_FIELD, AUDIT_METADATA_FIELD, GitMetadataSettings } from "@braintrust/core";
|
|
3
3
|
import { IsoAsyncLocalStorage } from "./isomorph";
|
|
4
4
|
export type Metadata = Record<string, unknown>;
|
|
5
5
|
export type SetCurrentArg = {
|
|
@@ -64,7 +64,9 @@ export interface Span {
|
|
|
64
64
|
*/
|
|
65
65
|
traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
|
|
66
66
|
/**
|
|
67
|
-
* Lower-level alternative to `traced
|
|
67
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
68
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
69
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
68
70
|
*
|
|
69
71
|
* See `traced` for full details.
|
|
70
72
|
*
|
|
@@ -111,12 +113,13 @@ declare class BraintrustState {
|
|
|
111
113
|
currentExperiment: Experiment | undefined;
|
|
112
114
|
currentLogger: Logger<false> | undefined;
|
|
113
115
|
currentSpan: IsoAsyncLocalStorage<Span>;
|
|
114
|
-
|
|
116
|
+
appUrl: string | null;
|
|
115
117
|
loginToken: string | null;
|
|
116
118
|
orgId: string | null;
|
|
117
119
|
orgName: string | null;
|
|
118
120
|
logUrl: string | null;
|
|
119
121
|
loggedIn: boolean;
|
|
122
|
+
gitMetadataSettings?: GitMetadataSettings;
|
|
120
123
|
private _apiConn;
|
|
121
124
|
private _logConn;
|
|
122
125
|
constructor();
|
|
@@ -176,12 +179,12 @@ export declare class Logger<IsAsyncFlush extends boolean> {
|
|
|
176
179
|
* Log a single event. The event will be batched and uploaded behind the scenes if `logOptions.asyncFlush` is true.
|
|
177
180
|
*
|
|
178
181
|
* @param event The event to log.
|
|
179
|
-
* @param event.input:
|
|
180
|
-
* @param event.output:
|
|
181
|
-
* @param event.expected:
|
|
182
|
-
* @param event.scores:
|
|
182
|
+
* @param event.input: (Optional) the arguments that uniquely define a user input (an arbitrary, JSON serializable object).
|
|
183
|
+
* @param event.output: (Optional) the output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
184
|
+
* @param event.expected: (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
185
|
+
* @param event.scores: (Optional) a dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
|
|
183
186
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
184
|
-
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end"
|
|
187
|
+
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
185
188
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
186
189
|
* :returns: The `id` of the logged event.
|
|
187
190
|
*/
|
|
@@ -194,7 +197,9 @@ export declare class Logger<IsAsyncFlush extends boolean> {
|
|
|
194
197
|
traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): PromiseUnless<IsAsyncFlush, R>;
|
|
195
198
|
private lazyParentIds;
|
|
196
199
|
/**
|
|
197
|
-
* Lower-level alternative to `traced
|
|
200
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
201
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
202
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
198
203
|
*
|
|
199
204
|
* See `traced` for full details.
|
|
200
205
|
*/
|
|
@@ -232,7 +237,7 @@ export type OtherExperimentLogFields = {
|
|
|
232
237
|
datasetRecordId: string;
|
|
233
238
|
};
|
|
234
239
|
export type ExperimentLogPartialArgs = Partial<OtherExperimentLogFields> & Partial<InputField | InputsField>;
|
|
235
|
-
export type ExperimentLogFullArgs = Partial<Omit<OtherExperimentLogFields, "scores">> & Required<Pick<OtherExperimentLogFields, "scores">> & Partial<InputField | InputsField> & Partial<IdField>;
|
|
240
|
+
export type ExperimentLogFullArgs = Partial<Omit<OtherExperimentLogFields, "output" | "scores">> & Required<Pick<OtherExperimentLogFields, "output" | "scores">> & Partial<InputField | InputsField> & Partial<IdField>;
|
|
236
241
|
export type LogFeedbackFullArgs = IdField & Partial<Omit<OtherExperimentLogFields, "output" | "metrics" | "datasetRecordId"> & {
|
|
237
242
|
comment: string;
|
|
238
243
|
source: Source;
|
|
@@ -259,6 +264,7 @@ type ExperimentEvent = Partial<InputField> & Partial<OtherExperimentLogFields> &
|
|
|
259
264
|
created: string;
|
|
260
265
|
span_parents: string[];
|
|
261
266
|
span_attributes: Record<string, unknown>;
|
|
267
|
+
context: Record<string, unknown>;
|
|
262
268
|
[PARENT_ID_FIELD]: string;
|
|
263
269
|
[AUDIT_SOURCE_FIELD]: Source;
|
|
264
270
|
[AUDIT_METADATA_FIELD]?: Record<string, unknown>;
|
|
@@ -311,10 +317,11 @@ export type InitOptions = {
|
|
|
311
317
|
update?: boolean;
|
|
312
318
|
baseExperiment?: string;
|
|
313
319
|
isPublic?: boolean;
|
|
314
|
-
|
|
320
|
+
appUrl?: string;
|
|
315
321
|
apiKey?: string;
|
|
316
322
|
orgName?: string;
|
|
317
323
|
metadata?: Metadata;
|
|
324
|
+
gitMetadataSettings?: GitMetadataSettings;
|
|
318
325
|
setCurrent?: boolean;
|
|
319
326
|
};
|
|
320
327
|
/**
|
|
@@ -330,7 +337,7 @@ export type InitOptions = {
|
|
|
330
337
|
* @param options.baseExperiment An optional experiment name to use as a base. If specified, the new experiment will be summarized and compared to this
|
|
331
338
|
* experiment. Otherwise, it will pick an experiment by finding the closest ancestor on the default (e.g. main) branch.
|
|
332
339
|
* @param options.isPublic An optional parameter to control whether the experiment is publicly visible to anybody with the link or privately visible to only members of the organization. Defaults to private.
|
|
333
|
-
* @param options.
|
|
340
|
+
* @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrustdata.com.
|
|
334
341
|
* @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
|
|
335
342
|
* key is specified, will prompt the user to login.
|
|
336
343
|
* @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
|
|
@@ -338,6 +345,7 @@ export type InitOptions = {
|
|
|
338
345
|
* about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the
|
|
339
346
|
* `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any
|
|
340
347
|
* JSON-serializable type, but its keys must be strings.
|
|
348
|
+
* @param options.gitMetadataSettings (Optional) Settings for collecting git metadata. By default, will collect all git metadata fields allowed in org-level settings.
|
|
341
349
|
* @param setCurrent If true (the default), set the global current-experiment to the newly-created one.
|
|
342
350
|
* @returns The newly created Experiment.
|
|
343
351
|
*/
|
|
@@ -354,7 +362,7 @@ type InitDatasetOptions = {
|
|
|
354
362
|
dataset?: string;
|
|
355
363
|
description?: string;
|
|
356
364
|
version?: string;
|
|
357
|
-
|
|
365
|
+
appUrl?: string;
|
|
358
366
|
apiKey?: string;
|
|
359
367
|
orgName?: string;
|
|
360
368
|
};
|
|
@@ -365,7 +373,7 @@ type InitDatasetOptions = {
|
|
|
365
373
|
* @param options Additional options for configuring init().
|
|
366
374
|
* @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically.
|
|
367
375
|
* @param options.description An optional description of the dataset.
|
|
368
|
-
* @param options.
|
|
376
|
+
* @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrustdata.com.
|
|
369
377
|
* @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
|
|
370
378
|
* key is specified, will prompt the user to login.
|
|
371
379
|
* @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
|
|
@@ -382,7 +390,7 @@ type AsyncFlushArg<IsAsyncFlush> = {
|
|
|
382
390
|
type InitLoggerOptions<IsAsyncFlush> = {
|
|
383
391
|
projectName?: string;
|
|
384
392
|
projectId?: string;
|
|
385
|
-
|
|
393
|
+
appUrl?: string;
|
|
386
394
|
apiKey?: string;
|
|
387
395
|
orgName?: string;
|
|
388
396
|
forceLogin?: boolean;
|
|
@@ -395,7 +403,7 @@ type InitLoggerOptions<IsAsyncFlush> = {
|
|
|
395
403
|
* @param options.projectName The name of the project to log into. If unspecified, will default to the Global project.
|
|
396
404
|
* @param options.projectId The id of the project to log into. This takes precedence over projectName if specified.
|
|
397
405
|
* @param options.asyncFlush If true, will log asynchronously in the background. Otherwise, will log synchronously. (false by default, to support serverless environments)
|
|
398
|
-
* @param options.
|
|
406
|
+
* @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrustdata.com.
|
|
399
407
|
* @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
|
|
400
408
|
* key is specified, will prompt the user to login.
|
|
401
409
|
* @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
|
|
@@ -409,14 +417,14 @@ export declare function initLogger<IsAsyncFlush extends boolean = false>(options
|
|
|
409
417
|
* https://www.braintrustdata.com/app/token. This method is called automatically by `init()`.
|
|
410
418
|
*
|
|
411
419
|
* @param options Options for configuring login().
|
|
412
|
-
* @param options.
|
|
420
|
+
* @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrustdata.com.
|
|
413
421
|
* @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API
|
|
414
422
|
* key is specified, will prompt the user to login.
|
|
415
423
|
* @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple.
|
|
416
424
|
* @param options.forceLogin Login again, even if you have already logged in (by default, this function will exit quickly if you have already logged in)
|
|
417
425
|
*/
|
|
418
426
|
export declare function login(options?: {
|
|
419
|
-
|
|
427
|
+
appUrl?: string;
|
|
420
428
|
apiKey?: string;
|
|
421
429
|
orgName?: string;
|
|
422
430
|
forceLogin?: boolean;
|
|
@@ -470,7 +478,11 @@ export declare function getSpanParentObject<IsAsyncFlush extends boolean>(option
|
|
|
470
478
|
*/
|
|
471
479
|
export declare function traced<IsAsyncFlush extends boolean = false, R = void>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg & AsyncFlushArg<IsAsyncFlush>): PromiseUnless<IsAsyncFlush, R>;
|
|
472
480
|
/**
|
|
473
|
-
* Lower-level alternative to `traced
|
|
481
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
482
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
483
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
484
|
+
*
|
|
485
|
+
* See `traced` for full details.
|
|
474
486
|
*/
|
|
475
487
|
export declare function startSpan<IsAsyncFlush extends boolean = false>(args?: StartSpanArgs & AsyncFlushArg<IsAsyncFlush>): Span;
|
|
476
488
|
/**
|
|
@@ -502,10 +514,10 @@ export declare class Experiment {
|
|
|
502
514
|
* @param event The event to log.
|
|
503
515
|
* @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
|
|
504
516
|
* @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
505
|
-
* @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
517
|
+
* @param event.expected: (Optional) The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
506
518
|
* @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
507
519
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
508
|
-
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end"
|
|
520
|
+
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
509
521
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
510
522
|
* @param event.dataset_record_id: (Optional) the id of the dataset record that this event is associated with. This field is required if and only if the experiment is associated with a dataset.
|
|
511
523
|
* @param event.inputs: (Deprecated) the same as `input` (will be removed in a future version).
|
|
@@ -520,7 +532,9 @@ export declare class Experiment {
|
|
|
520
532
|
traced<R>(callback: (span: Span) => R, args?: StartSpanArgs & SetCurrentArg): R;
|
|
521
533
|
private lazyParentIds;
|
|
522
534
|
/**
|
|
523
|
-
* Lower-level alternative to `traced
|
|
535
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
536
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
537
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
524
538
|
*
|
|
525
539
|
* See `traced` for full details.
|
|
526
540
|
*/
|