braintrust 0.0.92 → 0.0.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +252 -199
- package/dist/cli.js +171 -128
- package/dist/index.js +255 -200
- package/dist/logger.d.ts +36 -25
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/util.d.ts +6 -0
- package/package.json +1 -1
package/dist/browser.js
CHANGED
|
@@ -183,6 +183,21 @@ function getCurrentUnixTimestamp() {
|
|
|
183
183
|
function isEmpty(a) {
|
|
184
184
|
return a === void 0 || a === null;
|
|
185
185
|
}
|
|
186
|
+
var LazyValue = class {
|
|
187
|
+
constructor(callable) {
|
|
188
|
+
this.value = {
|
|
189
|
+
hasComputed: false
|
|
190
|
+
};
|
|
191
|
+
this.callable = callable;
|
|
192
|
+
}
|
|
193
|
+
async get() {
|
|
194
|
+
if (this.value.hasComputed) {
|
|
195
|
+
return this.value.val;
|
|
196
|
+
}
|
|
197
|
+
this.value = { hasComputed: true, val: await this.callable() };
|
|
198
|
+
return this.value.val;
|
|
199
|
+
}
|
|
200
|
+
};
|
|
186
201
|
|
|
187
202
|
// src/logger.ts
|
|
188
203
|
var NoopSpan = class {
|
|
@@ -212,7 +227,7 @@ var NoopSpan = class {
|
|
|
212
227
|
var NOOP_SPAN = new NoopSpan();
|
|
213
228
|
var BraintrustState = class {
|
|
214
229
|
constructor() {
|
|
215
|
-
this.
|
|
230
|
+
this.appUrl = null;
|
|
216
231
|
this.loginToken = null;
|
|
217
232
|
this.orgId = null;
|
|
218
233
|
this.orgName = null;
|
|
@@ -228,7 +243,7 @@ var BraintrustState = class {
|
|
|
228
243
|
globalThis.__inherited_braintrust_state = this;
|
|
229
244
|
}
|
|
230
245
|
resetLoginInfo() {
|
|
231
|
-
this.
|
|
246
|
+
this.appUrl = null;
|
|
232
247
|
this.loginToken = null;
|
|
233
248
|
this.orgId = null;
|
|
234
249
|
this.orgName = null;
|
|
@@ -240,10 +255,10 @@ var BraintrustState = class {
|
|
|
240
255
|
}
|
|
241
256
|
apiConn() {
|
|
242
257
|
if (!this._apiConn) {
|
|
243
|
-
if (!this.
|
|
244
|
-
throw new Error("Must initialize
|
|
258
|
+
if (!this.appUrl) {
|
|
259
|
+
throw new Error("Must initialize appUrl before requesting apiConn");
|
|
245
260
|
}
|
|
246
|
-
this._apiConn = new HTTPConnection(this.
|
|
261
|
+
this._apiConn = new HTTPConnection(this.appUrl);
|
|
247
262
|
}
|
|
248
263
|
return this._apiConn;
|
|
249
264
|
}
|
|
@@ -397,25 +412,25 @@ function logFeedbackImpl(bgLogger, parentIds, {
|
|
|
397
412
|
updateEvent = Object.fromEntries(
|
|
398
413
|
Object.entries(updateEvent).filter(([_, v]) => !isEmpty(v))
|
|
399
414
|
);
|
|
400
|
-
const trueParentIds = (async () => {
|
|
401
|
-
const { kind, ...ids } = await parentIds;
|
|
415
|
+
const trueParentIds = new LazyValue(async () => {
|
|
416
|
+
const { kind, ...ids } = await parentIds.get();
|
|
402
417
|
return ids;
|
|
403
|
-
})
|
|
418
|
+
});
|
|
404
419
|
if (Object.keys(updateEvent).length > 0) {
|
|
405
|
-
const record = (async () => {
|
|
420
|
+
const record = new LazyValue(async () => {
|
|
406
421
|
return {
|
|
407
422
|
id,
|
|
408
423
|
...updateEvent,
|
|
409
|
-
...await trueParentIds,
|
|
424
|
+
...await trueParentIds.get(),
|
|
410
425
|
[AUDIT_SOURCE_FIELD]: source,
|
|
411
426
|
[AUDIT_METADATA_FIELD]: metadata,
|
|
412
427
|
[IS_MERGE_FIELD]: true
|
|
413
428
|
};
|
|
414
|
-
})
|
|
429
|
+
});
|
|
415
430
|
bgLogger.log([record]);
|
|
416
431
|
}
|
|
417
432
|
if (!isEmpty(comment)) {
|
|
418
|
-
const record = (async () => {
|
|
433
|
+
const record = new LazyValue(async () => {
|
|
419
434
|
return {
|
|
420
435
|
id: v4_default(),
|
|
421
436
|
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -427,11 +442,11 @@ function logFeedbackImpl(bgLogger, parentIds, {
|
|
|
427
442
|
comment: {
|
|
428
443
|
text: comment
|
|
429
444
|
},
|
|
430
|
-
...await trueParentIds,
|
|
445
|
+
...await trueParentIds.get(),
|
|
431
446
|
[AUDIT_SOURCE_FIELD]: source,
|
|
432
447
|
[AUDIT_METADATA_FIELD]: metadata
|
|
433
448
|
};
|
|
434
|
-
})
|
|
449
|
+
});
|
|
435
450
|
bgLogger.log([record]);
|
|
436
451
|
}
|
|
437
452
|
}
|
|
@@ -441,32 +456,34 @@ var Logger = class {
|
|
|
441
456
|
this.kind = "logger";
|
|
442
457
|
this.lazyMetadata = lazyMetadata;
|
|
443
458
|
this.logOptions = logOptions;
|
|
444
|
-
const logConn =
|
|
459
|
+
const logConn = new LazyValue(
|
|
460
|
+
() => this.getState().then((state) => state.logConn())
|
|
461
|
+
);
|
|
445
462
|
this.bgLogger = new BackgroundLogger(logConn);
|
|
446
463
|
this.lastStartTime = getCurrentUnixTimestamp();
|
|
447
464
|
}
|
|
448
465
|
get org_id() {
|
|
449
466
|
return (async () => {
|
|
450
|
-
return (await this.lazyMetadata).org_id;
|
|
467
|
+
return (await this.lazyMetadata.get()).org_id;
|
|
451
468
|
})();
|
|
452
469
|
}
|
|
453
470
|
get project() {
|
|
454
471
|
return (async () => {
|
|
455
|
-
return (await this.lazyMetadata).project;
|
|
472
|
+
return (await this.lazyMetadata.get()).project;
|
|
456
473
|
})();
|
|
457
474
|
}
|
|
458
475
|
async getState() {
|
|
459
|
-
await this.lazyMetadata;
|
|
476
|
+
await this.lazyMetadata.get();
|
|
460
477
|
return _state;
|
|
461
478
|
}
|
|
462
479
|
/**
|
|
463
480
|
* Log a single event. The event will be batched and uploaded behind the scenes if `logOptions.asyncFlush` is true.
|
|
464
481
|
*
|
|
465
482
|
* @param event The event to log.
|
|
466
|
-
* @param event.input:
|
|
467
|
-
* @param event.output:
|
|
468
|
-
* @param event.expected:
|
|
469
|
-
* @param event.scores:
|
|
483
|
+
* @param event.input: (Optional) the arguments that uniquely define a user input (an arbitrary, JSON serializable object).
|
|
484
|
+
* @param event.output: (Optional) the output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
485
|
+
* @param event.expected: (Optional) the ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
486
|
+
* @param event.scores: (Optional) a dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare logs.
|
|
470
487
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
471
488
|
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
472
489
|
* @param event.id: (Optional) a unique identifier for the event. If you don't provide one, BrainTrust will generate one for you.
|
|
@@ -522,14 +539,16 @@ var Logger = class {
|
|
|
522
539
|
};
|
|
523
540
|
}
|
|
524
541
|
/**
|
|
525
|
-
* Lower-level alternative to `traced
|
|
542
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
543
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
544
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
526
545
|
*
|
|
527
546
|
* See `traced` for full details.
|
|
528
547
|
*/
|
|
529
548
|
startSpan(args) {
|
|
530
549
|
const { name, ...argsRest } = args ?? {};
|
|
531
550
|
return new SpanImpl({
|
|
532
|
-
parentIds: this.lazyParentIds(),
|
|
551
|
+
parentIds: new LazyValue(() => this.lazyParentIds()),
|
|
533
552
|
bgLogger: this.bgLogger,
|
|
534
553
|
name: name ?? "root",
|
|
535
554
|
...argsRest
|
|
@@ -547,7 +566,11 @@ var Logger = class {
|
|
|
547
566
|
* @param event.source (Optional) the source of the feedback. Must be one of "external" (default), "app", or "api".
|
|
548
567
|
*/
|
|
549
568
|
logFeedback(event) {
|
|
550
|
-
logFeedbackImpl(
|
|
569
|
+
logFeedbackImpl(
|
|
570
|
+
this.bgLogger,
|
|
571
|
+
new LazyValue(() => this.lazyParentIds()),
|
|
572
|
+
event
|
|
573
|
+
);
|
|
551
574
|
}
|
|
552
575
|
/*
|
|
553
576
|
* Flush any pending logs to the server.
|
|
@@ -597,9 +620,20 @@ var BackgroundLogger = class {
|
|
|
597
620
|
}
|
|
598
621
|
async flush_once(batchSize = DefaultBatchSize) {
|
|
599
622
|
this.active_flush_resolved = false;
|
|
600
|
-
const
|
|
623
|
+
const itemLazyValues = this.items;
|
|
601
624
|
this.items = [];
|
|
602
|
-
const allItems =
|
|
625
|
+
const allItems = await (async () => {
|
|
626
|
+
try {
|
|
627
|
+
const itemPromises = itemLazyValues.map((x) => x.get());
|
|
628
|
+
return mergeRowBatch(await Promise.all(itemPromises)).reverse();
|
|
629
|
+
} catch (e) {
|
|
630
|
+
console.warn(
|
|
631
|
+
"Encountered error when constructing records to flush:\n",
|
|
632
|
+
e
|
|
633
|
+
);
|
|
634
|
+
return [];
|
|
635
|
+
}
|
|
636
|
+
})();
|
|
603
637
|
let postPromises = [];
|
|
604
638
|
while (true) {
|
|
605
639
|
const items = [];
|
|
@@ -624,9 +658,7 @@ var BackgroundLogger = class {
|
|
|
624
658
|
for (let i = 0; i < NumRetries; i++) {
|
|
625
659
|
const startTime = now();
|
|
626
660
|
try {
|
|
627
|
-
return (await (await this.logConn).post_json("logs", itemsS)).map(
|
|
628
|
-
(res) => res.id
|
|
629
|
-
);
|
|
661
|
+
return (await (await this.logConn.get()).post_json("logs", itemsS)).map((res) => res.id);
|
|
630
662
|
} catch (e) {
|
|
631
663
|
const retryingText = i + 1 === NumRetries ? "" : " Retrying";
|
|
632
664
|
const errMsg = (() => {
|
|
@@ -673,88 +705,92 @@ function init(project, options = {}) {
|
|
|
673
705
|
baseExperiment,
|
|
674
706
|
isPublic,
|
|
675
707
|
update,
|
|
676
|
-
|
|
708
|
+
appUrl,
|
|
677
709
|
apiKey,
|
|
678
710
|
orgName,
|
|
679
711
|
metadata,
|
|
680
712
|
gitMetadataSettings
|
|
681
713
|
} = options || {};
|
|
682
|
-
const lazyMetadata =
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
if (description) {
|
|
696
|
-
args["description"] = description;
|
|
697
|
-
}
|
|
698
|
-
if (update) {
|
|
699
|
-
args["update"] = update;
|
|
700
|
-
}
|
|
701
|
-
let mergedGitMetadataSettings = {
|
|
702
|
-
..._state.gitMetadataSettings || {
|
|
703
|
-
collect: "all"
|
|
714
|
+
const lazyMetadata = new LazyValue(
|
|
715
|
+
async () => {
|
|
716
|
+
await login({
|
|
717
|
+
orgName,
|
|
718
|
+
apiKey,
|
|
719
|
+
appUrl
|
|
720
|
+
});
|
|
721
|
+
const args = {
|
|
722
|
+
project_name: project,
|
|
723
|
+
org_id: _state.orgId
|
|
724
|
+
};
|
|
725
|
+
if (experiment) {
|
|
726
|
+
args["experiment_name"] = experiment;
|
|
704
727
|
}
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
args["repo_info"] = repoStatus;
|
|
715
|
-
}
|
|
716
|
-
if (baseExperiment) {
|
|
717
|
-
args["base_experiment"] = baseExperiment;
|
|
718
|
-
} else {
|
|
719
|
-
args["ancestor_commits"] = await isomorph_default.getPastNAncestors();
|
|
720
|
-
}
|
|
721
|
-
if (dataset !== void 0) {
|
|
722
|
-
args["dataset_id"] = dataset.id;
|
|
723
|
-
args["dataset_version"] = await dataset.version();
|
|
724
|
-
}
|
|
725
|
-
if (isPublic !== void 0) {
|
|
726
|
-
args["public"] = isPublic;
|
|
727
|
-
}
|
|
728
|
-
if (metadata) {
|
|
729
|
-
args["metadata"] = metadata;
|
|
730
|
-
}
|
|
731
|
-
let response = null;
|
|
732
|
-
while (true) {
|
|
733
|
-
try {
|
|
734
|
-
response = await _state.apiConn().post_json("api/experiment/register", args);
|
|
735
|
-
break;
|
|
736
|
-
} catch (e) {
|
|
737
|
-
if (args["base_experiment"] && `${"data" in e && e.data}`.includes("base experiment")) {
|
|
738
|
-
console.warn(`Base experiment ${args["base_experiment"]} not found.`);
|
|
739
|
-
delete args["base_experiment"];
|
|
740
|
-
} else {
|
|
741
|
-
throw e;
|
|
728
|
+
if (description) {
|
|
729
|
+
args["description"] = description;
|
|
730
|
+
}
|
|
731
|
+
if (update) {
|
|
732
|
+
args["update"] = update;
|
|
733
|
+
}
|
|
734
|
+
let mergedGitMetadataSettings = {
|
|
735
|
+
..._state.gitMetadataSettings || {
|
|
736
|
+
collect: "all"
|
|
742
737
|
}
|
|
738
|
+
};
|
|
739
|
+
if (gitMetadataSettings) {
|
|
740
|
+
mergedGitMetadataSettings = mergeGitMetadataSettings(
|
|
741
|
+
mergedGitMetadataSettings,
|
|
742
|
+
gitMetadataSettings
|
|
743
|
+
);
|
|
743
744
|
}
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
id: response.project.id,
|
|
748
|
-
name: response.project.name,
|
|
749
|
-
fullInfo: response.project
|
|
750
|
-
},
|
|
751
|
-
experiment: {
|
|
752
|
-
id: response.experiment.id,
|
|
753
|
-
name: response.experiment.name,
|
|
754
|
-
fullInfo: response.experiment
|
|
745
|
+
const repoStatus = await isomorph_default.getRepoStatus(gitMetadataSettings);
|
|
746
|
+
if (repoStatus) {
|
|
747
|
+
args["repo_info"] = repoStatus;
|
|
755
748
|
}
|
|
756
|
-
|
|
757
|
-
|
|
749
|
+
if (baseExperiment) {
|
|
750
|
+
args["base_experiment"] = baseExperiment;
|
|
751
|
+
} else {
|
|
752
|
+
args["ancestor_commits"] = await isomorph_default.getPastNAncestors();
|
|
753
|
+
}
|
|
754
|
+
if (dataset !== void 0) {
|
|
755
|
+
args["dataset_id"] = dataset.id;
|
|
756
|
+
args["dataset_version"] = await dataset.version();
|
|
757
|
+
}
|
|
758
|
+
if (isPublic !== void 0) {
|
|
759
|
+
args["public"] = isPublic;
|
|
760
|
+
}
|
|
761
|
+
if (metadata) {
|
|
762
|
+
args["metadata"] = metadata;
|
|
763
|
+
}
|
|
764
|
+
let response = null;
|
|
765
|
+
while (true) {
|
|
766
|
+
try {
|
|
767
|
+
response = await _state.apiConn().post_json("api/experiment/register", args);
|
|
768
|
+
break;
|
|
769
|
+
} catch (e) {
|
|
770
|
+
if (args["base_experiment"] && `${"data" in e && e.data}`.includes("base experiment")) {
|
|
771
|
+
console.warn(
|
|
772
|
+
`Base experiment ${args["base_experiment"]} not found.`
|
|
773
|
+
);
|
|
774
|
+
delete args["base_experiment"];
|
|
775
|
+
} else {
|
|
776
|
+
throw e;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
return {
|
|
781
|
+
project: {
|
|
782
|
+
id: response.project.id,
|
|
783
|
+
name: response.project.name,
|
|
784
|
+
fullInfo: response.project
|
|
785
|
+
},
|
|
786
|
+
experiment: {
|
|
787
|
+
id: response.experiment.id,
|
|
788
|
+
name: response.experiment.name,
|
|
789
|
+
fullInfo: response.experiment
|
|
790
|
+
}
|
|
791
|
+
};
|
|
792
|
+
}
|
|
793
|
+
);
|
|
758
794
|
const ret = new Experiment(lazyMetadata, dataset);
|
|
759
795
|
if (options.setCurrent ?? true) {
|
|
760
796
|
_state.currentExperiment = ret;
|
|
@@ -776,33 +812,35 @@ function withLogger(callback, options = {}) {
|
|
|
776
812
|
return callback(logger);
|
|
777
813
|
}
|
|
778
814
|
function initDataset(project, options = {}) {
|
|
779
|
-
const { dataset, description, version,
|
|
780
|
-
const lazyMetadata =
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
815
|
+
const { dataset, description, version, appUrl, apiKey, orgName } = options || {};
|
|
816
|
+
const lazyMetadata = new LazyValue(
|
|
817
|
+
async () => {
|
|
818
|
+
await login({
|
|
819
|
+
orgName,
|
|
820
|
+
apiKey,
|
|
821
|
+
appUrl
|
|
822
|
+
});
|
|
823
|
+
const args = {
|
|
824
|
+
org_id: _state.orgId,
|
|
825
|
+
project_name: project,
|
|
826
|
+
dataset_name: dataset,
|
|
827
|
+
description
|
|
828
|
+
};
|
|
829
|
+
const response = await _state.apiConn().post_json("api/dataset/register", args);
|
|
830
|
+
return {
|
|
831
|
+
project: {
|
|
832
|
+
id: response.project.id,
|
|
833
|
+
name: response.project.name,
|
|
834
|
+
fullInfo: response.project
|
|
835
|
+
},
|
|
836
|
+
dataset: {
|
|
837
|
+
id: response.dataset.id,
|
|
838
|
+
name: response.dataset.name,
|
|
839
|
+
fullInfo: response.dataset
|
|
840
|
+
}
|
|
841
|
+
};
|
|
842
|
+
}
|
|
843
|
+
);
|
|
806
844
|
return new Dataset(lazyMetadata, version);
|
|
807
845
|
}
|
|
808
846
|
function withDataset(project, callback, options = {}) {
|
|
@@ -817,51 +855,53 @@ function initLogger(options = {}) {
|
|
|
817
855
|
projectName,
|
|
818
856
|
projectId,
|
|
819
857
|
asyncFlush,
|
|
820
|
-
|
|
858
|
+
appUrl,
|
|
821
859
|
apiKey,
|
|
822
860
|
orgName,
|
|
823
861
|
forceLogin
|
|
824
862
|
} = options || {};
|
|
825
|
-
const lazyMetadata =
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
const org_id = _state.orgId;
|
|
833
|
-
if (projectId === void 0) {
|
|
834
|
-
const response = await _state.apiConn().post_json("api/project/register", {
|
|
835
|
-
project_name: projectName || GLOBAL_PROJECT,
|
|
836
|
-
org_id
|
|
837
|
-
});
|
|
838
|
-
return {
|
|
839
|
-
org_id,
|
|
840
|
-
project: {
|
|
841
|
-
id: response.project.id,
|
|
842
|
-
name: response.project.name,
|
|
843
|
-
fullInfo: response.project
|
|
844
|
-
}
|
|
845
|
-
};
|
|
846
|
-
} else if (projectName === void 0) {
|
|
847
|
-
const response = await _state.apiConn().get_json("api/project", {
|
|
848
|
-
id: projectId
|
|
863
|
+
const lazyMetadata = new LazyValue(
|
|
864
|
+
async () => {
|
|
865
|
+
await login({
|
|
866
|
+
orgName,
|
|
867
|
+
apiKey,
|
|
868
|
+
appUrl,
|
|
869
|
+
forceLogin
|
|
849
870
|
});
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
project
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
871
|
+
const org_id = _state.orgId;
|
|
872
|
+
if (projectId === void 0) {
|
|
873
|
+
const response = await _state.apiConn().post_json("api/project/register", {
|
|
874
|
+
project_name: projectName || GLOBAL_PROJECT,
|
|
875
|
+
org_id
|
|
876
|
+
});
|
|
877
|
+
return {
|
|
878
|
+
org_id,
|
|
879
|
+
project: {
|
|
880
|
+
id: response.project.id,
|
|
881
|
+
name: response.project.name,
|
|
882
|
+
fullInfo: response.project
|
|
883
|
+
}
|
|
884
|
+
};
|
|
885
|
+
} else if (projectName === void 0) {
|
|
886
|
+
const response = await _state.apiConn().get_json("api/project", {
|
|
887
|
+
id: projectId
|
|
888
|
+
});
|
|
889
|
+
return {
|
|
890
|
+
org_id,
|
|
891
|
+
project: {
|
|
892
|
+
id: projectId,
|
|
893
|
+
name: response.name,
|
|
894
|
+
fullInfo: response.project
|
|
895
|
+
}
|
|
896
|
+
};
|
|
897
|
+
} else {
|
|
898
|
+
return {
|
|
899
|
+
org_id,
|
|
900
|
+
project: { id: projectId, name: projectName, fullInfo: {} }
|
|
901
|
+
};
|
|
902
|
+
}
|
|
863
903
|
}
|
|
864
|
-
|
|
904
|
+
);
|
|
865
905
|
const ret = new Logger(lazyMetadata, {
|
|
866
906
|
asyncFlush
|
|
867
907
|
});
|
|
@@ -872,7 +912,7 @@ function initLogger(options = {}) {
|
|
|
872
912
|
}
|
|
873
913
|
async function login(options = {}) {
|
|
874
914
|
const {
|
|
875
|
-
|
|
915
|
+
appUrl = isomorph_default.getEnv("BRAINTRUST_APP_URL") || "https://www.braintrustdata.com",
|
|
876
916
|
apiKey = isomorph_default.getEnv("BRAINTRUST_API_KEY"),
|
|
877
917
|
orgName = isomorph_default.getEnv("BRAINTRUST_ORG_NAME")
|
|
878
918
|
} = options || {};
|
|
@@ -881,11 +921,11 @@ async function login(options = {}) {
|
|
|
881
921
|
return;
|
|
882
922
|
}
|
|
883
923
|
_state.resetLoginInfo();
|
|
884
|
-
_state.
|
|
924
|
+
_state.appUrl = appUrl;
|
|
885
925
|
let conn = null;
|
|
886
926
|
if (apiKey !== void 0) {
|
|
887
927
|
const resp = await checkResponse(
|
|
888
|
-
await fetch(_urljoin(_state.
|
|
928
|
+
await fetch(_urljoin(_state.appUrl, `/api/apikey/login`), {
|
|
889
929
|
method: "POST",
|
|
890
930
|
headers: {
|
|
891
931
|
"Content-Type": "application/json"
|
|
@@ -1002,7 +1042,7 @@ function _check_org_info(org_info, org_name) {
|
|
|
1002
1042
|
if (org_name === void 0 || org.name === org_name) {
|
|
1003
1043
|
_state.orgId = org.id;
|
|
1004
1044
|
_state.orgName = org.name;
|
|
1005
|
-
_state.logUrl = isomorph_default.getEnv("
|
|
1045
|
+
_state.logUrl = isomorph_default.getEnv("BRAINTRUST_API_URL") ?? org.api_url;
|
|
1006
1046
|
_state.gitMetadataSettings = org.git_metadata || void 0;
|
|
1007
1047
|
break;
|
|
1008
1048
|
}
|
|
@@ -1072,6 +1112,9 @@ function validateAndSanitizeExperimentLogFullArgs(event, hasDataset) {
|
|
|
1072
1112
|
"Exactly one of input or inputs (deprecated) must be specified. Prefer input."
|
|
1073
1113
|
);
|
|
1074
1114
|
}
|
|
1115
|
+
if (!event.output) {
|
|
1116
|
+
throw new Error("output must be specified");
|
|
1117
|
+
}
|
|
1075
1118
|
if (!event.scores) {
|
|
1076
1119
|
throw new Error("scores must be specified");
|
|
1077
1120
|
}
|
|
@@ -1090,27 +1133,29 @@ var Experiment = class {
|
|
|
1090
1133
|
this.kind = "experiment";
|
|
1091
1134
|
this.lazyMetadata = lazyMetadata;
|
|
1092
1135
|
this.dataset = dataset;
|
|
1093
|
-
const logConn =
|
|
1136
|
+
const logConn = new LazyValue(
|
|
1137
|
+
() => this.getState().then((state) => state.logConn())
|
|
1138
|
+
);
|
|
1094
1139
|
this.bgLogger = new BackgroundLogger(logConn);
|
|
1095
1140
|
this.lastStartTime = getCurrentUnixTimestamp();
|
|
1096
1141
|
}
|
|
1097
1142
|
get id() {
|
|
1098
1143
|
return (async () => {
|
|
1099
|
-
return (await this.lazyMetadata).experiment.id;
|
|
1144
|
+
return (await this.lazyMetadata.get()).experiment.id;
|
|
1100
1145
|
})();
|
|
1101
1146
|
}
|
|
1102
1147
|
get name() {
|
|
1103
1148
|
return (async () => {
|
|
1104
|
-
return (await this.lazyMetadata).experiment.name;
|
|
1149
|
+
return (await this.lazyMetadata.get()).experiment.name;
|
|
1105
1150
|
})();
|
|
1106
1151
|
}
|
|
1107
1152
|
get project() {
|
|
1108
1153
|
return (async () => {
|
|
1109
|
-
return (await this.lazyMetadata).project;
|
|
1154
|
+
return (await this.lazyMetadata.get()).project;
|
|
1110
1155
|
})();
|
|
1111
1156
|
}
|
|
1112
1157
|
async getState() {
|
|
1113
|
-
await this.lazyMetadata;
|
|
1158
|
+
await this.lazyMetadata.get();
|
|
1114
1159
|
return _state;
|
|
1115
1160
|
}
|
|
1116
1161
|
/**
|
|
@@ -1119,7 +1164,7 @@ var Experiment = class {
|
|
|
1119
1164
|
* @param event The event to log.
|
|
1120
1165
|
* @param event.input: The arguments that uniquely define a test case (an arbitrary, JSON serializable object). Later on, Braintrust will use the `input` to know whether two test cases are the same between experiments, so they should not contain experiment-specific state. A simple rule of thumb is that if you run the same experiment twice, the `input` should be identical.
|
|
1121
1166
|
* @param event.output: The output of your application, including post-processing (an arbitrary, JSON serializable object), that allows you to determine whether the result is correct or not. For example, in an app that generates SQL queries, the `output` should be the _result_ of the SQL query generated by the model, not the query itself, because there may be multiple valid queries that answer a single question.
|
|
1122
|
-
* @param event.expected: The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
1167
|
+
* @param event.expected: (Optional) The ground truth value (an arbitrary, JSON serializable object) that you'd compare to `output` to determine if your `output` value is correct or not. Braintrust currently does not compare `output` to `expected` for you, since there are so many different ways to do that correctly. Instead, these values are just used to help you navigate your experiments while digging into analyses. However, we may later use these values to re-score outputs or fine-tune your models.
|
|
1123
1168
|
* @param event.scores: A dictionary of numeric values (between 0 and 1) to log. The scores should give you a variety of signals that help you determine how accurate the outputs are compared to what you expect and diagnose failures. For example, a summarization app might have one score that tells you how accurate the summary is, and another that measures the word similarity between the generated and grouth truth summary. The word similarity score could help you determine whether the summarization was covering similar concepts or not. You can use these scores to help you sort, filter, and compare experiments.
|
|
1124
1169
|
* @param event.metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings.
|
|
1125
1170
|
* @param event.metrics: (Optional) a dictionary of metrics to log. The following keys are populated automatically: "start", "end".
|
|
@@ -1161,14 +1206,16 @@ var Experiment = class {
|
|
|
1161
1206
|
};
|
|
1162
1207
|
}
|
|
1163
1208
|
/**
|
|
1164
|
-
* Lower-level alternative to `traced
|
|
1209
|
+
* Lower-level alternative to `traced`. This allows you to start a span yourself, and can be useful in situations
|
|
1210
|
+
* where you cannot use callbacks. However, spans started with `startSpan` will not be marked as the "current span",
|
|
1211
|
+
* so `currentSpan()` and `traced()` will be no-ops. If you want to mark a span as current, use `traced` instead.
|
|
1165
1212
|
*
|
|
1166
1213
|
* See `traced` for full details.
|
|
1167
1214
|
*/
|
|
1168
1215
|
startSpan(args) {
|
|
1169
1216
|
const { name, ...argsRest } = args ?? {};
|
|
1170
1217
|
return new SpanImpl({
|
|
1171
|
-
parentIds: this.lazyParentIds(),
|
|
1218
|
+
parentIds: new LazyValue(() => this.lazyParentIds()),
|
|
1172
1219
|
bgLogger: this.bgLogger,
|
|
1173
1220
|
name: name ?? "root",
|
|
1174
1221
|
...argsRest
|
|
@@ -1186,7 +1233,7 @@ var Experiment = class {
|
|
|
1186
1233
|
let { summarizeScores = true, comparisonExperimentId = void 0 } = options || {};
|
|
1187
1234
|
await this.bgLogger.flush();
|
|
1188
1235
|
const state = await this.getState();
|
|
1189
|
-
const projectUrl = `${state.
|
|
1236
|
+
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
1190
1237
|
state.orgName
|
|
1191
1238
|
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
1192
1239
|
const experimentUrl = `${projectUrl}/${encodeURIComponent(
|
|
@@ -1242,7 +1289,11 @@ var Experiment = class {
|
|
|
1242
1289
|
* @param event.source (Optional) the source of the feedback. Must be one of "external" (default), "app", or "api".
|
|
1243
1290
|
*/
|
|
1244
1291
|
logFeedback(event) {
|
|
1245
|
-
logFeedbackImpl(
|
|
1292
|
+
logFeedbackImpl(
|
|
1293
|
+
this.bgLogger,
|
|
1294
|
+
new LazyValue(() => this.lazyParentIds()),
|
|
1295
|
+
event
|
|
1296
|
+
);
|
|
1246
1297
|
}
|
|
1247
1298
|
/**
|
|
1248
1299
|
* Flush any pending rows to the server.
|
|
@@ -1323,18 +1374,18 @@ var SpanImpl = class _SpanImpl {
|
|
|
1323
1374
|
if (sanitizedAndInternalData.metrics?.end) {
|
|
1324
1375
|
this.loggedEndTime = sanitizedAndInternalData.metrics?.end;
|
|
1325
1376
|
}
|
|
1326
|
-
const parentIds = (async () => {
|
|
1327
|
-
const { kind, ...ids } = await this.parentIds;
|
|
1377
|
+
const parentIds = new LazyValue(async () => {
|
|
1378
|
+
const { kind, ...ids } = await this.parentIds.get();
|
|
1328
1379
|
return ids;
|
|
1329
|
-
})
|
|
1330
|
-
const record = (async () => {
|
|
1380
|
+
});
|
|
1381
|
+
const record = new LazyValue(async () => {
|
|
1331
1382
|
return {
|
|
1332
1383
|
...sanitizedAndInternalData,
|
|
1333
1384
|
...this.rowIds,
|
|
1334
|
-
...await parentIds,
|
|
1385
|
+
...await parentIds.get(),
|
|
1335
1386
|
[IS_MERGE_FIELD]: this.isMerge
|
|
1336
1387
|
};
|
|
1337
|
-
})
|
|
1388
|
+
});
|
|
1338
1389
|
this.bgLogger.log([record]);
|
|
1339
1390
|
}
|
|
1340
1391
|
logFeedback(event) {
|
|
@@ -1388,26 +1439,28 @@ var Dataset = class {
|
|
|
1388
1439
|
this._fetchedData = void 0;
|
|
1389
1440
|
this.lazyMetadata = lazyMetadata;
|
|
1390
1441
|
this.pinnedVersion = pinnedVersion;
|
|
1391
|
-
const logConn =
|
|
1442
|
+
const logConn = new LazyValue(
|
|
1443
|
+
() => this.getState().then((state) => state.logConn())
|
|
1444
|
+
);
|
|
1392
1445
|
this.bgLogger = new BackgroundLogger(logConn);
|
|
1393
1446
|
}
|
|
1394
1447
|
get id() {
|
|
1395
1448
|
return (async () => {
|
|
1396
|
-
return (await this.lazyMetadata).dataset.id;
|
|
1449
|
+
return (await this.lazyMetadata.get()).dataset.id;
|
|
1397
1450
|
})();
|
|
1398
1451
|
}
|
|
1399
1452
|
get name() {
|
|
1400
1453
|
return (async () => {
|
|
1401
|
-
return (await this.lazyMetadata).dataset.name;
|
|
1454
|
+
return (await this.lazyMetadata.get()).dataset.name;
|
|
1402
1455
|
})();
|
|
1403
1456
|
}
|
|
1404
1457
|
get project() {
|
|
1405
1458
|
return (async () => {
|
|
1406
|
-
return (await this.lazyMetadata).project;
|
|
1459
|
+
return (await this.lazyMetadata.get()).project;
|
|
1407
1460
|
})();
|
|
1408
1461
|
}
|
|
1409
1462
|
async getState() {
|
|
1410
|
-
await this.lazyMetadata;
|
|
1463
|
+
await this.lazyMetadata.get();
|
|
1411
1464
|
return _state;
|
|
1412
1465
|
}
|
|
1413
1466
|
/**
|
|
@@ -1438,7 +1491,7 @@ var Dataset = class {
|
|
|
1438
1491
|
}
|
|
1439
1492
|
}
|
|
1440
1493
|
const rowId = id || v4_default();
|
|
1441
|
-
const args = (async () => ({
|
|
1494
|
+
const args = new LazyValue(async () => ({
|
|
1442
1495
|
id: rowId,
|
|
1443
1496
|
inputs: input,
|
|
1444
1497
|
output,
|
|
@@ -1446,18 +1499,18 @@ var Dataset = class {
|
|
|
1446
1499
|
dataset_id: await this.id,
|
|
1447
1500
|
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1448
1501
|
metadata
|
|
1449
|
-
}))
|
|
1502
|
+
}));
|
|
1450
1503
|
this.bgLogger.log([args]);
|
|
1451
1504
|
return rowId;
|
|
1452
1505
|
}
|
|
1453
1506
|
delete(id) {
|
|
1454
|
-
const args = (async () => ({
|
|
1507
|
+
const args = new LazyValue(async () => ({
|
|
1455
1508
|
id,
|
|
1456
1509
|
project_id: (await this.project).id,
|
|
1457
1510
|
dataset_id: await this.id,
|
|
1458
1511
|
created: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1459
1512
|
_object_delete: true
|
|
1460
|
-
}))
|
|
1513
|
+
}));
|
|
1461
1514
|
this.bgLogger.log([args]);
|
|
1462
1515
|
return id;
|
|
1463
1516
|
}
|
|
@@ -1471,7 +1524,7 @@ var Dataset = class {
|
|
|
1471
1524
|
let { summarizeData = true } = options || {};
|
|
1472
1525
|
await this.bgLogger.flush();
|
|
1473
1526
|
const state = await this.getState();
|
|
1474
|
-
const projectUrl = `${state.
|
|
1527
|
+
const projectUrl = `${state.appUrl}/app/${encodeURIComponent(
|
|
1475
1528
|
state.orgName
|
|
1476
1529
|
)}/p/${encodeURIComponent((await this.project).name)}`;
|
|
1477
1530
|
const datasetUrl = `${projectUrl}/d/${encodeURIComponent(await this.name)}`;
|