braintrust 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/_generated_types.py +737 -672
- braintrust/audit.py +2 -2
- braintrust/cli/eval.py +6 -7
- braintrust/cli/push.py +11 -11
- braintrust/context.py +12 -17
- braintrust/contrib/temporal/__init__.py +16 -27
- braintrust/contrib/temporal/test_temporal.py +8 -3
- braintrust/devserver/auth.py +8 -8
- braintrust/devserver/cache.py +3 -4
- braintrust/devserver/cors.py +8 -7
- braintrust/devserver/dataset.py +3 -5
- braintrust/devserver/eval_hooks.py +7 -6
- braintrust/devserver/schemas.py +22 -19
- braintrust/devserver/server.py +19 -12
- braintrust/devserver/test_cached_login.py +4 -4
- braintrust/framework.py +128 -140
- braintrust/framework2.py +88 -87
- braintrust/functions/invoke.py +66 -59
- braintrust/functions/stream.py +3 -2
- braintrust/generated_types.py +3 -1
- braintrust/git_fields.py +11 -11
- braintrust/gitutil.py +2 -3
- braintrust/graph_util.py +10 -10
- braintrust/id_gen.py +2 -2
- braintrust/logger.py +346 -357
- braintrust/merge_row_batch.py +10 -9
- braintrust/oai.py +21 -20
- braintrust/otel/__init__.py +49 -49
- braintrust/otel/context.py +16 -30
- braintrust/otel/test_distributed_tracing.py +14 -11
- braintrust/otel/test_otel_bt_integration.py +32 -31
- braintrust/parameters.py +8 -8
- braintrust/prompt.py +14 -14
- braintrust/prompt_cache/disk_cache.py +5 -4
- braintrust/prompt_cache/lru_cache.py +3 -2
- braintrust/prompt_cache/prompt_cache.py +13 -14
- braintrust/queue.py +4 -4
- braintrust/score.py +4 -4
- braintrust/serializable_data_class.py +4 -4
- braintrust/span_identifier_v1.py +1 -2
- braintrust/span_identifier_v2.py +3 -4
- braintrust/span_identifier_v3.py +23 -20
- braintrust/span_identifier_v4.py +34 -25
- braintrust/test_framework.py +16 -6
- braintrust/test_helpers.py +5 -5
- braintrust/test_id_gen.py +2 -3
- braintrust/test_otel.py +61 -53
- braintrust/test_queue.py +0 -1
- braintrust/test_score.py +1 -3
- braintrust/test_span_components.py +29 -44
- braintrust/util.py +9 -8
- braintrust/version.py +2 -2
- braintrust/wrappers/_anthropic_utils.py +4 -4
- braintrust/wrappers/agno/__init__.py +3 -4
- braintrust/wrappers/agno/agent.py +1 -2
- braintrust/wrappers/agno/function_call.py +1 -2
- braintrust/wrappers/agno/model.py +1 -2
- braintrust/wrappers/agno/team.py +1 -2
- braintrust/wrappers/agno/utils.py +12 -12
- braintrust/wrappers/anthropic.py +7 -8
- braintrust/wrappers/claude_agent_sdk/__init__.py +3 -4
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +29 -27
- braintrust/wrappers/dspy.py +15 -17
- braintrust/wrappers/google_genai/__init__.py +16 -16
- braintrust/wrappers/langchain.py +22 -24
- braintrust/wrappers/litellm.py +4 -3
- braintrust/wrappers/openai.py +15 -15
- braintrust/wrappers/pydantic_ai.py +21 -20
- braintrust/wrappers/test_agno.py +0 -1
- braintrust/wrappers/test_dspy.py +0 -1
- braintrust/wrappers/test_google_genai.py +2 -3
- braintrust/wrappers/test_litellm.py +0 -1
- {braintrust-0.3.15.dist-info → braintrust-0.4.0.dist-info}/METADATA +3 -2
- braintrust-0.4.0.dist-info/RECORD +120 -0
- braintrust-0.3.15.dist-info/RECORD +0 -120
- {braintrust-0.3.15.dist-info → braintrust-0.4.0.dist-info}/WHEEL +0 -0
- {braintrust-0.3.15.dist-info → braintrust-0.4.0.dist-info}/entry_points.txt +0 -0
- {braintrust-0.3.15.dist-info → braintrust-0.4.0.dist-info}/top_level.txt +0 -0
braintrust/logger.py
CHANGED
|
@@ -19,24 +19,16 @@ import traceback
|
|
|
19
19
|
import types
|
|
20
20
|
import uuid
|
|
21
21
|
from abc import ABC, abstractmethod
|
|
22
|
+
from collections.abc import Callable, Iterator, Mapping, MutableMapping, Sequence
|
|
22
23
|
from functools import partial, wraps
|
|
23
24
|
from multiprocessing import cpu_count
|
|
24
25
|
from types import TracebackType
|
|
25
26
|
from typing import (
|
|
26
27
|
Any,
|
|
27
|
-
Callable,
|
|
28
28
|
Dict,
|
|
29
29
|
Generic,
|
|
30
|
-
Iterator,
|
|
31
|
-
List,
|
|
32
30
|
Literal,
|
|
33
|
-
Mapping,
|
|
34
|
-
MutableMapping,
|
|
35
31
|
Optional,
|
|
36
|
-
Sequence,
|
|
37
|
-
Set,
|
|
38
|
-
Tuple,
|
|
39
|
-
Type,
|
|
40
32
|
TypedDict,
|
|
41
33
|
TypeVar,
|
|
42
34
|
Union,
|
|
@@ -107,7 +99,7 @@ from .util import (
|
|
|
107
99
|
REDACTION_FIELDS = ["input", "output", "expected", "metadata", "context", "scores", "metrics"]
|
|
108
100
|
from .xact_ids import prettify_xact
|
|
109
101
|
|
|
110
|
-
Metadata =
|
|
102
|
+
Metadata = dict[str, Any]
|
|
111
103
|
DATA_API_VERSION = 2
|
|
112
104
|
|
|
113
105
|
T = TypeVar("T")
|
|
@@ -161,12 +153,12 @@ class Span(Exportable, contextlib.AbstractContextManager, ABC):
|
|
|
161
153
|
@abstractmethod
|
|
162
154
|
def start_span(
|
|
163
155
|
self,
|
|
164
|
-
name:
|
|
165
|
-
type:
|
|
166
|
-
span_attributes:
|
|
167
|
-
start_time:
|
|
168
|
-
set_current:
|
|
169
|
-
parent:
|
|
156
|
+
name: str | None = None,
|
|
157
|
+
type: SpanTypeAttribute | None = None,
|
|
158
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
159
|
+
start_time: float | None = None,
|
|
160
|
+
set_current: bool | None = None,
|
|
161
|
+
parent: str | None = None,
|
|
170
162
|
**event: Any,
|
|
171
163
|
) -> "Span":
|
|
172
164
|
"""Create a new span. This is useful if you want to log more detailed trace information beyond the scope of a single log event. Data logged over several calls to `Span.log` will be merged into one logical row.
|
|
@@ -224,7 +216,7 @@ class Span(Exportable, contextlib.AbstractContextManager, ABC):
|
|
|
224
216
|
"""
|
|
225
217
|
|
|
226
218
|
@abstractmethod
|
|
227
|
-
def end(self, end_time:
|
|
219
|
+
def end(self, end_time: float | None = None) -> float:
|
|
228
220
|
"""Log an end time to the span (defaults to the current time). Returns the logged time.
|
|
229
221
|
|
|
230
222
|
Will be invoked automatically if the span is bound to a context manager.
|
|
@@ -238,15 +230,15 @@ class Span(Exportable, contextlib.AbstractContextManager, ABC):
|
|
|
238
230
|
"""Flush any pending rows to the server."""
|
|
239
231
|
|
|
240
232
|
@abstractmethod
|
|
241
|
-
def close(self, end_time:
|
|
233
|
+
def close(self, end_time: float | None = None) -> float:
|
|
242
234
|
"""Alias for `end`."""
|
|
243
235
|
|
|
244
236
|
@abstractmethod
|
|
245
237
|
def set_attributes(
|
|
246
238
|
self,
|
|
247
|
-
name:
|
|
248
|
-
type:
|
|
249
|
-
span_attributes:
|
|
239
|
+
name: str | None = None,
|
|
240
|
+
type: SpanTypeAttribute | None = None,
|
|
241
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
250
242
|
) -> None:
|
|
251
243
|
"""Set the span's name, type, or other attributes. These attributes will be attached to all log events within the span.
|
|
252
244
|
The attributes are equivalent to the arguments to start_span.
|
|
@@ -287,17 +279,17 @@ class _NoopSpan(Span):
|
|
|
287
279
|
|
|
288
280
|
def start_span(
|
|
289
281
|
self,
|
|
290
|
-
name:
|
|
291
|
-
type:
|
|
292
|
-
span_attributes:
|
|
293
|
-
start_time:
|
|
294
|
-
set_current:
|
|
295
|
-
parent:
|
|
282
|
+
name: str | None = None,
|
|
283
|
+
type: SpanTypeAttribute | None = None,
|
|
284
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
285
|
+
start_time: float | None = None,
|
|
286
|
+
set_current: bool | None = None,
|
|
287
|
+
parent: str | None = None,
|
|
296
288
|
**event: Any,
|
|
297
289
|
):
|
|
298
290
|
return self
|
|
299
291
|
|
|
300
|
-
def end(self, end_time:
|
|
292
|
+
def end(self, end_time: float | None = None) -> float:
|
|
301
293
|
return end_time or time.time()
|
|
302
294
|
|
|
303
295
|
def export(self):
|
|
@@ -312,14 +304,14 @@ class _NoopSpan(Span):
|
|
|
312
304
|
def flush(self):
|
|
313
305
|
pass
|
|
314
306
|
|
|
315
|
-
def close(self, end_time:
|
|
307
|
+
def close(self, end_time: float | None = None) -> float:
|
|
316
308
|
return self.end(end_time)
|
|
317
309
|
|
|
318
310
|
def set_attributes(
|
|
319
311
|
self,
|
|
320
|
-
name:
|
|
321
|
-
type:
|
|
322
|
-
span_attributes:
|
|
312
|
+
name: str | None = None,
|
|
313
|
+
type: SpanTypeAttribute | None = None,
|
|
314
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
323
315
|
):
|
|
324
316
|
pass
|
|
325
317
|
|
|
@@ -334,9 +326,9 @@ class _NoopSpan(Span):
|
|
|
334
326
|
|
|
335
327
|
def __exit__(
|
|
336
328
|
self,
|
|
337
|
-
exc_type:
|
|
338
|
-
exc_value:
|
|
339
|
-
traceback:
|
|
329
|
+
exc_type: type[BaseException] | None,
|
|
330
|
+
exc_value: BaseException | None,
|
|
331
|
+
traceback: TracebackType | None,
|
|
340
332
|
):
|
|
341
333
|
pass
|
|
342
334
|
|
|
@@ -348,11 +340,11 @@ NOOP_SPAN_PERMALINK = "https://www.braintrust.dev/noop-span"
|
|
|
348
340
|
class BraintrustState:
|
|
349
341
|
def __init__(self):
|
|
350
342
|
self.id = str(uuid.uuid4())
|
|
351
|
-
self.current_experiment:
|
|
352
|
-
self.current_logger: contextvars.ContextVar[
|
|
343
|
+
self.current_experiment: Experiment | None = None
|
|
344
|
+
self.current_logger: contextvars.ContextVar[Logger | None] = contextvars.ContextVar(
|
|
353
345
|
"braintrust_current_logger", default=None
|
|
354
346
|
)
|
|
355
|
-
self.current_parent: contextvars.ContextVar[
|
|
347
|
+
self.current_parent: contextvars.ContextVar[str | None] = contextvars.ContextVar(
|
|
356
348
|
"braintrust_current_parent", default=None
|
|
357
349
|
)
|
|
358
350
|
self.current_span: contextvars.ContextVar[Span] = contextvars.ContextVar(
|
|
@@ -402,20 +394,20 @@ class BraintrustState:
|
|
|
402
394
|
)
|
|
403
395
|
|
|
404
396
|
def reset_login_info(self):
|
|
405
|
-
self.app_url:
|
|
406
|
-
self.app_public_url:
|
|
407
|
-
self.login_token:
|
|
408
|
-
self.org_id:
|
|
409
|
-
self.org_name:
|
|
410
|
-
self.api_url:
|
|
411
|
-
self.proxy_url:
|
|
397
|
+
self.app_url: str | None = None
|
|
398
|
+
self.app_public_url: str | None = None
|
|
399
|
+
self.login_token: str | None = None
|
|
400
|
+
self.org_id: str | None = None
|
|
401
|
+
self.org_name: str | None = None
|
|
402
|
+
self.api_url: str | None = None
|
|
403
|
+
self.proxy_url: str | None = None
|
|
412
404
|
self.logged_in: bool = False
|
|
413
|
-
self.git_metadata_settings:
|
|
405
|
+
self.git_metadata_settings: GitMetadataSettings | None = None
|
|
414
406
|
|
|
415
|
-
self._app_conn:
|
|
416
|
-
self._api_conn:
|
|
417
|
-
self._proxy_conn:
|
|
418
|
-
self._user_info:
|
|
407
|
+
self._app_conn: HTTPConnection | None = None
|
|
408
|
+
self._api_conn: HTTPConnection | None = None
|
|
409
|
+
self._proxy_conn: HTTPConnection | None = None
|
|
410
|
+
self._user_info: Mapping[str, Any] | None = None
|
|
419
411
|
|
|
420
412
|
def reset_parent_state(self):
|
|
421
413
|
# reset possible parent state for tests
|
|
@@ -480,9 +472,9 @@ class BraintrustState:
|
|
|
480
472
|
|
|
481
473
|
def login(
|
|
482
474
|
self,
|
|
483
|
-
app_url:
|
|
484
|
-
api_key:
|
|
485
|
-
org_name:
|
|
475
|
+
app_url: str | None = None,
|
|
476
|
+
api_key: str | None = None,
|
|
477
|
+
org_name: str | None = None,
|
|
486
478
|
force_login: bool = False,
|
|
487
479
|
) -> None:
|
|
488
480
|
if not force_login and self.logged_in:
|
|
@@ -558,7 +550,7 @@ class BraintrustState:
|
|
|
558
550
|
bg_logger = self._global_bg_logger.get()
|
|
559
551
|
bg_logger.enforce_queue_size_limit(enforce)
|
|
560
552
|
|
|
561
|
-
def set_masking_function(self, masking_function:
|
|
553
|
+
def set_masking_function(self, masking_function: Callable[[Any], Any] | None) -> None:
|
|
562
554
|
"""Set the masking function on the background logger."""
|
|
563
555
|
self.global_bg_logger().set_masking_function(masking_function)
|
|
564
556
|
|
|
@@ -566,7 +558,7 @@ class BraintrustState:
|
|
|
566
558
|
_state: BraintrustState = None # type: ignore
|
|
567
559
|
|
|
568
560
|
|
|
569
|
-
_http_adapter:
|
|
561
|
+
_http_adapter: HTTPAdapter | None = None
|
|
570
562
|
|
|
571
563
|
|
|
572
564
|
def set_http_adapter(adapter: HTTPAdapter) -> None:
|
|
@@ -632,7 +624,7 @@ class RetryRequestExceptionsAdapter(HTTPAdapter):
|
|
|
632
624
|
|
|
633
625
|
|
|
634
626
|
class HTTPConnection:
|
|
635
|
-
def __init__(self, base_url: str, adapter:
|
|
627
|
+
def __init__(self, base_url: str, adapter: HTTPAdapter | None = None):
|
|
636
628
|
self.base_url = base_url
|
|
637
629
|
self.token = None
|
|
638
630
|
self.adapter = adapter
|
|
@@ -661,7 +653,7 @@ class HTTPConnection:
|
|
|
661
653
|
self.token = token
|
|
662
654
|
self._set_session_token()
|
|
663
655
|
|
|
664
|
-
def _set_adapter(self, adapter:
|
|
656
|
+
def _set_adapter(self, adapter: HTTPAdapter | None) -> None:
|
|
665
657
|
self.adapter = adapter
|
|
666
658
|
|
|
667
659
|
def _reset(self, **retry_kwargs: Any) -> None:
|
|
@@ -693,9 +685,7 @@ class HTTPConnection:
|
|
|
693
685
|
def delete(self, path: str, *args: Any, **kwargs: Any) -> requests.Response:
|
|
694
686
|
return self.session.delete(_urljoin(self.base_url, path), *args, **kwargs)
|
|
695
687
|
|
|
696
|
-
def get_json(
|
|
697
|
-
self, object_type: str, args: Optional[Mapping[str, Any]] = None, retries: int = 0
|
|
698
|
-
) -> Mapping[str, Any]:
|
|
688
|
+
def get_json(self, object_type: str, args: Mapping[str, Any] | None = None, retries: int = 0) -> Mapping[str, Any]:
|
|
699
689
|
tries = retries + 1
|
|
700
690
|
for i in range(tries):
|
|
701
691
|
resp = self.get(f"/{object_type}", params=args)
|
|
@@ -708,7 +698,7 @@ class HTTPConnection:
|
|
|
708
698
|
# Needed for type checking.
|
|
709
699
|
raise Exception("unreachable")
|
|
710
700
|
|
|
711
|
-
def post_json(self, object_type: str, args:
|
|
701
|
+
def post_json(self, object_type: str, args: Mapping[str, Any] | None = None) -> Any:
|
|
712
702
|
resp = self.post(f"/{object_type.lstrip('/')}", json=args)
|
|
713
703
|
response_raise_for_status(resp)
|
|
714
704
|
return resp.json()
|
|
@@ -792,11 +782,11 @@ def _apply_masking_to_field(masking_function: Callable[[Any], Any], data: Any, f
|
|
|
792
782
|
|
|
793
783
|
class _BackgroundLogger(ABC):
|
|
794
784
|
@abstractmethod
|
|
795
|
-
def log(self, *args: LazyValue[
|
|
785
|
+
def log(self, *args: LazyValue[dict[str, Any]]) -> None:
|
|
796
786
|
pass
|
|
797
787
|
|
|
798
788
|
@abstractmethod
|
|
799
|
-
def flush(self, batch_size:
|
|
789
|
+
def flush(self, batch_size: int | None = None):
|
|
800
790
|
pass
|
|
801
791
|
|
|
802
792
|
|
|
@@ -804,20 +794,20 @@ class _MemoryBackgroundLogger(_BackgroundLogger):
|
|
|
804
794
|
def __init__(self):
|
|
805
795
|
self.lock = threading.Lock()
|
|
806
796
|
self.logs = []
|
|
807
|
-
self.masking_function:
|
|
797
|
+
self.masking_function: Callable[[Any], Any] | None = None
|
|
808
798
|
|
|
809
799
|
def enforce_queue_size_limit(self, enforce: bool) -> None:
|
|
810
800
|
pass
|
|
811
801
|
|
|
812
|
-
def log(self, *args: LazyValue[
|
|
802
|
+
def log(self, *args: LazyValue[dict[str, Any]]) -> None:
|
|
813
803
|
with self.lock:
|
|
814
804
|
self.logs.extend(args)
|
|
815
805
|
|
|
816
|
-
def set_masking_function(self, masking_function:
|
|
806
|
+
def set_masking_function(self, masking_function: Callable[[Any], Any] | None) -> None:
|
|
817
807
|
"""Set the masking function for the memory logger."""
|
|
818
808
|
self.masking_function = masking_function
|
|
819
809
|
|
|
820
|
-
def flush(self, batch_size:
|
|
810
|
+
def flush(self, batch_size: int | None = None):
|
|
821
811
|
pass
|
|
822
812
|
|
|
823
813
|
def pop(self):
|
|
@@ -871,7 +861,7 @@ BACKGROUND_LOGGER_BASE_SLEEP_TIME_S = 1.0
|
|
|
871
861
|
class _HTTPBackgroundLogger:
|
|
872
862
|
def __init__(self, api_conn: LazyValue[HTTPConnection]):
|
|
873
863
|
self.api_conn = api_conn
|
|
874
|
-
self.masking_function:
|
|
864
|
+
self.masking_function: Callable[[Any], Any] | None = None
|
|
875
865
|
self.outfile = sys.stderr
|
|
876
866
|
self.flush_lock = threading.RLock()
|
|
877
867
|
|
|
@@ -934,7 +924,7 @@ class _HTTPBackgroundLogger:
|
|
|
934
924
|
"""
|
|
935
925
|
self.queue.enforce_queue_size_limit(enforce)
|
|
936
926
|
|
|
937
|
-
def log(self, *args: LazyValue[
|
|
927
|
+
def log(self, *args: LazyValue[dict[str, Any]]) -> None:
|
|
938
928
|
self._start()
|
|
939
929
|
dropped_items = []
|
|
940
930
|
for event in args:
|
|
@@ -981,7 +971,7 @@ class _HTTPBackgroundLogger:
|
|
|
981
971
|
else:
|
|
982
972
|
raise
|
|
983
973
|
|
|
984
|
-
def flush(self, batch_size:
|
|
974
|
+
def flush(self, batch_size: int | None = None):
|
|
985
975
|
if batch_size is None:
|
|
986
976
|
batch_size = self.default_batch_size
|
|
987
977
|
|
|
@@ -1020,7 +1010,7 @@ class _HTTPBackgroundLogger:
|
|
|
1020
1010
|
f"Encountered the following errors while logging:", post_promise_exceptions
|
|
1021
1011
|
)
|
|
1022
1012
|
|
|
1023
|
-
attachment_errors:
|
|
1013
|
+
attachment_errors: list[Exception] = []
|
|
1024
1014
|
for attachment in attachments:
|
|
1025
1015
|
try:
|
|
1026
1016
|
result = attachment.upload()
|
|
@@ -1038,8 +1028,8 @@ class _HTTPBackgroundLogger:
|
|
|
1038
1028
|
)
|
|
1039
1029
|
|
|
1040
1030
|
def _unwrap_lazy_values(
|
|
1041
|
-
self, wrapped_items: Sequence[LazyValue[
|
|
1042
|
-
) ->
|
|
1031
|
+
self, wrapped_items: Sequence[LazyValue[dict[str, Any]]]
|
|
1032
|
+
) -> tuple[list[list[dict[str, Any]]], list["BaseAttachment"]]:
|
|
1043
1033
|
for i in range(self.num_tries):
|
|
1044
1034
|
try:
|
|
1045
1035
|
unwrapped_items = [item.get() for item in wrapped_items]
|
|
@@ -1069,7 +1059,7 @@ class _HTTPBackgroundLogger:
|
|
|
1069
1059
|
|
|
1070
1060
|
batched_items[batch_idx][item_idx] = masked_item
|
|
1071
1061
|
|
|
1072
|
-
attachments:
|
|
1062
|
+
attachments: list["BaseAttachment"] = []
|
|
1073
1063
|
for batch in batched_items:
|
|
1074
1064
|
for item in batch:
|
|
1075
1065
|
_extract_attachments(item, attachments)
|
|
@@ -1179,7 +1169,7 @@ class _HTTPBackgroundLogger:
|
|
|
1179
1169
|
def internal_replace_api_conn(self, api_conn: HTTPConnection):
|
|
1180
1170
|
self.api_conn = LazyValue(lambda: api_conn, use_mutex=False)
|
|
1181
1171
|
|
|
1182
|
-
def set_masking_function(self, masking_function:
|
|
1172
|
+
def set_masking_function(self, masking_function: Callable[[Any], Any] | None):
|
|
1183
1173
|
"""Set or update the masking function."""
|
|
1184
1174
|
self.masking_function = masking_function
|
|
1185
1175
|
|
|
@@ -1221,7 +1211,7 @@ def _internal_with_memory_background_logger():
|
|
|
1221
1211
|
class ObjectMetadata:
|
|
1222
1212
|
id: str
|
|
1223
1213
|
name: str
|
|
1224
|
-
full_info:
|
|
1214
|
+
full_info: dict[str, Any]
|
|
1225
1215
|
|
|
1226
1216
|
|
|
1227
1217
|
@dataclasses.dataclass
|
|
@@ -1250,69 +1240,69 @@ class OrgProjectMetadata:
|
|
|
1250
1240
|
# this.
|
|
1251
1241
|
@overload
|
|
1252
1242
|
def init(
|
|
1253
|
-
project:
|
|
1254
|
-
experiment:
|
|
1255
|
-
description:
|
|
1243
|
+
project: str | None = ...,
|
|
1244
|
+
experiment: str | None = ...,
|
|
1245
|
+
description: str | None = ...,
|
|
1256
1246
|
dataset: Optional["Dataset"] = ...,
|
|
1257
1247
|
open: Literal[False] = ...,
|
|
1258
|
-
base_experiment:
|
|
1248
|
+
base_experiment: str | None = ...,
|
|
1259
1249
|
is_public: bool = ...,
|
|
1260
|
-
app_url:
|
|
1261
|
-
api_key:
|
|
1262
|
-
org_name:
|
|
1263
|
-
metadata:
|
|
1264
|
-
git_metadata_settings:
|
|
1250
|
+
app_url: str | None = ...,
|
|
1251
|
+
api_key: str | None = ...,
|
|
1252
|
+
org_name: str | None = ...,
|
|
1253
|
+
metadata: Metadata | None = ...,
|
|
1254
|
+
git_metadata_settings: GitMetadataSettings | None = ...,
|
|
1265
1255
|
set_current: bool = ...,
|
|
1266
|
-
update:
|
|
1267
|
-
project_id:
|
|
1268
|
-
base_experiment_id:
|
|
1269
|
-
repo_info:
|
|
1270
|
-
state:
|
|
1256
|
+
update: bool | None = ...,
|
|
1257
|
+
project_id: str | None = ...,
|
|
1258
|
+
base_experiment_id: str | None = ...,
|
|
1259
|
+
repo_info: RepoInfo | None = ...,
|
|
1260
|
+
state: BraintrustState | None = ...,
|
|
1271
1261
|
) -> "Experiment": ...
|
|
1272
1262
|
|
|
1273
1263
|
|
|
1274
1264
|
@overload
|
|
1275
1265
|
def init(
|
|
1276
|
-
project:
|
|
1277
|
-
experiment:
|
|
1278
|
-
description:
|
|
1266
|
+
project: str | None = ...,
|
|
1267
|
+
experiment: str | None = ...,
|
|
1268
|
+
description: str | None = ...,
|
|
1279
1269
|
dataset: Optional["Dataset"] = ...,
|
|
1280
1270
|
open: Literal[True] = ...,
|
|
1281
|
-
base_experiment:
|
|
1271
|
+
base_experiment: str | None = ...,
|
|
1282
1272
|
is_public: bool = ...,
|
|
1283
|
-
app_url:
|
|
1284
|
-
api_key:
|
|
1285
|
-
org_name:
|
|
1286
|
-
metadata:
|
|
1287
|
-
git_metadata_settings:
|
|
1273
|
+
app_url: str | None = ...,
|
|
1274
|
+
api_key: str | None = ...,
|
|
1275
|
+
org_name: str | None = ...,
|
|
1276
|
+
metadata: Metadata | None = ...,
|
|
1277
|
+
git_metadata_settings: GitMetadataSettings | None = ...,
|
|
1288
1278
|
set_current: bool = ...,
|
|
1289
|
-
update:
|
|
1290
|
-
project_id:
|
|
1291
|
-
base_experiment_id:
|
|
1292
|
-
repo_info:
|
|
1293
|
-
state:
|
|
1279
|
+
update: bool | None = ...,
|
|
1280
|
+
project_id: str | None = ...,
|
|
1281
|
+
base_experiment_id: str | None = ...,
|
|
1282
|
+
repo_info: RepoInfo | None = ...,
|
|
1283
|
+
state: BraintrustState | None = ...,
|
|
1294
1284
|
) -> "ReadonlyExperiment": ...
|
|
1295
1285
|
|
|
1296
1286
|
|
|
1297
1287
|
def init(
|
|
1298
|
-
project:
|
|
1299
|
-
experiment:
|
|
1300
|
-
description:
|
|
1288
|
+
project: str | None = None,
|
|
1289
|
+
experiment: str | None = None,
|
|
1290
|
+
description: str | None = None,
|
|
1301
1291
|
dataset: Optional["Dataset"] = None,
|
|
1302
1292
|
open: bool = False,
|
|
1303
|
-
base_experiment:
|
|
1293
|
+
base_experiment: str | None = None,
|
|
1304
1294
|
is_public: bool = False,
|
|
1305
|
-
app_url:
|
|
1306
|
-
api_key:
|
|
1307
|
-
org_name:
|
|
1308
|
-
metadata:
|
|
1309
|
-
git_metadata_settings:
|
|
1295
|
+
app_url: str | None = None,
|
|
1296
|
+
api_key: str | None = None,
|
|
1297
|
+
org_name: str | None = None,
|
|
1298
|
+
metadata: Metadata | None = None,
|
|
1299
|
+
git_metadata_settings: GitMetadataSettings | None = None,
|
|
1310
1300
|
set_current: bool = True,
|
|
1311
|
-
update:
|
|
1312
|
-
project_id:
|
|
1313
|
-
base_experiment_id:
|
|
1314
|
-
repo_info:
|
|
1315
|
-
state:
|
|
1301
|
+
update: bool | None = None,
|
|
1302
|
+
project_id: str | None = None,
|
|
1303
|
+
base_experiment_id: str | None = None,
|
|
1304
|
+
repo_info: RepoInfo | None = None,
|
|
1305
|
+
state: BraintrustState | None = None,
|
|
1316
1306
|
) -> Union["Experiment", "ReadonlyExperiment"]:
|
|
1317
1307
|
"""
|
|
1318
1308
|
Log in, and then initialize a new experiment in a specified project. If the project does not exist, it will be created.
|
|
@@ -1460,18 +1450,18 @@ def init_experiment(*args, **kwargs) -> Union["Experiment", "ReadonlyExperiment"
|
|
|
1460
1450
|
|
|
1461
1451
|
|
|
1462
1452
|
def init_dataset(
|
|
1463
|
-
project:
|
|
1464
|
-
name:
|
|
1465
|
-
description:
|
|
1466
|
-
version:
|
|
1467
|
-
app_url:
|
|
1468
|
-
api_key:
|
|
1469
|
-
org_name:
|
|
1470
|
-
project_id:
|
|
1471
|
-
metadata:
|
|
1453
|
+
project: str | None = None,
|
|
1454
|
+
name: str | None = None,
|
|
1455
|
+
description: str | None = None,
|
|
1456
|
+
version: str | int | None = None,
|
|
1457
|
+
app_url: str | None = None,
|
|
1458
|
+
api_key: str | None = None,
|
|
1459
|
+
org_name: str | None = None,
|
|
1460
|
+
project_id: str | None = None,
|
|
1461
|
+
metadata: Metadata | None = None,
|
|
1472
1462
|
use_output: bool = DEFAULT_IS_LEGACY_DATASET,
|
|
1473
|
-
_internal_btql:
|
|
1474
|
-
state:
|
|
1463
|
+
_internal_btql: dict[str, Any] | None = None,
|
|
1464
|
+
state: BraintrustState | None = None,
|
|
1475
1465
|
) -> "Dataset":
|
|
1476
1466
|
"""
|
|
1477
1467
|
Create a new dataset in a specified project. If the project does not exist, it will be created.
|
|
@@ -1519,7 +1509,7 @@ def init_dataset(
|
|
|
1519
1509
|
)
|
|
1520
1510
|
|
|
1521
1511
|
|
|
1522
|
-
def _compute_logger_metadata(project_name:
|
|
1512
|
+
def _compute_logger_metadata(project_name: str | None = None, project_id: str | None = None):
|
|
1523
1513
|
login()
|
|
1524
1514
|
org_id = _state.org_id
|
|
1525
1515
|
if project_id is None:
|
|
@@ -1547,15 +1537,15 @@ def _compute_logger_metadata(project_name: Optional[str] = None, project_id: Opt
|
|
|
1547
1537
|
|
|
1548
1538
|
|
|
1549
1539
|
def init_logger(
|
|
1550
|
-
project:
|
|
1551
|
-
project_id:
|
|
1540
|
+
project: str | None = None,
|
|
1541
|
+
project_id: str | None = None,
|
|
1552
1542
|
async_flush: bool = True,
|
|
1553
|
-
app_url:
|
|
1554
|
-
api_key:
|
|
1555
|
-
org_name:
|
|
1543
|
+
app_url: str | None = None,
|
|
1544
|
+
api_key: str | None = None,
|
|
1545
|
+
org_name: str | None = None,
|
|
1556
1546
|
force_login: bool = False,
|
|
1557
1547
|
set_current: bool = True,
|
|
1558
|
-
state:
|
|
1548
|
+
state: BraintrustState | None = None,
|
|
1559
1549
|
) -> "Logger":
|
|
1560
1550
|
"""
|
|
1561
1551
|
Create a new logger in a specified project. If the project does not exist, it will be created.
|
|
@@ -1604,17 +1594,17 @@ def init_logger(
|
|
|
1604
1594
|
|
|
1605
1595
|
|
|
1606
1596
|
def load_prompt(
|
|
1607
|
-
project:
|
|
1608
|
-
slug:
|
|
1609
|
-
version:
|
|
1610
|
-
project_id:
|
|
1611
|
-
id:
|
|
1612
|
-
defaults:
|
|
1597
|
+
project: str | None = None,
|
|
1598
|
+
slug: str | None = None,
|
|
1599
|
+
version: str | int | None = None,
|
|
1600
|
+
project_id: str | None = None,
|
|
1601
|
+
id: str | None = None,
|
|
1602
|
+
defaults: Mapping[str, Any] | None = None,
|
|
1613
1603
|
no_trace: bool = False,
|
|
1614
|
-
environment:
|
|
1615
|
-
app_url:
|
|
1616
|
-
api_key:
|
|
1617
|
-
org_name:
|
|
1604
|
+
environment: str | None = None,
|
|
1605
|
+
app_url: str | None = None,
|
|
1606
|
+
api_key: str | None = None,
|
|
1607
|
+
org_name: str | None = None,
|
|
1618
1608
|
) -> "Prompt":
|
|
1619
1609
|
"""
|
|
1620
1610
|
Loads a prompt from the specified project.
|
|
@@ -1737,9 +1727,9 @@ login_lock = threading.RLock()
|
|
|
1737
1727
|
|
|
1738
1728
|
|
|
1739
1729
|
def login(
|
|
1740
|
-
app_url:
|
|
1741
|
-
api_key:
|
|
1742
|
-
org_name:
|
|
1730
|
+
app_url: str | None = None,
|
|
1731
|
+
api_key: str | None = None,
|
|
1732
|
+
org_name: str | None = None,
|
|
1743
1733
|
force_login: bool = False,
|
|
1744
1734
|
) -> None:
|
|
1745
1735
|
"""
|
|
@@ -1763,9 +1753,9 @@ def login(
|
|
|
1763
1753
|
|
|
1764
1754
|
|
|
1765
1755
|
def login_to_state(
|
|
1766
|
-
app_url:
|
|
1767
|
-
api_key:
|
|
1768
|
-
org_name:
|
|
1756
|
+
app_url: str | None = None,
|
|
1757
|
+
api_key: str | None = None,
|
|
1758
|
+
org_name: str | None = None,
|
|
1769
1759
|
) -> BraintrustState:
|
|
1770
1760
|
app_url = _get_app_url(app_url)
|
|
1771
1761
|
|
|
@@ -1845,7 +1835,7 @@ def login_to_state(
|
|
|
1845
1835
|
return state
|
|
1846
1836
|
|
|
1847
1837
|
|
|
1848
|
-
def set_masking_function(masking_function:
|
|
1838
|
+
def set_masking_function(masking_function: Callable[[Any], Any] | None) -> None:
|
|
1849
1839
|
"""
|
|
1850
1840
|
Set a global masking function that will be applied to all logged data before sending to Braintrust.
|
|
1851
1841
|
The masking function will be applied after records are merged but before they are sent to the backend.
|
|
@@ -1872,7 +1862,7 @@ def log(**event: Any) -> str:
|
|
|
1872
1862
|
return e.log(**event)
|
|
1873
1863
|
|
|
1874
1864
|
|
|
1875
|
-
def summarize(summarize_scores: bool = True, comparison_experiment_id:
|
|
1865
|
+
def summarize(summarize_scores: bool = True, comparison_experiment_id: str | None = None) -> "ExperimentSummary":
|
|
1876
1866
|
"""
|
|
1877
1867
|
Summarize the current experiment, including the scores (compared to the closest reference experiment) and metadata.
|
|
1878
1868
|
|
|
@@ -1918,7 +1908,7 @@ def current_span() -> Span:
|
|
|
1918
1908
|
|
|
1919
1909
|
|
|
1920
1910
|
@contextlib.contextmanager
|
|
1921
|
-
def parent_context(parent:
|
|
1911
|
+
def parent_context(parent: str | None, state: BraintrustState | None = None):
|
|
1922
1912
|
"""
|
|
1923
1913
|
Context manager to temporarily set the parent context for spans.
|
|
1924
1914
|
|
|
@@ -1940,7 +1930,7 @@ def parent_context(parent: Optional[str], state: Optional[BraintrustState] = Non
|
|
|
1940
1930
|
|
|
1941
1931
|
|
|
1942
1932
|
def get_span_parent_object(
|
|
1943
|
-
parent:
|
|
1933
|
+
parent: str | None = None, state: BraintrustState | None = None
|
|
1944
1934
|
) -> Union[SpanComponentsV4, "Logger", "Experiment", Span]:
|
|
1945
1935
|
"""Mainly for internal use. Return the parent object for starting a span in a global context.
|
|
1946
1936
|
Applies precedence: current span > propagated parent string > experiment > logger."""
|
|
@@ -2155,14 +2145,14 @@ def traced(*span_args: Any, **span_kwargs: Any) -> Callable[[F], F]:
|
|
|
2155
2145
|
|
|
2156
2146
|
|
|
2157
2147
|
def start_span(
|
|
2158
|
-
name:
|
|
2159
|
-
type:
|
|
2160
|
-
span_attributes:
|
|
2161
|
-
start_time:
|
|
2162
|
-
set_current:
|
|
2163
|
-
parent:
|
|
2164
|
-
propagated_event:
|
|
2165
|
-
state:
|
|
2148
|
+
name: str | None = None,
|
|
2149
|
+
type: SpanTypeAttribute | None = None,
|
|
2150
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
2151
|
+
start_time: float | None = None,
|
|
2152
|
+
set_current: bool | None = None,
|
|
2153
|
+
parent: str | None = None,
|
|
2154
|
+
propagated_event: dict[str, Any] | None = None,
|
|
2155
|
+
state: BraintrustState | None = None,
|
|
2166
2156
|
**event: Any,
|
|
2167
2157
|
) -> Span:
|
|
2168
2158
|
"""Lower-level alternative to `@traced` for starting a span at the toplevel. It creates a span under the first active object (using the same precedence order as `@traced`), or if `parent` is specified, under the specified parent row, or returns a no-op span object.
|
|
@@ -2265,7 +2255,7 @@ def validate_tags(tags: Sequence[str]) -> None:
|
|
|
2265
2255
|
seen.add(tag)
|
|
2266
2256
|
|
|
2267
2257
|
|
|
2268
|
-
def _extract_attachments(event:
|
|
2258
|
+
def _extract_attachments(event: dict[str, Any], attachments: list["BaseAttachment"]) -> None:
|
|
2269
2259
|
"""
|
|
2270
2260
|
Helper function for uploading attachments. Recursively extracts `Attachment`
|
|
2271
2261
|
and `ExternalAttachment` values and replaces them with their associated
|
|
@@ -2282,13 +2272,13 @@ def _extract_attachments(event: Dict[str, Any], attachments: List["BaseAttachmen
|
|
|
2282
2272
|
return v.reference # Attachment cannot be nested.
|
|
2283
2273
|
|
|
2284
2274
|
# Recursive case: object.
|
|
2285
|
-
if isinstance(v,
|
|
2275
|
+
if isinstance(v, dict):
|
|
2286
2276
|
for k, v2 in v.items():
|
|
2287
2277
|
v[k] = _helper(v2)
|
|
2288
2278
|
return v
|
|
2289
2279
|
|
|
2290
2280
|
# Recursive case: array.
|
|
2291
|
-
if isinstance(v,
|
|
2281
|
+
if isinstance(v, list):
|
|
2292
2282
|
for i in range(len(v)):
|
|
2293
2283
|
v[i] = _helper(v[i])
|
|
2294
2284
|
return v
|
|
@@ -2308,7 +2298,7 @@ def _enrich_attachments(event: TMutableMapping) -> TMutableMapping:
|
|
|
2308
2298
|
"""
|
|
2309
2299
|
|
|
2310
2300
|
def _helper(v: Any) -> Any:
|
|
2311
|
-
if isinstance(v,
|
|
2301
|
+
if isinstance(v, dict):
|
|
2312
2302
|
# Base case: AttachmentReference.
|
|
2313
2303
|
if v.get("type") == "braintrust_attachment" or v.get("type") == "external_attachment":
|
|
2314
2304
|
return ReadonlyAttachment(cast(AttachmentReference, v))
|
|
@@ -2319,7 +2309,7 @@ def _enrich_attachments(event: TMutableMapping) -> TMutableMapping:
|
|
|
2319
2309
|
return v
|
|
2320
2310
|
|
|
2321
2311
|
# Recursive case: array.
|
|
2322
|
-
if isinstance(v,
|
|
2312
|
+
if isinstance(v, list):
|
|
2323
2313
|
for i in range(len(v)):
|
|
2324
2314
|
v[i] = _helper(v[i])
|
|
2325
2315
|
return v
|
|
@@ -2333,7 +2323,7 @@ def _enrich_attachments(event: TMutableMapping) -> TMutableMapping:
|
|
|
2333
2323
|
return event
|
|
2334
2324
|
|
|
2335
2325
|
|
|
2336
|
-
def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any]) ->
|
|
2326
|
+
def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any]) -> dict[str, Any]:
|
|
2337
2327
|
# Make sure only certain keys are specified.
|
|
2338
2328
|
forbidden_keys = set(event.keys()) - {
|
|
2339
2329
|
"input",
|
|
@@ -2436,7 +2426,7 @@ def _validate_and_sanitize_experiment_log_full_args(event: Mapping[str, Any], ha
|
|
|
2436
2426
|
return event
|
|
2437
2427
|
|
|
2438
2428
|
|
|
2439
|
-
def _deep_copy_event(event: Mapping[str, Any]) ->
|
|
2429
|
+
def _deep_copy_event(event: Mapping[str, Any]) -> dict[str, Any]:
|
|
2440
2430
|
"""
|
|
2441
2431
|
Creates a deep copy of the given event. Replaces references to user objects
|
|
2442
2432
|
with placeholder strings to ensure serializability, except for `Attachment`
|
|
@@ -2460,7 +2450,7 @@ def _deep_copy_event(event: Mapping[str, Any]) -> Dict[str, Any]:
|
|
|
2460
2450
|
|
|
2461
2451
|
# Check for circular references in mutable containers
|
|
2462
2452
|
# Use id() to track object identity
|
|
2463
|
-
if isinstance(v, (Mapping,
|
|
2453
|
+
if isinstance(v, (Mapping, list, tuple, set)):
|
|
2464
2454
|
obj_id = id(v)
|
|
2465
2455
|
if obj_id in visited:
|
|
2466
2456
|
return "<circular reference>"
|
|
@@ -2481,7 +2471,7 @@ def _deep_copy_event(event: Mapping[str, Any]) -> Dict[str, Any]:
|
|
|
2481
2471
|
key_str = f"<non-stringifiable-key: {type(k).__name__}>"
|
|
2482
2472
|
result[key_str] = _deep_copy_object(v[k], depth + 1)
|
|
2483
2473
|
return result
|
|
2484
|
-
elif isinstance(v, (
|
|
2474
|
+
elif isinstance(v, (list, tuple, set)):
|
|
2485
2475
|
return [_deep_copy_object(x, depth + 1) for x in v]
|
|
2486
2476
|
finally:
|
|
2487
2477
|
# Remove from visited set after processing to allow the same object
|
|
@@ -2547,9 +2537,9 @@ class ObjectFetcher(ABC, Generic[TMapping]):
|
|
|
2547
2537
|
def __init__(
|
|
2548
2538
|
self,
|
|
2549
2539
|
object_type: str,
|
|
2550
|
-
pinned_version:
|
|
2551
|
-
mutate_record:
|
|
2552
|
-
_internal_btql:
|
|
2540
|
+
pinned_version: None | int | str = None,
|
|
2541
|
+
mutate_record: Callable[[TMapping], TMapping] | None = None,
|
|
2542
|
+
_internal_btql: dict[str, Any] | None = None,
|
|
2553
2543
|
):
|
|
2554
2544
|
self.object_type = object_type
|
|
2555
2545
|
|
|
@@ -2563,10 +2553,10 @@ class ObjectFetcher(ABC, Generic[TMapping]):
|
|
|
2563
2553
|
self._pinned_version = str(pinned_version) if pinned_version is not None else None
|
|
2564
2554
|
self._mutate_record = mutate_record
|
|
2565
2555
|
|
|
2566
|
-
self._fetched_data:
|
|
2556
|
+
self._fetched_data: list[TMapping] | None = None
|
|
2567
2557
|
self._internal_btql = _internal_btql
|
|
2568
2558
|
|
|
2569
|
-
def fetch(self, batch_size:
|
|
2559
|
+
def fetch(self, batch_size: int | None = None) -> Iterator[TMapping]:
|
|
2570
2560
|
"""
|
|
2571
2561
|
Fetch all records.
|
|
2572
2562
|
|
|
@@ -2601,7 +2591,7 @@ class ObjectFetcher(ABC, Generic[TMapping]):
|
|
|
2601
2591
|
@abstractmethod
|
|
2602
2592
|
def id(self) -> str: ...
|
|
2603
2593
|
|
|
2604
|
-
def _refetch(self, batch_size:
|
|
2594
|
+
def _refetch(self, batch_size: int | None = None) -> list[TMapping]:
|
|
2605
2595
|
state = self._get_state()
|
|
2606
2596
|
limit = batch_size if batch_size is not None else DEFAULT_FETCH_BATCH_SIZE
|
|
2607
2597
|
if self._fetched_data is None:
|
|
@@ -2642,7 +2632,7 @@ class ObjectFetcher(ABC, Generic[TMapping]):
|
|
|
2642
2632
|
)
|
|
2643
2633
|
response_raise_for_status(resp)
|
|
2644
2634
|
resp_json = resp.json()
|
|
2645
|
-
data = (data or []) + cast(
|
|
2635
|
+
data = (data or []) + cast(list[TMapping], resp_json["data"])
|
|
2646
2636
|
if not resp_json.get("cursor", None):
|
|
2647
2637
|
break
|
|
2648
2638
|
cursor = resp_json.get("cursor", None)
|
|
@@ -2699,7 +2689,7 @@ class Attachment(BaseAttachment):
|
|
|
2699
2689
|
def __init__(
|
|
2700
2690
|
self,
|
|
2701
2691
|
*,
|
|
2702
|
-
data:
|
|
2692
|
+
data: str | bytes | bytearray,
|
|
2703
2693
|
filename: str,
|
|
2704
2694
|
content_type: str,
|
|
2705
2695
|
):
|
|
@@ -2770,7 +2760,7 @@ class Attachment(BaseAttachment):
|
|
|
2770
2760
|
try:
|
|
2771
2761
|
data = self._data.get()
|
|
2772
2762
|
except Exception as e:
|
|
2773
|
-
raise
|
|
2763
|
+
raise OSError(f"Failed to read file: {e}") from e
|
|
2774
2764
|
|
|
2775
2765
|
signed_url = metadata.get("signedUrl")
|
|
2776
2766
|
headers = metadata.get("headers")
|
|
@@ -2823,7 +2813,7 @@ class Attachment(BaseAttachment):
|
|
|
2823
2813
|
|
|
2824
2814
|
return LazyValue(error_wrapper, use_mutex=True)
|
|
2825
2815
|
|
|
2826
|
-
def _init_data(self, data:
|
|
2816
|
+
def _init_data(self, data: str | bytes | bytearray) -> LazyValue[bytes]:
|
|
2827
2817
|
if isinstance(data, str):
|
|
2828
2818
|
self._ensure_file_readable(data)
|
|
2829
2819
|
|
|
@@ -3041,11 +3031,11 @@ def _log_feedback_impl(
|
|
|
3041
3031
|
parent_object_type: SpanObjectTypeV3,
|
|
3042
3032
|
parent_object_id: LazyValue[str],
|
|
3043
3033
|
id: str,
|
|
3044
|
-
scores:
|
|
3045
|
-
expected:
|
|
3046
|
-
tags:
|
|
3047
|
-
comment:
|
|
3048
|
-
metadata:
|
|
3034
|
+
scores: Mapping[str, int | float] | None = None,
|
|
3035
|
+
expected: Any | None = None,
|
|
3036
|
+
tags: Sequence[str] | None = None,
|
|
3037
|
+
comment: str | None = None,
|
|
3038
|
+
metadata: Mapping[str, Any] | None = None,
|
|
3049
3039
|
source: Literal["external", "app", "api", None] = None,
|
|
3050
3040
|
):
|
|
3051
3041
|
if source is None:
|
|
@@ -3185,13 +3175,13 @@ class SpanIds:
|
|
|
3185
3175
|
|
|
3186
3176
|
span_id: str
|
|
3187
3177
|
root_span_id: str
|
|
3188
|
-
span_parents:
|
|
3178
|
+
span_parents: list[str] | None
|
|
3189
3179
|
|
|
3190
3180
|
|
|
3191
3181
|
def _resolve_span_ids(
|
|
3192
|
-
span_id:
|
|
3193
|
-
root_span_id:
|
|
3194
|
-
parent_span_ids:
|
|
3182
|
+
span_id: str | None,
|
|
3183
|
+
root_span_id: str | None,
|
|
3184
|
+
parent_span_ids: ParentSpanIds | None,
|
|
3195
3185
|
lookup_span_parent: bool,
|
|
3196
3186
|
id_generator: "id_gen.IDGenerator",
|
|
3197
3187
|
context_manager: "context.ContextManager",
|
|
@@ -3265,7 +3255,7 @@ def span_components_to_object_id(components: SpanComponentsV4) -> str:
|
|
|
3265
3255
|
return _span_components_to_object_id_lambda(components)()
|
|
3266
3256
|
|
|
3267
3257
|
|
|
3268
|
-
def permalink(slug: str, org_name:
|
|
3258
|
+
def permalink(slug: str, org_name: str | None = None, app_url: str | None = None) -> str:
|
|
3269
3259
|
"""
|
|
3270
3260
|
Format a permalink to the Braintrust application for viewing the span represented by the provided `slug`.
|
|
3271
3261
|
|
|
@@ -3314,13 +3304,13 @@ def permalink(slug: str, org_name: Optional[str] = None, app_url: Optional[str]
|
|
|
3314
3304
|
|
|
3315
3305
|
|
|
3316
3306
|
def _start_span_parent_args(
|
|
3317
|
-
parent:
|
|
3307
|
+
parent: str | None,
|
|
3318
3308
|
parent_object_type: SpanObjectTypeV3,
|
|
3319
3309
|
parent_object_id: LazyValue[str],
|
|
3320
|
-
parent_compute_object_metadata_args:
|
|
3321
|
-
parent_span_ids:
|
|
3322
|
-
propagated_event:
|
|
3323
|
-
) ->
|
|
3310
|
+
parent_compute_object_metadata_args: dict[str, Any] | None,
|
|
3311
|
+
parent_span_ids: ParentSpanIds | None,
|
|
3312
|
+
propagated_event: dict[str, Any] | None,
|
|
3313
|
+
) -> dict[str, Any]:
|
|
3324
3314
|
if parent:
|
|
3325
3315
|
assert parent_span_ids is None, "Cannot specify both parent and parent_span_ids"
|
|
3326
3316
|
parent_components = SpanComponentsV4.from_str(parent)
|
|
@@ -3374,9 +3364,9 @@ class _ExperimentDatasetEvent(TypedDict):
|
|
|
3374
3364
|
|
|
3375
3365
|
id: str
|
|
3376
3366
|
_xact_id: str
|
|
3377
|
-
input:
|
|
3378
|
-
expected:
|
|
3379
|
-
tags:
|
|
3367
|
+
input: Any | None
|
|
3368
|
+
expected: Any | None
|
|
3369
|
+
tags: Sequence[str] | None
|
|
3380
3370
|
|
|
3381
3371
|
|
|
3382
3372
|
class ExperimentDatasetIterator:
|
|
@@ -3422,7 +3412,7 @@ class Experiment(ObjectFetcher[ExperimentEvent], Exportable):
|
|
|
3422
3412
|
self,
|
|
3423
3413
|
lazy_metadata: LazyValue[ProjectExperimentMetadata],
|
|
3424
3414
|
dataset: Optional["Dataset"] = None,
|
|
3425
|
-
state:
|
|
3415
|
+
state: BraintrustState | None = None,
|
|
3426
3416
|
):
|
|
3427
3417
|
self._lazy_metadata = lazy_metadata
|
|
3428
3418
|
self.dataset = dataset
|
|
@@ -3473,16 +3463,16 @@ class Experiment(ObjectFetcher[ExperimentEvent], Exportable):
|
|
|
3473
3463
|
|
|
3474
3464
|
def log(
|
|
3475
3465
|
self,
|
|
3476
|
-
input:
|
|
3477
|
-
output:
|
|
3478
|
-
expected:
|
|
3479
|
-
error:
|
|
3480
|
-
tags:
|
|
3481
|
-
scores:
|
|
3482
|
-
metadata:
|
|
3483
|
-
metrics:
|
|
3484
|
-
id:
|
|
3485
|
-
dataset_record_id:
|
|
3466
|
+
input: Any | None = None,
|
|
3467
|
+
output: Any | None = None,
|
|
3468
|
+
expected: Any | None = None,
|
|
3469
|
+
error: str | None = None,
|
|
3470
|
+
tags: Sequence[str] | None = None,
|
|
3471
|
+
scores: Mapping[str, int | float] | None = None,
|
|
3472
|
+
metadata: Mapping[str, Any] | None = None,
|
|
3473
|
+
metrics: Mapping[str, int | float] | None = None,
|
|
3474
|
+
id: str | None = None,
|
|
3475
|
+
dataset_record_id: str | None = None,
|
|
3486
3476
|
allow_concurrent_with_spans: bool = False,
|
|
3487
3477
|
) -> str:
|
|
3488
3478
|
"""
|
|
@@ -3527,11 +3517,11 @@ class Experiment(ObjectFetcher[ExperimentEvent], Exportable):
|
|
|
3527
3517
|
def log_feedback(
|
|
3528
3518
|
self,
|
|
3529
3519
|
id: str,
|
|
3530
|
-
scores:
|
|
3531
|
-
expected:
|
|
3532
|
-
tags:
|
|
3533
|
-
comment:
|
|
3534
|
-
metadata:
|
|
3520
|
+
scores: Mapping[str, int | float] | None = None,
|
|
3521
|
+
expected: Any | None = None,
|
|
3522
|
+
tags: Sequence[str] | None = None,
|
|
3523
|
+
comment: str | None = None,
|
|
3524
|
+
metadata: Mapping[str, Any] | None = None,
|
|
3535
3525
|
source: Literal["external", "app", "api", None] = None,
|
|
3536
3526
|
) -> None:
|
|
3537
3527
|
"""
|
|
@@ -3559,13 +3549,13 @@ class Experiment(ObjectFetcher[ExperimentEvent], Exportable):
|
|
|
3559
3549
|
|
|
3560
3550
|
def start_span(
|
|
3561
3551
|
self,
|
|
3562
|
-
name:
|
|
3563
|
-
type:
|
|
3564
|
-
span_attributes:
|
|
3565
|
-
start_time:
|
|
3566
|
-
set_current:
|
|
3567
|
-
parent:
|
|
3568
|
-
propagated_event:
|
|
3552
|
+
name: str | None = None,
|
|
3553
|
+
type: SpanTypeAttribute | None = None,
|
|
3554
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
3555
|
+
start_time: float | None = None,
|
|
3556
|
+
set_current: bool | None = None,
|
|
3557
|
+
parent: str | None = None,
|
|
3558
|
+
propagated_event: dict[str, Any] | None = None,
|
|
3569
3559
|
**event: Any,
|
|
3570
3560
|
) -> Span:
|
|
3571
3561
|
"""Create a new toplevel span underneath the experiment. The name defaults to "root" and the span type to "eval".
|
|
@@ -3599,7 +3589,7 @@ class Experiment(ObjectFetcher[ExperimentEvent], Exportable):
|
|
|
3599
3589
|
**event,
|
|
3600
3590
|
)
|
|
3601
3591
|
|
|
3602
|
-
def fetch_base_experiment(self) ->
|
|
3592
|
+
def fetch_base_experiment(self) -> ExperimentIdentifier | None:
|
|
3603
3593
|
state = self._get_state()
|
|
3604
3594
|
conn = state.app_conn()
|
|
3605
3595
|
|
|
@@ -3616,7 +3606,7 @@ class Experiment(ObjectFetcher[ExperimentEvent], Exportable):
|
|
|
3616
3606
|
return None
|
|
3617
3607
|
|
|
3618
3608
|
def summarize(
|
|
3619
|
-
self, summarize_scores: bool = True, comparison_experiment_id:
|
|
3609
|
+
self, summarize_scores: bool = True, comparison_experiment_id: str | None = None
|
|
3620
3610
|
) -> "ExperimentSummary":
|
|
3621
3611
|
"""
|
|
3622
3612
|
Summarize the experiment, including the scores (compared to the closest reference experiment) and metadata.
|
|
@@ -3703,13 +3693,13 @@ class Experiment(ObjectFetcher[ExperimentEvent], Exportable):
|
|
|
3703
3693
|
|
|
3704
3694
|
def _start_span_impl(
|
|
3705
3695
|
self,
|
|
3706
|
-
name:
|
|
3707
|
-
type:
|
|
3708
|
-
span_attributes:
|
|
3709
|
-
start_time:
|
|
3710
|
-
set_current:
|
|
3711
|
-
parent:
|
|
3712
|
-
propagated_event:
|
|
3696
|
+
name: str | None = None,
|
|
3697
|
+
type: SpanTypeAttribute | None = None,
|
|
3698
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
3699
|
+
start_time: float | None = None,
|
|
3700
|
+
set_current: bool | None = None,
|
|
3701
|
+
parent: str | None = None,
|
|
3702
|
+
propagated_event: dict[str, Any] | None = None,
|
|
3713
3703
|
lookup_span_parent: bool = True,
|
|
3714
3704
|
**event: Any,
|
|
3715
3705
|
) -> Span:
|
|
@@ -3739,9 +3729,9 @@ class Experiment(ObjectFetcher[ExperimentEvent], Exportable):
|
|
|
3739
3729
|
|
|
3740
3730
|
def __exit__(
|
|
3741
3731
|
self,
|
|
3742
|
-
exc_type:
|
|
3743
|
-
exc_value:
|
|
3744
|
-
traceback:
|
|
3732
|
+
exc_type: type[BaseException] | None,
|
|
3733
|
+
exc_value: BaseException | None,
|
|
3734
|
+
traceback: TracebackType | None,
|
|
3745
3735
|
) -> None:
|
|
3746
3736
|
del exc_type, exc_value, traceback
|
|
3747
3737
|
|
|
@@ -3754,7 +3744,7 @@ class ReadonlyExperiment(ObjectFetcher[ExperimentEvent]):
|
|
|
3754
3744
|
def __init__(
|
|
3755
3745
|
self,
|
|
3756
3746
|
lazy_metadata: LazyValue[ProjectExperimentMetadata],
|
|
3757
|
-
state:
|
|
3747
|
+
state: BraintrustState | None = None,
|
|
3758
3748
|
):
|
|
3759
3749
|
self._lazy_metadata = lazy_metadata
|
|
3760
3750
|
self.state = state or _state
|
|
@@ -3779,7 +3769,7 @@ class ReadonlyExperiment(ObjectFetcher[ExperimentEvent]):
|
|
|
3779
3769
|
self._lazy_metadata.get()
|
|
3780
3770
|
return self.state
|
|
3781
3771
|
|
|
3782
|
-
def as_dataset(self, batch_size:
|
|
3772
|
+
def as_dataset(self, batch_size: int | None = None) -> Iterator[_ExperimentDatasetEvent]:
|
|
3783
3773
|
"""
|
|
3784
3774
|
Return the experiment's data as a dataset iterator.
|
|
3785
3775
|
|
|
@@ -3805,19 +3795,19 @@ class SpanImpl(Span):
|
|
|
3805
3795
|
self,
|
|
3806
3796
|
parent_object_type: SpanObjectTypeV3,
|
|
3807
3797
|
parent_object_id: LazyValue[str],
|
|
3808
|
-
parent_compute_object_metadata_args:
|
|
3809
|
-
parent_span_ids:
|
|
3810
|
-
name:
|
|
3811
|
-
type:
|
|
3812
|
-
default_root_type:
|
|
3813
|
-
span_attributes:
|
|
3814
|
-
start_time:
|
|
3815
|
-
set_current:
|
|
3816
|
-
event:
|
|
3817
|
-
propagated_event:
|
|
3818
|
-
span_id:
|
|
3819
|
-
root_span_id:
|
|
3820
|
-
state:
|
|
3798
|
+
parent_compute_object_metadata_args: dict[str, Any] | None,
|
|
3799
|
+
parent_span_ids: ParentSpanIds | None,
|
|
3800
|
+
name: str | None = None,
|
|
3801
|
+
type: SpanTypeAttribute | None = None,
|
|
3802
|
+
default_root_type: SpanTypeAttribute | None = None,
|
|
3803
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
3804
|
+
start_time: float | None = None,
|
|
3805
|
+
set_current: bool | None = None,
|
|
3806
|
+
event: dict[str, Any] | None = None,
|
|
3807
|
+
propagated_event: dict[str, Any] | None = None,
|
|
3808
|
+
span_id: str | None = None,
|
|
3809
|
+
root_span_id: str | None = None,
|
|
3810
|
+
state: BraintrustState | None = None,
|
|
3821
3811
|
lookup_span_parent: bool = True,
|
|
3822
3812
|
):
|
|
3823
3813
|
if span_attributes is None:
|
|
@@ -3830,11 +3820,11 @@ class SpanImpl(Span):
|
|
|
3830
3820
|
self.state = state or _state
|
|
3831
3821
|
|
|
3832
3822
|
self.can_set_current = cast(bool, coalesce(set_current, True))
|
|
3833
|
-
self._logged_end_time:
|
|
3823
|
+
self._logged_end_time: float | None = None
|
|
3834
3824
|
|
|
3835
3825
|
# Context token for proper cleanup - used by both OTEL and Braintrust context managers
|
|
3836
3826
|
# This is set by the context manager when the span becomes active
|
|
3837
|
-
self._context_token:
|
|
3827
|
+
self._context_token: Any | None = None
|
|
3838
3828
|
|
|
3839
3829
|
self.parent_object_type = parent_object_type
|
|
3840
3830
|
self.parent_object_id = parent_object_id
|
|
@@ -3867,7 +3857,7 @@ class SpanImpl(Span):
|
|
|
3867
3857
|
_EXEC_COUNTER += 1
|
|
3868
3858
|
exec_counter = _EXEC_COUNTER
|
|
3869
3859
|
|
|
3870
|
-
internal_data:
|
|
3860
|
+
internal_data: dict[str, Any] = dict(
|
|
3871
3861
|
metrics=dict(
|
|
3872
3862
|
start=start_time or time.time(),
|
|
3873
3863
|
),
|
|
@@ -3909,9 +3899,9 @@ class SpanImpl(Span):
|
|
|
3909
3899
|
|
|
3910
3900
|
def set_attributes(
|
|
3911
3901
|
self,
|
|
3912
|
-
name:
|
|
3913
|
-
type:
|
|
3914
|
-
span_attributes:
|
|
3902
|
+
name: str | None = None,
|
|
3903
|
+
type: SpanTypeAttribute | None = None,
|
|
3904
|
+
span_attributes: Mapping[str, Any] | None = None,
|
|
3915
3905
|
) -> None:
|
|
3916
3906
|
self.log_internal(
|
|
3917
3907
|
internal_data={
|
|
@@ -3929,9 +3919,7 @@ class SpanImpl(Span):
|
|
|
3929
3919
|
def log(self, **event: Any) -> None:
|
|
3930
3920
|
return self.log_internal(event=event, internal_data=None)
|
|
3931
3921
|
|
|
3932
|
-
def log_internal(
|
|
3933
|
-
self, event: Optional[Dict[str, Any]] = None, internal_data: Optional[Dict[str, Any]] = None
|
|
3934
|
-
) -> None:
|
|
3922
|
+
def log_internal(self, event: dict[str, Any] | None = None, internal_data: dict[str, Any] | None = None) -> None:
|
|
3935
3923
|
serializable_partial_record, lazy_partial_record = split_logging_data(event, internal_data)
|
|
3936
3924
|
|
|
3937
3925
|
# We both check for serializability and round-trip `partial_record`
|
|
@@ -3939,7 +3927,7 @@ class SpanImpl(Span):
|
|
|
3939
3927
|
# cutting out any reference to user objects when the object is logged
|
|
3940
3928
|
# asynchronously, so that in case the objects are modified, the logging
|
|
3941
3929
|
# is unaffected.
|
|
3942
|
-
partial_record:
|
|
3930
|
+
partial_record: dict[str, Any] = dict(
|
|
3943
3931
|
id=self.id,
|
|
3944
3932
|
span_id=self.span_id,
|
|
3945
3933
|
root_span_id=self.root_span_id,
|
|
@@ -3956,7 +3944,7 @@ class SpanImpl(Span):
|
|
|
3956
3944
|
if len(serializable_partial_record.get("tags", [])) > 0 and self.span_parents:
|
|
3957
3945
|
raise Exception("Tags can only be logged to the root span")
|
|
3958
3946
|
|
|
3959
|
-
def compute_record() ->
|
|
3947
|
+
def compute_record() -> dict[str, Any]:
|
|
3960
3948
|
exporter = _get_exporter()
|
|
3961
3949
|
return dict(
|
|
3962
3950
|
**serializable_partial_record,
|
|
@@ -3979,13 +3967,13 @@ class SpanImpl(Span):
|
|
|
3979
3967
|
|
|
3980
3968
|
def start_span(
|
|
3981
3969
|
self,
|
|
3982
|
-
name:
|
|
3983
|
-
type:
|
|
3984
|
-
span_attributes:
|
|
3985
|
-
start_time:
|
|
3986
|
-
set_current:
|
|
3987
|
-
parent:
|
|
3988
|
-
propagated_event:
|
|
3970
|
+
name: str | None = None,
|
|
3971
|
+
type: SpanTypeAttribute | None = None,
|
|
3972
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
3973
|
+
start_time: float | None = None,
|
|
3974
|
+
set_current: bool | None = None,
|
|
3975
|
+
parent: str | None = None,
|
|
3976
|
+
propagated_event: dict[str, Any] | None = None,
|
|
3989
3977
|
**event: Any,
|
|
3990
3978
|
) -> Span:
|
|
3991
3979
|
if parent:
|
|
@@ -4017,7 +4005,7 @@ class SpanImpl(Span):
|
|
|
4017
4005
|
state=self.state,
|
|
4018
4006
|
)
|
|
4019
4007
|
|
|
4020
|
-
def end(self, end_time:
|
|
4008
|
+
def end(self, end_time: float | None = None) -> float:
|
|
4021
4009
|
internal_data = {}
|
|
4022
4010
|
if not self._logged_end_time:
|
|
4023
4011
|
end_time = end_time or time.time()
|
|
@@ -4162,13 +4150,13 @@ class SpanImpl(Span):
|
|
|
4162
4150
|
|
|
4163
4151
|
|
|
4164
4152
|
def log_exc_info_to_span(
|
|
4165
|
-
span: Span, exc_type:
|
|
4153
|
+
span: Span, exc_type: type[BaseException], exc_value: BaseException, tb: TracebackType | None
|
|
4166
4154
|
) -> None:
|
|
4167
4155
|
error = stringify_exception(exc_type, exc_value, tb)
|
|
4168
4156
|
span.log(error=error)
|
|
4169
4157
|
|
|
4170
4158
|
|
|
4171
|
-
def stringify_exception(exc_type:
|
|
4159
|
+
def stringify_exception(exc_type: type[BaseException], exc_value: BaseException, tb: TracebackType | None) -> str:
|
|
4172
4160
|
return "".join(
|
|
4173
4161
|
traceback.format_exception_only(exc_type, exc_value)
|
|
4174
4162
|
+ ["\nTraceback (most recent call last):\n"]
|
|
@@ -4183,8 +4171,8 @@ def _strip_nones(d: T, deep: bool) -> T:
|
|
|
4183
4171
|
|
|
4184
4172
|
|
|
4185
4173
|
def split_logging_data(
|
|
4186
|
-
event:
|
|
4187
|
-
) ->
|
|
4174
|
+
event: dict[str, Any] | None, internal_data: dict[str, Any] | None
|
|
4175
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
4188
4176
|
# There should be no overlap between the dictionaries being merged,
|
|
4189
4177
|
# except for `sanitized` and `internal_data`, where the former overrides
|
|
4190
4178
|
# the latter.
|
|
@@ -4192,8 +4180,8 @@ def split_logging_data(
|
|
|
4192
4180
|
sanitized_and_internal_data = _strip_nones(internal_data or {}, deep=True)
|
|
4193
4181
|
merge_dicts(sanitized_and_internal_data, _strip_nones(sanitized, deep=False))
|
|
4194
4182
|
|
|
4195
|
-
serializable_partial_record:
|
|
4196
|
-
lazy_partial_record:
|
|
4183
|
+
serializable_partial_record: dict[str, Any] = {}
|
|
4184
|
+
lazy_partial_record: dict[str, Any] = {}
|
|
4197
4185
|
for k, v in sanitized_and_internal_data.items():
|
|
4198
4186
|
if isinstance(v, BraintrustStream):
|
|
4199
4187
|
# Python has weird semantics with loop variables and lambda functions, so we
|
|
@@ -4220,10 +4208,10 @@ class Dataset(ObjectFetcher[DatasetEvent]):
|
|
|
4220
4208
|
def __init__(
|
|
4221
4209
|
self,
|
|
4222
4210
|
lazy_metadata: LazyValue[ProjectDatasetMetadata],
|
|
4223
|
-
version:
|
|
4211
|
+
version: None | int | str = None,
|
|
4224
4212
|
legacy: bool = DEFAULT_IS_LEGACY_DATASET,
|
|
4225
|
-
_internal_btql:
|
|
4226
|
-
state:
|
|
4213
|
+
_internal_btql: dict[str, Any] | None = None,
|
|
4214
|
+
state: BraintrustState | None = None,
|
|
4227
4215
|
):
|
|
4228
4216
|
if legacy:
|
|
4229
4217
|
eprint(
|
|
@@ -4231,7 +4219,7 @@ class Dataset(ObjectFetcher[DatasetEvent]):
|
|
|
4231
4219
|
)
|
|
4232
4220
|
|
|
4233
4221
|
def mutate_record(r: DatasetEvent) -> DatasetEvent:
|
|
4234
|
-
_enrich_attachments(cast(
|
|
4222
|
+
_enrich_attachments(cast(dict[str, Any], r))
|
|
4235
4223
|
return ensure_dataset_record(r, legacy)
|
|
4236
4224
|
|
|
4237
4225
|
self._lazy_metadata = lazy_metadata
|
|
@@ -4278,10 +4266,10 @@ class Dataset(ObjectFetcher[DatasetEvent]):
|
|
|
4278
4266
|
|
|
4279
4267
|
def _validate_event(
|
|
4280
4268
|
self,
|
|
4281
|
-
metadata:
|
|
4282
|
-
expected:
|
|
4283
|
-
output:
|
|
4284
|
-
tags:
|
|
4269
|
+
metadata: dict[str, Any] | None = None,
|
|
4270
|
+
expected: Any | None = None,
|
|
4271
|
+
output: Any | None = None,
|
|
4272
|
+
tags: Sequence[str] | None = None,
|
|
4285
4273
|
):
|
|
4286
4274
|
if metadata is not None:
|
|
4287
4275
|
if not isinstance(metadata, dict):
|
|
@@ -4298,7 +4286,7 @@ class Dataset(ObjectFetcher[DatasetEvent]):
|
|
|
4298
4286
|
|
|
4299
4287
|
def _create_args(
|
|
4300
4288
|
self, id, input=None, expected=None, metadata=None, tags=None, output=None, is_merge=False
|
|
4301
|
-
) -> LazyValue[
|
|
4289
|
+
) -> LazyValue[dict[str, Any]]:
|
|
4302
4290
|
expected_value = expected if expected is not None else output
|
|
4303
4291
|
|
|
4304
4292
|
args = _populate_args(
|
|
@@ -4319,7 +4307,7 @@ class Dataset(ObjectFetcher[DatasetEvent]):
|
|
|
4319
4307
|
_check_json_serializable(args)
|
|
4320
4308
|
args = _deep_copy_event(args)
|
|
4321
4309
|
|
|
4322
|
-
def compute_args() ->
|
|
4310
|
+
def compute_args() -> dict[str, Any]:
|
|
4323
4311
|
return dict(
|
|
4324
4312
|
**args,
|
|
4325
4313
|
dataset_id=self.id,
|
|
@@ -4329,12 +4317,12 @@ class Dataset(ObjectFetcher[DatasetEvent]):
|
|
|
4329
4317
|
|
|
4330
4318
|
def insert(
|
|
4331
4319
|
self,
|
|
4332
|
-
input:
|
|
4333
|
-
expected:
|
|
4334
|
-
tags:
|
|
4335
|
-
metadata:
|
|
4336
|
-
id:
|
|
4337
|
-
output:
|
|
4320
|
+
input: Any | None = None,
|
|
4321
|
+
expected: Any | None = None,
|
|
4322
|
+
tags: Sequence[str] | None = None,
|
|
4323
|
+
metadata: dict[str, Any] | None = None,
|
|
4324
|
+
id: str | None = None,
|
|
4325
|
+
output: Any | None = None,
|
|
4338
4326
|
) -> str:
|
|
4339
4327
|
"""
|
|
4340
4328
|
Insert a single record to the dataset. The record will be batched and uploaded behind the scenes. If you pass in an `id`,
|
|
@@ -4373,10 +4361,10 @@ class Dataset(ObjectFetcher[DatasetEvent]):
|
|
|
4373
4361
|
def update(
|
|
4374
4362
|
self,
|
|
4375
4363
|
id: str,
|
|
4376
|
-
input:
|
|
4377
|
-
expected:
|
|
4378
|
-
tags:
|
|
4379
|
-
metadata:
|
|
4364
|
+
input: Any | None = None,
|
|
4365
|
+
expected: Any | None = None,
|
|
4366
|
+
tags: Sequence[str] | None = None,
|
|
4367
|
+
metadata: dict[str, Any] | None = None,
|
|
4380
4368
|
) -> str:
|
|
4381
4369
|
"""
|
|
4382
4370
|
Update fields of a single record in the dataset. The updated fields will be batched and uploaded behind the scenes.
|
|
@@ -4488,7 +4476,7 @@ class Dataset(ObjectFetcher[DatasetEvent]):
|
|
|
4488
4476
|
def render_message(render: Callable[[str], str], message: PromptMessage):
|
|
4489
4477
|
base = {k: v for (k, v) in message.as_dict().items() if v is not None}
|
|
4490
4478
|
# TODO: shouldn't load_prompt guarantee content is a PromptMessage?
|
|
4491
|
-
content = cast(Union[str,
|
|
4479
|
+
content = cast(Union[str, list[Union[TextPart, ImagePart]], dict[str, Any]], message.content)
|
|
4492
4480
|
if content is not None:
|
|
4493
4481
|
if isinstance(content, str):
|
|
4494
4482
|
base["content"] = render(content)
|
|
@@ -4552,7 +4540,7 @@ def render_message(render: Callable[[str], str], message: PromptMessage):
|
|
|
4552
4540
|
|
|
4553
4541
|
|
|
4554
4542
|
def _create_custom_render():
|
|
4555
|
-
def _get_key(key: str, scopes:
|
|
4543
|
+
def _get_key(key: str, scopes: list[dict[str, Any]], warn: bool) -> Any:
|
|
4556
4544
|
thing = chevron.renderer._get_key(key, scopes, warn) # type: ignore
|
|
4557
4545
|
if isinstance(thing, str):
|
|
4558
4546
|
return thing
|
|
@@ -4592,7 +4580,7 @@ def render_templated_object(obj: Any, args: Any) -> Any:
|
|
|
4592
4580
|
return obj
|
|
4593
4581
|
|
|
4594
4582
|
|
|
4595
|
-
def render_prompt_params(params:
|
|
4583
|
+
def render_prompt_params(params: dict[str, Any], args: Any) -> dict[str, Any]:
|
|
4596
4584
|
if not params:
|
|
4597
4585
|
return params
|
|
4598
4586
|
|
|
@@ -4617,7 +4605,7 @@ def render_prompt_params(params: Dict[str, Any], args: Any) -> Dict[str, Any]:
|
|
|
4617
4605
|
return {**params, "response_format": {**response_format, "json_schema": {**json_schema, "schema": parsed_schema}}}
|
|
4618
4606
|
|
|
4619
4607
|
|
|
4620
|
-
def render_mustache(template: str, data: Any, *, strict: bool = False, renderer:
|
|
4608
|
+
def render_mustache(template: str, data: Any, *, strict: bool = False, renderer: Callable[..., Any] | None = None):
|
|
4621
4609
|
if renderer is None:
|
|
4622
4610
|
renderer = chevron.render
|
|
4623
4611
|
|
|
@@ -4694,7 +4682,7 @@ class Prompt:
|
|
|
4694
4682
|
return self._lazy_metadata.get().slug
|
|
4695
4683
|
|
|
4696
4684
|
@property
|
|
4697
|
-
def prompt(self) ->
|
|
4685
|
+
def prompt(self) -> PromptBlockData | None:
|
|
4698
4686
|
return self._lazy_metadata.get().prompt_data.prompt
|
|
4699
4687
|
|
|
4700
4688
|
@property
|
|
@@ -4791,7 +4779,7 @@ class Prompt:
|
|
|
4791
4779
|
|
|
4792
4780
|
|
|
4793
4781
|
class Project:
|
|
4794
|
-
def __init__(self, name:
|
|
4782
|
+
def __init__(self, name: str | None = None, id: str | None = None):
|
|
4795
4783
|
self._name = name
|
|
4796
4784
|
self._id = id
|
|
4797
4785
|
self.init_lock = threading.RLock()
|
|
@@ -4831,9 +4819,9 @@ class Logger(Exportable):
|
|
|
4831
4819
|
self,
|
|
4832
4820
|
lazy_metadata: LazyValue[OrgProjectMetadata],
|
|
4833
4821
|
async_flush: bool = True,
|
|
4834
|
-
compute_metadata_args:
|
|
4835
|
-
link_args:
|
|
4836
|
-
state:
|
|
4822
|
+
compute_metadata_args: dict | None = None,
|
|
4823
|
+
link_args: dict | None = None,
|
|
4824
|
+
state: BraintrustState | None = None,
|
|
4837
4825
|
):
|
|
4838
4826
|
self._lazy_metadata = lazy_metadata
|
|
4839
4827
|
self.async_flush = async_flush
|
|
@@ -4873,15 +4861,15 @@ class Logger(Exportable):
|
|
|
4873
4861
|
|
|
4874
4862
|
def log(
|
|
4875
4863
|
self,
|
|
4876
|
-
input:
|
|
4877
|
-
output:
|
|
4878
|
-
expected:
|
|
4879
|
-
error:
|
|
4880
|
-
tags:
|
|
4881
|
-
scores:
|
|
4882
|
-
metadata:
|
|
4883
|
-
metrics:
|
|
4884
|
-
id:
|
|
4864
|
+
input: Any | None = None,
|
|
4865
|
+
output: Any | None = None,
|
|
4866
|
+
expected: Any | None = None,
|
|
4867
|
+
error: str | None = None,
|
|
4868
|
+
tags: Sequence[str] | None = None,
|
|
4869
|
+
scores: Mapping[str, int | float] | None = None,
|
|
4870
|
+
metadata: Mapping[str, Any] | None = None,
|
|
4871
|
+
metrics: Mapping[str, int | float] | None = None,
|
|
4872
|
+
id: str | None = None,
|
|
4885
4873
|
allow_concurrent_with_spans: bool = False,
|
|
4886
4874
|
) -> str:
|
|
4887
4875
|
"""
|
|
@@ -4926,11 +4914,11 @@ class Logger(Exportable):
|
|
|
4926
4914
|
def log_feedback(
|
|
4927
4915
|
self,
|
|
4928
4916
|
id: str,
|
|
4929
|
-
scores:
|
|
4930
|
-
expected:
|
|
4931
|
-
tags:
|
|
4932
|
-
comment:
|
|
4933
|
-
metadata:
|
|
4917
|
+
scores: Mapping[str, int | float] | None = None,
|
|
4918
|
+
expected: Any | None = None,
|
|
4919
|
+
tags: Sequence[str] | None = None,
|
|
4920
|
+
comment: str | None = None,
|
|
4921
|
+
metadata: Mapping[str, Any] | None = None,
|
|
4934
4922
|
source: Literal["external", "app", "api", None] = None,
|
|
4935
4923
|
) -> None:
|
|
4936
4924
|
"""
|
|
@@ -4958,15 +4946,15 @@ class Logger(Exportable):
|
|
|
4958
4946
|
|
|
4959
4947
|
def start_span(
|
|
4960
4948
|
self,
|
|
4961
|
-
name:
|
|
4962
|
-
type:
|
|
4963
|
-
span_attributes:
|
|
4964
|
-
start_time:
|
|
4965
|
-
set_current:
|
|
4966
|
-
parent:
|
|
4967
|
-
propagated_event:
|
|
4968
|
-
span_id:
|
|
4969
|
-
root_span_id:
|
|
4949
|
+
name: str | None = None,
|
|
4950
|
+
type: SpanTypeAttribute | None = None,
|
|
4951
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
4952
|
+
start_time: float | None = None,
|
|
4953
|
+
set_current: bool | None = None,
|
|
4954
|
+
parent: str | None = None,
|
|
4955
|
+
propagated_event: dict[str, Any] | None = None,
|
|
4956
|
+
span_id: str | None = None,
|
|
4957
|
+
root_span_id: str | None = None,
|
|
4970
4958
|
**event: Any,
|
|
4971
4959
|
) -> Span:
|
|
4972
4960
|
"""Create a new toplevel span underneath the logger. The name defaults to "root" and the span type to "task".
|
|
@@ -5004,15 +4992,15 @@ class Logger(Exportable):
|
|
|
5004
4992
|
|
|
5005
4993
|
def _start_span_impl(
|
|
5006
4994
|
self,
|
|
5007
|
-
name:
|
|
5008
|
-
type:
|
|
5009
|
-
span_attributes:
|
|
5010
|
-
start_time:
|
|
5011
|
-
set_current:
|
|
5012
|
-
parent:
|
|
5013
|
-
propagated_event:
|
|
5014
|
-
span_id:
|
|
5015
|
-
root_span_id:
|
|
4995
|
+
name: str | None = None,
|
|
4996
|
+
type: SpanTypeAttribute | None = None,
|
|
4997
|
+
span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
|
|
4998
|
+
start_time: float | None = None,
|
|
4999
|
+
set_current: bool | None = None,
|
|
5000
|
+
parent: str | None = None,
|
|
5001
|
+
propagated_event: dict[str, Any] | None = None,
|
|
5002
|
+
span_id: str | None = None,
|
|
5003
|
+
root_span_id: str | None = None,
|
|
5016
5004
|
lookup_span_parent: bool = True,
|
|
5017
5005
|
**event: Any,
|
|
5018
5006
|
) -> Span:
|
|
@@ -5062,7 +5050,7 @@ class Logger(Exportable):
|
|
|
5062
5050
|
def __enter__(self) -> "Logger":
|
|
5063
5051
|
return self
|
|
5064
5052
|
|
|
5065
|
-
def _get_link_base_url(self) ->
|
|
5053
|
+
def _get_link_base_url(self) -> str | None:
|
|
5066
5054
|
"""Return the base of link urls (e.g. https://braintrust.dev/app/my-org-name/) if we have the info
|
|
5067
5055
|
otherwise return None.
|
|
5068
5056
|
"""
|
|
@@ -5098,11 +5086,11 @@ class ScoreSummary(SerializableDataClass):
|
|
|
5098
5086
|
score: float
|
|
5099
5087
|
"""Average score across all examples."""
|
|
5100
5088
|
|
|
5101
|
-
improvements:
|
|
5089
|
+
improvements: int | None
|
|
5102
5090
|
"""Number of improvements in the score."""
|
|
5103
|
-
regressions:
|
|
5091
|
+
regressions: int | None
|
|
5104
5092
|
"""Number of regressions in the score."""
|
|
5105
|
-
diff:
|
|
5093
|
+
diff: float | None = None
|
|
5106
5094
|
"""Difference in score between the current and reference experiment."""
|
|
5107
5095
|
|
|
5108
5096
|
def __str__(self):
|
|
@@ -5133,15 +5121,15 @@ class MetricSummary(SerializableDataClass):
|
|
|
5133
5121
|
# Used to help with formatting
|
|
5134
5122
|
_longest_metric_name: int
|
|
5135
5123
|
|
|
5136
|
-
metric:
|
|
5124
|
+
metric: float | int
|
|
5137
5125
|
"""Average metric across all examples."""
|
|
5138
5126
|
unit: str
|
|
5139
5127
|
"""Unit label for the metric."""
|
|
5140
|
-
improvements:
|
|
5128
|
+
improvements: int | None
|
|
5141
5129
|
"""Number of improvements in the metric."""
|
|
5142
|
-
regressions:
|
|
5130
|
+
regressions: int | None
|
|
5143
5131
|
"""Number of regressions in the metric."""
|
|
5144
|
-
diff:
|
|
5132
|
+
diff: float | None = None
|
|
5145
5133
|
"""Difference in metric between the current and reference experiment."""
|
|
5146
5134
|
|
|
5147
5135
|
def __str__(self):
|
|
@@ -5167,21 +5155,21 @@ class ExperimentSummary(SerializableDataClass):
|
|
|
5167
5155
|
|
|
5168
5156
|
project_name: str
|
|
5169
5157
|
"""Name of the project that the experiment belongs to."""
|
|
5170
|
-
project_id:
|
|
5158
|
+
project_id: str | None
|
|
5171
5159
|
"""ID of the project. May be `None` if the eval was run locally."""
|
|
5172
|
-
experiment_id:
|
|
5160
|
+
experiment_id: str | None
|
|
5173
5161
|
"""ID of the experiment. May be `None` if the eval was run locally."""
|
|
5174
5162
|
experiment_name: str
|
|
5175
5163
|
"""Name of the experiment."""
|
|
5176
|
-
project_url:
|
|
5164
|
+
project_url: str | None
|
|
5177
5165
|
"""URL to the project's page in the Braintrust app."""
|
|
5178
|
-
experiment_url:
|
|
5166
|
+
experiment_url: str | None
|
|
5179
5167
|
"""URL to the experiment's page in the Braintrust app."""
|
|
5180
|
-
comparison_experiment_name:
|
|
5168
|
+
comparison_experiment_name: str | None
|
|
5181
5169
|
"""The experiment scores are baselined against."""
|
|
5182
|
-
scores:
|
|
5170
|
+
scores: dict[str, ScoreSummary]
|
|
5183
5171
|
"""Summary of the experiment's scores."""
|
|
5184
|
-
metrics:
|
|
5172
|
+
metrics: dict[str, MetricSummary]
|
|
5185
5173
|
"""Summary of the experiment's metrics."""
|
|
5186
5174
|
|
|
5187
5175
|
def __str__(self):
|
|
@@ -5230,7 +5218,7 @@ class DatasetSummary(SerializableDataClass):
|
|
|
5230
5218
|
"""URL to the project's page in the Braintrust app."""
|
|
5231
5219
|
dataset_url: str
|
|
5232
5220
|
"""URL to the experiment's page in the Braintrust app."""
|
|
5233
|
-
data_summary:
|
|
5221
|
+
data_summary: DataSummary | None
|
|
5234
5222
|
"""Summary of the dataset's data."""
|
|
5235
5223
|
|
|
5236
5224
|
def __str__(self):
|
|
@@ -5245,7 +5233,8 @@ class DatasetSummary(SerializableDataClass):
|
|
|
5245
5233
|
|
|
5246
5234
|
|
|
5247
5235
|
class TracedThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
|
|
5248
|
-
# Returns Any because Future
|
|
5236
|
+
# Returns Any because Future[T] generic typing was stabilized in Python 3.9,
|
|
5237
|
+
# but we maintain compatibility with older type checkers.
|
|
5249
5238
|
def submit(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
|
|
5250
5239
|
# Capture all current context variables
|
|
5251
5240
|
context = contextvars.copy_context()
|
|
@@ -5257,7 +5246,7 @@ class TracedThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
|
|
|
5257
5246
|
return super().submit(wrapped_fn, *args, **kwargs)
|
|
5258
5247
|
|
|
5259
5248
|
|
|
5260
|
-
def get_prompt_versions(project_id: str, prompt_id: str) ->
|
|
5249
|
+
def get_prompt_versions(project_id: str, prompt_id: str) -> list[str]:
|
|
5261
5250
|
"""
|
|
5262
5251
|
Get the versions for a specific prompt.
|
|
5263
5252
|
|
|
@@ -5317,13 +5306,13 @@ def get_prompt_versions(project_id: str, prompt_id: str) -> List[str]:
|
|
|
5317
5306
|
]
|
|
5318
5307
|
|
|
5319
5308
|
|
|
5320
|
-
def _get_app_url(app_url:
|
|
5309
|
+
def _get_app_url(app_url: str | None = None) -> str:
|
|
5321
5310
|
if app_url:
|
|
5322
5311
|
return app_url
|
|
5323
5312
|
return os.getenv("BRAINTRUST_APP_URL", DEFAULT_APP_URL)
|
|
5324
5313
|
|
|
5325
5314
|
|
|
5326
|
-
def _get_org_name(org_name:
|
|
5315
|
+
def _get_org_name(org_name: str | None = None) -> str | None:
|
|
5327
5316
|
if org_name:
|
|
5328
5317
|
return org_name
|
|
5329
5318
|
return os.getenv("BRAINTRUST_ORG_NAME")
|