PyPI - braintrust - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

braintrust 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

braintrust/_generated_types.py +737 -672
braintrust/audit.py +2 -2
braintrust/bt_json.py +178 -19
braintrust/cli/eval.py +6 -7
braintrust/cli/push.py +11 -11
braintrust/context.py +12 -17
braintrust/contrib/temporal/__init__.py +16 -27
braintrust/contrib/temporal/test_temporal.py +8 -3
braintrust/devserver/auth.py +8 -8
braintrust/devserver/cache.py +3 -4
braintrust/devserver/cors.py +8 -7
braintrust/devserver/dataset.py +3 -5
braintrust/devserver/eval_hooks.py +7 -6
braintrust/devserver/schemas.py +22 -19
braintrust/devserver/server.py +19 -12
braintrust/devserver/test_cached_login.py +4 -4
braintrust/framework.py +139 -142
braintrust/framework2.py +88 -87
braintrust/functions/invoke.py +66 -59
braintrust/functions/stream.py +3 -2
braintrust/generated_types.py +3 -1
braintrust/git_fields.py +11 -11
braintrust/gitutil.py +2 -3
braintrust/graph_util.py +10 -10
braintrust/id_gen.py +2 -2
braintrust/logger.py +373 -471
braintrust/merge_row_batch.py +10 -9
braintrust/oai.py +21 -20
braintrust/otel/__init__.py +49 -49
braintrust/otel/context.py +16 -30
braintrust/otel/test_distributed_tracing.py +14 -11
braintrust/otel/test_otel_bt_integration.py +32 -31
braintrust/parameters.py +8 -8
braintrust/prompt.py +14 -14
braintrust/prompt_cache/disk_cache.py +5 -4
braintrust/prompt_cache/lru_cache.py +3 -2
braintrust/prompt_cache/prompt_cache.py +13 -14
braintrust/queue.py +4 -4
braintrust/score.py +4 -4
braintrust/serializable_data_class.py +4 -4
braintrust/span_identifier_v1.py +1 -2
braintrust/span_identifier_v2.py +3 -4
braintrust/span_identifier_v3.py +23 -20
braintrust/span_identifier_v4.py +34 -25
braintrust/test_bt_json.py +644 -0
braintrust/test_framework.py +72 -6
braintrust/test_helpers.py +5 -5
braintrust/test_id_gen.py +2 -3
braintrust/test_logger.py +211 -107
braintrust/test_otel.py +61 -53
braintrust/test_queue.py +0 -1
braintrust/test_score.py +1 -3
braintrust/test_span_components.py +29 -44
braintrust/util.py +9 -8
braintrust/version.py +2 -2
braintrust/wrappers/_anthropic_utils.py +4 -4
braintrust/wrappers/agno/__init__.py +3 -4
braintrust/wrappers/agno/agent.py +1 -2
braintrust/wrappers/agno/function_call.py +1 -2
braintrust/wrappers/agno/model.py +1 -2
braintrust/wrappers/agno/team.py +1 -2
braintrust/wrappers/agno/utils.py +12 -12
braintrust/wrappers/anthropic.py +7 -8
braintrust/wrappers/claude_agent_sdk/__init__.py +3 -4
braintrust/wrappers/claude_agent_sdk/_wrapper.py +29 -27
braintrust/wrappers/dspy.py +15 -17
braintrust/wrappers/google_genai/__init__.py +17 -30
braintrust/wrappers/langchain.py +22 -24
braintrust/wrappers/litellm.py +4 -3
braintrust/wrappers/openai.py +15 -15
braintrust/wrappers/pydantic_ai.py +225 -110
braintrust/wrappers/test_agno.py +0 -1
braintrust/wrappers/test_dspy.py +0 -1
braintrust/wrappers/test_google_genai.py +64 -4
braintrust/wrappers/test_litellm.py +0 -1
braintrust/wrappers/test_pydantic_ai_integration.py +819 -22
{braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/METADATA +3 -2
braintrust-0.4.1.dist-info/RECORD +121 -0
braintrust-0.3.15.dist-info/RECORD +0 -120
{braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/WHEEL +0 -0
{braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/entry_points.txt +0 -0
{braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/top_level.txt +0 -0

braintrust/wrappers/agno/utils.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import time
-from typing import Any, Dict, List, Optional
+from typing import Any
-def omit(obj: Dict[str, Any], keys: List[str]):
+def omit(obj: dict[str, Any], keys: list[str]):
     return {k: v for k, v in obj.items() if k not in keys}
@@ -14,11 +14,11 @@ def mark_patched(obj: Any):
     setattr(obj, "_braintrust_patched", True)
-def clean(obj: Dict[str, Any]) -> Dict[str, Any]:
+def clean(obj: dict[str, Any]) -> dict[str, Any]:
     return {k: v for k, v in obj.items() if v is not None}
-def get_args_kwargs(args: List[str], kwargs: Dict[str, Any], keys: List[str]):
+def get_args_kwargs(args: list[str], kwargs: dict[str, Any], keys: list[str]):
     return {k: args[i] if args else kwargs.get(k) for i, k in enumerate(keys)}, omit(kwargs, keys)
@@ -71,7 +71,7 @@ AGNO_METRICS_MAP = {
 }
-def extract_metadata(instance: Any, component: str) -> Dict[str, Any]:
+def extract_metadata(instance: Any, component: str) -> dict[str, Any]:
     """Extract metadata from any component (model, agent, team)."""
     metadata = {"component": component}
@@ -100,7 +100,7 @@ def extract_metadata(instance: Any, component: str) -> Dict[str, Any]:
     return metadata
-def parse_metrics_from_agno(usage: Any) -> Dict[str, Any]:
+def parse_metrics_from_agno(usage: Any) -> dict[str, Any]:
     """Parse metrics from Agno usage object, following OpenAI wrapper pattern."""
     metrics = {}
@@ -121,7 +121,7 @@ def parse_metrics_from_agno(usage: Any) -> Dict[str, Any]:
     return metrics
-def extract_metrics(result: Any, messages: Optional[list] = None) -> Dict[str, Any]:
+def extract_metrics(result: Any, messages: list | None = None) -> dict[str, Any]:
     """
     Unified metrics extraction for all components.
@@ -163,7 +163,7 @@ def extract_metrics(result: Any, messages: Optional[list] = None) -> Dict[str, A
     return {}
-def extract_streaming_metrics(aggregated: Dict[str, Any], start_time: float) -> Optional[Dict[str, Any]]:
+def extract_streaming_metrics(aggregated: dict[str, Any], start_time: float) -> dict[str, Any] | None:
     """Extract metrics from aggregated streaming response."""
     metrics = {}
@@ -187,7 +187,7 @@ def extract_streaming_metrics(aggregated: Dict[str, Any], start_time: float) ->
     return metrics if metrics else None
-def _aggregate_metrics(target: Dict[str, Any], source: Dict[str, Any]) -> None:
+def _aggregate_metrics(target: dict[str, Any], source: dict[str, Any]) -> None:
     """Aggregate metrics from source into target dict."""
     for key, value in source.items():
         if _is_numeric(value):
@@ -205,7 +205,7 @@ def _aggregate_metrics(target: Dict[str, Any], source: Dict[str, Any]) -> None:
                 target[key] = value
-def _aggregate_model_chunks(chunks: List[Any]) -> Dict[str, Any]:
+def _aggregate_model_chunks(chunks: list[Any]) -> dict[str, Any]:
     """Aggregate ModelResponse chunks from invoke_stream into a complete response."""
     aggregated = {
         "content": "",
@@ -263,7 +263,7 @@ def _aggregate_model_chunks(chunks: List[Any]) -> Dict[str, Any]:
     return aggregated
-def _aggregate_response_stream_chunks(chunks: List[Any]) -> Dict[str, Any]:
+def _aggregate_response_stream_chunks(chunks: list[Any]) -> dict[str, Any]:
     """
     Aggregate chunks from response_stream which can be ModelResponse, RunOutputEvent, or TeamRunOutputEvent.
@@ -344,7 +344,7 @@ def _aggregate_response_stream_chunks(chunks: List[Any]) -> Dict[str, Any]:
     return aggregated
-def _aggregate_agent_chunks(chunks: List[Any]) -> Dict[str, Any]:
+def _aggregate_agent_chunks(chunks: list[Any]) -> dict[str, Any]:
     """Aggregate BaseAgentRunEvent/BaseTeamRunEvent chunks into a complete response."""
     aggregated = {
         "content": "",

braintrust/wrappers/anthropic.py CHANGED Viewed

@@ -2,7 +2,6 @@ import logging
 import time
 import warnings
 from contextlib import contextmanager
-from typing import Optional
 from braintrust.logger import NOOP_SPAN, log_exc_info_to_span, start_span
 from braintrust.wrappers._anthropic_utils import Wrapper, extract_anthropic_usage, finalize_anthropic_tokens
@@ -10,7 +9,6 @@ from braintrust.wrappers._anthropic_utils import Wrapper, extract_anthropic_usag
 log = logging.getLogger(__name__)
 # This tracer depends on an internal anthropic method used to merge
 # streamed messages together. It's a bit tricky so I'm opting to use it
 # here. If it goes away, this polyfill will make it a no-op and the only
@@ -242,7 +240,7 @@ class TracedMessageStream(Wrapper):
         self.__metrics = {}
         self.__snapshot = None
         self.__request_start_time = request_start_time
-        self.__time_to_first_token: Optional[float] = None
+        self.__time_to_first_token: float | None = None
     def _get_final_traced_message(self):
         return self.__snapshot
@@ -314,7 +312,7 @@ def _start_span(name, kwargs):
     return NOOP_SPAN
-def _log_message_to_span(message, span, time_to_first_token: Optional[float] = None):
+def _log_message_to_span(message, span, time_to_first_token: float | None = None):
     """Log telemetry from the given anthropic.Message to the given span."""
     with _catch_exceptions():
         usage = getattr(message, "usage", {})
@@ -326,13 +324,14 @@ def _log_message_to_span(message, span, time_to_first_token: Optional[float] = N
         # Create output dict with only truthy values for role and content
         output = {
-            k: v for k, v in {
-                "role": getattr(message, "role", None),
-                "content": getattr(message, "content", None)
-            }.items() if v
+            k: v
+            for k, v in {"role": getattr(message, "role", None), "content": getattr(message, "content", None)}.items()
+            if v
         } or None
         span.log(output=output, metrics=metrics)
 @contextmanager
 def _catch_exceptions():
     try:

braintrust/wrappers/claude_agent_sdk/__init__.py CHANGED Viewed

@@ -16,7 +16,6 @@ Usage (imports can be before or after setup):
 """
 import logging
-from typing import Optional
 from braintrust.logger import NOOP_SPAN, current_span, init_logger
@@ -28,9 +27,9 @@ __all__ = ["setup_claude_agent_sdk"]
 def setup_claude_agent_sdk(
-    api_key: Optional[str] = None,
-    project_id: Optional[str] = None,
-    project: Optional[str] = None,
+    api_key: str | None = None,
+    project_id: str | None = None,
+    project: str | None = None,
 ) -> bool:
     """
     Setup Braintrust integration with Claude Agent SDK. Will automatically patch the SDK for automatic tracing.

braintrust/wrappers/claude_agent_sdk/_wrapper.py CHANGED Viewed

@@ -2,7 +2,8 @@ import dataclasses
 import logging
 import threading
 import time
-from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Tuple
+from collections.abc import AsyncGenerator, Callable
+from typing import Any
 from braintrust.logger import start_span
 from braintrust.span_types import SpanTypeAttribute
@@ -108,12 +109,12 @@ def _wrap_tool_handler(handler: Any, tool_name: Any) -> Callable[..., Any]:
     so we try the context variable first, then fall back to current_span export.
     """
     # Check if already wrapped to prevent double-wrapping
-    if hasattr(handler, '_braintrust_wrapped'):
+    if hasattr(handler, "_braintrust_wrapped"):
         return handler
     async def wrapped_handler(args: Any) -> Any:
         # Get parent span export from thread-local storage
-        parent_export = getattr(_thread_local, 'parent_span_export', None)
+        parent_export = getattr(_thread_local, "parent_span_export", None)
         with start_span(
             name=str(tool_name),
@@ -144,11 +145,14 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
         We end the previous span when the next AssistantMessage arrives, using the marked
         start time to ensure sequential timing (no overlapping LLM spans).
         """
-        def __init__(self, query_start_time: Optional[float] = None):
-            self.current_span: Optional[Any] = None
-            self.next_start_time: Optional[float] = query_start_time
-        def start_llm_span(self, message: Any, prompt: Any, conversation_history: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+        def __init__(self, query_start_time: float | None = None):
+            self.current_span: Any | None = None
+            self.next_start_time: float | None = query_start_time
+        def start_llm_span(
+            self, message: Any, prompt: Any, conversation_history: list[dict[str, Any]]
+        ) -> dict[str, Any] | None:
             """Start a new LLM span, ending the previous one if it exists."""
             # Use the marked start time, or current time as fallback
             start_time = self.next_start_time if self.next_start_time is not None else time.time()
@@ -158,8 +162,7 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
                 self.current_span.end(end_time=start_time)
             final_content, span = _create_llm_span_for_messages(
-                [message], prompt, conversation_history,
-                start_time=start_time
+                [message], prompt, conversation_history, start_time=start_time
             )
             self.current_span = span
             self.next_start_time = None  # Reset for next span
@@ -169,7 +172,7 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
             """Mark when the next LLM call will start (after tool results)."""
             self.next_start_time = time.time()
-        def log_usage(self, usage_metrics: Dict[str, float]) -> None:
+        def log_usage(self, usage_metrics: dict[str, float]) -> None:
             """Log usage metrics to the current LLM span."""
             if self.current_span and usage_metrics:
                 self.current_span.log(metrics=usage_metrics)
@@ -186,8 +189,8 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
             client = original_client_class(*args, **kwargs)
             super().__init__(client)
             self.__client = client
-            self.__last_prompt: Optional[str] = None
-            self.__query_start_time: Optional[float] = None
+            self.__last_prompt: str | None = None
+            self.__query_start_time: float | None = None
         async def query(self, *args: Any, **kwargs: Any) -> Any:
             """Wrap query to capture the prompt and start time for tracing."""
@@ -220,7 +223,7 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
                 # Store the parent span export in thread-local storage for tool handlers
                 _thread_local.parent_span_export = span.export()
-                final_results: List[Dict[str, Any]] = []
+                final_results: list[dict[str, Any]] = []
                 llm_tracker = LLMSpanTracker(query_start_time=self.__query_start_time)
                 try:
@@ -243,10 +246,12 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
                                 llm_tracker.log_usage(usage_metrics)
                             result_metadata = {
-                                k: v for k, v in {
+                                k: v
+                                for k, v in {
                                     "num_turns": getattr(message, "num_turns", None),
                                     "session_id": getattr(message, "session_id", None),
-                                }.items() if v is not None
+                                }.items()
+                                if v is not None
                             }
                             if result_metadata:
                                 span.log(metadata=result_metadata)
@@ -257,8 +262,8 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
                     log.warning("Error in tracing code", exc_info=e)
                 finally:
                     llm_tracker.cleanup()
-                    if hasattr(_thread_local, 'parent_span_export'):
-                        delattr(_thread_local, 'parent_span_export')
+                    if hasattr(_thread_local, "parent_span_export"):
+                        delattr(_thread_local, "parent_span_export")
         async def __aenter__(self) -> "WrappedClaudeSDKClient":
             await self.__client.__aenter__()
@@ -271,11 +276,11 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
 def _create_llm_span_for_messages(
-    messages: List[Any],  # List of AssistantMessage objects
+    messages: list[Any],  # List of AssistantMessage objects
     prompt: Any,
-    conversation_history: List[Dict[str, Any]],
-    start_time: Optional[float] = None,
-) -> Tuple[Optional[Dict[str, Any]], Optional[Any]]:
+    conversation_history: list[dict[str, Any]],
+    start_time: float | None = None,
+) -> tuple[dict[str, Any] | None, Any | None]:
     """Creates an LLM span for a group of AssistantMessage objects.
     Returns a tuple of (final_content, span):
@@ -295,13 +300,12 @@ def _create_llm_span_for_messages(
     model = getattr(last_message, "model", None)
     input_messages = _build_llm_input(prompt, conversation_history)
-    outputs: List[Dict[str, Any]] = []
+    outputs: list[dict[str, Any]] = []
     for msg in messages:
         if hasattr(msg, "content"):
             content = _serialize_content_blocks(msg.content)
             outputs.append({"content": content, "role": "assistant"})
     llm_span = start_span(
         name="anthropic.messages.create",
         span_attributes={"type": SpanTypeAttribute.LLM},
@@ -355,7 +359,7 @@ def _serialize_content_blocks(content: Any) -> Any:
     return content
-def _extract_usage_from_result_message(result_message: Any) -> Dict[str, float]:
+def _extract_usage_from_result_message(result_message: Any) -> dict[str, float]:
     """Extracts and normalizes usage metrics from a ResultMessage.
     Uses shared Anthropic utilities for consistent metric extraction.
@@ -374,9 +378,7 @@ def _extract_usage_from_result_message(result_message: Any) -> Dict[str, float]:
     return metrics
-def _build_llm_input(
-    prompt: Any, conversation_history: List[Dict[str, Any]]
-) -> Optional[List[Dict[str, Any]]]:
+def _build_llm_input(prompt: Any, conversation_history: list[dict[str, Any]]) -> list[dict[str, Any]] | None:
     """Builds the input array for an LLM span from the initial prompt and conversation history.
     Formats input to match Anthropic messages API format for proper UI rendering.

braintrust/wrappers/dspy.py CHANGED Viewed

@@ -47,7 +47,7 @@ Advanced Usage with LiteLLM Patching:
     ```
 """
-from typing import Any, Dict, Optional
+from typing import Any
 from braintrust.logger import current_span, start_span
 from braintrust.span_types import SpanTypeAttribute
@@ -58,9 +58,7 @@ from braintrust.span_types import SpanTypeAttribute
 try:
     from dspy.utils.callback import BaseCallback
 except ImportError:
-    raise ImportError(
-        "DSPy is not installed. Please install it with: pip install dspy"
-    )
+    raise ImportError("DSPy is not installed. Please install it with: pip install dspy")
 class BraintrustDSpyCallback(BaseCallback):
@@ -130,13 +128,13 @@ class BraintrustDSpyCallback(BaseCallback):
         """Initialize the Braintrust DSPy callback handler."""
         super().__init__()
         # Map call_id to span objects for proper nesting
-        self._spans: Dict[str, Any] = {}
+        self._spans: dict[str, Any] = {}
     def on_lm_start(
         self,
         call_id: str,
         instance: Any,
-        inputs: Dict[str, Any],
+        inputs: dict[str, Any],
     ):
         """Log the start of a language model call.
@@ -174,8 +172,8 @@ class BraintrustDSpyCallback(BaseCallback):
     def on_lm_end(
         self,
         call_id: str,
-        outputs: Optional[Dict[str, Any]],
-        exception: Optional[Exception] = None,
+        outputs: dict[str, Any] | None,
+        exception: Exception | None = None,
     ):
         """Log the end of a language model call.
@@ -205,7 +203,7 @@ class BraintrustDSpyCallback(BaseCallback):
         self,
         call_id: str,
         instance: Any,
-        inputs: Dict[str, Any],
+        inputs: dict[str, Any],
     ):
         """Log the start of a DSPy module execution.
@@ -236,8 +234,8 @@ class BraintrustDSpyCallback(BaseCallback):
     def on_module_end(
         self,
         call_id: str,
-        outputs: Optional[Any],
-        exception: Optional[Exception] = None,
+        outputs: Any | None,
+        exception: Exception | None = None,
     ):
         """Log the end of a DSPy module execution.
@@ -274,7 +272,7 @@ class BraintrustDSpyCallback(BaseCallback):
         self,
         call_id: str,
         instance: Any,
-        inputs: Dict[str, Any],
+        inputs: dict[str, Any],
     ):
         """Log the start of a tool invocation.
@@ -309,8 +307,8 @@ class BraintrustDSpyCallback(BaseCallback):
     def on_tool_end(
         self,
         call_id: str,
-        outputs: Optional[Dict[str, Any]],
-        exception: Optional[Exception] = None,
+        outputs: dict[str, Any] | None,
+        exception: Exception | None = None,
     ):
         """Log the end of a tool invocation.
@@ -340,7 +338,7 @@ class BraintrustDSpyCallback(BaseCallback):
         self,
         call_id: str,
         instance: Any,
-        inputs: Dict[str, Any],
+        inputs: dict[str, Any],
     ):
         """Log the start of an evaluation run.
@@ -374,8 +372,8 @@ class BraintrustDSpyCallback(BaseCallback):
     def on_evaluate_end(
         self,
         call_id: str,
-        outputs: Optional[Any],
-        exception: Optional[Exception] = None,
+        outputs: Any | None,
+        exception: Exception | None = None,
     ):
         """Log the end of an evaluation run.

braintrust/wrappers/google_genai/__init__.py CHANGED Viewed

@@ -1,19 +1,20 @@
 import logging
 import time
-from typing import Any, Dict, Iterable, List, Optional, Tuple
-from wrapt import wrap_function_wrapper
+from collections.abc import Iterable
+from typing import Any
+from braintrust.bt_json import bt_safe_deep_copy
 from braintrust.logger import NOOP_SPAN, Attachment, current_span, init_logger, start_span
 from braintrust.span_types import SpanTypeAttribute
+from wrapt import wrap_function_wrapper
 logger = logging.getLogger(__name__)
 def setup_genai(
-    api_key: Optional[str] = None,
-    project_id: Optional[str] = None,
-    project_name: Optional[str] = None,
+    api_key: str | None = None,
+    project_id: str | None = None,
+    project_name: str | None = None,
 ):
     span = current_span()
     if span == NOOP_SPAN:
@@ -148,8 +149,8 @@ def wrap_async_models(AsyncModels: Any):
     return AsyncModels
-def _serialize_input(api_client: Any, input: Dict[str, Any]):
-    config = _try_dict(input.get("config"))
+def _serialize_input(api_client: Any, input: dict[str, Any]):
+    config = bt_safe_deep_copy(input.get("config"))
     if config is not None:
         tools = _serialize_tools(api_client, input)
@@ -223,7 +224,7 @@ def _serialize_content_item(item: Any) -> Any:
     return item
-def _serialize_tools(api_client: Any, input: Optional[Any]):
+def _serialize_tools(api_client: Any, input: Any | None):
     try:
         from google.genai.models import (
             _GenerateContentParameters_to_mldev,  # pyright: ignore [reportPrivateUsage]
@@ -242,7 +243,7 @@ def _serialize_tools(api_client: Any, input: Optional[Any]):
         return None
-def omit(obj: Dict[str, Any], keys: Iterable[str]):
+def omit(obj: dict[str, Any], keys: Iterable[str]):
     return {k: v for k, v in obj.items() if k not in keys}
@@ -254,11 +255,11 @@ def mark_patched(obj: Any):
     return setattr(obj, "_braintrust_patched", True)
-def get_args_kwargs(args: List[str], kwargs: Dict[str, Any], keys: Iterable[str]):
+def get_args_kwargs(args: list[str], kwargs: dict[str, Any], keys: Iterable[str]):
     return {k: args[i] if args else kwargs.get(k) for i, k in enumerate(keys)}, omit(kwargs, keys)
-def _extract_generate_content_metrics(response: Any, start: float) -> Dict[str, Any]:
+def _extract_generate_content_metrics(response: Any, start: float) -> dict[str, Any]:
     """Extract metrics from a non-streaming generate_content response."""
     end_time = time.time()
     metrics = dict(
@@ -297,8 +298,8 @@ def _extract_generate_content_metrics(response: Any, start: float) -> Dict[str,
 def _aggregate_generate_content_chunks(
-    chunks: List[Any], start: float, first_token_time: Optional[float] = None
-) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+    chunks: list[Any], start: float, first_token_time: float | None = None
+) -> tuple[dict[str, Any], dict[str, Any]]:
     """Aggregate streaming chunks into a single response with metrics."""
     end_time = time.time()
     metrics = dict(
@@ -410,11 +411,11 @@ def _aggregate_generate_content_chunks(
     return aggregated, clean_metrics
-def clean(obj: Dict[str, Any]) -> Dict[str, Any]:
+def clean(obj: dict[str, Any]) -> dict[str, Any]:
     return {k: v for k, v in obj.items() if v is not None}
-def get_path(obj: Dict[str, Any], path: str, default: Any = None) -> Optional[Any]:
+def get_path(obj: dict[str, Any], path: str, default: Any = None) -> Any | None:
     keys = path.split(".")
     current = obj
@@ -424,17 +425,3 @@ def get_path(obj: Dict[str, Any], path: str, default: Any = None) -> Optional[An
         current = current[key]
     return current
-def _try_dict(obj: Any) -> Optional[Dict[str, Any]]:
-    try:
-        return obj.model_dump()
-    except AttributeError:
-        pass
-    try:
-        return obj.dump()
-    except AttributeError:
-        pass
-    return obj

braintrust/wrappers/langchain.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import contextvars
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any
 from uuid import UUID
 import braintrust
@@ -30,7 +30,7 @@ class BraintrustTracer(BaseCallbackHandler):
         self.logger = logger
         self.spans = {}
-    def _start_span(self, parent_run_id, run_id, name: Optional[str], **kwargs: Any) -> Any:
+    def _start_span(self, parent_run_id, run_id, name: str | None, **kwargs: Any) -> Any:
         assert run_id not in self.spans, f"Span already exists for run_id {run_id} (this is likely a bug)"
         current_parent = langchain_parent.get()
@@ -60,29 +60,29 @@ class BraintrustTracer(BaseCallbackHandler):
     def on_chain_start(
         self,
-        serialized: Dict[str, Any],
-        inputs: Dict[str, Any],
+        serialized: dict[str, Any],
+        inputs: dict[str, Any],
         *,
         run_id: UUID,
-        parent_run_id: Optional[UUID] = None,
-        tags: Optional[List[str]] = None,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
         **kwargs: Any,
     ) -> Any:
         self._start_span(parent_run_id, run_id, "Chain", input=inputs, metadata={"tags": tags})
     def on_chain_end(
-        self, outputs: Dict[str, Any], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
+        self, outputs: dict[str, Any], *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
     ) -> Any:
         self._end_span(run_id, output=outputs)
     def on_llm_start(
         self,
-        serialized: Dict[str, Any],
-        prompts: List[str],
+        serialized: dict[str, Any],
+        prompts: list[str],
         *,
         run_id: UUID,
-        parent_run_id: Optional[UUID] = None,
-        tags: Optional[List[str]] = None,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
         **kwargs: Any,
     ) -> Any:
         self._start_span(
@@ -95,12 +95,12 @@ class BraintrustTracer(BaseCallbackHandler):
     def on_chat_model_start(
         self,
-        serialized: Dict[str, Any],
-        messages: List[List[BaseMessage]],
+        serialized: dict[str, Any],
+        messages: list[list[BaseMessage]],
         *,
         run_id: UUID,
-        parent_run_id: Optional[UUID] = None,
-        tags: Optional[List[str]] = None,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
         **kwargs: Any,
     ) -> Any:
         self._start_span(
@@ -112,7 +112,7 @@ class BraintrustTracer(BaseCallbackHandler):
         )
     def on_llm_end(
-        self, response: LLMResult, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
+        self, response: LLMResult, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
     ) -> Any:
         metrics = {}
         token_usage = response.llm_output.get("token_usage", {})
@@ -127,25 +127,23 @@ class BraintrustTracer(BaseCallbackHandler):
     def on_tool_start(
         self,
-        serialized: Dict[str, Any],
+        serialized: dict[str, Any],
         input_str: str,
         *,
         run_id: UUID,
-        parent_run_id: Optional[UUID] = None,
-        tags: Optional[List[str]] = None,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
         **kwargs: Any,
     ) -> Any:
         _logger.warning("Starting tool, but it will not be traced in braintrust (unsupported)")
-    def on_tool_end(self, output: str, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any) -> Any:
+    def on_tool_end(self, output: str, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any) -> Any:
         pass
-    def on_retriever_start(
-        self, query: str, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
-    ) -> Any:
+    def on_retriever_start(self, query: str, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any) -> Any:
         _logger.warning("Starting retriever, but it will not be traced in braintrust (unsupported)")
     def on_retriever_end(
-        self, response: List[Document], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
+        self, response: list[Document], *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
     ) -> Any:
         pass

braintrust/wrappers/litellm.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from __future__ import annotations
 import time
-from collections.abc import AsyncGenerator, Generator
+from collections.abc import AsyncGenerator, Callable, Generator
 from types import TracebackType
-from typing import Any, Callable
+from typing import Any
 from braintrust.logger import Span, start_span
 from braintrust.span_types import SpanTypeAttribute
@@ -655,7 +655,8 @@ def patch_litellm():
     """
     try:
         import litellm
-        if not hasattr(litellm, '_braintrust_wrapped'):
+        if not hasattr(litellm, "_braintrust_wrapped"):
             wrapped = wrap_litellm(litellm)
             litellm.completion = wrapped.completion
             litellm.acompletion = wrapped.acompletion

braintrust 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl

braintrust 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl