PyPI - braintrust - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

braintrust 0.4.3py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

braintrust/__init__.py +3 -0
braintrust/_generated_types.py +106 -6
braintrust/auto.py +179 -0
braintrust/conftest.py +23 -4
braintrust/framework.py +113 -3
braintrust/functions/invoke.py +3 -1
braintrust/functions/test_invoke.py +61 -0
braintrust/generated_types.py +7 -1
braintrust/logger.py +127 -45
braintrust/oai.py +51 -0
braintrust/span_cache.py +337 -0
braintrust/span_identifier_v3.py +21 -0
braintrust/test_bt_json.py +0 -5
braintrust/test_framework.py +37 -0
braintrust/test_http.py +444 -0
braintrust/test_logger.py +295 -5
braintrust/test_span_cache.py +344 -0
braintrust/test_trace.py +267 -0
braintrust/test_util.py +58 -1
braintrust/trace.py +385 -0
braintrust/util.py +20 -0
braintrust/version.py +2 -2
braintrust/wrappers/agno/__init__.py +2 -3
braintrust/wrappers/anthropic.py +64 -0
braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
braintrust/wrappers/claude_agent_sdk/test_wrapper.py +115 -0
braintrust/wrappers/dspy.py +52 -1
braintrust/wrappers/google_genai/__init__.py +9 -6
braintrust/wrappers/litellm.py +6 -43
braintrust/wrappers/pydantic_ai.py +2 -3
braintrust/wrappers/test_agno.py +9 -0
braintrust/wrappers/test_anthropic.py +156 -0
braintrust/wrappers/test_dspy.py +117 -0
braintrust/wrappers/test_google_genai.py +9 -0
braintrust/wrappers/test_litellm.py +57 -55
braintrust/wrappers/test_openai.py +253 -1
braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
braintrust/wrappers/test_utils.py +79 -0
{braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
{braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/RECORD +44 -37
{braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
{braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
{braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0

braintrust/trace.py ADDED Viewed

@@ -0,0 +1,385 @@
+"""
+Trace objects for accessing spans in evaluations.
+This module provides the LocalTrace class which allows scorers to access
+spans from the current evaluation task without making server round-trips.
+"""
+import asyncio
+from typing import Any, Awaitable, Callable, Optional, Protocol
+from braintrust.logger import BraintrustState, ObjectFetcher
+class SpanData:
+    """Span data returned by get_spans()."""
+    def __init__(
+        self,
+        input: Optional[Any] = None,
+        output: Optional[Any] = None,
+        metadata: Optional[dict[str, Any]] = None,
+        span_id: Optional[str] = None,
+        span_parents: Optional[list[str]] = None,
+        span_attributes: Optional[dict[str, Any]] = None,
+        **kwargs: Any,
+    ):
+        self.input = input
+        self.output = output
+        self.metadata = metadata
+        self.span_id = span_id
+        self.span_parents = span_parents
+        self.span_attributes = span_attributes
+        # Store any additional fields
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "SpanData":
+        """Create SpanData from a dictionary."""
+        return cls(**data)
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        result = {}
+        for key, value in self.__dict__.items():
+            if value is not None:
+                result[key] = value
+        return result
+class SpanFetcher(ObjectFetcher[dict[str, Any]]):
+    """
+    Fetcher for spans by root_span_id, using the ObjectFetcher pattern.
+    Handles pagination automatically via cursor-based iteration.
+    """
+    def __init__(
+        self,
+        object_type: str,  # Literal["experiment", "project_logs", "playground_logs"]
+        object_id: str,
+        root_span_id: str,
+        state: BraintrustState,
+        span_type_filter: Optional[list[str]] = None,
+    ):
+        # Build the filter expression for root_span_id and optionally span_attributes.type
+        filter_expr = self._build_filter(root_span_id, span_type_filter)
+        super().__init__(
+            object_type=object_type,
+            _internal_btql={"filter": filter_expr},
+        )
+        self._object_id = object_id
+        self._state = state
+    @staticmethod
+    def _build_filter(root_span_id: str, span_type_filter: Optional[list[str]] = None) -> dict[str, Any]:
+        """Build BTQL filter expression."""
+        children = [
+            # Base filter: root_span_id = 'value'
+            {
+                "op": "eq",
+                "left": {"op": "ident", "name": ["root_span_id"]},
+                "right": {"op": "literal", "value": root_span_id},
+            },
+            # Exclude span_attributes.purpose = 'scorer'
+            {
+                "op": "or",
+                "children": [
+                    {
+                        "op": "isnull",
+                        "expr": {"op": "ident", "name": ["span_attributes", "purpose"]},
+                    },
+                    {
+                        "op": "ne",
+                        "left": {"op": "ident", "name": ["span_attributes", "purpose"]},
+                        "right": {"op": "literal", "value": "scorer"},
+                    },
+                ],
+            },
+        ]
+        # If span type filter specified, add it
+        if span_type_filter and len(span_type_filter) > 0:
+            children.append(
+                {
+                    "op": "in",
+                    "left": {"op": "ident", "name": ["span_attributes", "type"]},
+                    "right": {"op": "literal", "value": span_type_filter},
+                }
+            )
+        return {"op": "and", "children": children}
+    @property
+    def id(self) -> str:
+        return self._object_id
+    def _get_state(self) -> BraintrustState:
+        return self._state
+SpanFetchFn = Callable[[Optional[list[str]]], Awaitable[list[SpanData]]]
+class CachedSpanFetcher:
+    """
+    Cached span fetcher that handles fetching and caching spans by type.
+    Caching strategy:
+    - Cache spans by span type (dict[spanType, list[SpanData]])
+    - Track if all spans have been fetched (all_fetched flag)
+    - When filtering by spanType, only fetch types not already in cache
+    """
+    def __init__(
+        self,
+        object_type: Optional[str] = None,  # Literal["experiment", "project_logs", "playground_logs"]
+        object_id: Optional[str] = None,
+        root_span_id: Optional[str] = None,
+        get_state: Optional[Callable[[], Awaitable[BraintrustState]]] = None,
+        fetch_fn: Optional[SpanFetchFn] = None,
+    ):
+        self._span_cache: dict[str, list[SpanData]] = {}
+        self._all_fetched = False
+        if fetch_fn is not None:
+            # Direct fetch function injection (for testing)
+            self._fetch_fn = fetch_fn
+        else:
+            # Standard constructor with SpanFetcher
+            if object_type is None or object_id is None or root_span_id is None or get_state is None:
+                raise ValueError("Must provide either fetch_fn or all of object_type, object_id, root_span_id, get_state")
+            async def _fetch_fn(span_type: Optional[list[str]]) -> list[SpanData]:
+                state = await get_state()
+                fetcher = SpanFetcher(
+                    object_type=object_type,
+                    object_id=object_id,
+                    root_span_id=root_span_id,
+                    state=state,
+                    span_type_filter=span_type,
+                )
+                rows = list(fetcher.fetch())
+                # Filter out scorer spans
+                filtered = [
+                    row
+                    for row in rows
+                    if not (
+                        isinstance(row.get("span_attributes"), dict)
+                        and row.get("span_attributes", {}).get("purpose") == "scorer"
+                    )
+                ]
+                return [
+                    SpanData(
+                        input=row.get("input"),
+                        output=row.get("output"),
+                        metadata=row.get("metadata"),
+                        span_id=row.get("span_id"),
+                        span_parents=row.get("span_parents"),
+                        span_attributes=row.get("span_attributes"),
+                        id=row.get("id"),
+                        _xact_id=row.get("_xact_id"),
+                        _pagination_key=row.get("_pagination_key"),
+                        root_span_id=row.get("root_span_id"),
+                    )
+                    for row in filtered
+                ]
+            self._fetch_fn = _fetch_fn
+    async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]:
+        """
+        Get spans, using cache when possible.
+        Args:
+            span_type: Optional list of span types to filter by
+        Returns:
+            List of matching spans
+        """
+        # If we've fetched all spans, just filter from cache
+        if self._all_fetched:
+            return self._get_from_cache(span_type)
+        # If no filter requested, fetch everything
+        if not span_type or len(span_type) == 0:
+            await self._fetch_spans(None)
+            self._all_fetched = True
+            return self._get_from_cache(None)
+        # Find which spanTypes we don't have in cache yet
+        missing_types = [t for t in span_type if t not in self._span_cache]
+        # If all requested types are cached, return from cache
+        if not missing_types:
+            return self._get_from_cache(span_type)
+        # Fetch only the missing types
+        await self._fetch_spans(missing_types)
+        return self._get_from_cache(span_type)
+    async def _fetch_spans(self, span_type: Optional[list[str]]) -> None:
+        """Fetch spans from the server."""
+        spans = await self._fetch_fn(span_type)
+        for span in spans:
+            span_attrs = span.span_attributes or {}
+            span_type_str = span_attrs.get("type", "")
+            if span_type_str not in self._span_cache:
+                self._span_cache[span_type_str] = []
+            self._span_cache[span_type_str].append(span)
+    def _get_from_cache(self, span_type: Optional[list[str]]) -> list[SpanData]:
+        """Get spans from cache, optionally filtering by type."""
+        if not span_type or len(span_type) == 0:
+            # Return all spans
+            result = []
+            for spans in self._span_cache.values():
+                result.extend(spans)
+            return result
+        # Return only requested types
+        result = []
+        for type_str in span_type:
+            if type_str in self._span_cache:
+                result.extend(self._span_cache[type_str])
+        return result
+class Trace(Protocol):
+    """
+    Interface for trace objects that can be used by scorers.
+    Both the SDK's LocalTrace class and the API wrapper's WrapperTrace implement this.
+    """
+    def get_configuration(self) -> dict[str, str]:
+        """Get the trace configuration (object_type, object_id, root_span_id)."""
+        ...
+    async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]:
+        """
+        Fetch all spans for this root span.
+        Args:
+            span_type: Optional list of span types to filter by
+        Returns:
+            List of matching spans
+        """
+        ...
+class LocalTrace(dict):
+    """
+    SDK implementation of Trace that uses local span cache and falls back to BTQL.
+    Carries identifying information about the evaluation so scorers can perform
+    richer logging or side effects.
+    Inherits from dict so that it serializes to {"trace_ref": {...}} when passed
+    to json.dumps(). This allows LocalTrace to be transparently serialized when
+    passed through invoke() or other JSON-serializing code paths.
+    """
+    def __init__(
+        self,
+        object_type: str,  # Literal["experiment", "project_logs", "playground_logs"]
+        object_id: str,
+        root_span_id: str,
+        ensure_spans_flushed: Optional[Callable[[], Awaitable[None]]],
+        state: BraintrustState,
+    ):
+        # Initialize dict with trace_ref for JSON serialization
+        super().__init__({
+            "trace_ref": {
+                "object_type": object_type,
+                "object_id": object_id,
+                "root_span_id": root_span_id,
+            }
+        })
+        self._object_type = object_type
+        self._object_id = object_id
+        self._root_span_id = root_span_id
+        self._ensure_spans_flushed = ensure_spans_flushed
+        self._state = state
+        self._spans_flushed = False
+        self._spans_flush_promise: Optional[asyncio.Task[None]] = None
+        async def get_state() -> BraintrustState:
+            await self._ensure_spans_ready()
+            # Ensure state is logged in
+            await asyncio.get_event_loop().run_in_executor(None, lambda: state.login())
+            return state
+        self._cached_fetcher = CachedSpanFetcher(
+            object_type=object_type,
+            object_id=object_id,
+            root_span_id=root_span_id,
+            get_state=get_state,
+        )
+    def get_configuration(self) -> dict[str, str]:
+        """Get the trace configuration."""
+        return {
+            "object_type": self._object_type,
+            "object_id": self._object_id,
+            "root_span_id": self._root_span_id,
+        }
+    async def get_spans(self, span_type: Optional[list[str]] = None) -> list[SpanData]:
+        """
+        Fetch all rows for this root span from its parent object (experiment or project logs).
+        First checks the local span cache for recently logged spans, then falls
+        back to CachedSpanFetcher which handles BTQL fetching and caching.
+        Args:
+            span_type: Optional list of span types to filter by
+        Returns:
+            List of matching spans
+        """
+        # Try local span cache first (for recently logged spans not yet flushed)
+        cached_spans = self._state.span_cache.get_by_root_span_id(self._root_span_id)
+        if cached_spans and len(cached_spans) > 0:
+            # Filter by purpose
+            spans = [span for span in cached_spans if not (span.span_attributes or {}).get("purpose") == "scorer"]
+            # Filter by span type if requested
+            if span_type and len(span_type) > 0:
+                spans = [span for span in spans if (span.span_attributes or {}).get("type", "") in span_type]
+            # Convert to SpanData
+            return [
+                SpanData(
+                    input=span.input,
+                    output=span.output,
+                    metadata=span.metadata,
+                    span_id=span.span_id,
+                    span_parents=span.span_parents,
+                    span_attributes=span.span_attributes,
+                )
+                for span in spans
+            ]
+        # Fall back to CachedSpanFetcher for BTQL fetching with caching
+        return await self._cached_fetcher.get_spans(span_type)
+    async def _ensure_spans_ready(self) -> None:
+        """Ensure spans are flushed before fetching."""
+        if self._spans_flushed or not self._ensure_spans_flushed:
+            return
+        if self._spans_flush_promise is None:
+            async def flush_and_mark():
+                try:
+                    await self._ensure_spans_flushed()
+                    self._spans_flushed = True
+                except Exception as err:
+                    self._spans_flush_promise = None
+                    raise err
+            self._spans_flush_promise = asyncio.create_task(flush_and_mark())
+        await self._spans_flush_promise

braintrust/util.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import inspect
 import json
+import math
+import os
 import sys
 import threading
 import urllib.parse
@@ -9,6 +11,24 @@ from typing import Any, Generic, Literal, TypedDict, TypeVar, Union
 from requests import HTTPError, Response
+def parse_env_var_float(name: str, default: float) -> float:
+    """Parse a float from an environment variable, returning default if invalid.
+    Returns the default value if the env var is missing, empty, not a valid
+    float, NaN, or infinity.
+    """
+    value = os.environ.get(name)
+    if value is None:
+        return default
+    try:
+        result = float(value)
+        if math.isnan(result) or math.isinf(result):
+            return default
+        return result
+    except (ValueError, TypeError):
+        return default
 GLOBAL_PROJECT = "Global"
 BT_IS_ASYNC_ATTRIBUTE = "_BT_IS_ASYNC"

braintrust/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
-VERSION = "0.4.3"
+VERSION = "0.5.2"
 # this will be templated during the build
-GIT_COMMIT = "d734e8ffc272ee65fe0588df00fd9390614ccd2e"
+GIT_COMMIT = "25868bc58450dad2058b6499ce3bb9400330fbd1"

braintrust/wrappers/agno/__init__.py CHANGED Viewed

@@ -62,7 +62,6 @@ def setup_agno(
         models.base.Model = wrap_model(models.base.Model)  # pyright: ignore[reportUnknownMemberType]
         tools.function.FunctionCall = wrap_function_call(tools.function.FunctionCall)  # pyright: ignore[reportUnknownMemberType]
         return True
-    except ImportError as e:
-        logger.error(f"Failed to import Agno: {e}")
-        logger.error("Agno is not installed. Please install it with: pip install agno")
+    except ImportError:
+        # Not installed - this is expected when using auto_instrument()
         return False

braintrust/wrappers/anthropic.py CHANGED Viewed

@@ -5,6 +5,7 @@ from contextlib import contextmanager
 from braintrust.logger import NOOP_SPAN, log_exc_info_to_span, start_span
 from braintrust.wrappers._anthropic_utils import Wrapper, extract_anthropic_usage, finalize_anthropic_tokens
+from wrapt import wrap_function_wrapper
 log = logging.getLogger(__name__)
@@ -358,3 +359,66 @@ def wrap_anthropic(client):
 def wrap_anthropic_client(client):
     return wrap_anthropic(client)
+def _apply_anthropic_wrapper(client):
+    """Apply tracing wrapper to an Anthropic client instance in-place."""
+    wrapped = wrap_anthropic(client)
+    client.messages = wrapped.messages
+    if hasattr(wrapped, "beta"):
+        client.beta = wrapped.beta
+def _apply_async_anthropic_wrapper(client):
+    """Apply tracing wrapper to an AsyncAnthropic client instance in-place."""
+    wrapped = wrap_anthropic(client)
+    client.messages = wrapped.messages
+    if hasattr(wrapped, "beta"):
+        client.beta = wrapped.beta
+def _anthropic_init_wrapper(wrapped, instance, args, kwargs):
+    """Wrapper for Anthropic.__init__ that applies tracing after initialization."""
+    wrapped(*args, **kwargs)
+    _apply_anthropic_wrapper(instance)
+def _async_anthropic_init_wrapper(wrapped, instance, args, kwargs):
+    """Wrapper for AsyncAnthropic.__init__ that applies tracing after initialization."""
+    wrapped(*args, **kwargs)
+    _apply_async_anthropic_wrapper(instance)
+def patch_anthropic() -> bool:
+    """
+    Patch Anthropic to add Braintrust tracing globally.
+    After calling this, all new Anthropic() and AsyncAnthropic() clients
+    will automatically have tracing enabled.
+    Returns:
+        True if Anthropic was patched (or already patched), False if Anthropic is not installed.
+    Example:
+        ```python
+        import braintrust
+        braintrust.patch_anthropic()
+        import anthropic
+        client = anthropic.Anthropic()
+        # All calls are now traced!
+        ```
+    """
+    try:
+        import anthropic
+        if getattr(anthropic, "__braintrust_wrapped__", False):
+            return True  # Already patched
+        wrap_function_wrapper("anthropic", "Anthropic.__init__", _anthropic_init_wrapper)
+        wrap_function_wrapper("anthropic", "AsyncAnthropic.__init__", _async_anthropic_init_wrapper)
+        anthropic.__braintrust_wrapped__ = True
+        return True
+    except ImportError:
+        return False

braintrust/wrappers/claude_agent_sdk/__init__.py CHANGED Viewed

@@ -105,7 +105,6 @@ def setup_claude_agent_sdk(
                         setattr(module, "tool", wrapped_tool_fn)
         return True
-    except ImportError as e:
-        logger.error(f"Failed to import Claude Agent SDK: {e}")
-        logger.error("claude-agent-sdk is not installed. Please install it with: pip install claude-agent-sdk")
+    except ImportError:
+        # Not installed - this is expected when using auto_instrument()
         return False

braintrust/wrappers/claude_agent_sdk/_wrapper.py CHANGED Viewed

@@ -2,7 +2,7 @@ import dataclasses
 import logging
 import threading
 import time
-from collections.abc import AsyncGenerator, Callable
+from collections.abc import AsyncGenerator, AsyncIterable, Callable
 from typing import Any
 from braintrust.logger import start_span
@@ -191,17 +191,38 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
             self.__client = client
             self.__last_prompt: str | None = None
             self.__query_start_time: float | None = None
+            self.__captured_messages: list[dict[str, Any]] | None = None
         async def query(self, *args: Any, **kwargs: Any) -> Any:
             """Wrap query to capture the prompt and start time for tracing."""
             # Capture the time when query is called (when LLM call starts)
             self.__query_start_time = time.time()
+            self.__captured_messages = None
             # Capture the prompt for use in receive_response
-            if args:
-                self.__last_prompt = str(args[0])
-            elif "prompt" in kwargs:
-                self.__last_prompt = str(kwargs["prompt"])
+            prompt = args[0] if args else kwargs.get("prompt")
+            if prompt is not None:
+                if isinstance(prompt, str):
+                    self.__last_prompt = prompt
+                elif isinstance(prompt, AsyncIterable):
+                    # AsyncIterable[dict] - wrap it to capture messages as they're yielded
+                    captured: list[dict[str, Any]] = []
+                    self.__captured_messages = captured
+                    self.__last_prompt = None  # Will be set after messages are captured
+                    async def capturing_wrapper() -> AsyncGenerator[dict[str, Any], None]:
+                        async for msg in prompt:
+                            captured.append(msg)
+                            yield msg
+                    # Replace the prompt with our capturing wrapper
+                    if args:
+                        args = (capturing_wrapper(),) + args[1:]
+                    else:
+                        kwargs["prompt"] = capturing_wrapper()
+                else:
+                    self.__last_prompt = str(prompt)
             return await self.__client.query(*args, **kwargs)
@@ -215,11 +236,16 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
             """
             generator = self.__client.receive_response()
+            # Determine the initial input - may be updated later if using async generator
+            initial_input = self.__last_prompt if self.__last_prompt else None
             with start_span(
                 name="Claude Agent",
                 span_attributes={"type": SpanTypeAttribute.TASK},
-                input=self.__last_prompt if self.__last_prompt else None,
+                input=initial_input,
             ) as span:
+                # If we're capturing async messages, we'll update input after they're consumed
+                input_needs_update = self.__captured_messages is not None
                 # Store the parent span export in thread-local storage for tool handlers
                 _thread_local.parent_span_export = span.export()
@@ -228,6 +254,13 @@ def _create_client_wrapper_class(original_client_class: Any) -> Any:
                 try:
                     async for message in generator:
+                        # Update input from captured async messages (once, after they're consumed)
+                        if input_needs_update and self.__captured_messages:
+                            captured_input = _format_captured_messages(self.__captured_messages)
+                            if captured_input:
+                                span.log(input=captured_input)
+                            input_needs_update = False
                         message_type = type(message).__name__
                         if message_type == "AssistantMessage":
@@ -390,3 +423,12 @@ def _build_llm_input(prompt: Any, conversation_history: list[dict[str, Any]]) ->
             return [{"content": prompt, "role": "user"}] + conversation_history
     return conversation_history if conversation_history else None
+def _format_captured_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Formats captured async generator messages into structured input.
+    Returns the messages as-is to preserve structure for tracing.
+    Empty list returns empty list.
+    """
+    return messages if messages else []

braintrust 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl

braintrust 0.4.3py3-none-any.whl → 0.5.2py3-none-any.whl