PyPI - braintrust - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

braintrust 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

braintrust/_generated_types.py +224 -122
braintrust/cli/install/api.py +1 -1
braintrust/conftest.py +24 -0
braintrust/db_fields.py +1 -0
braintrust/devserver/test_server_integration.py +0 -11
braintrust/framework.py +2 -2
braintrust/functions/invoke.py +1 -8
braintrust/generated_types.py +7 -7
braintrust/logger.py +30 -38
braintrust/otel/__init__.py +24 -15
braintrust/prompt_cache/test_disk_cache.py +3 -3
braintrust/span_types.py +3 -0
braintrust/test_bt_json.py +23 -19
braintrust/test_framework.py +25 -0
braintrust/test_logger.py +34 -0
braintrust/test_otel.py +118 -26
braintrust/test_util.py +51 -1
braintrust/util.py +24 -3
braintrust/version.py +2 -2
braintrust/wrappers/langsmith_wrapper.py +517 -0
braintrust/wrappers/litellm.py +43 -0
braintrust/wrappers/test_agno.py +0 -12
braintrust/wrappers/test_anthropic.py +1 -11
braintrust/wrappers/test_dspy.py +0 -11
braintrust/wrappers/test_google_genai.py +6 -1
braintrust/wrappers/test_langsmith_wrapper.py +338 -0
braintrust/wrappers/test_litellm.py +73 -10
braintrust/wrappers/test_oai_attachments.py +0 -10
braintrust/wrappers/test_openai.py +3 -12
braintrust/wrappers/test_openrouter.py +0 -9
braintrust/wrappers/test_pydantic_ai_integration.py +0 -11
braintrust/wrappers/test_pydantic_ai_wrap_openai.py +2 -0
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/METADATA +1 -1
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/RECORD +37 -35
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/WHEEL +0 -0
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/entry_points.txt +0 -0
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/top_level.txt +0 -0

braintrust/functions/invoke.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any, Literal, TypedDict, TypeVar, overload
 from sseclient import SSEClient
-from .._generated_types import FunctionTypeEnum, InvokeContext
+from .._generated_types import FunctionTypeEnum
 from ..logger import Exportable, get_span_parent_object, login, proxy_conn
 from ..util import response_raise_for_status
 from .constants import INVOKE_API_VERSION
@@ -43,7 +43,6 @@ def invoke(
     # arguments to the function
     input: Any = None,
     messages: list[Any] | None = None,
-    context: InvokeContext | None = None,
     metadata: dict[str, Any] | None = None,
     tags: list[str] | None = None,
     parent: Exportable | str | None = None,
@@ -72,7 +71,6 @@ def invoke(
     # arguments to the function
     input: Any = None,
     messages: list[Any] | None = None,
-    context: InvokeContext | None = None,
     metadata: dict[str, Any] | None = None,
     tags: list[str] | None = None,
     parent: Exportable | str | None = None,
@@ -100,7 +98,6 @@ def invoke(
     # arguments to the function
     input: Any = None,
     messages: list[Any] | None = None,
-    context: InvokeContext | None = None,
     metadata: dict[str, Any] | None = None,
     tags: list[str] | None = None,
     parent: Exportable | str | None = None,
@@ -119,8 +116,6 @@ def invoke(
     Args:
         input: The input to the function. This will be logged as the `input` field in the span.
         messages: Additional OpenAI-style messages to add to the prompt (only works for llm functions).
-        context: Context for functions that operate on spans/traces (e.g., facets). Should contain
-            `object_type`, `object_id`, and `scope` fields.
         metadata: Additional metadata to add to the span. This will be logged as the `metadata` field in the span.
             It will also be available as the {{metadata}} field in the prompt and as the `metadata` argument
             to the function.
@@ -195,8 +190,6 @@ def invoke(
     )
     if messages is not None:
         request["messages"] = messages
-    if context is not None:
-        request["context"] = context
     if mode is not None:
         request["mode"] = mode
     if strict is not None:

braintrust/generated_types.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Auto-generated file (internal git SHA 547fa17c0937e0e25fdf9214487be6f31c91a37a) -- do not modify"""
+"""Auto-generated file (internal git SHA 87ac73f4945a47eff2d4e42775ba4dbc58854c73) -- do not modify"""
 from ._generated_types import (
     Acl,
@@ -10,6 +10,7 @@ from ._generated_types import (
     AsyncScoringState,
     AttachmentReference,
     AttachmentStatus,
+    BatchedFacetData,
     BraintrustAttachmentReference,
     BraintrustModelParams,
     CallEvent,
@@ -47,15 +48,13 @@ from ._generated_types import (
     GraphEdge,
     GraphNode,
     Group,
+    GroupScope,
     IfExists,
-    InvokeContext,
     InvokeFunction,
     InvokeParent,
-    InvokeScope,
     MCPServer,
     MessageRole,
     ModelParams,
-    NullableFunctionTypeEnum,
     NullableSavedFunctionId,
     ObjectReference,
     ObjectReferenceNullish,
@@ -99,6 +98,7 @@ from ._generated_types import (
     StreamingMode,
     ToolFunctionDefinition,
     TraceScope,
+    TriggeredFunctionState,
     UploadStatus,
     User,
     View,
@@ -117,6 +117,7 @@ __all__ = [
     "AsyncScoringState",
     "AttachmentReference",
     "AttachmentStatus",
+    "BatchedFacetData",
     "BraintrustAttachmentReference",
     "BraintrustModelParams",
     "CallEvent",
@@ -154,15 +155,13 @@ __all__ = [
     "GraphEdge",
     "GraphNode",
     "Group",
+    "GroupScope",
     "IfExists",
-    "InvokeContext",
     "InvokeFunction",
     "InvokeParent",
-    "InvokeScope",
     "MCPServer",
     "MessageRole",
     "ModelParams",
-    "NullableFunctionTypeEnum",
     "NullableSavedFunctionId",
     "ObjectReference",
     "ObjectReferenceNullish",
@@ -206,6 +205,7 @@ __all__ = [
     "StreamingMode",
     "ToolFunctionDefinition",
     "TraceScope",
+    "TriggeredFunctionState",
     "UploadStatus",
     "User",
     "View",

braintrust/logger.py CHANGED Viewed

@@ -454,24 +454,22 @@ class BraintrustState:
     def copy_state(self, other: "BraintrustState"):
         """Copy login information from another BraintrustState instance."""
-        self.__dict__.update(
-            {
-                k: v
-                for (k, v) in other.__dict__.items()
-                if k
-                not in (
-                    "current_experiment",
-                    "current_logger",
-                    "current_parent",
-                    "current_span",
-                    "_global_bg_logger",
-                    "_override_bg_logger",
-                    "_context_manager",
-                    "_last_otel_setting",
-                    "_context_manager_lock",
-                )
-            }
-        )
+        self.__dict__.update({
+            k: v
+            for (k, v) in other.__dict__.items()
+            if k
+            not in (
+                "current_experiment",
+                "current_logger",
+                "current_parent",
+                "current_span",
+                "_global_bg_logger",
+                "_override_bg_logger",
+                "_context_manager",
+                "_last_otel_setting",
+                "_context_manager_lock",
+            )
+        })
     def login(
         self,
@@ -2344,6 +2342,7 @@ def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any])
         SKIP_ASYNC_SCORING_FIELD,
         "span_id",
         "root_span_id",
+        "_bt_internal_override_pagination_key",
     }
     if forbidden_keys:
         raise ValueError(f"The following keys are not permitted: {forbidden_keys}")
@@ -3856,9 +3855,6 @@ class SpanImpl(Span):
         if serializable_partial_record.get("metrics", {}).get("end") is not None:
             self._logged_end_time = serializable_partial_record["metrics"]["end"]
-        if len(serializable_partial_record.get("tags", [])) > 0 and self.span_parents:
-            raise Exception("Tags can only be logged to the root span")
         def compute_record() -> dict[str, Any]:
             exporter = _get_exporter()
             return dict(
@@ -4406,24 +4402,20 @@ def render_message(render: Callable[[str], str], message: PromptMessage):
                 if c["type"] == "text":
                     rendered_content.append({**c, "text": render(c["text"])})
                 elif c["type"] == "image_url":
-                    rendered_content.append(
-                        {
-                            **c,
-                            "image_url": {**c["image_url"], "url": render(c["image_url"]["url"])},
-                        }
-                    )
+                    rendered_content.append({
+                        **c,
+                        "image_url": {**c["image_url"], "url": render(c["image_url"]["url"])},
+                    })
                 elif c["type"] == "file":
-                    rendered_content.append(
-                        {
-                            **c,
-                            "file": {
-                                **c["file"],
-                                "file_data": render(c["file"]["file_data"]),
-                                **({} if "file_id" not in c["file"] else {"file_id": render(c["file"]["file_id"])}),
-                                **({} if "filename" not in c["file"] else {"filename": render(c["file"]["filename"])}),
-                            },
-                        }
-                    )
+                    rendered_content.append({
+                        **c,
+                        "file": {
+                            **c["file"],
+                            "file_data": render(c["file"]["file_data"]),
+                            **({} if "file_id" not in c["file"] else {"file_id": render(c["file"]["file_id"])}),
+                            **({} if "filename" not in c["file"] else {"filename": render(c["file"]["filename"])}),
+                        },
+                    })
                 else:
                     raise ValueError(f"Unknown content type: {c['type']}")

braintrust/otel/__init__.py CHANGED Viewed

@@ -90,18 +90,13 @@ class AISpanProcessor:
     def _should_keep_filtered_span(self, span):
         """
         Keep spans if:
-        1. It's a root span (no parent)
-        2. Custom filter returns True/False (if provided)
-        3. Span name starts with 'gen_ai.', 'braintrust.', 'llm.', 'ai.', or 'traceloop.'
-        4. Any attribute name starts with those prefixes
+        1. Custom filter returns True/False (if provided)
+        2. Span name starts with 'gen_ai.', 'braintrust.', 'llm.', 'ai.', or 'traceloop.'
+        3. Any attribute name starts with those prefixes
         """
         if not span:
             return False
-        # Braintrust requires root spans, so always keep them
-        if span.parent is None:
-            return True
         # Apply custom filter if provided
         if self._custom_filter:
             custom_result = self._custom_filter(span)
@@ -384,6 +379,9 @@ def _get_braintrust_parent(object_type, object_id: str | None = None, compute_ar
     return None
+def is_root_span(span) -> bool:
+    """Returns True if the span is a root span (no parent span)."""
+    return getattr(span, "parent", None) is None
 def context_from_span_export(export_str: str):
     """
@@ -522,15 +520,17 @@ def add_span_parent_to_baggage(span, ctx=None):
     return add_parent_to_baggage(parent_value, ctx=ctx)
-def parent_from_headers(headers: dict[str, str]) -> str | None:
+def parent_from_headers(headers: dict[str, str], propagator=None) -> str | None:
     """
-    Extract a Braintrust-compatible parent string from W3C Trace Context headers.
+    Extract a Braintrust-compatible parent string from trace context headers.
-    This converts OTEL trace context headers (traceparent/baggage) into a format
-    that can be passed as the 'parent' parameter to Braintrust's start_span() method.
+    This converts OTEL trace context headers into a format that can be passed
+    as the 'parent' parameter to Braintrust's start_span() method.
     Args:
-        headers: Dictionary with 'traceparent' and optionally 'baggage' keys
+        headers: Dictionary with trace context headers (e.g., 'traceparent'/'baggage' for W3C)
+        propagator: Optional custom TextMapPropagator. If not provided, uses the
+                   globally registered propagator (W3C TraceContext by default).
     Returns:
         Braintrust V4 export string that can be used as parent parameter,
@@ -545,6 +545,12 @@ def parent_from_headers(headers: dict[str, str]) -> str | None:
         >>> parent = parent_from_headers(headers)
         >>> with project.start_span(name="service_c", parent=parent) as span:
         >>>     span.log(input="BT span as child of OTEL parent")
+        >>> # Using a custom propagator (e.g., B3 format)
+        >>> from opentelemetry.propagators.b3 import B3MultiFormat
+        >>> propagator = B3MultiFormat()
+        >>> headers = {'X-B3-TraceId': '...', 'X-B3-SpanId': '...', 'baggage': '...'}
+        >>> parent = parent_from_headers(headers, propagator=propagator)
     """
     if not OTEL_AVAILABLE:
         raise ImportError(INSTALL_ERR_MSG)
@@ -553,8 +559,11 @@ def parent_from_headers(headers: dict[str, str]) -> str | None:
     from opentelemetry import baggage, trace
     from opentelemetry.propagate import extract
-    # Extract context from headers using W3C Trace Context propagator
-    ctx = extract(headers)
+    # Extract context from headers using provided propagator or global propagator
+    if propagator is not None:
+        ctx = propagator.extract(headers)
+    else:
+        ctx = extract(headers)
     # Get span from context
     span = trace.get_current_span(ctx)

braintrust/prompt_cache/test_disk_cache.py CHANGED Viewed

@@ -39,7 +39,7 @@ class TestDiskCache(unittest.TestCase):
             "a\nb",
         ]
         for k in weird_keys:
-            time.sleep(0.05)  # make sure the mtimes are different
+            time.sleep(0.01)  # make sure the mtimes are different
             self.cache.set(k, data)
             result = self.cache.get(k)
             assert data == result
@@ -61,7 +61,7 @@ class TestDiskCache(unittest.TestCase):
         # Fill cache beyond max size (3).
         for i in range(3):
             self.cache.set(f"key{i}", {"value": i})
-            time.sleep(0.1)  # wait to ensure different mtimes
+            time.sleep(0.01)  # wait to ensure different mtimes
         # Add one more to trigger eviction.
         self.cache.set("key3", {"value": 3})
@@ -75,7 +75,7 @@ class TestDiskCache(unittest.TestCase):
         # Fill cache beyond max size (3).
         for i in range(3):
             self.cache.set(f"key{i}", {"value": i})
-            time.sleep(0.1)  # wait to ensure different mtimes
+            time.sleep(0.01)  # wait to ensure different mtimes
         # Add one more to trigger eviction.
         self.cache.set("key3", {"value": 3})

braintrust/span_types.py CHANGED Viewed

@@ -13,6 +13,9 @@ class SpanTypeAttribute(str, Enum):
     EVAL = "eval"
     TASK = "task"
     TOOL = "tool"
+    AUTOMATION = "automation"
+    FACET = "facet"
+    PREPROCESSOR = "preprocessor"
 class SpanPurpose(str, Enum):

braintrust/test_bt_json.py CHANGED Viewed

@@ -5,6 +5,7 @@ import json
 from typing import Any
 from unittest import TestCase
+import pytest
 from braintrust.bt_json import bt_dumps, bt_safe_deep_copy
 from braintrust.logger import Attachment, ExternalAttachment
@@ -281,30 +282,33 @@ class TestBTJson(TestCase):
         self.assertTrue("(1, 2)" in result or "1, 2" in result)
         self.assertIn("None", result)
-    def test_to_bt_safe_special_objects(self):
-        """Test _to_bt_safe handling of Span, Experiment, Dataset, Logger objects."""
-        from braintrust import init, init_dataset, init_logger
+@pytest.mark.vcr
+def test_to_bt_safe_special_objects():
+    """Test _to_bt_safe handling of Span, Experiment, Dataset, Logger objects."""
+    from braintrust import init, init_dataset, init_logger
-        # Create actual objects
-        exp = init(project="test", experiment="test")
-        dataset = init_dataset(project="test", name="test")
-        logger = init_logger(project="test")
-        span = exp.start_span()
+    # Create actual objects
+    exp = init(project="test", experiment="test")
+    dataset = init_dataset(project="test", name="test")
+    logger = init_logger(project="test")
+    span = exp.start_span()
-        # Import _to_bt_safe
-        from braintrust.bt_json import _to_bt_safe
+    # Import _to_bt_safe
+    from braintrust.bt_json import _to_bt_safe
+    # Test each special object
+    assert _to_bt_safe(span) == "<span>"
+    assert _to_bt_safe(exp) == "<experiment>"
+    assert _to_bt_safe(dataset) == "<dataset>"
+    assert _to_bt_safe(logger) == "<logger>"
-        # Test each special object
-        self.assertEqual(_to_bt_safe(span), "<span>")
-        self.assertEqual(_to_bt_safe(exp), "<experiment>")
-        self.assertEqual(_to_bt_safe(dataset), "<dataset>")
-        self.assertEqual(_to_bt_safe(logger), "<logger>")
+    # Clean up
+    exp.flush()
+    dataset.flush()
+    logger.flush()
-        # Clean up
-        exp.flush()
-        dataset.flush()
-        logger.flush()
+class TestBTJsonAttachments(TestCase):
     def test_to_bt_safe_attachments(self):
         """Test _to_bt_safe preserves BaseAttachment and converts ReadonlyAttachment to reference."""
         from braintrust.bt_json import _to_bt_safe

braintrust/test_framework.py CHANGED Viewed

@@ -343,6 +343,31 @@ async def test_eval_no_send_logs_true(with_memory_logger, simple_scorer):
     assert len(logs) == 0
+@pytest.mark.asyncio
+async def test_eval_no_send_logs_with_none_score(with_memory_logger):
+    """Test that scorers returning None don't crash local mode."""
+    def sometimes_none_scorer(input, output, expected):
+        # Return None for first input, score for second
+        if input == "hello":
+            return {"name": "conditional", "score": None}
+        return {"name": "conditional", "score": 1.0}
+    result = await Eval(
+        "test-none-score",
+        data=[
+            {"input": "hello", "expected": "hello world"},
+            {"input": "test", "expected": "test world"},
+        ],
+        task=lambda input_val: input_val + " world",
+        scores=[sometimes_none_scorer],
+        no_send_logs=True,
+    )
+    # Should not crash and should calculate average from non-None scores only
+    assert result.summary.scores["conditional"].score == 1.0  # Only the second score counts
 @pytest.mark.asyncio
 async def test_hooks_tags_append(with_memory_logger, with_simulate_login, simple_scorer):
     """Test that hooks.tags can be appended to and logged."""

braintrust/test_logger.py CHANGED Viewed

@@ -849,6 +849,40 @@ def test_span_link_with_unresolved_experiment(with_simulate_login, with_memory_l
     assert link == "https://www.braintrust.dev/error-generating-link?msg=resolve-experiment-id"
+def test_experiment_span_link_uses_env_vars_when_logged_out(with_memory_logger):
+    """Verify EXPERIMENT spans use BRAINTRUST_ORG_NAME env var when not logged in."""
+    simulate_logout()
+    assert_logged_out()
+    keys = ["BRAINTRUST_APP_URL", "BRAINTRUST_ORG_NAME"]
+    originals = {k: os.environ.get(k) for k in keys}
+    try:
+        os.environ["BRAINTRUST_APP_URL"] = "https://test-app.example.com"
+        os.environ["BRAINTRUST_ORG_NAME"] = "env-org-name"
+        experiment = braintrust.init(
+            project="test-project",
+            experiment="test-experiment",
+        )
+        # Create span with resolved experiment ID
+        span = experiment.start_span(name="test-span")
+        span.parent_object_id = LazyValue(lambda: "test-exp-id", use_mutex=False)
+        span.end()
+        link = span.link()
+        # Should use env var org name and app url
+        assert "env-org-name" in link
+        assert "test-app.example.com" in link
+        assert "test-exp-id" in link
+    finally:
+        for k, v in originals.items():
+            os.environ.pop(k, None)
+            if v:
+                os.environ[k] = v
 def test_permalink_with_valid_span_logged_in(with_simulate_login, with_memory_logger):
     logger = init_logger(
         project="test-project",

braintrust 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

braintrust 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl