PyPI - braintrust - Versions diffs - 0.3.11__tar.gz → 0.3.13__tar.gz - Mend

braintrust 0.3.11tar.gz → 0.3.13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

{braintrust-0.3.11 → braintrust-0.3.13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: braintrust
-Version: 0.3.11
+Version: 0.3.13
 Summary: SDK for integrating Braintrust
 Home-page: https://www.braintrust.dev
 Author: Braintrust
@@ -75,6 +75,20 @@ Install the library with pip.
 pip install braintrust
 ```
+**Performance tip**: For 3-5x faster JSON serialization, install with the optional `performance` extra:
+```bash
+pip install braintrust[performance]
+```
+Or install `orjson` separately:
+```bash
+pip install orjson
+```
+The SDK automatically detects and uses orjson if available, with seamless fallback to standard json. See [ORJSON_OPTIMIZATION.md](ORJSON_OPTIMIZATION.md) for details.
 Then, run a simple experiment with the following code (replace `YOUR_API_KEY` with
 your Braintrust API key):
@@ -100,3 +114,7 @@ Eval(
   scores=[is_equal],
 )
 ```
+# Performance Optimization
+For 3-5x faster JSON serialization, install `orjson`. The SDK automatically detects and uses orjson if available, with seamless fallback to standard json.

{braintrust-0.3.11 → braintrust-0.3.13}/README.md RENAMED Viewed

@@ -12,6 +12,20 @@ Install the library with pip.
 pip install braintrust
 ```
+**Performance tip**: For 3-5x faster JSON serialization, install with the optional `performance` extra:
+```bash
+pip install braintrust[performance]
+```
+Or install `orjson` separately:
+```bash
+pip install orjson
+```
+The SDK automatically detects and uses orjson if available, with seamless fallback to standard json. See [ORJSON_OPTIMIZATION.md](ORJSON_OPTIMIZATION.md) for details.
 Then, run a simple experiment with the following code (replace `YOUR_API_KEY` with
 your Braintrust API key):
@@ -37,3 +51,7 @@ Eval(
   scores=[is_equal],
 )
 ```
+# Performance Optimization
+For 3-5x faster JSON serialization, install `orjson`. The SDK automatically detects and uses orjson if available, with seamless fallback to standard json.

braintrust-0.3.13/src/braintrust/bt_json.py ADDED Viewed

@@ -0,0 +1,116 @@
+import dataclasses
+import json
+from typing import Any, cast
+# Try to import orjson for better performance
+# If not available, we'll use standard json
+try:
+    import orjson
+    _HAS_ORJSON = True
+except ImportError:
+    _HAS_ORJSON = False
+def _to_dict(obj: Any) -> Any:
+    """
+    Function-based default handler for non-JSON-serializable objects.
+    Handles:
+    - dataclasses
+    - Pydantic v2 BaseModel
+    - Pydantic v1 BaseModel
+    - Falls back to str() for unknown types
+    """
+    if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
+        return dataclasses.asdict(obj)
+    # Attempt to dump a Pydantic v2 `BaseModel`.
+    try:
+        return cast(Any, obj).model_dump()
+    except (AttributeError, TypeError):
+        pass
+    # Attempt to dump a Pydantic v1 `BaseModel`.
+    try:
+        return cast(Any, obj).dict()
+    except (AttributeError, TypeError):
+        pass
+    # When everything fails, try to return the string representation of the object
+    try:
+        return str(obj)
+    except Exception:
+        # If str() fails, return an error placeholder
+        return f"<non-serializable: {type(obj).__name__}>"
+class BraintrustJSONEncoder(json.JSONEncoder):
+    """
+    Custom JSON encoder for standard json library.
+    This is used as a fallback when orjson is not available or fails.
+    """
+    def default(self, o: Any):
+        return _to_dict(o)
+def bt_dumps(obj, **kwargs) -> str:
+    """
+    Serialize obj to a JSON-formatted string.
+    Automatically uses orjson if available for better performance (3-5x faster),
+    with fallback to standard json library if orjson is not installed or fails.
+    Args:
+        obj: Object to serialize
+        **kwargs: Additional arguments (passed to json.dumps in fallback path)
+    Returns:
+        JSON string representation of obj
+    """
+    if _HAS_ORJSON:
+        # Try orjson first for better performance
+        try:
+            # pylint: disable=no-member  # orjson is a C extension, pylint can't introspect it
+            return orjson.dumps(  # type: ignore[possibly-unbound]
+                obj,
+                default=_to_dict,
+                # options match json.dumps behavior for bc
+                option=orjson.OPT_SORT_KEYS | orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_NON_STR_KEYS,  # type: ignore[possibly-unbound]
+            ).decode("utf-8")
+        except Exception:
+            # If orjson fails, fall back to standard json
+            pass
+    # Use standard json (either orjson not available or it failed)
+    # Use sort_keys=True for deterministic output (matches orjson OPT_SORT_KEYS)
+    return json.dumps(obj, cls=BraintrustJSONEncoder, allow_nan=False, sort_keys=True, **kwargs)
+def bt_loads(s: str, **kwargs) -> Any:
+    """
+    Deserialize s (a str containing a JSON document) to a Python object.
+    Automatically uses orjson if available for better performance (2-3x faster),
+    with fallback to standard json library if orjson is not installed or fails.
+    Args:
+        s: JSON string to deserialize
+        **kwargs: Additional arguments (passed to json.loads in fallback path)
+    Returns:
+        Python object representation of JSON string
+    """
+    if _HAS_ORJSON:
+        # Try orjson first for better performance
+        try:
+            # pylint: disable=no-member  # orjson is a C extension, pylint can't introspect it
+            return orjson.loads(s)  # type: ignore[possibly-unbound]
+        except Exception:
+            # If orjson fails, fall back to standard json
+            pass
+    # Use standard json (either orjson not available or it failed)
+    return json.loads(s, **kwargs)

{braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/auth.py RENAMED Viewed

@@ -10,6 +10,7 @@ from ..logger import BraintrustState
 ORIGIN_HEADER = "origin"
 BRAINTRUST_AUTH_TOKEN_HEADER = "x-bt-auth-token"
 BRAINTRUST_ORG_NAME_HEADER = "x-bt-org-name"
+BRAINTRUST_PROJECT_ID_HEADER = "x-bt-project-id"
 @dataclass
@@ -17,6 +18,7 @@ class RequestContext:
     app_origin: Optional[str]
     token: Optional[str]
     org_name: Optional[str]
+    project_id: Optional[str]
     state: Optional[BraintrustState]
@@ -56,6 +58,7 @@ class AuthorizationMiddleware(BaseHTTPMiddleware):
                 app_origin=extract_allowed_origin(request.headers.get(ORIGIN_HEADER)),
                 token=None,
                 org_name=request.headers.get(BRAINTRUST_ORG_NAME_HEADER),
+                project_id=request.headers.get(BRAINTRUST_PROJECT_ID_HEADER),
                 state=None,
             )

{braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/cors.py RENAMED Viewed

@@ -18,6 +18,7 @@ ALLOWED_HEADERS = [
     "x-bt-auth-token",
     "x-bt-parent",
     "x-bt-org-name",
+    "x-bt-project-id",
     "x-bt-stream-fmt",
     "x-bt-use-cache",
     "x-stainless-os",

{braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/server.py RENAMED Viewed

@@ -196,7 +196,7 @@ async def run_eval(request: Request) -> Union[JSONResponse, StreamingResponse]:
                     "state": state,
                     "scores": evaluator.scores
                     + [
-                        make_scorer(state, score["name"], score["function_id"])
+                        make_scorer(state, score["name"], score["function_id"], ctx.project_id)
                         for score in eval_data.get("scores", [])
                     ],
                     "stream": stream_fn,
@@ -305,7 +305,7 @@ def snake_to_camel(snake_str: str) -> str:
     return components[0] + "".join(x.title() for x in components[1:]) if components else snake_str
-def make_scorer(state: BraintrustState, name: str, score: FunctionId) -> EvalScorer[Any, Any]:
+def make_scorer(state: BraintrustState, name: str, score: FunctionId, project_id: Optional[str] = None) -> EvalScorer[Any, Any]:
     def scorer_fn(input, output, expected, metadata):
         request = {
             **score,
@@ -315,7 +315,10 @@ def make_scorer(state: BraintrustState, name: str, score: FunctionId) -> EvalSco
             "mode": "auto",
             "strict": True,
         }
-        result = state.proxy_conn().post("function/invoke", json=request, headers={"Accept": "application/json"})
+        headers = {"Accept": "application/json"}
+        if project_id:
+            headers["x-bt-project-id"] = project_id
+        result = state.proxy_conn().post("function/invoke", json=request, headers=headers)
         result.raise_for_status()
         data = result.json()
         return data

{braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/logger.py RENAMED Viewed

@@ -9,6 +9,7 @@ import inspect
 import io
 import json
 import logging
+import math
 import os
 import sys
 import textwrap
@@ -53,7 +54,7 @@ from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 from . import context, id_gen
-from .bt_json import bt_dumps
+from .bt_json import bt_dumps, bt_loads
 from .db_fields import (
     ASYNC_SCORING_CONTROL_FIELD,
     AUDIT_METADATA_FIELD,
@@ -2471,7 +2472,15 @@ def _deep_copy_event(event: Mapping[str, Any]) -> Dict[str, Any]:
                     # `json.dumps`. However, that runs at log upload time, while we want to
                     # cut out all the references to user objects synchronously in this
                     # function.
-                    return {str(k): _deep_copy_object(v[k], depth + 1) for k in v}
+                    result = {}
+                    for k in v:
+                        try:
+                            key_str = str(k)
+                        except Exception:
+                            # If str() fails on the key, use a fallback representation
+                            key_str = f"<non-stringifiable-key: {type(k).__name__}>"
+                        result[key_str] = _deep_copy_object(v[k], depth + 1)
+                    return result
                 elif isinstance(v, (List, Tuple, Set)):
                     return [_deep_copy_object(x, depth + 1) for x in v]
             finally:
@@ -2491,7 +2500,14 @@ def _deep_copy_event(event: Mapping[str, Any]) -> Dict[str, Any]:
             return v
         elif isinstance(v, ReadonlyAttachment):
             return v.reference
-        elif isinstance(v, (int, float, str, bool)) or v is None:
+        elif isinstance(v, float):
+            # Handle NaN and Infinity for JSON compatibility
+            if math.isnan(v):
+                return "NaN"
+            elif math.isinf(v):
+                return "Infinity" if v > 0 else "-Infinity"
+            return v
+        elif isinstance(v, (int, str, bool)) or v is None:
             # Skip roundtrip for primitive types.
             return v
         else:
@@ -2500,7 +2516,7 @@ def _deep_copy_event(event: Mapping[str, Any]) -> Dict[str, Any]:
             # E.g. the original type could have a `__del__` method that alters
             # some shared internal state, and we need this deep copy to be
             # fully-independent from the original.
-            return json.loads(bt_dumps(v))
+            return bt_loads(bt_dumps(v))
     return _deep_copy_object(event)
@@ -2523,7 +2539,7 @@ class ObjectIterator(Generic[T]):
         return value
-INTERNAL_BTQL_LIMIT = 1000
+DEFAULT_FETCH_BATCH_SIZE = 1000
 MAX_BTQL_ITERATIONS = 10000
@@ -2550,7 +2566,7 @@ class ObjectFetcher(ABC, Generic[TMapping]):
         self._fetched_data: Optional[List[TMapping]] = None
         self._internal_btql = _internal_btql
-    def fetch(self) -> Iterator[TMapping]:
+    def fetch(self, batch_size: Optional[int] = None) -> Iterator[TMapping]:
         """
         Fetch all records.
@@ -2563,9 +2579,10 @@ class ObjectFetcher(ABC, Generic[TMapping]):
             print(record)
         ```
+        :param batch_size: The number of records to fetch per request. Defaults to 1000.
         :returns: An iterator over the records.
         """
-        return ObjectIterator(self._refetch)
+        return ObjectIterator(lambda: self._refetch(batch_size=batch_size))
     def __iter__(self) -> Iterator[TMapping]:
         return self.fetch()
@@ -2584,8 +2601,9 @@ class ObjectFetcher(ABC, Generic[TMapping]):
     @abstractmethod
     def id(self) -> str: ...
-    def _refetch(self) -> List[TMapping]:
+    def _refetch(self, batch_size: Optional[int] = None) -> List[TMapping]:
         state = self._get_state()
+        limit = batch_size if batch_size is not None else DEFAULT_FETCH_BATCH_SIZE
         if self._fetched_data is None:
             cursor = None
             data = None
@@ -2610,7 +2628,7 @@ class ObjectFetcher(ABC, Generic[TMapping]):
                                 ],
                             },
                             "cursor": cursor,
-                            "limit": INTERNAL_BTQL_LIMIT,
+                            "limit": limit,
                             **(self._internal_btql or {}),
                         },
                         "use_columnstore": False,
@@ -3761,8 +3779,14 @@ class ReadonlyExperiment(ObjectFetcher[ExperimentEvent]):
         self._lazy_metadata.get()
         return self.state
-    def as_dataset(self) -> Iterator[_ExperimentDatasetEvent]:
-        return ExperimentDatasetIterator(self.fetch())
+    def as_dataset(self, batch_size: Optional[int] = None) -> Iterator[_ExperimentDatasetEvent]:
+        """
+        Return the experiment's data as a dataset iterator.
+        :param batch_size: The number of records to fetch per request. Defaults to 1000.
+        :returns: An iterator over the experiment data as dataset records.
+        """
+        return ExperimentDatasetIterator(self.fetch(batch_size=batch_size))
 _EXEC_COUNTER_LOCK = threading.Lock()

{braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_logger.py RENAMED Viewed

@@ -716,6 +716,107 @@ def test_span_log_with_large_document_many_pages(with_memory_logger):
     assert logged_output["pages"][0]["lines"][0]["words"][0]["content"] == "word_0"
+def test_span_log_handles_nan_gracefully(with_memory_logger):
+    """Test that span.log() handles NaN values by converting them to "NaN" string."""
+    logger = init_test_logger(__name__)
+    with logger.start_span(name="test_span") as span:
+        # Should NOT raise - should handle NaN gracefully
+        span.log(
+            input={"test": "input"},
+            output={"value": float("nan")},
+        )
+    # Verify the log was recorded with NaN handled appropriately
+    logs = with_memory_logger.pop()
+    assert len(logs) == 1
+    assert logs[0]["input"]["test"] == "input"
+    # NaN should be converted to "NaN" string for JSON compatibility
+    output_value = logs[0]["output"]["value"]
+    assert output_value == "NaN"
+def test_span_log_handles_infinity_gracefully(with_memory_logger):
+    """Test that span.log() handles Infinity values by converting them to "Infinity"/"-Infinity" strings."""
+    logger = init_test_logger(__name__)
+    with logger.start_span(name="test_span") as span:
+        # Should NOT raise - should handle Infinity gracefully
+        span.log(
+            input={"test": "input"},
+            output={"value": float("inf"), "neg": float("-inf")},
+        )
+    # Verify the log was recorded with Infinity handled appropriately
+    logs = with_memory_logger.pop()
+    assert len(logs) == 1
+    assert logs[0]["input"]["test"] == "input"
+    # Infinity should be converted to string representations for JSON compatibility
+    assert logs[0]["output"]["value"] == "Infinity"
+    assert logs[0]["output"]["neg"] == "-Infinity"
+def test_span_log_handles_unstringifiable_object_gracefully(with_memory_logger):
+    """Test that span.log() should handle objects with bad __str__ gracefully without raising.
+    This test currently FAILS - it demonstrates the desired behavior after the fix.
+    """
+    logger = init_test_logger(__name__)
+    class BadStrObject:
+        def __str__(self):
+            raise RuntimeError("Cannot convert to string!")
+        def __repr__(self):
+            raise RuntimeError("Cannot convert to repr!")
+    with logger.start_span(name="test_span") as span:
+        # Should NOT raise - should handle gracefully
+        span.log(
+            input={"test": "input"},
+            output={"result": BadStrObject()},
+        )
+    # Verify the log was recorded with a fallback representation
+    logs = with_memory_logger.pop()
+    assert len(logs) == 1
+    assert logs[0]["input"]["test"] == "input"
+    # The bad object should have been replaced with some error placeholder
+    assert "result" in logs[0]["output"]
+    output_str = str(logs[0]["output"]["result"])
+    # Should contain some indication of serialization failure
+    assert "error" in output_str.lower() or "serializ" in output_str.lower()
+def test_span_log_handles_bad_dict_keys_gracefully(with_memory_logger):
+    """Test that span.log() should handle non-stringifiable dict keys gracefully.
+    This test currently FAILS - it demonstrates the desired behavior after the fix.
+    """
+    logger = init_test_logger(__name__)
+    class BadKey:
+        def __str__(self):
+            raise ValueError("Key cannot be stringified!")
+        def __repr__(self):
+            raise ValueError("Key cannot be stringified!")
+    with logger.start_span(name="test_span") as span:
+        # Should NOT raise - should handle gracefully
+        span.log(
+            input={"test": "input"},
+            output={BadKey(): "value"},
+        )
+    # Verify the log was recorded with the problematic key handled
+    logs = with_memory_logger.pop()
+    assert len(logs) == 1
+    assert logs[0]["input"]["test"] == "input"
+    # The output should exist but the bad key should be replaced
+    assert "output" in logs[0]
 def test_span_link_logged_out(with_memory_logger):
     simulate_logout()
     assert_logged_out()
@@ -2491,7 +2592,7 @@ class TestDatasetInternalBtql(TestCase):
     @patch("braintrust.logger.BraintrustState")
     def test_dataset_internal_btql_limit_not_overwritten(self, mock_state_class):
-        """Test that custom limit in _internal_btql is not overwritten by INTERNAL_BTQL_LIMIT."""
+        """Test that custom limit in _internal_btql is not overwritten by DEFAULT_FETCH_BATCH_SIZE."""
         # Set up mock state
         mock_state = MagicMock()
         mock_state_class.return_value = mock_state
@@ -2538,7 +2639,7 @@ class TestDatasetInternalBtql(TestCase):
         call_args = mock_api_conn.post.call_args
         query_json = call_args[1]["json"]["query"]
-        # Verify that the custom limit is present (not overwritten by INTERNAL_BTQL_LIMIT)
+        # Verify that the custom limit is present (not overwritten by DEFAULT_FETCH_BATCH_SIZE)
         self.assertEqual(query_json["limit"], custom_limit)
         # Verify that other _internal_btql fields are also present
@@ -2546,8 +2647,14 @@ class TestDatasetInternalBtql(TestCase):
     @patch("braintrust.logger.BraintrustState")
     def test_dataset_default_limit_when_not_specified(self, mock_state_class):
-        """Test that INTERNAL_BTQL_LIMIT is used when no custom limit is specified."""
-        from braintrust.logger import INTERNAL_BTQL_LIMIT, Dataset, LazyValue, ObjectMetadata, ProjectDatasetMetadata
+        """Test that DEFAULT_FETCH_BATCH_SIZE is used when no custom limit is specified."""
+        from braintrust.logger import (
+            DEFAULT_FETCH_BATCH_SIZE,
+            Dataset,
+            LazyValue,
+            ObjectMetadata,
+            ProjectDatasetMetadata,
+        )
         # Set up mock state
         mock_state = MagicMock()
@@ -2590,4 +2697,52 @@ class TestDatasetInternalBtql(TestCase):
         query_json = call_args[1]["json"]["query"]
         # Verify that the default limit is used
-        self.assertEqual(query_json["limit"], INTERNAL_BTQL_LIMIT)
+        self.assertEqual(query_json["limit"], DEFAULT_FETCH_BATCH_SIZE)
+    @patch("braintrust.logger.BraintrustState")
+    def test_dataset_custom_batch_size_in_fetch(self, mock_state_class):
+        """Test that custom batch_size in fetch() is properly passed to BTQL query."""
+        from braintrust.logger import Dataset, LazyValue, ObjectMetadata, ProjectDatasetMetadata
+        # Set up mock state
+        mock_state = MagicMock()
+        mock_state_class.return_value = mock_state
+        # Mock the API connection and response
+        mock_api_conn = MagicMock()
+        mock_state.api_conn.return_value = mock_api_conn
+        # Mock response object
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "data": [{"id": "1", "input": "test1", "expected": "output1"}],
+            "cursor": None,
+        }
+        mock_api_conn.post.return_value = mock_response
+        # Create dataset
+        project_metadata = ObjectMetadata(id="test-project", name="test-project", full_info={})
+        dataset_metadata = ObjectMetadata(id="test-dataset", name="test-dataset", full_info={})
+        lazy_metadata = LazyValue(
+            lambda: ProjectDatasetMetadata(project=project_metadata, dataset=dataset_metadata),
+            use_mutex=False,
+        )
+        dataset = Dataset(
+            lazy_metadata=lazy_metadata,
+            state=mock_state,
+        )
+        # Trigger a fetch with custom batch_size
+        custom_batch_size = 250
+        list(dataset.fetch(batch_size=custom_batch_size))
+        # Verify the API was called
+        mock_api_conn.post.assert_called_once()
+        # Get the actual call arguments
+        call_args = mock_api_conn.post.call_args
+        query_json = call_args[1]["json"]["query"]
+        # Verify that the custom batch_size is used
+        self.assertEqual(query_json["limit"], custom_batch_size)

braintrust-0.3.13/src/braintrust/version.py ADDED Viewed

@@ -0,0 +1,4 @@
+VERSION = "0.3.13"
+# this will be templated during the build
+GIT_COMMIT = "cef88a007fa60f4cd873f1d891a54ce5e173f3aa"

{braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_openai.py RENAMED Viewed

@@ -1690,35 +1690,3 @@ def test_braintrust_tracing_processor_trace_metadata_logging(memory_logger):
     spans = memory_logger.pop()
     root_span = spans[0]
     assert root_span["metadata"]["conversation_id"] == "test-12345", "Should log trace metadata"
-def test_parse_metrics_excludes_booleans():
-    """Test that boolean fields in usage objects are excluded from metrics.
-    Reproduces issue where OpenRouter returns is_byok (a boolean) in the usage
-    object, which caused API validation errors: "Expected number, received boolean".
-    In Python, bool is a subclass of int, so isinstance(True, int) returns True.
-    The fix ensures _is_numeric explicitly excludes booleans.
-    """
-    from braintrust.oai import _parse_metrics_from_usage
-    # Simulate OpenRouter's usage object with boolean field
-    usage = {
-        "completion_tokens": 11,
-        "prompt_tokens": 8,
-        "total_tokens": 19,
-        "cost": 0.000104,
-        "is_byok": False,  # This boolean should be filtered out
-    }
-    metrics = _parse_metrics_from_usage(usage)
-    # Numeric fields should be included
-    assert metrics["completion_tokens"] == 11
-    assert metrics["prompt_tokens"] == 8
-    assert metrics["tokens"] == 19  # total_tokens gets renamed
-    assert metrics["cost"] == 0.000104
-    # Boolean field should NOT be in metrics (this was the bug)
-    assert "is_byok" not in metrics

braintrust 0.3.11__tar.gz → 0.3.13__tar.gz

braintrust 0.3.11tar.gz → 0.3.13tar.gz