PyPI - braintrust - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

braintrust 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

braintrust/_generated_types.py +737 -672
braintrust/audit.py +2 -2
braintrust/bt_json.py +178 -19
braintrust/cli/eval.py +6 -7
braintrust/cli/push.py +11 -11
braintrust/context.py +12 -17
braintrust/contrib/temporal/__init__.py +16 -27
braintrust/contrib/temporal/test_temporal.py +8 -3
braintrust/devserver/auth.py +8 -8
braintrust/devserver/cache.py +3 -4
braintrust/devserver/cors.py +8 -7
braintrust/devserver/dataset.py +3 -5
braintrust/devserver/eval_hooks.py +7 -6
braintrust/devserver/schemas.py +22 -19
braintrust/devserver/server.py +19 -12
braintrust/devserver/test_cached_login.py +4 -4
braintrust/framework.py +139 -142
braintrust/framework2.py +88 -87
braintrust/functions/invoke.py +66 -59
braintrust/functions/stream.py +3 -2
braintrust/generated_types.py +3 -1
braintrust/git_fields.py +11 -11
braintrust/gitutil.py +2 -3
braintrust/graph_util.py +10 -10
braintrust/id_gen.py +2 -2
braintrust/logger.py +373 -471
braintrust/merge_row_batch.py +10 -9
braintrust/oai.py +21 -20
braintrust/otel/__init__.py +49 -49
braintrust/otel/context.py +16 -30
braintrust/otel/test_distributed_tracing.py +14 -11
braintrust/otel/test_otel_bt_integration.py +32 -31
braintrust/parameters.py +8 -8
braintrust/prompt.py +14 -14
braintrust/prompt_cache/disk_cache.py +5 -4
braintrust/prompt_cache/lru_cache.py +3 -2
braintrust/prompt_cache/prompt_cache.py +13 -14
braintrust/queue.py +4 -4
braintrust/score.py +4 -4
braintrust/serializable_data_class.py +4 -4
braintrust/span_identifier_v1.py +1 -2
braintrust/span_identifier_v2.py +3 -4
braintrust/span_identifier_v3.py +23 -20
braintrust/span_identifier_v4.py +34 -25
braintrust/test_bt_json.py +644 -0
braintrust/test_framework.py +72 -6
braintrust/test_helpers.py +5 -5
braintrust/test_id_gen.py +2 -3
braintrust/test_logger.py +211 -107
braintrust/test_otel.py +61 -53
braintrust/test_queue.py +0 -1
braintrust/test_score.py +1 -3
braintrust/test_span_components.py +29 -44
braintrust/util.py +9 -8
braintrust/version.py +2 -2
braintrust/wrappers/_anthropic_utils.py +4 -4
braintrust/wrappers/agno/__init__.py +3 -4
braintrust/wrappers/agno/agent.py +1 -2
braintrust/wrappers/agno/function_call.py +1 -2
braintrust/wrappers/agno/model.py +1 -2
braintrust/wrappers/agno/team.py +1 -2
braintrust/wrappers/agno/utils.py +12 -12
braintrust/wrappers/anthropic.py +7 -8
braintrust/wrappers/claude_agent_sdk/__init__.py +3 -4
braintrust/wrappers/claude_agent_sdk/_wrapper.py +29 -27
braintrust/wrappers/dspy.py +15 -17
braintrust/wrappers/google_genai/__init__.py +17 -30
braintrust/wrappers/langchain.py +22 -24
braintrust/wrappers/litellm.py +4 -3
braintrust/wrappers/openai.py +15 -15
braintrust/wrappers/pydantic_ai.py +225 -110
braintrust/wrappers/test_agno.py +0 -1
braintrust/wrappers/test_dspy.py +0 -1
braintrust/wrappers/test_google_genai.py +64 -4
braintrust/wrappers/test_litellm.py +0 -1
braintrust/wrappers/test_pydantic_ai_integration.py +819 -22
{braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/METADATA +3 -2
braintrust-0.4.1.dist-info/RECORD +121 -0
braintrust-0.3.15.dist-info/RECORD +0 -120
{braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/WHEEL +0 -0
{braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/entry_points.txt +0 -0
{braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/top_level.txt +0 -0

braintrust/devserver/auth.py CHANGED Viewed

@@ -1,5 +1,5 @@
+from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
-from typing import Awaitable, Callable, Dict, Optional
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.requests import Request
@@ -15,14 +15,14 @@ BRAINTRUST_PROJECT_ID_HEADER = "x-bt-project-id"
 @dataclass
 class RequestContext:
-    app_origin: Optional[str]
-    token: Optional[str]
-    org_name: Optional[str]
-    project_id: Optional[str]
-    state: Optional[BraintrustState]
+    app_origin: str | None
+    token: str | None
+    org_name: str | None
+    project_id: str | None
+    state: BraintrustState | None
-def extract_allowed_origin(origin: Optional[str]) -> Optional[str]:
+def extract_allowed_origin(origin: str | None) -> str | None:
     """Extract and validate the origin header."""
     # This should use the same check_origin logic from cors.py
     from .cors import check_origin
@@ -32,7 +32,7 @@ def extract_allowed_origin(origin: Optional[str]) -> Optional[str]:
     return None
-def parse_braintrust_auth_header(headers: Dict[str, str]) -> Optional[str]:
+def parse_braintrust_auth_header(headers: dict[str, str]) -> str | None:
     """Parse the authorization token from headers."""
     # Check x-bt-auth-token first
     token = headers.get(BRAINTRUST_AUTH_TOKEN_HEADER)

braintrust/devserver/cache.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """LRU cache implementation for the dev server."""
 import json
-from typing import Dict, Optional
 from ..logger import BraintrustState, login_to_state
@@ -11,10 +10,10 @@ class LRUCache:
     def __init__(self, max_size: int = 32):
         self.max_size = max_size
-        self.cache: Dict[str, BraintrustState] = {}
+        self.cache: dict[str, BraintrustState] = {}
         self.access_order: list[str] = []
-    def get(self, key: str) -> Optional[BraintrustState]:
+    def get(self, key: str) -> BraintrustState | None:
         """Get a value from the cache, updating access order."""
         if key in self.cache:
             # Move to end to mark as recently used
@@ -41,7 +40,7 @@ class LRUCache:
 _login_cache = LRUCache(max_size=32)  # TODO: Make this configurable
-async def cached_login(api_key: str, app_url: str, org_name: Optional[str] = None) -> BraintrustState:
+async def cached_login(api_key: str, app_url: str, org_name: str | None = None) -> BraintrustState:
     """Login with caching to avoid repeated API calls."""
     cache_key = json.dumps({"api_key": api_key, "app_url": app_url, "org_name": org_name})

braintrust/devserver/cors.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import os
 import re
-from typing import Any, Awaitable, Callable, Dict, List, Union
+from collections.abc import Awaitable, Callable
+from typing import Any
 # CORS configuration
-ALLOWED_ORIGINS: List[Union[str, re.Pattern]] = [
+ALLOWED_ORIGINS: list[str | re.Pattern] = [
     "https://www.braintrust.dev",
     "https://www.braintrustdata.com",
     re.compile(r"https://.*\.preview\.braintrust\.dev"),
@@ -70,9 +71,9 @@ def create_cors_middleware() -> type:
         async def __call__(
             self,
-            scope: Dict[str, Any],
-            receive: Callable[[], Awaitable[Dict[str, Any]]],
-            send: Callable[[Dict[str, Any]], Awaitable[None]],
+            scope: dict[str, Any],
+            receive: Callable[[], Awaitable[dict[str, Any]]],
+            send: Callable[[dict[str, Any]], Awaitable[None]],
         ) -> None:
             if scope["type"] == "http":
                 headers = dict(scope["headers"])
@@ -81,7 +82,7 @@ def create_cors_middleware() -> type:
                 # Handle OPTIONS requests
                 if scope["method"] == "OPTIONS":
-                    async def send_options_wrapper(message: Dict[str, Any]) -> None:
+                    async def send_options_wrapper(message: dict[str, Any]) -> None:
                         if message["type"] == "http.response.start":
                             headers_dict = dict(message.get("headers", []))
@@ -120,7 +121,7 @@ def create_cors_middleware() -> type:
                     return
                 # For other requests, add CORS headers if origin is valid
-                async def send_wrapper(message: Dict[str, Any]) -> None:
+                async def send_wrapper(message: dict[str, Any]) -> None:
                     if message["type"] == "http.response.start" and origin and check_origin(origin):
                         headers_dict = dict(message.get("headers", []))

braintrust/devserver/dataset.py CHANGED Viewed

@@ -1,11 +1,11 @@
-from typing import Any, Dict, Union
+from typing import Any
 from braintrust import init_dataset
 from braintrust._generated_types import RunEvalData, RunEvalData1, RunEvalData2
 from braintrust.logger import BraintrustState
-async def get_dataset_by_id(state: BraintrustState, dataset_id: str) -> Dict[str, str]:
+async def get_dataset_by_id(state: BraintrustState, dataset_id: str) -> dict[str, str]:
     """Fetch dataset information by ID."""
     # Make API call to get dataset info
     conn = state.api_conn()
@@ -23,9 +23,7 @@ async def get_dataset_by_id(state: BraintrustState, dataset_id: str) -> Dict[str
 # NOTE: To make this performant, we'll have to make these functions work with async i/o
-async def get_dataset(
-    state: BraintrustState, data: Union[RunEvalData, RunEvalData1, RunEvalData2, Dict[str, Any]]
-) -> Any:
+async def get_dataset(state: BraintrustState, data: RunEvalData | RunEvalData1 | RunEvalData2 | dict[str, Any]) -> Any:
     """
     Get dataset from various data sources.

braintrust/devserver/eval_hooks.py CHANGED Viewed

@@ -7,7 +7,8 @@ for reporting progress during evaluation execution.
 import asyncio
 import json
-from typing import Any, Callable, Dict, Optional
+from collections.abc import Callable
+from typing import Any
 class EvalHooks:
@@ -15,13 +16,13 @@ class EvalHooks:
     def __init__(
         self,
-        report_progress: Optional[Callable[[Dict[str, Any]], None]] = None,
-        parameters: Optional[Dict[str, Any]] = None,
+        report_progress: Callable[[dict[str, Any]], None] | None = None,
+        parameters: dict[str, Any] | None = None,
     ):
         self._report_progress = report_progress
         self.parameters = parameters or {}
-    def report_progress(self, event: Dict[str, Any]) -> None:
+    def report_progress(self, event: dict[str, Any]) -> None:
         """Report progress during task execution."""
         if self._report_progress:
             self._report_progress(event)
@@ -45,7 +46,7 @@ class SSEQueue:
     """Simple wrapper around asyncio.Queue for SSE events."""
     def __init__(self):
-        self.queue: asyncio.Queue[Optional[str]] = asyncio.Queue()
+        self.queue: asyncio.Queue[str | None] = asyncio.Queue()
     async def put_event(self, event: str, data: Any) -> None:
         """Add an SSE event to the queue."""
@@ -56,6 +57,6 @@ class SSEQueue:
         """Signal end of stream."""
         await self.queue.put(None)
-    async def get(self) -> Optional[str]:
+    async def get(self) -> str | None:
         """Get the next event from the queue."""
         return await self.queue.get()

braintrust/devserver/schemas.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import json
-from typing import Any, Dict, List, Optional, Sequence, Union, get_args, get_origin
+from collections.abc import Sequence
+from typing import Any, Union, get_args, get_origin, get_type_hints
-from typing_extensions import TypedDict, get_type_hints
+from typing_extensions import TypedDict
 # This is not beautiful code, but it saves us from introducing Pydantic as a dependency, and it is fairly
 # straightforward for an LLM to keep it up to date with runEvalBodySchema in JS.
@@ -16,12 +17,12 @@ class ValidationError(Exception):
 class ParsedFunctionId(TypedDict, total=False):
     """Parsed function identifier."""
-    function_id: Optional[str]
-    version: Optional[str]
-    name: Optional[str]
-    prompt_session_id: Optional[str]
-    inline_code: Optional[str]
-    global_function: Optional[str]
+    function_id: str | None
+    version: str | None
+    name: str | None
+    prompt_session_id: str | None
+    inline_code: str | None
+    global_function: str | None
 class ParsedParent(TypedDict):
@@ -35,16 +36,16 @@ class ParsedEvalBody(TypedDict, total=False):
     """Type for parsed eval request body."""
     name: str  # Required
-    parameters: Dict[str, Any]
+    parameters: dict[str, Any]
     data: Any
-    scores: List[ParsedFunctionId]
+    scores: list[ParsedFunctionId]
     experiment_name: str
     project_id: str
-    parent: Union[str, ParsedParent]
+    parent: str | ParsedParent
     stream: bool
-def validate_typed_dict(data: Any, typed_dict_class: type, path: str = "") -> Dict[str, Any]:
+def validate_typed_dict(data: Any, typed_dict_class: type, path: str = "") -> dict[str, Any]:
     """Validate data against a TypedDict definition."""
     if not isinstance(data, dict):
         raise ValidationError(f"{path or 'Root'} must be a dictionary, got {type(data).__name__}")
@@ -107,7 +108,7 @@ def validate_value(value: Any, expected_type: type, path: str) -> Any:
         return validate_value(value, inner_type, path)
     # Handle List/Sequence
-    if origin in (list, List, Sequence):
+    if origin in (list, list, Sequence):
         if not isinstance(value, list):
             raise ValidationError(f"{path} must be a list, got {type(value).__name__}")
@@ -115,7 +116,7 @@ def validate_value(value: Any, expected_type: type, path: str) -> Any:
         return [validate_value(item, item_type, f"{path}[{i}]") for i, item in enumerate(value)]
     # Handle Dict/Mapping
-    if origin in (dict, Dict):
+    if origin in (dict, dict):
         if not isinstance(value, dict):
             raise ValidationError(f"{path} must be a dict, got {type(value).__name__}")
@@ -172,7 +173,7 @@ def parse_function_id(data: Any, path: str = "function") -> ParsedFunctionId:
     raise ValidationError(f"{path} must specify function_id, name, prompt_session_id, or inline_code")
-def parse_eval_body(request_data: Union[str, bytes, dict]) -> ParsedEvalBody:
+def parse_eval_body(request_data: str | bytes | dict) -> ParsedEvalBody:
     """
     Parse request body for eval execution.
@@ -221,10 +222,12 @@ def parse_eval_body(request_data: Union[str, bytes, dict]) -> ParsedEvalBody:
         parsed_scores = []
         for i, score in enumerate(scores_data):
             try:
-                parsed_scores.append({
-                    "name": score["name"],
-                    "function_id": parse_function_id(score["function_id"], f"scores[{i}]"),
-                })
+                parsed_scores.append(
+                    {
+                        "name": score["name"],
+                        "function_id": parse_function_id(score["function_id"], f"scores[{i}]"),
+                    }
+                )
             except ValidationError as e:
                 raise ValidationError(f"Invalid score at index {i}: {e}")

braintrust/devserver/server.py CHANGED Viewed

@@ -2,7 +2,7 @@ import asyncio
 import json
 import sys
 import textwrap
-from typing import Any, Optional, Union
+from typing import Any
 try:
     import uvicorn
@@ -40,7 +40,7 @@ _all_evaluators: dict[str, Evaluator[Any, Any]] = {}
 class CheckAuthorizedMiddleware(BaseHTTPMiddleware):
-    def __init__(self, app, allowed_org_name: Optional[str] = None):
+    def __init__(self, app, allowed_org_name: str | None = None):
         super().__init__(app)
         self.allowed_org_name = allowed_org_name
         self.protected_paths = ["/list", "/eval"]
@@ -100,7 +100,7 @@ async def list_evaluators(request: Request) -> JSONResponse:
     return JSONResponse(evaluator_list)
-async def run_eval(request: Request) -> Union[JSONResponse, StreamingResponse]:
+async def run_eval(request: Request) -> JSONResponse | StreamingResponse:
     """Handle eval execution requests."""
     try:
         # Get request body
@@ -157,12 +157,14 @@ async def run_eval(request: Request) -> Union[JSONResponse, StreamingResponse]:
             result = await evaluator.task(input, hooks)
         else:
             result = evaluator.task(input, hooks)
-        hooks.report_progress({
-            "format": "code",
-            "output_type": "completion",
-            "event": "json_delta",
-            "data": json.dumps(result),
-        })
+        hooks.report_progress(
+            {
+                "format": "code",
+                "output_type": "completion",
+                "event": "json_delta",
+                "data": json.dumps(result),
+            }
+        )
         return result
     def on_start_fn(summary: ExperimentSummary):
@@ -214,6 +216,7 @@ async def run_eval(request: Request) -> Union[JSONResponse, StreamingResponse]:
             async def event_generator():
                 """Generate SSE events from the queue."""
                 # Create a task to run the eval and signal completion
                 async def run_and_complete():
                     try:
@@ -255,7 +258,7 @@ async def run_eval(request: Request) -> Union[JSONResponse, StreamingResponse]:
         return JSONResponse({"error": f"Failed to run evaluation: {str(e)}"}, status_code=500)
-def create_app(evaluators: list[Evaluator[Any, Any]], org_name: Optional[str] = None):
+def create_app(evaluators: list[Evaluator[Any, Any]], org_name: str | None = None):
     """Create and configure the Starlette app for the dev server.
     Args:
@@ -283,7 +286,9 @@ def create_app(evaluators: list[Evaluator[Any, Any]], org_name: Optional[str] =
     return app
-def run_dev_server(evaluators: list[Evaluator[Any, Any]], host: str = "localhost", port: int = 8300, org_name: Optional[str] = None):
+def run_dev_server(
+    evaluators: list[Evaluator[Any, Any]], host: str = "localhost", port: int = 8300, org_name: str | None = None
+):
     """Start the dev server.
     Args:
@@ -305,7 +310,9 @@ def snake_to_camel(snake_str: str) -> str:
     return components[0] + "".join(x.title() for x in components[1:]) if components else snake_str
-def make_scorer(state: BraintrustState, name: str, score: FunctionId, project_id: Optional[str] = None) -> EvalScorer[Any, Any]:
+def make_scorer(
+    state: BraintrustState, name: str, score: FunctionId, project_id: str | None = None
+) -> EvalScorer[Any, Any]:
     def scorer_fn(input, output, expected, metadata):
         request = {
             **score,

braintrust/devserver/test_cached_login.py CHANGED Viewed

@@ -10,7 +10,7 @@ class TestCachedLogin(unittest.TestCase):
         """Clear the cache before each test."""
         cache._login_cache = cache.LRUCache(max_size=32)
-    @patch('braintrust.devserver.cache.login_to_state')
+    @patch("braintrust.devserver.cache.login_to_state")
     def test_cached_login_caches_results(self, mock_login):
         """Test that cached_login caches and reuses results."""
         mock_state = MagicMock()
@@ -26,7 +26,7 @@ class TestCachedLogin(unittest.TestCase):
         self.assertEqual(result2, mock_state)
         self.assertEqual(mock_login.call_count, 1)  # Still 1, not called again
-    @patch('braintrust.devserver.cache.login_to_state')
+    @patch("braintrust.devserver.cache.login_to_state")
     def test_cached_login_different_keys(self, mock_login):
         """Test that different cache keys create separate entries."""
         mock_state1 = MagicMock()
@@ -48,7 +48,7 @@ class TestCachedLogin(unittest.TestCase):
         self.assertEqual(result3, mock_state3)
         self.assertEqual(mock_login.call_count, 3)
-    @patch('braintrust.devserver.cache.login_to_state')
+    @patch("braintrust.devserver.cache.login_to_state")
     def test_cached_login_with_org_name(self, mock_login):
         """Test caching with org_name parameter."""
         mock_state = MagicMock()
@@ -68,7 +68,7 @@ class TestCachedLogin(unittest.TestCase):
         result3 = asyncio.run(cache.cached_login("api_key_1", "https://app.braintrust.com", org_name="other_org"))
         self.assertEqual(mock_login.call_count, 2)
-    @patch('braintrust.devserver.cache.login_to_state')
+    @patch("braintrust.devserver.cache.login_to_state")
     def test_cached_login_propagates_exceptions(self, mock_login):
         """Test that exceptions from login_to_state are propagated."""
         mock_login.side_effect = ValueError("Invalid API key")

braintrust 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl

braintrust 0.3.15py3-none-any.whl → 0.4.1py3-none-any.whl