PyPI - braintrust - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

braintrust 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

braintrust/__init__.py +3 -0
braintrust/auto.py +179 -0
braintrust/conftest.py +23 -4
braintrust/framework.py +18 -5
braintrust/logger.py +49 -13
braintrust/oai.py +51 -0
braintrust/test_bt_json.py +0 -5
braintrust/test_framework.py +37 -0
braintrust/test_http.py +444 -0
braintrust/test_logger.py +179 -5
braintrust/test_util.py +58 -1
braintrust/util.py +20 -0
braintrust/version.py +2 -2
braintrust/wrappers/agno/__init__.py +2 -3
braintrust/wrappers/anthropic.py +64 -0
braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
braintrust/wrappers/claude_agent_sdk/test_wrapper.py +9 -0
braintrust/wrappers/dspy.py +52 -1
braintrust/wrappers/google_genai/__init__.py +9 -6
braintrust/wrappers/litellm.py +6 -43
braintrust/wrappers/pydantic_ai.py +2 -3
braintrust/wrappers/test_agno.py +9 -0
braintrust/wrappers/test_anthropic.py +156 -0
braintrust/wrappers/test_dspy.py +117 -0
braintrust/wrappers/test_google_genai.py +9 -0
braintrust/wrappers/test_litellm.py +57 -55
braintrust/wrappers/test_openai.py +253 -1
braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
braintrust/wrappers/test_utils.py +79 -0
{braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
{braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/RECORD +34 -32
{braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
{braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
{braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0

braintrust/test_http.py ADDED Viewed

@@ -0,0 +1,444 @@
+"""Tests for HTTP connection handling, retries, and timeouts."""
+import http.server
+import os
+import socketserver
+import threading
+import time
+import pytest
+import requests
+from braintrust.logger import HTTPConnection, RetryRequestExceptionsAdapter
+class HangingConnectionHandler(http.server.BaseHTTPRequestHandler):
+    """HTTP handler that simulates stale connections by HANGING (not responding).
+    This simulates what happens when a NAT gateway silently drops packets:
+    - The TCP connection appears open
+    - Packets are sent but never acknowledged
+    - The client waits forever for a response
+    """
+    request_count = 0
+    hang_count = 1
+    def log_message(self, format, *args):
+        pass
+    def do_POST(self):
+        HangingConnectionHandler.request_count += 1
+        if HangingConnectionHandler.request_count <= HangingConnectionHandler.hang_count:
+            # Simulate stale connection: hang long enough for client to timeout
+            for _ in range(100):  # 10 seconds total, interruptible
+                time.sleep(0.1)
+            return
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(b'{"status": "ok"}')
+    def do_GET(self):
+        self.do_POST()
+class CloseConnectionHandler(http.server.BaseHTTPRequestHandler):
+    """HTTP handler that closes connection immediately (triggers ConnectionError)."""
+    request_count = 0
+    fail_count = 1
+    def log_message(self, format, *args):
+        pass
+    def do_POST(self):
+        CloseConnectionHandler.request_count += 1
+        if CloseConnectionHandler.request_count <= CloseConnectionHandler.fail_count:
+            self.connection.close()
+            return
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(b'{"status": "ok"}')
+    def do_GET(self):
+        self.do_POST()
+@pytest.fixture
+def hanging_server():
+    """Fixture that creates a server that HANGS on first request (simulates stale NAT)."""
+    HangingConnectionHandler.request_count = 0
+    HangingConnectionHandler.hang_count = 1
+    server = socketserver.ThreadingTCPServer(("127.0.0.1", 0), HangingConnectionHandler)
+    server.daemon_threads = True
+    port = server.server_address[1]
+    thread = threading.Thread(target=server.serve_forever)
+    thread.daemon = True
+    thread.start()
+    yield f"http://127.0.0.1:{port}"
+    server.shutdown()
+    server.server_close()
+@pytest.fixture
+def closing_server():
+    """Fixture that creates a server that CLOSES connection on first request."""
+    CloseConnectionHandler.request_count = 0
+    CloseConnectionHandler.fail_count = 1
+    server = socketserver.ThreadingTCPServer(("127.0.0.1", 0), CloseConnectionHandler)
+    server.daemon_threads = True
+    port = server.server_address[1]
+    thread = threading.Thread(target=server.serve_forever)
+    thread.daemon = True
+    thread.start()
+    yield f"http://127.0.0.1:{port}"
+    server.shutdown()
+    server.server_close()
+class TestRetryRequestExceptionsAdapter:
+    """Tests for RetryRequestExceptionsAdapter timeout and retry behavior."""
+    def test_adapter_has_default_timeout(self):
+        """Adapter should have a default_timeout_secs attribute."""
+        adapter = RetryRequestExceptionsAdapter(base_num_retries=3, backoff_factor=0.1)
+        assert hasattr(adapter, "default_timeout_secs")
+        assert adapter.default_timeout_secs == 60
+    def test_adapter_applies_default_timeout_to_requests(self, hanging_server):
+        """Requests without explicit timeout should use default_timeout_secs."""
+        adapter = RetryRequestExceptionsAdapter(
+            base_num_retries=3,
+            backoff_factor=0.05,
+            default_timeout_secs=0.2,
+        )
+        session = requests.Session()
+        session.mount("http://", adapter)
+        start = time.time()
+        resp = session.post(f"{hanging_server}/test", json={"hello": "world"})
+        elapsed = time.time() - start
+        assert resp.status_code == 200
+        assert elapsed < 2.0, f"Should complete within 2s, took {elapsed:.2f}s"
+        assert HangingConnectionHandler.request_count >= 2
+    def test_adapter_retries_on_connection_close(self, closing_server):
+        """Adapter retries on connection close errors."""
+        adapter = RetryRequestExceptionsAdapter(base_num_retries=5, backoff_factor=0.05)
+        session = requests.Session()
+        session.mount("http://", adapter)
+        start = time.time()
+        resp = session.post(f"{closing_server}/test", json={"hello": "world"})
+        elapsed = time.time() - start
+        assert resp.status_code == 200
+        assert elapsed < 5.0
+        assert CloseConnectionHandler.request_count >= 2
+    def test_adapter_resets_pool_on_timeout(self, hanging_server):
+        """Adapter resets connection pool on timeout errors via self.close().
+        This is the key fix for stale NAT connections: when a request times out,
+        we reset the connection pool to ensure the next retry uses a fresh connection.
+        """
+        adapter = RetryRequestExceptionsAdapter(
+            base_num_retries=10,
+            backoff_factor=0.05,
+            default_timeout_secs=0.2,
+        )
+        session = requests.Session()
+        session.mount("http://", adapter)
+        start = time.time()
+        resp = session.post(f"{hanging_server}/test", json={"hello": "world"})
+        elapsed = time.time() - start
+        assert resp.status_code == 200
+        assert elapsed < 10.0, f"Request took too long: {elapsed:.2f}s"
+        assert HangingConnectionHandler.request_count >= 2
+class TestHTTPConnection:
+    """Tests for HTTPConnection timeout configuration."""
+    def test_make_long_lived_uses_default_timeout(self, hanging_server):
+        """HTTPConnection.make_long_lived() should use default_timeout_secs.
+        This tests the exact scenario from the stale connection bug:
+        - Long eval run (15+ minutes)
+        - app_conn() becomes stale due to NAT gateway idle timeout
+        - summarize() calls fetch_base_experiment()
+        - Request hangs forever because no timeout
+        With the fix, make_long_lived() uses default_timeout_secs (60s by default).
+        """
+        os.environ["BRAINTRUST_HTTP_TIMEOUT"] = "0.2"
+        try:
+            conn = HTTPConnection(hanging_server)
+            conn.make_long_lived()
+            assert hasattr(conn.adapter, "default_timeout_secs")
+            assert conn.adapter.default_timeout_secs == 0.2
+            start = time.time()
+            resp = conn.post("/test", json={"hello": "world"})
+            elapsed = time.time() - start
+            assert resp.status_code == 200
+            # Allow more time due to backoff_factor=0.5 in make_long_lived()
+            assert elapsed < 15.0, f"Should complete within 15s, took {elapsed:.2f}s"
+        finally:
+            del os.environ["BRAINTRUST_HTTP_TIMEOUT"]
+    def test_env_var_configures_timeout(self):
+        """BRAINTRUST_HTTP_TIMEOUT env var configures timeout via make_long_lived()."""
+        os.environ["BRAINTRUST_HTTP_TIMEOUT"] = "30"
+        try:
+            conn = HTTPConnection("http://localhost:8080")
+            conn.make_long_lived()
+            assert hasattr(conn.adapter, "default_timeout_secs")
+            assert conn.adapter.default_timeout_secs == 30
+        finally:
+            del os.environ["BRAINTRUST_HTTP_TIMEOUT"]
+class TestAdapterCloseAndReuse:
+    """Tests verifying that adapter.close() allows subsequent requests.
+    This addresses the review concern about whether calling self.close()
+    (which calls PoolManager.clear()) breaks subsequent request handling.
+    """
+    @pytest.fixture
+    def simple_server(self):
+        """Fixture that creates a server that always succeeds."""
+        class SimpleHandler(http.server.BaseHTTPRequestHandler):
+            request_count = 0
+            def log_message(self, format, *args):
+                pass
+            def do_GET(self):
+                SimpleHandler.request_count += 1
+                self.send_response(200)
+                self.send_header("Content-Type", "application/json")
+                self.end_headers()
+                self.wfile.write(b'{"status": "ok"}')
+        SimpleHandler.request_count = 0
+        server = socketserver.ThreadingTCPServer(("127.0.0.1", 0), SimpleHandler)
+        server.daemon_threads = True
+        port = server.server_address[1]
+        thread = threading.Thread(target=server.serve_forever)
+        thread.daemon = True
+        thread.start()
+        yield f"http://127.0.0.1:{port}", SimpleHandler
+        server.shutdown()
+        server.server_close()
+    def test_adapter_works_after_close(self, simple_server):
+        """Verify adapter.close() does not break subsequent requests.
+        This is the key test for the PR feedback: after calling close(),
+        the PoolManager should create new connection pools on demand.
+        """
+        url, handler = simple_server
+        adapter = RetryRequestExceptionsAdapter(base_num_retries=3, backoff_factor=0.1)
+        session = requests.Session()
+        session.mount("http://", adapter)
+        # First request works
+        resp1 = session.get(f"{url}/test1")
+        assert resp1.status_code == 200
+        assert handler.request_count == 1
+        # Explicitly close the adapter (simulates what happens on timeout)
+        adapter.close()
+        # Second request should still work after close()
+        resp2 = session.get(f"{url}/test2")
+        assert resp2.status_code == 200
+        assert handler.request_count == 2
+    def test_adapter_works_after_multiple_closes(self, simple_server):
+        """Verify adapter works even after multiple close() calls."""
+        url, handler = simple_server
+        adapter = RetryRequestExceptionsAdapter(base_num_retries=3, backoff_factor=0.1)
+        session = requests.Session()
+        session.mount("http://", adapter)
+        for i in range(3):
+            resp = session.get(f"{url}/test{i}")
+            assert resp.status_code == 200
+            adapter.close()
+        assert handler.request_count == 3
+    def test_concurrent_requests_with_close(self):
+        """Test thread safety: close() called while requests are in-flight.
+        This tests a potential race condition where one thread calls close()
+        while another thread is mid-request. Requests are staggered to ensure
+        close() happens while some requests are in-flight.
+        """
+        import concurrent.futures
+        class SlowHandler(http.server.BaseHTTPRequestHandler):
+            request_count = 0
+            def log_message(self, format, *args):
+                pass
+            def do_GET(self):
+                SlowHandler.request_count += 1
+                # Simulate slow response
+                time.sleep(0.1)
+                self.send_response(200)
+                self.send_header("Content-Type", "application/json")
+                self.end_headers()
+                self.wfile.write(b'{"status": "ok"}')
+        SlowHandler.request_count = 0
+        server = socketserver.ThreadingTCPServer(("127.0.0.1", 0), SlowHandler)
+        server.daemon_threads = True
+        port = server.server_address[1]
+        url = f"http://127.0.0.1:{port}"
+        server_thread = threading.Thread(target=server.serve_forever)
+        server_thread.daemon = True
+        server_thread.start()
+        try:
+            adapter = RetryRequestExceptionsAdapter(base_num_retries=3, backoff_factor=0.1)
+            session = requests.Session()
+            session.mount("http://", adapter)
+            errors = []
+            def make_request(i):
+                try:
+                    time.sleep(i * 0.02)  # Stagger requests
+                    resp = session.get(f"{url}/test{i}")
+                    return resp.status_code
+                except Exception as e:
+                    errors.append(e)
+                    return None
+            def close_adapter():
+                time.sleep(0.05)  # Close while requests are in-flight
+                adapter.close()
+            # Launch concurrent requests and a close() call
+            with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+                # Start several requests (staggered)
+                request_futures = [executor.submit(make_request, i) for i in range(5)]
+                # Start close() call mid-flight
+                close_future = executor.submit(close_adapter)
+                close_future.result()
+                results = [f.result() for f in request_futures]
+            # All requests should succeed (retry on failure)
+            assert all(r == 200 for r in results), f"Some requests failed: {results}, errors: {errors}"
+        finally:
+            server.shutdown()
+            server.server_close()
+    def test_stress_concurrent_close_and_requests(self):
+        """Stress test: many close() calls interleaved with requests.
+        Requests are staggered to ensure close() calls happen during requests.
+        """
+        import concurrent.futures
+        class FastHandler(http.server.BaseHTTPRequestHandler):
+            request_count = 0
+            def log_message(self, format, *args):
+                pass
+            def do_GET(self):
+                FastHandler.request_count += 1
+                self.send_response(200)
+                self.send_header("Content-Type", "application/json")
+                self.end_headers()
+                self.wfile.write(b'{"status": "ok"}')
+        FastHandler.request_count = 0
+        server = socketserver.ThreadingTCPServer(("127.0.0.1", 0), FastHandler)
+        server.daemon_threads = True
+        port = server.server_address[1]
+        url = f"http://127.0.0.1:{port}"
+        server_thread = threading.Thread(target=server.serve_forever)
+        server_thread.daemon = True
+        server_thread.start()
+        try:
+            adapter = RetryRequestExceptionsAdapter(base_num_retries=5, backoff_factor=0.01)
+            session = requests.Session()
+            session.mount("http://", adapter)
+            errors = []
+            success_count = 0
+            lock = threading.Lock()
+            def make_request(i):
+                nonlocal success_count
+                try:
+                    time.sleep(i * 0.005)  # Stagger requests
+                    resp = session.get(f"{url}/test{i}")
+                    if resp.status_code == 200:
+                        with lock:
+                            success_count += 1
+                    return resp.status_code
+                except Exception as e:
+                    with lock:
+                        errors.append(str(e))
+                    return None
+            def close_repeatedly():
+                for _ in range(20):
+                    time.sleep(0.01)  # Close throughout the request window
+                    adapter.close()
+            # Launch many concurrent requests while repeatedly closing
+            with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
+                request_futures = [executor.submit(make_request, i) for i in range(50)]
+                close_futures = [executor.submit(close_repeatedly) for _ in range(3)]
+                # Wait for all
+                for f in close_futures:
+                    f.result()
+                results = [f.result() for f in request_futures]
+            failed = [r for r in results if r != 200]
+            assert len(failed) == 0, f"Failed requests: {len(failed)}, errors: {errors[:5]}"
+        finally:
+            server.shutdown()
+            server.server_close()

braintrust/test_logger.py CHANGED Viewed

@@ -833,6 +833,16 @@ def test_span_project_id_logged_in(with_memory_logger, with_simulate_login):
     )
+def test_span_export_disables_cache(with_memory_logger):
+    """Test that span.export() disables the span cache."""
+    logger = init_test_logger(__name__)
+    with logger.start_span(name="test_span") as span:
+        # Exporting should disable the span cache
+        span.export()
+        assert logger.state.span_cache.disabled
 def test_span_project_name_logged_in(with_simulate_login, with_memory_logger):
     init_logger(project="test-project")
     span = logger.start_span(name="test-span")
@@ -929,11 +939,7 @@ def test_permalink_with_valid_span_logged_in(with_simulate_login, with_memory_lo
 @pytest.mark.asyncio
 async def test_span_link_in_async_context(with_simulate_login, with_memory_logger):
-    """Test that span.link() works correctly when called from within an async function.
-    This tests the bug where current_logger was a plain attribute instead of a ContextVar,
-    causing span.link() to return a noop link in async contexts even though the span was valid.
-    """
+    """Test that span.link() works correctly when called from within an async function."""
     import asyncio
     logger = init_logger(
@@ -966,6 +972,174 @@ async def test_span_link_in_async_context(with_simulate_login, with_memory_logge
     assert "test-project-id" in link
+@pytest.mark.asyncio
+async def test_current_logger_after_multiple_awaits(with_simulate_login, with_memory_logger):
+    """Test that current_logger() works after multiple await points."""
+    import asyncio
+    logger = init_logger(project="test-project", project_id="test-project-id")
+    async def check_logger_after_awaits():
+        assert braintrust.current_logger() is logger
+        await asyncio.sleep(0.01)
+        assert braintrust.current_logger() is logger
+        await asyncio.sleep(0.01)
+        assert braintrust.current_logger() is logger
+        return braintrust.current_logger()
+    result = await check_logger_after_awaits()
+    assert result is logger
+@pytest.mark.asyncio
+async def test_current_logger_in_async_generator(with_simulate_login, with_memory_logger):
+    """Test that current_logger() works within an async generator (yield)."""
+    import asyncio
+    logger = init_logger(project="test-project", project_id="test-project-id")
+    async def logger_generator():
+        for i in range(3):
+            await asyncio.sleep(0.01)
+            yield braintrust.current_logger()
+    results = []
+    async for log in logger_generator():
+        results.append(log)
+    assert len(results) == 3
+    assert all(r is logger for r in results)
+@pytest.mark.asyncio
+async def test_current_logger_in_separate_task(with_simulate_login, with_memory_logger):
+    """Test that current_logger() works in a separately created asyncio task."""
+    import asyncio
+    logger = init_logger(project="test-project", project_id="test-project-id")
+    async def get_logger_in_task():
+        await asyncio.sleep(0.01)
+        return braintrust.current_logger()
+    # Create a separate task
+    task = asyncio.create_task(get_logger_in_task())
+    result = await task
+    assert result is logger
+@pytest.mark.asyncio
+async def test_span_link_in_nested_async(with_simulate_login, with_memory_logger):
+    """Test that span.link() works in deeply nested async calls."""
+    import asyncio
+    logger = init_logger(project="test-project", project_id="test-project-id")
+    span = logger.start_span(name="test-span")
+    async def level3():
+        await asyncio.sleep(0.01)
+        return span.link()
+    async def level2():
+        await asyncio.sleep(0.01)
+        return await level3()
+    async def level1():
+        await asyncio.sleep(0.01)
+        return await level2()
+    link = await level1()
+    span.end()
+    assert link != "https://www.braintrust.dev/noop-span"
+    assert span._id in link
+def test_current_logger_in_thread(with_simulate_login, with_memory_logger):
+    """Test that current_logger() works correctly when called from a new thread.
+    Regression test: ContextVar values don't propagate to new threads,
+    so current_logger must be a plain attribute for thread access.
+    """
+    import threading
+    logger = init_logger(project="test-project", project_id="test-project-id")
+    assert braintrust.current_logger() is logger
+    thread_result = {}
+    def check_logger_in_thread():
+        thread_result["logger"] = braintrust.current_logger()
+    thread = threading.Thread(target=check_logger_in_thread)
+    thread.start()
+    thread.join()
+    assert thread_result["logger"] is logger
+def test_span_link_in_thread(with_simulate_login, with_memory_logger):
+    """Test that span.link() works correctly when called from a new thread.
+    The span should be able to generate a valid link even when link() is called
+    from a different thread than where the span was created.
+    """
+    import threading
+    logger = init_logger(project="test-project", project_id="test-project-id")
+    span = logger.start_span(name="test-span")
+    thread_result = {}
+    def get_link_in_thread():
+        # Call link() on the span directly (not via current_span() which uses ContextVar)
+        thread_result["link"] = span.link()
+    thread = threading.Thread(target=get_link_in_thread)
+    thread.start()
+    thread.join()
+    span.end()
+    # The link should NOT be the noop link
+    assert thread_result["link"] != "https://www.braintrust.dev/noop-span"
+    # The link should contain the span ID
+    assert span._id in thread_result["link"]
+@pytest.mark.asyncio
+async def test_current_logger_async_context_isolation(with_simulate_login, with_memory_logger):
+    """Test that different async contexts can have different loggers.
+    When a child task sets its own logger, it should not affect the parent context.
+    This ensures async context isolation via ContextVar.
+    """
+    import asyncio
+    parent_logger = init_logger(project="parent-project", project_id="parent-project-id")
+    assert braintrust.current_logger() is parent_logger
+    child_result = {}
+    async def child_task():
+        # Child initially inherits parent's logger
+        assert braintrust.current_logger() is parent_logger
+        # Child sets its own logger
+        child_logger = init_logger(project="child-project", project_id="child-project-id")
+        child_result["logger"] = braintrust.current_logger()
+        return child_logger
+    # Run child task
+    child_logger = await asyncio.create_task(child_task())
+    # Child should have seen its own logger
+    assert child_result["logger"] is child_logger
+    # Parent should still see parent logger (not affected by child)
+    assert braintrust.current_logger() is parent_logger
 def test_span_set_current(with_memory_logger):
     """Test that span.set_current() makes the span accessible via current_span()."""
     init_test_logger(__name__)

braintrust 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

braintrust 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl