PyPI - braintrust - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.3__py3-none-any.whl - Mend

braintrust 0.5.0py3-none-any.whl → 0.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

braintrust/__init__.py +14 -0
braintrust/_generated_types.py +56 -3
braintrust/auto.py +179 -0
braintrust/conftest.py +23 -4
braintrust/db_fields.py +10 -0
braintrust/framework.py +18 -5
braintrust/generated_types.py +3 -1
braintrust/logger.py +369 -134
braintrust/merge_row_batch.py +49 -109
braintrust/oai.py +51 -0
braintrust/test_bt_json.py +0 -5
braintrust/test_context.py +1264 -0
braintrust/test_framework.py +37 -0
braintrust/test_http.py +444 -0
braintrust/test_logger.py +179 -5
braintrust/test_merge_row_batch.py +160 -0
braintrust/test_util.py +58 -1
braintrust/util.py +20 -0
braintrust/version.py +2 -2
braintrust/wrappers/agno/__init__.py +2 -3
braintrust/wrappers/anthropic.py +64 -0
braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
braintrust/wrappers/claude_agent_sdk/test_wrapper.py +9 -0
braintrust/wrappers/dspy.py +52 -1
braintrust/wrappers/google_genai/__init__.py +9 -6
braintrust/wrappers/litellm.py +6 -43
braintrust/wrappers/pydantic_ai.py +2 -3
braintrust/wrappers/test_agno.py +9 -0
braintrust/wrappers/test_anthropic.py +156 -0
braintrust/wrappers/test_dspy.py +117 -0
braintrust/wrappers/test_google_genai.py +9 -0
braintrust/wrappers/test_litellm.py +57 -55
braintrust/wrappers/test_openai.py +253 -1
braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
braintrust/wrappers/test_utils.py +79 -0
braintrust/wrappers/threads.py +114 -0
{braintrust-0.5.0.dist-info → braintrust-0.5.3.dist-info}/METADATA +1 -1
{braintrust-0.5.0.dist-info → braintrust-0.5.3.dist-info}/RECORD +41 -37
{braintrust-0.5.0.dist-info → braintrust-0.5.3.dist-info}/WHEEL +1 -1
braintrust/graph_util.py +0 -147
{braintrust-0.5.0.dist-info → braintrust-0.5.3.dist-info}/entry_points.txt +0 -0
{braintrust-0.5.0.dist-info → braintrust-0.5.3.dist-info}/top_level.txt +0 -0

braintrust/wrappers/test_anthropic.py CHANGED Viewed

@@ -9,6 +9,7 @@ import pytest
 from braintrust import logger
 from braintrust.test_helpers import init_test_logger
 from braintrust.wrappers.anthropic import wrap_anthropic
+from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
 TEST_ORG_ID = "test-org-123"
 PROJECT_NAME = "test-anthropic-app"
@@ -481,3 +482,158 @@ async def test_anthropic_beta_messages_streaming_async(memory_logger):
     assert metrics["prompt_tokens"] == usage.input_tokens
     assert metrics["completion_tokens"] == usage.output_tokens
     assert metrics["tokens"] == usage.input_tokens + usage.output_tokens
+class TestPatchAnthropic:
+    """Tests for patch_anthropic() / unpatch_anthropic()."""
+    def test_patch_anthropic_sets_wrapped_flag(self):
+        """patch_anthropic() should set __braintrust_wrapped__ on anthropic module."""
+        result = run_in_subprocess("""
+            from braintrust.wrappers.anthropic import patch_anthropic
+            import anthropic
+            assert not hasattr(anthropic, "__braintrust_wrapped__")
+            patch_anthropic()
+            assert hasattr(anthropic, "__braintrust_wrapped__")
+            print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+    def test_patch_anthropic_wraps_new_clients(self):
+        """After patch_anthropic(), new Anthropic() clients should be wrapped."""
+        result = run_in_subprocess("""
+            from braintrust.wrappers.anthropic import patch_anthropic
+            patch_anthropic()
+            import anthropic
+            client = anthropic.Anthropic(api_key="test-key")
+            # Check that messages is wrapped
+            messages_type = type(client.messages).__name__
+            print(f"messages_type={messages_type}")
+            print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+    def test_patch_anthropic_idempotent(self):
+        """Multiple patch_anthropic() calls should be safe."""
+        result = run_in_subprocess("""
+            from braintrust.wrappers.anthropic import patch_anthropic
+            import anthropic
+            patch_anthropic()
+            first_class = anthropic.Anthropic
+            patch_anthropic()  # Second call
+            second_class = anthropic.Anthropic
+            assert first_class is second_class
+            print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+    def test_patch_anthropic_creates_spans(self):
+        """patch_anthropic() should create spans when making API calls."""
+        result = run_in_subprocess("""
+            from braintrust.wrappers.anthropic import patch_anthropic
+            from braintrust.test_helpers import init_test_logger
+            from braintrust import logger
+            # Set up memory logger
+            init_test_logger("test-auto")
+            with logger._internal_with_memory_background_logger() as memory_logger:
+                patch_anthropic()
+                import anthropic
+                client = anthropic.Anthropic()
+                # Make a call within a span context
+                import braintrust
+                with braintrust.start_span(name="test") as span:
+                    try:
+                        # This will fail without API key, but span should still be created
+                        client.messages.create(
+                            model="claude-3-5-haiku-latest",
+                            max_tokens=100,
+                            messages=[{"role": "user", "content": "hi"}],
+                        )
+                    except Exception:
+                        pass  # Expected without API key
+                # Check that spans were logged
+                spans = memory_logger.pop()
+                # Should have at least the parent span
+                assert len(spans) >= 1, f"Expected spans, got {spans}"
+                print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+class TestPatchAnthropicSpans:
+    """VCR-based tests verifying that patch_anthropic() produces spans."""
+    @pytest.mark.vcr
+    def test_patch_anthropic_creates_spans(self, memory_logger):
+        """patch_anthropic() should create spans when making API calls."""
+        from braintrust.wrappers.anthropic import patch_anthropic
+        assert not memory_logger.pop()
+        patch_anthropic()
+        client = anthropic.Anthropic()
+        response = client.messages.create(
+            model="claude-3-5-haiku-latest",
+            max_tokens=100,
+            messages=[{"role": "user", "content": "Say hi"}],
+        )
+        assert response.content[0].text
+        # Verify span was created
+        spans = memory_logger.pop()
+        assert len(spans) == 1
+        span = spans[0]
+        assert span["metadata"]["provider"] == "anthropic"
+        assert "claude" in span["metadata"]["model"]
+        assert span["input"]
+class TestPatchAnthropicAsyncSpans:
+    """VCR-based tests verifying that patch_anthropic() produces spans for async clients."""
+    @pytest.mark.vcr
+    @pytest.mark.asyncio
+    async def test_patch_anthropic_async_creates_spans(self, memory_logger):
+        """patch_anthropic() should create spans for async API calls."""
+        from braintrust.wrappers.anthropic import patch_anthropic
+        assert not memory_logger.pop()
+        patch_anthropic()
+        client = anthropic.AsyncAnthropic()
+        response = await client.messages.create(
+            model="claude-3-5-haiku-latest",
+            max_tokens=100,
+            messages=[{"role": "user", "content": "Say hi async"}],
+        )
+        assert response.content[0].text
+        # Verify span was created
+        spans = memory_logger.pop()
+        assert len(spans) == 1
+        span = spans[0]
+        assert span["metadata"]["provider"] == "anthropic"
+        assert "claude" in span["metadata"]["model"]
+        assert span["input"]
+class TestAutoInstrumentAnthropic:
+    """Tests for auto_instrument() with Anthropic."""
+    def test_auto_instrument_anthropic(self):
+        """Test auto_instrument patches Anthropic, creates spans, and uninstrument works."""
+        verify_autoinstrument_script("test_auto_anthropic.py")

braintrust/wrappers/test_dspy.py CHANGED Viewed

@@ -7,6 +7,7 @@ import pytest
 from braintrust import logger
 from braintrust.test_helpers import init_test_logger
 from braintrust.wrappers.dspy import BraintrustDSpyCallback
+from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
 PROJECT_NAME = "test-dspy-app"
 MODEL = "openai/gpt-4o-mini"
@@ -58,3 +59,119 @@ def test_dspy_callback(memory_logger):
     # Verify span parenting (LM span should have parent)
     assert lm_span.get("span_parents")  # LM span should have parent
+class TestPatchDSPy:
+    """Tests for patch_dspy() / unpatch_dspy()."""
+    def test_patch_dspy_sets_wrapped_flag(self):
+        """patch_dspy() should set __braintrust_wrapped__ on dspy module."""
+        result = run_in_subprocess("""
+            dspy = __import__("dspy")
+            from braintrust.wrappers.dspy import patch_dspy
+            assert not hasattr(dspy, "__braintrust_wrapped__")
+            patch_dspy()
+            assert hasattr(dspy, "__braintrust_wrapped__")
+            print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+    def test_patch_dspy_wraps_configure(self):
+        """After patch_dspy(), dspy.configure() should auto-add BraintrustDSpyCallback."""
+        result = run_in_subprocess("""
+            from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
+            patch_dspy()
+            import dspy
+            # Configure without explicitly adding callback
+            dspy.configure(lm=None)
+            # Check that BraintrustDSpyCallback was auto-added
+            from dspy.dsp.utils.settings import settings
+            callbacks = settings.callbacks
+            has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
+            assert has_bt_callback, f"Expected BraintrustDSpyCallback in {callbacks}"
+            print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+    def test_patch_dspy_preserves_existing_callbacks(self):
+        """patch_dspy() should preserve user-provided callbacks."""
+        result = run_in_subprocess("""
+            from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
+            patch_dspy()
+            import dspy
+            from dspy.utils.callback import BaseCallback
+            class MyCallback(BaseCallback):
+                pass
+            my_callback = MyCallback()
+            dspy.configure(lm=None, callbacks=[my_callback])
+            from dspy.dsp.utils.settings import settings
+            callbacks = settings.callbacks
+            # Should have both callbacks
+            has_my_callback = any(cb is my_callback for cb in callbacks)
+            has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
+            assert has_my_callback, "User callback should be preserved"
+            assert has_bt_callback, "BraintrustDSpyCallback should be added"
+            print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+    def test_patch_dspy_does_not_duplicate_callback(self):
+        """patch_dspy() should not add duplicate BraintrustDSpyCallback."""
+        result = run_in_subprocess("""
+            from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
+            patch_dspy()
+            import dspy
+            # User explicitly adds BraintrustDSpyCallback
+            bt_callback = BraintrustDSpyCallback()
+            dspy.configure(lm=None, callbacks=[bt_callback])
+            from dspy.dsp.utils.settings import settings
+            callbacks = settings.callbacks
+            # Should only have one BraintrustDSpyCallback
+            bt_callbacks = [cb for cb in callbacks if isinstance(cb, BraintrustDSpyCallback)]
+            assert len(bt_callbacks) == 1, f"Expected 1 BraintrustDSpyCallback, got {len(bt_callbacks)}"
+            print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+    def test_patch_dspy_idempotent(self):
+        """Multiple patch_dspy() calls should be safe."""
+        result = run_in_subprocess("""
+            from braintrust.wrappers.dspy import patch_dspy
+            import dspy
+            patch_dspy()
+            patch_dspy()  # Second call - should be no-op, not double-wrap
+            # Verify configure still works
+            lm = dspy.LM("openai/gpt-4o-mini")
+            dspy.configure(lm=lm)
+            print("SUCCESS")
+        """)
+        assert result.returncode == 0, f"Failed: {result.stderr}"
+        assert "SUCCESS" in result.stdout
+class TestAutoInstrumentDSPy:
+    """Tests for auto_instrument() with DSPy."""
+    def test_auto_instrument_dspy(self):
+        """Test auto_instrument patches DSPy, creates spans, and uninstrument works."""
+        verify_autoinstrument_script("test_auto_dspy.py")

braintrust/wrappers/test_google_genai.py CHANGED Viewed

@@ -6,6 +6,7 @@ import pytest
 from braintrust import logger
 from braintrust.test_helpers import init_test_logger
 from braintrust.wrappers.google_genai import setup_genai
+from braintrust.wrappers.test_utils import verify_autoinstrument_script
 from google.genai import types
 from google.genai.client import Client
@@ -637,3 +638,11 @@ def test_attachment_with_pydantic_model(memory_logger):
     # Attachment should be preserved
     assert copied["context_file"] is attachment
+class TestAutoInstrumentGoogleGenAI:
+    """Tests for auto_instrument() with Google GenAI."""
+    def test_auto_instrument_google_genai(self):
+        """Test auto_instrument patches Google GenAI and creates spans."""
+        verify_autoinstrument_script("test_auto_google_genai.py")

braintrust/wrappers/test_litellm.py CHANGED Viewed

@@ -6,7 +6,7 @@ import pytest
 from braintrust import logger
 from braintrust.test_helpers import assert_dict_matches, init_test_logger
 from braintrust.wrappers.litellm import wrap_litellm
-from braintrust.wrappers.test_utils import assert_metrics_are_valid
+from braintrust.wrappers.test_utils import assert_metrics_are_valid, verify_autoinstrument_script
 TEST_ORG_ID = "test-org-litellm-py-tracing"
 PROJECT_NAME = "test-project-litellm-py-tracing"
@@ -697,71 +697,73 @@ async def test_litellm_async_streaming_with_break(memory_logger):
 @pytest.mark.vcr
 def test_patch_litellm_responses(memory_logger):
     """Test that patch_litellm() patches responses."""
-    from braintrust.wrappers.litellm import patch_litellm, unpatch_litellm
+    from braintrust.wrappers.litellm import patch_litellm
     assert not memory_logger.pop()
     patch_litellm()
-    try:
-        start = time.time()
-        # Call litellm.responses directly (not wrapped_litellm.responses)
-        response = litellm.responses(
-            model=TEST_MODEL,
-            input=TEST_PROMPT,
-            instructions="Just the number please",
-        )
-        end = time.time()
-        assert response
-        assert response.output
-        assert len(response.output) > 0
-        content = response.output[0].content[0].text
-        assert "24" in content or "twenty-four" in content.lower()
-        # Verify span was created
-        spans = memory_logger.pop()
-        assert len(spans) == 1
-        span = spans[0]
-        assert_metrics_are_valid(span["metrics"], start, end)
-        assert span["metadata"]["model"] == TEST_MODEL
-        assert span["metadata"]["provider"] == "litellm"
-        assert TEST_PROMPT in str(span["input"])
-    finally:
-        unpatch_litellm()
+    start = time.time()
+    # Call litellm.responses directly (not wrapped_litellm.responses)
+    response = litellm.responses(
+        model=TEST_MODEL,
+        input=TEST_PROMPT,
+        instructions="Just the number please",
+    )
+    end = time.time()
+    assert response
+    assert response.output
+    assert len(response.output) > 0
+    content = response.output[0].content[0].text
+    assert "24" in content or "twenty-four" in content.lower()
+    # Verify span was created
+    spans = memory_logger.pop()
+    assert len(spans) == 1
+    span = spans[0]
+    assert_metrics_are_valid(span["metrics"], start, end)
+    assert span["metadata"]["model"] == TEST_MODEL
+    assert span["metadata"]["provider"] == "litellm"
+    assert TEST_PROMPT in str(span["input"])
 @pytest.mark.vcr
 @pytest.mark.asyncio
 async def test_patch_litellm_aresponses(memory_logger):
     """Test that patch_litellm() patches aresponses."""
-    from braintrust.wrappers.litellm import patch_litellm, unpatch_litellm
+    from braintrust.wrappers.litellm import patch_litellm
     assert not memory_logger.pop()
     patch_litellm()
-    try:
-        start = time.time()
-        # Call litellm.aresponses directly (not wrapped_litellm.aresponses)
-        response = await litellm.aresponses(
-            model=TEST_MODEL,
-            input=TEST_PROMPT,
-            instructions="Just the number please",
-        )
-        end = time.time()
-        assert response
-        assert response.output
-        assert len(response.output) > 0
-        content = response.output[0].content[0].text
-        assert "24" in content or "twenty-four" in content.lower()
-        # Verify span was created
-        spans = memory_logger.pop()
-        assert len(spans) == 1
-        span = spans[0]
-        assert_metrics_are_valid(span["metrics"], start, end)
-        assert span["metadata"]["model"] == TEST_MODEL
-        assert span["metadata"]["provider"] == "litellm"
-        assert TEST_PROMPT in str(span["input"])
-    finally:
-        unpatch_litellm()
+    start = time.time()
+    # Call litellm.aresponses directly (not wrapped_litellm.aresponses)
+    response = await litellm.aresponses(
+        model=TEST_MODEL,
+        input=TEST_PROMPT,
+        instructions="Just the number please",
+    )
+    end = time.time()
+    assert response
+    assert response.output
+    assert len(response.output) > 0
+    content = response.output[0].content[0].text
+    assert "24" in content or "twenty-four" in content.lower()
+    # Verify span was created
+    spans = memory_logger.pop()
+    assert len(spans) == 1
+    span = spans[0]
+    assert_metrics_are_valid(span["metrics"], start, end)
+    assert span["metadata"]["model"] == TEST_MODEL
+    assert span["metadata"]["provider"] == "litellm"
+    assert TEST_PROMPT in str(span["input"])
+class TestAutoInstrumentLiteLLM:
+    """Tests for auto_instrument() with LiteLLM."""
+    def test_auto_instrument_litellm(self):
+        """Test auto_instrument patches LiteLLM, creates spans, and uninstrument works."""
+        verify_autoinstrument_script("test_auto_litellm.py")

braintrust 0.5.0__py3-none-any.whl → 0.5.3__py3-none-any.whl

braintrust 0.5.0py3-none-any.whl → 0.5.3py3-none-any.whl