braintrust 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. braintrust/__init__.py +3 -0
  2. braintrust/auto.py +179 -0
  3. braintrust/conftest.py +23 -4
  4. braintrust/framework.py +18 -5
  5. braintrust/logger.py +49 -13
  6. braintrust/oai.py +51 -0
  7. braintrust/test_bt_json.py +0 -5
  8. braintrust/test_framework.py +37 -0
  9. braintrust/test_http.py +444 -0
  10. braintrust/test_logger.py +179 -5
  11. braintrust/test_util.py +58 -1
  12. braintrust/util.py +20 -0
  13. braintrust/version.py +2 -2
  14. braintrust/wrappers/agno/__init__.py +2 -3
  15. braintrust/wrappers/anthropic.py +64 -0
  16. braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
  17. braintrust/wrappers/claude_agent_sdk/test_wrapper.py +9 -0
  18. braintrust/wrappers/dspy.py +52 -1
  19. braintrust/wrappers/google_genai/__init__.py +9 -6
  20. braintrust/wrappers/litellm.py +6 -43
  21. braintrust/wrappers/pydantic_ai.py +2 -3
  22. braintrust/wrappers/test_agno.py +9 -0
  23. braintrust/wrappers/test_anthropic.py +156 -0
  24. braintrust/wrappers/test_dspy.py +117 -0
  25. braintrust/wrappers/test_google_genai.py +9 -0
  26. braintrust/wrappers/test_litellm.py +57 -55
  27. braintrust/wrappers/test_openai.py +253 -1
  28. braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
  29. braintrust/wrappers/test_utils.py +79 -0
  30. {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
  31. {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/RECORD +34 -32
  32. {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
  33. {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
  34. {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ import pytest
7
7
  from braintrust import logger
8
8
  from braintrust.test_helpers import init_test_logger
9
9
  from braintrust.wrappers.dspy import BraintrustDSpyCallback
10
+ from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
10
11
 
11
12
  PROJECT_NAME = "test-dspy-app"
12
13
  MODEL = "openai/gpt-4o-mini"
@@ -58,3 +59,119 @@ def test_dspy_callback(memory_logger):
58
59
 
59
60
  # Verify span parenting (LM span should have parent)
60
61
  assert lm_span.get("span_parents") # LM span should have parent
62
+
63
+
64
+ class TestPatchDSPy:
65
+ """Tests for patch_dspy() / unpatch_dspy()."""
66
+
67
+ def test_patch_dspy_sets_wrapped_flag(self):
68
+ """patch_dspy() should set __braintrust_wrapped__ on dspy module."""
69
+ result = run_in_subprocess("""
70
+ dspy = __import__("dspy")
71
+ from braintrust.wrappers.dspy import patch_dspy
72
+
73
+ assert not hasattr(dspy, "__braintrust_wrapped__")
74
+ patch_dspy()
75
+ assert hasattr(dspy, "__braintrust_wrapped__")
76
+ print("SUCCESS")
77
+ """)
78
+ assert result.returncode == 0, f"Failed: {result.stderr}"
79
+ assert "SUCCESS" in result.stdout
80
+
81
+ def test_patch_dspy_wraps_configure(self):
82
+ """After patch_dspy(), dspy.configure() should auto-add BraintrustDSpyCallback."""
83
+ result = run_in_subprocess("""
84
+ from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
85
+ patch_dspy()
86
+
87
+ import dspy
88
+
89
+ # Configure without explicitly adding callback
90
+ dspy.configure(lm=None)
91
+
92
+ # Check that BraintrustDSpyCallback was auto-added
93
+ from dspy.dsp.utils.settings import settings
94
+ callbacks = settings.callbacks
95
+ has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
96
+ assert has_bt_callback, f"Expected BraintrustDSpyCallback in {callbacks}"
97
+ print("SUCCESS")
98
+ """)
99
+ assert result.returncode == 0, f"Failed: {result.stderr}"
100
+ assert "SUCCESS" in result.stdout
101
+
102
+ def test_patch_dspy_preserves_existing_callbacks(self):
103
+ """patch_dspy() should preserve user-provided callbacks."""
104
+ result = run_in_subprocess("""
105
+ from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
106
+ patch_dspy()
107
+
108
+ import dspy
109
+ from dspy.utils.callback import BaseCallback
110
+
111
+ class MyCallback(BaseCallback):
112
+ pass
113
+
114
+ my_callback = MyCallback()
115
+ dspy.configure(lm=None, callbacks=[my_callback])
116
+
117
+ from dspy.dsp.utils.settings import settings
118
+ callbacks = settings.callbacks
119
+
120
+ # Should have both callbacks
121
+ has_my_callback = any(cb is my_callback for cb in callbacks)
122
+ has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
123
+
124
+ assert has_my_callback, "User callback should be preserved"
125
+ assert has_bt_callback, "BraintrustDSpyCallback should be added"
126
+ print("SUCCESS")
127
+ """)
128
+ assert result.returncode == 0, f"Failed: {result.stderr}"
129
+ assert "SUCCESS" in result.stdout
130
+
131
+ def test_patch_dspy_does_not_duplicate_callback(self):
132
+ """patch_dspy() should not add duplicate BraintrustDSpyCallback."""
133
+ result = run_in_subprocess("""
134
+ from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
135
+ patch_dspy()
136
+
137
+ import dspy
138
+
139
+ # User explicitly adds BraintrustDSpyCallback
140
+ bt_callback = BraintrustDSpyCallback()
141
+ dspy.configure(lm=None, callbacks=[bt_callback])
142
+
143
+ from dspy.dsp.utils.settings import settings
144
+ callbacks = settings.callbacks
145
+
146
+ # Should only have one BraintrustDSpyCallback
147
+ bt_callbacks = [cb for cb in callbacks if isinstance(cb, BraintrustDSpyCallback)]
148
+ assert len(bt_callbacks) == 1, f"Expected 1 BraintrustDSpyCallback, got {len(bt_callbacks)}"
149
+ print("SUCCESS")
150
+ """)
151
+ assert result.returncode == 0, f"Failed: {result.stderr}"
152
+ assert "SUCCESS" in result.stdout
153
+
154
+ def test_patch_dspy_idempotent(self):
155
+ """Multiple patch_dspy() calls should be safe."""
156
+ result = run_in_subprocess("""
157
+ from braintrust.wrappers.dspy import patch_dspy
158
+ import dspy
159
+
160
+ patch_dspy()
161
+ patch_dspy() # Second call - should be no-op, not double-wrap
162
+
163
+ # Verify configure still works
164
+ lm = dspy.LM("openai/gpt-4o-mini")
165
+ dspy.configure(lm=lm)
166
+ print("SUCCESS")
167
+ """)
168
+ assert result.returncode == 0, f"Failed: {result.stderr}"
169
+ assert "SUCCESS" in result.stdout
170
+
171
+
172
+ class TestAutoInstrumentDSPy:
173
+ """Tests for auto_instrument() with DSPy."""
174
+
175
+ def test_auto_instrument_dspy(self):
176
+ """Test auto_instrument patches DSPy, creates spans, and uninstrument works."""
177
+ verify_autoinstrument_script("test_auto_dspy.py")
@@ -6,6 +6,7 @@ import pytest
6
6
  from braintrust import logger
7
7
  from braintrust.test_helpers import init_test_logger
8
8
  from braintrust.wrappers.google_genai import setup_genai
9
+ from braintrust.wrappers.test_utils import verify_autoinstrument_script
9
10
  from google.genai import types
10
11
  from google.genai.client import Client
11
12
 
@@ -637,3 +638,11 @@ def test_attachment_with_pydantic_model(memory_logger):
637
638
 
638
639
  # Attachment should be preserved
639
640
  assert copied["context_file"] is attachment
641
+
642
+
643
+ class TestAutoInstrumentGoogleGenAI:
644
+ """Tests for auto_instrument() with Google GenAI."""
645
+
646
+ def test_auto_instrument_google_genai(self):
647
+ """Test auto_instrument patches Google GenAI and creates spans."""
648
+ verify_autoinstrument_script("test_auto_google_genai.py")
@@ -6,7 +6,7 @@ import pytest
6
6
  from braintrust import logger
7
7
  from braintrust.test_helpers import assert_dict_matches, init_test_logger
8
8
  from braintrust.wrappers.litellm import wrap_litellm
9
- from braintrust.wrappers.test_utils import assert_metrics_are_valid
9
+ from braintrust.wrappers.test_utils import assert_metrics_are_valid, verify_autoinstrument_script
10
10
 
11
11
  TEST_ORG_ID = "test-org-litellm-py-tracing"
12
12
  PROJECT_NAME = "test-project-litellm-py-tracing"
@@ -697,71 +697,73 @@ async def test_litellm_async_streaming_with_break(memory_logger):
697
697
  @pytest.mark.vcr
698
698
  def test_patch_litellm_responses(memory_logger):
699
699
  """Test that patch_litellm() patches responses."""
700
- from braintrust.wrappers.litellm import patch_litellm, unpatch_litellm
700
+ from braintrust.wrappers.litellm import patch_litellm
701
701
 
702
702
  assert not memory_logger.pop()
703
703
 
704
704
  patch_litellm()
705
- try:
706
- start = time.time()
707
- # Call litellm.responses directly (not wrapped_litellm.responses)
708
- response = litellm.responses(
709
- model=TEST_MODEL,
710
- input=TEST_PROMPT,
711
- instructions="Just the number please",
712
- )
713
- end = time.time()
714
-
715
- assert response
716
- assert response.output
717
- assert len(response.output) > 0
718
- content = response.output[0].content[0].text
719
- assert "24" in content or "twenty-four" in content.lower()
720
-
721
- # Verify span was created
722
- spans = memory_logger.pop()
723
- assert len(spans) == 1
724
- span = spans[0]
725
- assert_metrics_are_valid(span["metrics"], start, end)
726
- assert span["metadata"]["model"] == TEST_MODEL
727
- assert span["metadata"]["provider"] == "litellm"
728
- assert TEST_PROMPT in str(span["input"])
729
- finally:
730
- unpatch_litellm()
705
+ start = time.time()
706
+ # Call litellm.responses directly (not wrapped_litellm.responses)
707
+ response = litellm.responses(
708
+ model=TEST_MODEL,
709
+ input=TEST_PROMPT,
710
+ instructions="Just the number please",
711
+ )
712
+ end = time.time()
713
+
714
+ assert response
715
+ assert response.output
716
+ assert len(response.output) > 0
717
+ content = response.output[0].content[0].text
718
+ assert "24" in content or "twenty-four" in content.lower()
719
+
720
+ # Verify span was created
721
+ spans = memory_logger.pop()
722
+ assert len(spans) == 1
723
+ span = spans[0]
724
+ assert_metrics_are_valid(span["metrics"], start, end)
725
+ assert span["metadata"]["model"] == TEST_MODEL
726
+ assert span["metadata"]["provider"] == "litellm"
727
+ assert TEST_PROMPT in str(span["input"])
731
728
 
732
729
 
733
730
  @pytest.mark.vcr
734
731
  @pytest.mark.asyncio
735
732
  async def test_patch_litellm_aresponses(memory_logger):
736
733
  """Test that patch_litellm() patches aresponses."""
737
- from braintrust.wrappers.litellm import patch_litellm, unpatch_litellm
734
+ from braintrust.wrappers.litellm import patch_litellm
738
735
 
739
736
  assert not memory_logger.pop()
740
737
 
741
738
  patch_litellm()
742
- try:
743
- start = time.time()
744
- # Call litellm.aresponses directly (not wrapped_litellm.aresponses)
745
- response = await litellm.aresponses(
746
- model=TEST_MODEL,
747
- input=TEST_PROMPT,
748
- instructions="Just the number please",
749
- )
750
- end = time.time()
751
-
752
- assert response
753
- assert response.output
754
- assert len(response.output) > 0
755
- content = response.output[0].content[0].text
756
- assert "24" in content or "twenty-four" in content.lower()
757
-
758
- # Verify span was created
759
- spans = memory_logger.pop()
760
- assert len(spans) == 1
761
- span = spans[0]
762
- assert_metrics_are_valid(span["metrics"], start, end)
763
- assert span["metadata"]["model"] == TEST_MODEL
764
- assert span["metadata"]["provider"] == "litellm"
765
- assert TEST_PROMPT in str(span["input"])
766
- finally:
767
- unpatch_litellm()
739
+ start = time.time()
740
+ # Call litellm.aresponses directly (not wrapped_litellm.aresponses)
741
+ response = await litellm.aresponses(
742
+ model=TEST_MODEL,
743
+ input=TEST_PROMPT,
744
+ instructions="Just the number please",
745
+ )
746
+ end = time.time()
747
+
748
+ assert response
749
+ assert response.output
750
+ assert len(response.output) > 0
751
+ content = response.output[0].content[0].text
752
+ assert "24" in content or "twenty-four" in content.lower()
753
+
754
+ # Verify span was created
755
+ spans = memory_logger.pop()
756
+ assert len(spans) == 1
757
+ span = spans[0]
758
+ assert_metrics_are_valid(span["metrics"], start, end)
759
+ assert span["metadata"]["model"] == TEST_MODEL
760
+ assert span["metadata"]["provider"] == "litellm"
761
+ assert TEST_PROMPT in str(span["input"])
762
+
763
+
764
+ class TestAutoInstrumentLiteLLM:
765
+ """Tests for auto_instrument() with LiteLLM."""
766
+
767
+ def test_auto_instrument_litellm(self):
768
+ """Test auto_instrument patches LiteLLM, creates spans, and uninstrument works."""
769
+ verify_autoinstrument_script("test_auto_litellm.py")
@@ -6,7 +6,7 @@ import openai
6
6
  import pytest
7
7
  from braintrust import logger, wrap_openai
8
8
  from braintrust.test_helpers import assert_dict_matches, init_test_logger
9
- from braintrust.wrappers.test_utils import assert_metrics_are_valid
9
+ from braintrust.wrappers.test_utils import assert_metrics_are_valid, run_in_subprocess, verify_autoinstrument_script
10
10
  from openai import AsyncOpenAI
11
11
  from openai._types import NOT_GIVEN
12
12
  from pydantic import BaseModel
@@ -1681,3 +1681,255 @@ def test_braintrust_tracing_processor_trace_metadata_logging(memory_logger):
1681
1681
  spans = memory_logger.pop()
1682
1682
  root_span = spans[0]
1683
1683
  assert root_span["metadata"]["conversation_id"] == "test-12345", "Should log trace metadata"
1684
+
1685
+
1686
+ class TestPatchOpenAI:
1687
+ """Tests for patch_openai()."""
1688
+
1689
+ def test_patch_openai_sets_wrapped_flag(self):
1690
+ """patch_openai() should set __braintrust_wrapped__ on openai module."""
1691
+ result = run_in_subprocess("""
1692
+ from braintrust.oai import patch_openai
1693
+ import openai
1694
+
1695
+ assert not hasattr(openai, "__braintrust_wrapped__")
1696
+ patch_openai()
1697
+ assert hasattr(openai, "__braintrust_wrapped__")
1698
+ print("SUCCESS")
1699
+ """)
1700
+ assert result.returncode == 0, f"Failed: {result.stderr}"
1701
+ assert "SUCCESS" in result.stdout
1702
+
1703
+ def test_patch_openai_wraps_new_clients(self):
1704
+ """After patch_openai(), new OpenAI() clients should be wrapped."""
1705
+ result = run_in_subprocess("""
1706
+ from braintrust.oai import patch_openai
1707
+ patch_openai()
1708
+
1709
+ import openai
1710
+ client = openai.OpenAI(api_key="test-key")
1711
+
1712
+ # Check that chat completions is wrapped (our wrapper adds tracing)
1713
+ # The wrapper replaces client.chat with a wrapped version
1714
+ chat_type = type(client.chat).__name__
1715
+ print(f"chat_type={chat_type}")
1716
+ print("SUCCESS")
1717
+ """)
1718
+ assert result.returncode == 0, f"Failed: {result.stderr}"
1719
+ assert "SUCCESS" in result.stdout
1720
+
1721
+ def test_patch_openai_creates_spans(self):
1722
+ """patch_openai() should create spans when making API calls."""
1723
+ result = run_in_subprocess("""
1724
+ from braintrust.oai import patch_openai
1725
+ from braintrust.test_helpers import init_test_logger
1726
+ from braintrust import logger
1727
+
1728
+ # Set up memory logger
1729
+ init_test_logger("test-auto")
1730
+ with logger._internal_with_memory_background_logger() as memory_logger:
1731
+ patch_openai()
1732
+
1733
+ import openai
1734
+ client = openai.OpenAI()
1735
+
1736
+ # Make a call within a span context
1737
+ import braintrust
1738
+ with braintrust.start_span(name="test") as span:
1739
+ try:
1740
+ # This will fail without API key, but span should still be created
1741
+ client.chat.completions.create(
1742
+ model="gpt-4o-mini",
1743
+ messages=[{"role": "user", "content": "hi"}],
1744
+ )
1745
+ except Exception:
1746
+ pass # Expected without API key
1747
+
1748
+ # Check that spans were logged
1749
+ spans = memory_logger.pop()
1750
+ # Should have at least the parent span
1751
+ assert len(spans) >= 1, f"Expected spans, got {spans}"
1752
+ print("SUCCESS")
1753
+ """)
1754
+ assert result.returncode == 0, f"Failed: {result.stderr}"
1755
+ assert "SUCCESS" in result.stdout
1756
+
1757
+ def test_patch_openai_before_import(self):
1758
+ """patch_openai() should work when called before importing openai."""
1759
+ result = run_in_subprocess("""
1760
+ from braintrust.oai import patch_openai
1761
+
1762
+ # Patch BEFORE importing openai
1763
+ patch_openai()
1764
+
1765
+ import openai
1766
+ assert hasattr(openai, "__braintrust_wrapped__")
1767
+
1768
+ client = openai.OpenAI(api_key="test-key")
1769
+ print("SUCCESS")
1770
+ """)
1771
+ assert result.returncode == 0, f"Failed: {result.stderr}"
1772
+ assert "SUCCESS" in result.stdout
1773
+
1774
+ def test_patch_openai_after_import(self):
1775
+ """patch_openai() should work when called after importing openai."""
1776
+ result = run_in_subprocess("""
1777
+ import openai
1778
+ from braintrust.oai import patch_openai
1779
+
1780
+ # Patch AFTER importing openai
1781
+ patch_openai()
1782
+
1783
+ assert hasattr(openai, "__braintrust_wrapped__")
1784
+
1785
+ client = openai.OpenAI(api_key="test-key")
1786
+ print("SUCCESS")
1787
+ """)
1788
+ assert result.returncode == 0, f"Failed: {result.stderr}"
1789
+ assert "SUCCESS" in result.stdout
1790
+
1791
+ def test_patch_openai_idempotent(self):
1792
+ """Multiple patch_openai() calls should be safe."""
1793
+ result = run_in_subprocess("""
1794
+ from braintrust.oai import patch_openai
1795
+ import openai
1796
+
1797
+ patch_openai()
1798
+ patch_openai() # Second call - should be no-op, not double-wrap
1799
+
1800
+ # Verify we can still create clients
1801
+ client = openai.OpenAI(api_key="test-key")
1802
+ assert hasattr(client, "chat")
1803
+ print("SUCCESS")
1804
+ """)
1805
+ assert result.returncode == 0, f"Failed: {result.stderr}"
1806
+ assert "SUCCESS" in result.stdout
1807
+
1808
+ def test_patch_openai_chains_with_other_patches(self):
1809
+ """patch_openai() should chain with other libraries that patch OpenAI."""
1810
+ result = run_in_subprocess("""
1811
+ import openai
1812
+
1813
+ # Simulate another library (like Datadog) patching OpenAI first
1814
+ other_library_init_called = []
1815
+
1816
+ class OtherLibraryOpenAI(openai.OpenAI):
1817
+ def __init__(self, *args, **kwargs):
1818
+ other_library_init_called.append(True)
1819
+ super().__init__(*args, **kwargs)
1820
+
1821
+ openai.OpenAI = OtherLibraryOpenAI
1822
+
1823
+ # Now apply our patch - should subclass OtherLibraryOpenAI
1824
+ from braintrust.oai import patch_openai
1825
+ patch_openai()
1826
+
1827
+ # Create a client - both patches should run
1828
+ client = openai.OpenAI(api_key="test-key")
1829
+
1830
+ # Verify other library's __init__ was called (chaining works)
1831
+ assert len(other_library_init_called) == 1, "Other library's patch should have run"
1832
+
1833
+ # Verify our patch was applied (client has wrapped chat)
1834
+ assert hasattr(client, "chat"), "Client should have chat attribute"
1835
+
1836
+ print("SUCCESS")
1837
+ """)
1838
+ assert result.returncode == 0, f"Failed: {result.stderr}"
1839
+ assert "SUCCESS" in result.stdout
1840
+
1841
+ def test_patch_openai_chains_async_client(self):
1842
+ """patch_openai() should chain with other libraries for AsyncOpenAI too."""
1843
+ result = run_in_subprocess("""
1844
+ import openai
1845
+
1846
+ # Simulate another library patching AsyncOpenAI first
1847
+ other_library_init_called = []
1848
+
1849
+ class OtherLibraryAsyncOpenAI(openai.AsyncOpenAI):
1850
+ def __init__(self, *args, **kwargs):
1851
+ other_library_init_called.append(True)
1852
+ super().__init__(*args, **kwargs)
1853
+
1854
+ openai.AsyncOpenAI = OtherLibraryAsyncOpenAI
1855
+
1856
+ # Now apply our patch
1857
+ from braintrust.oai import patch_openai
1858
+ patch_openai()
1859
+
1860
+ # Create an async client - both patches should run
1861
+ client = openai.AsyncOpenAI(api_key="test-key")
1862
+
1863
+ # Verify other library's __init__ was called
1864
+ assert len(other_library_init_called) == 1, "Other library's patch should have run"
1865
+
1866
+ # Verify our patch was applied
1867
+ assert hasattr(client, "chat"), "Client should have chat attribute"
1868
+
1869
+ print("SUCCESS")
1870
+ """)
1871
+ assert result.returncode == 0, f"Failed: {result.stderr}"
1872
+ assert "SUCCESS" in result.stdout
1873
+
1874
+
1875
+ class TestPatchOpenAISpans:
1876
+ """VCR-based tests verifying that patch_openai() produces spans."""
1877
+
1878
+ @pytest.mark.vcr
1879
+ def test_patch_openai_creates_spans(self, memory_logger):
1880
+ """patch_openai() should create spans when making API calls."""
1881
+ from braintrust.oai import patch_openai
1882
+
1883
+ assert not memory_logger.pop()
1884
+
1885
+ patch_openai()
1886
+ client = openai.OpenAI()
1887
+ response = client.chat.completions.create(
1888
+ model="gpt-4o-mini",
1889
+ messages=[{"role": "user", "content": "Say hi"}],
1890
+ )
1891
+ assert response.choices[0].message.content
1892
+
1893
+ # Verify span was created
1894
+ spans = memory_logger.pop()
1895
+ assert len(spans) == 1
1896
+ span = spans[0]
1897
+ assert span["metadata"]["provider"] == "openai"
1898
+ assert "gpt-4o-mini" in span["metadata"]["model"]
1899
+ assert span["input"]
1900
+
1901
+
1902
+ class TestPatchOpenAIAsyncSpans:
1903
+ """VCR-based tests verifying that patch_openai() produces spans for async clients."""
1904
+
1905
+ @pytest.mark.vcr
1906
+ @pytest.mark.asyncio
1907
+ async def test_patch_openai_async_creates_spans(self, memory_logger):
1908
+ """patch_openai() should create spans for async API calls."""
1909
+ from braintrust.oai import patch_openai
1910
+
1911
+ assert not memory_logger.pop()
1912
+
1913
+ patch_openai()
1914
+ client = openai.AsyncOpenAI()
1915
+ response = await client.chat.completions.create(
1916
+ model="gpt-4o-mini",
1917
+ messages=[{"role": "user", "content": "Say hi async"}],
1918
+ )
1919
+ assert response.choices[0].message.content
1920
+
1921
+ # Verify span was created
1922
+ spans = memory_logger.pop()
1923
+ assert len(spans) == 1
1924
+ span = spans[0]
1925
+ assert span["metadata"]["provider"] == "openai"
1926
+ assert "gpt-4o-mini" in span["metadata"]["model"]
1927
+ assert span["input"]
1928
+
1929
+
1930
+ class TestAutoInstrumentOpenAI:
1931
+ """Tests for auto_instrument() with OpenAI."""
1932
+
1933
+ def test_auto_instrument_openai(self):
1934
+ """Test auto_instrument patches OpenAI, creates spans, and uninstrument works."""
1935
+ verify_autoinstrument_script("test_auto_openai.py")
@@ -9,6 +9,7 @@ import pytest
9
9
  from braintrust import logger, setup_pydantic_ai, traced
10
10
  from braintrust.span_types import SpanTypeAttribute
11
11
  from braintrust.test_helpers import init_test_logger
12
+ from braintrust.wrappers.test_utils import verify_autoinstrument_script
12
13
  from pydantic import BaseModel
13
14
  from pydantic_ai import Agent, ModelSettings
14
15
  from pydantic_ai.messages import ModelRequest, UserPromptPart
@@ -2572,3 +2573,11 @@ async def test_attachment_in_result_data(memory_logger):
2572
2573
  copied = bt_safe_deep_copy(result_data)
2573
2574
  assert copied["output_file"] is ext_attachment
2574
2575
  assert copied["success"] is True
2576
+
2577
+
2578
+ class TestAutoInstrumentPydanticAI:
2579
+ """Tests for auto_instrument() with Pydantic AI."""
2580
+
2581
+ def test_auto_instrument_pydantic_ai(self):
2582
+ """Test auto_instrument patches Pydantic AI and creates spans."""
2583
+ verify_autoinstrument_script("test_auto_pydantic_ai.py")
@@ -1,3 +1,59 @@
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ import textwrap
5
+ from contextlib import contextmanager
6
+ from pathlib import Path
7
+
8
+ import vcr
9
+ from braintrust import logger
10
+ from braintrust.conftest import get_vcr_config
11
+ from braintrust.test_helpers import init_test_logger
12
+
13
+ # Source directory paths (resolved to handle installed vs source locations)
14
+ _SOURCE_DIR = Path(__file__).resolve().parent
15
+ AUTO_TEST_SCRIPTS_DIR = _SOURCE_DIR / "auto_test_scripts"
16
+
17
+ # Cassettes dir can be overridden via env var for subprocess tests
18
+ CASSETTES_DIR = Path(os.environ.get("BRAINTRUST_CASSETTES_DIR", _SOURCE_DIR / "cassettes"))
19
+
20
+
21
+ def run_in_subprocess(
22
+ code: str, timeout: int = 30, env: dict[str, str] | None = None
23
+ ) -> subprocess.CompletedProcess:
24
+ """Run Python code in a fresh subprocess."""
25
+ run_env = os.environ.copy()
26
+ if env:
27
+ run_env.update(env)
28
+ return subprocess.run(
29
+ [sys.executable, "-c", textwrap.dedent(code)],
30
+ capture_output=True,
31
+ text=True,
32
+ timeout=timeout,
33
+ env=run_env,
34
+ )
35
+
36
+
37
+ def verify_autoinstrument_script(script_name: str, timeout: int = 30) -> subprocess.CompletedProcess:
38
+ """Run a test script from the auto_test_scripts directory.
39
+
40
+ Raises AssertionError if the script exits with non-zero code.
41
+ """
42
+ script_path = AUTO_TEST_SCRIPTS_DIR / script_name
43
+ # Pass cassettes dir to subprocess since it may use installed package
44
+ env = os.environ.copy()
45
+ env["BRAINTRUST_CASSETTES_DIR"] = str(_SOURCE_DIR / "cassettes")
46
+ result = subprocess.run(
47
+ [sys.executable, str(script_path)],
48
+ capture_output=True,
49
+ text=True,
50
+ timeout=timeout,
51
+ env=env,
52
+ )
53
+ assert result.returncode == 0, f"Script {script_name} failed:\n{result.stderr}"
54
+ return result
55
+
56
+
1
57
  def assert_metrics_are_valid(metrics, start=None, end=None):
2
58
  assert metrics
3
59
  # assert 0 < metrics["time_to_first_token"]
@@ -10,3 +66,26 @@ def assert_metrics_are_valid(metrics, start=None, end=None):
10
66
  assert start <= metrics["start"] <= metrics["end"] <= end
11
67
  else:
12
68
  assert metrics["start"] <= metrics["end"]
69
+
70
+
71
+ @contextmanager
72
+ def autoinstrument_test_context(cassette_name: str):
73
+ """Context manager for auto_instrument tests.
74
+
75
+ Sets up VCR and memory_logger, yields memory_logger for direct use.
76
+
77
+ Usage:
78
+ with autoinstrument_test_context("test_auto_openai") as memory_logger:
79
+ # make API call
80
+ spans = memory_logger.pop()
81
+ """
82
+ cassette_path = CASSETTES_DIR / f"{cassette_name}.yaml"
83
+
84
+ init_test_logger("test-auto-instrument")
85
+
86
+ with logger._internal_with_memory_background_logger() as memory_logger:
87
+ memory_logger.pop() # Clear any prior spans
88
+
89
+ my_vcr = vcr.VCR(**get_vcr_config())
90
+ with my_vcr.use_cassette(str(cassette_path)):
91
+ yield memory_logger
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: braintrust
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: SDK for integrating Braintrust
5
5
  Home-page: https://www.braintrust.dev
6
6
  Author: Braintrust