braintrust 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/__init__.py +3 -0
- braintrust/auto.py +179 -0
- braintrust/conftest.py +23 -4
- braintrust/framework.py +18 -5
- braintrust/logger.py +49 -13
- braintrust/oai.py +51 -0
- braintrust/test_bt_json.py +0 -5
- braintrust/test_framework.py +37 -0
- braintrust/test_http.py +444 -0
- braintrust/test_logger.py +179 -5
- braintrust/test_util.py +58 -1
- braintrust/util.py +20 -0
- braintrust/version.py +2 -2
- braintrust/wrappers/agno/__init__.py +2 -3
- braintrust/wrappers/anthropic.py +64 -0
- braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
- braintrust/wrappers/claude_agent_sdk/test_wrapper.py +9 -0
- braintrust/wrappers/dspy.py +52 -1
- braintrust/wrappers/google_genai/__init__.py +9 -6
- braintrust/wrappers/litellm.py +6 -43
- braintrust/wrappers/pydantic_ai.py +2 -3
- braintrust/wrappers/test_agno.py +9 -0
- braintrust/wrappers/test_anthropic.py +156 -0
- braintrust/wrappers/test_dspy.py +117 -0
- braintrust/wrappers/test_google_genai.py +9 -0
- braintrust/wrappers/test_litellm.py +57 -55
- braintrust/wrappers/test_openai.py +253 -1
- braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
- braintrust/wrappers/test_utils.py +79 -0
- {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
- {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/RECORD +34 -32
- {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
- {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
- {braintrust-0.5.0.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0
braintrust/wrappers/test_dspy.py
CHANGED
|
@@ -7,6 +7,7 @@ import pytest
|
|
|
7
7
|
from braintrust import logger
|
|
8
8
|
from braintrust.test_helpers import init_test_logger
|
|
9
9
|
from braintrust.wrappers.dspy import BraintrustDSpyCallback
|
|
10
|
+
from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
|
|
10
11
|
|
|
11
12
|
PROJECT_NAME = "test-dspy-app"
|
|
12
13
|
MODEL = "openai/gpt-4o-mini"
|
|
@@ -58,3 +59,119 @@ def test_dspy_callback(memory_logger):
|
|
|
58
59
|
|
|
59
60
|
# Verify span parenting (LM span should have parent)
|
|
60
61
|
assert lm_span.get("span_parents") # LM span should have parent
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class TestPatchDSPy:
|
|
65
|
+
"""Tests for patch_dspy() / unpatch_dspy()."""
|
|
66
|
+
|
|
67
|
+
def test_patch_dspy_sets_wrapped_flag(self):
|
|
68
|
+
"""patch_dspy() should set __braintrust_wrapped__ on dspy module."""
|
|
69
|
+
result = run_in_subprocess("""
|
|
70
|
+
dspy = __import__("dspy")
|
|
71
|
+
from braintrust.wrappers.dspy import patch_dspy
|
|
72
|
+
|
|
73
|
+
assert not hasattr(dspy, "__braintrust_wrapped__")
|
|
74
|
+
patch_dspy()
|
|
75
|
+
assert hasattr(dspy, "__braintrust_wrapped__")
|
|
76
|
+
print("SUCCESS")
|
|
77
|
+
""")
|
|
78
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
79
|
+
assert "SUCCESS" in result.stdout
|
|
80
|
+
|
|
81
|
+
def test_patch_dspy_wraps_configure(self):
|
|
82
|
+
"""After patch_dspy(), dspy.configure() should auto-add BraintrustDSpyCallback."""
|
|
83
|
+
result = run_in_subprocess("""
|
|
84
|
+
from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
|
|
85
|
+
patch_dspy()
|
|
86
|
+
|
|
87
|
+
import dspy
|
|
88
|
+
|
|
89
|
+
# Configure without explicitly adding callback
|
|
90
|
+
dspy.configure(lm=None)
|
|
91
|
+
|
|
92
|
+
# Check that BraintrustDSpyCallback was auto-added
|
|
93
|
+
from dspy.dsp.utils.settings import settings
|
|
94
|
+
callbacks = settings.callbacks
|
|
95
|
+
has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
|
|
96
|
+
assert has_bt_callback, f"Expected BraintrustDSpyCallback in {callbacks}"
|
|
97
|
+
print("SUCCESS")
|
|
98
|
+
""")
|
|
99
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
100
|
+
assert "SUCCESS" in result.stdout
|
|
101
|
+
|
|
102
|
+
def test_patch_dspy_preserves_existing_callbacks(self):
|
|
103
|
+
"""patch_dspy() should preserve user-provided callbacks."""
|
|
104
|
+
result = run_in_subprocess("""
|
|
105
|
+
from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
|
|
106
|
+
patch_dspy()
|
|
107
|
+
|
|
108
|
+
import dspy
|
|
109
|
+
from dspy.utils.callback import BaseCallback
|
|
110
|
+
|
|
111
|
+
class MyCallback(BaseCallback):
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
my_callback = MyCallback()
|
|
115
|
+
dspy.configure(lm=None, callbacks=[my_callback])
|
|
116
|
+
|
|
117
|
+
from dspy.dsp.utils.settings import settings
|
|
118
|
+
callbacks = settings.callbacks
|
|
119
|
+
|
|
120
|
+
# Should have both callbacks
|
|
121
|
+
has_my_callback = any(cb is my_callback for cb in callbacks)
|
|
122
|
+
has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
|
|
123
|
+
|
|
124
|
+
assert has_my_callback, "User callback should be preserved"
|
|
125
|
+
assert has_bt_callback, "BraintrustDSpyCallback should be added"
|
|
126
|
+
print("SUCCESS")
|
|
127
|
+
""")
|
|
128
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
129
|
+
assert "SUCCESS" in result.stdout
|
|
130
|
+
|
|
131
|
+
def test_patch_dspy_does_not_duplicate_callback(self):
|
|
132
|
+
"""patch_dspy() should not add duplicate BraintrustDSpyCallback."""
|
|
133
|
+
result = run_in_subprocess("""
|
|
134
|
+
from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
|
|
135
|
+
patch_dspy()
|
|
136
|
+
|
|
137
|
+
import dspy
|
|
138
|
+
|
|
139
|
+
# User explicitly adds BraintrustDSpyCallback
|
|
140
|
+
bt_callback = BraintrustDSpyCallback()
|
|
141
|
+
dspy.configure(lm=None, callbacks=[bt_callback])
|
|
142
|
+
|
|
143
|
+
from dspy.dsp.utils.settings import settings
|
|
144
|
+
callbacks = settings.callbacks
|
|
145
|
+
|
|
146
|
+
# Should only have one BraintrustDSpyCallback
|
|
147
|
+
bt_callbacks = [cb for cb in callbacks if isinstance(cb, BraintrustDSpyCallback)]
|
|
148
|
+
assert len(bt_callbacks) == 1, f"Expected 1 BraintrustDSpyCallback, got {len(bt_callbacks)}"
|
|
149
|
+
print("SUCCESS")
|
|
150
|
+
""")
|
|
151
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
152
|
+
assert "SUCCESS" in result.stdout
|
|
153
|
+
|
|
154
|
+
def test_patch_dspy_idempotent(self):
|
|
155
|
+
"""Multiple patch_dspy() calls should be safe."""
|
|
156
|
+
result = run_in_subprocess("""
|
|
157
|
+
from braintrust.wrappers.dspy import patch_dspy
|
|
158
|
+
import dspy
|
|
159
|
+
|
|
160
|
+
patch_dspy()
|
|
161
|
+
patch_dspy() # Second call - should be no-op, not double-wrap
|
|
162
|
+
|
|
163
|
+
# Verify configure still works
|
|
164
|
+
lm = dspy.LM("openai/gpt-4o-mini")
|
|
165
|
+
dspy.configure(lm=lm)
|
|
166
|
+
print("SUCCESS")
|
|
167
|
+
""")
|
|
168
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
169
|
+
assert "SUCCESS" in result.stdout
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class TestAutoInstrumentDSPy:
|
|
173
|
+
"""Tests for auto_instrument() with DSPy."""
|
|
174
|
+
|
|
175
|
+
def test_auto_instrument_dspy(self):
|
|
176
|
+
"""Test auto_instrument patches DSPy, creates spans, and uninstrument works."""
|
|
177
|
+
verify_autoinstrument_script("test_auto_dspy.py")
|
|
@@ -6,6 +6,7 @@ import pytest
|
|
|
6
6
|
from braintrust import logger
|
|
7
7
|
from braintrust.test_helpers import init_test_logger
|
|
8
8
|
from braintrust.wrappers.google_genai import setup_genai
|
|
9
|
+
from braintrust.wrappers.test_utils import verify_autoinstrument_script
|
|
9
10
|
from google.genai import types
|
|
10
11
|
from google.genai.client import Client
|
|
11
12
|
|
|
@@ -637,3 +638,11 @@ def test_attachment_with_pydantic_model(memory_logger):
|
|
|
637
638
|
|
|
638
639
|
# Attachment should be preserved
|
|
639
640
|
assert copied["context_file"] is attachment
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
class TestAutoInstrumentGoogleGenAI:
|
|
644
|
+
"""Tests for auto_instrument() with Google GenAI."""
|
|
645
|
+
|
|
646
|
+
def test_auto_instrument_google_genai(self):
|
|
647
|
+
"""Test auto_instrument patches Google GenAI and creates spans."""
|
|
648
|
+
verify_autoinstrument_script("test_auto_google_genai.py")
|
|
@@ -6,7 +6,7 @@ import pytest
|
|
|
6
6
|
from braintrust import logger
|
|
7
7
|
from braintrust.test_helpers import assert_dict_matches, init_test_logger
|
|
8
8
|
from braintrust.wrappers.litellm import wrap_litellm
|
|
9
|
-
from braintrust.wrappers.test_utils import assert_metrics_are_valid
|
|
9
|
+
from braintrust.wrappers.test_utils import assert_metrics_are_valid, verify_autoinstrument_script
|
|
10
10
|
|
|
11
11
|
TEST_ORG_ID = "test-org-litellm-py-tracing"
|
|
12
12
|
PROJECT_NAME = "test-project-litellm-py-tracing"
|
|
@@ -697,71 +697,73 @@ async def test_litellm_async_streaming_with_break(memory_logger):
|
|
|
697
697
|
@pytest.mark.vcr
|
|
698
698
|
def test_patch_litellm_responses(memory_logger):
|
|
699
699
|
"""Test that patch_litellm() patches responses."""
|
|
700
|
-
from braintrust.wrappers.litellm import patch_litellm
|
|
700
|
+
from braintrust.wrappers.litellm import patch_litellm
|
|
701
701
|
|
|
702
702
|
assert not memory_logger.pop()
|
|
703
703
|
|
|
704
704
|
patch_litellm()
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
assert TEST_PROMPT in str(span["input"])
|
|
729
|
-
finally:
|
|
730
|
-
unpatch_litellm()
|
|
705
|
+
start = time.time()
|
|
706
|
+
# Call litellm.responses directly (not wrapped_litellm.responses)
|
|
707
|
+
response = litellm.responses(
|
|
708
|
+
model=TEST_MODEL,
|
|
709
|
+
input=TEST_PROMPT,
|
|
710
|
+
instructions="Just the number please",
|
|
711
|
+
)
|
|
712
|
+
end = time.time()
|
|
713
|
+
|
|
714
|
+
assert response
|
|
715
|
+
assert response.output
|
|
716
|
+
assert len(response.output) > 0
|
|
717
|
+
content = response.output[0].content[0].text
|
|
718
|
+
assert "24" in content or "twenty-four" in content.lower()
|
|
719
|
+
|
|
720
|
+
# Verify span was created
|
|
721
|
+
spans = memory_logger.pop()
|
|
722
|
+
assert len(spans) == 1
|
|
723
|
+
span = spans[0]
|
|
724
|
+
assert_metrics_are_valid(span["metrics"], start, end)
|
|
725
|
+
assert span["metadata"]["model"] == TEST_MODEL
|
|
726
|
+
assert span["metadata"]["provider"] == "litellm"
|
|
727
|
+
assert TEST_PROMPT in str(span["input"])
|
|
731
728
|
|
|
732
729
|
|
|
733
730
|
@pytest.mark.vcr
|
|
734
731
|
@pytest.mark.asyncio
|
|
735
732
|
async def test_patch_litellm_aresponses(memory_logger):
|
|
736
733
|
"""Test that patch_litellm() patches aresponses."""
|
|
737
|
-
from braintrust.wrappers.litellm import patch_litellm
|
|
734
|
+
from braintrust.wrappers.litellm import patch_litellm
|
|
738
735
|
|
|
739
736
|
assert not memory_logger.pop()
|
|
740
737
|
|
|
741
738
|
patch_litellm()
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
739
|
+
start = time.time()
|
|
740
|
+
# Call litellm.aresponses directly (not wrapped_litellm.aresponses)
|
|
741
|
+
response = await litellm.aresponses(
|
|
742
|
+
model=TEST_MODEL,
|
|
743
|
+
input=TEST_PROMPT,
|
|
744
|
+
instructions="Just the number please",
|
|
745
|
+
)
|
|
746
|
+
end = time.time()
|
|
747
|
+
|
|
748
|
+
assert response
|
|
749
|
+
assert response.output
|
|
750
|
+
assert len(response.output) > 0
|
|
751
|
+
content = response.output[0].content[0].text
|
|
752
|
+
assert "24" in content or "twenty-four" in content.lower()
|
|
753
|
+
|
|
754
|
+
# Verify span was created
|
|
755
|
+
spans = memory_logger.pop()
|
|
756
|
+
assert len(spans) == 1
|
|
757
|
+
span = spans[0]
|
|
758
|
+
assert_metrics_are_valid(span["metrics"], start, end)
|
|
759
|
+
assert span["metadata"]["model"] == TEST_MODEL
|
|
760
|
+
assert span["metadata"]["provider"] == "litellm"
|
|
761
|
+
assert TEST_PROMPT in str(span["input"])
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
class TestAutoInstrumentLiteLLM:
|
|
765
|
+
"""Tests for auto_instrument() with LiteLLM."""
|
|
766
|
+
|
|
767
|
+
def test_auto_instrument_litellm(self):
|
|
768
|
+
"""Test auto_instrument patches LiteLLM, creates spans, and uninstrument works."""
|
|
769
|
+
verify_autoinstrument_script("test_auto_litellm.py")
|
|
@@ -6,7 +6,7 @@ import openai
|
|
|
6
6
|
import pytest
|
|
7
7
|
from braintrust import logger, wrap_openai
|
|
8
8
|
from braintrust.test_helpers import assert_dict_matches, init_test_logger
|
|
9
|
-
from braintrust.wrappers.test_utils import assert_metrics_are_valid
|
|
9
|
+
from braintrust.wrappers.test_utils import assert_metrics_are_valid, run_in_subprocess, verify_autoinstrument_script
|
|
10
10
|
from openai import AsyncOpenAI
|
|
11
11
|
from openai._types import NOT_GIVEN
|
|
12
12
|
from pydantic import BaseModel
|
|
@@ -1681,3 +1681,255 @@ def test_braintrust_tracing_processor_trace_metadata_logging(memory_logger):
|
|
|
1681
1681
|
spans = memory_logger.pop()
|
|
1682
1682
|
root_span = spans[0]
|
|
1683
1683
|
assert root_span["metadata"]["conversation_id"] == "test-12345", "Should log trace metadata"
|
|
1684
|
+
|
|
1685
|
+
|
|
1686
|
+
class TestPatchOpenAI:
|
|
1687
|
+
"""Tests for patch_openai()."""
|
|
1688
|
+
|
|
1689
|
+
def test_patch_openai_sets_wrapped_flag(self):
|
|
1690
|
+
"""patch_openai() should set __braintrust_wrapped__ on openai module."""
|
|
1691
|
+
result = run_in_subprocess("""
|
|
1692
|
+
from braintrust.oai import patch_openai
|
|
1693
|
+
import openai
|
|
1694
|
+
|
|
1695
|
+
assert not hasattr(openai, "__braintrust_wrapped__")
|
|
1696
|
+
patch_openai()
|
|
1697
|
+
assert hasattr(openai, "__braintrust_wrapped__")
|
|
1698
|
+
print("SUCCESS")
|
|
1699
|
+
""")
|
|
1700
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
1701
|
+
assert "SUCCESS" in result.stdout
|
|
1702
|
+
|
|
1703
|
+
def test_patch_openai_wraps_new_clients(self):
|
|
1704
|
+
"""After patch_openai(), new OpenAI() clients should be wrapped."""
|
|
1705
|
+
result = run_in_subprocess("""
|
|
1706
|
+
from braintrust.oai import patch_openai
|
|
1707
|
+
patch_openai()
|
|
1708
|
+
|
|
1709
|
+
import openai
|
|
1710
|
+
client = openai.OpenAI(api_key="test-key")
|
|
1711
|
+
|
|
1712
|
+
# Check that chat completions is wrapped (our wrapper adds tracing)
|
|
1713
|
+
# The wrapper replaces client.chat with a wrapped version
|
|
1714
|
+
chat_type = type(client.chat).__name__
|
|
1715
|
+
print(f"chat_type={chat_type}")
|
|
1716
|
+
print("SUCCESS")
|
|
1717
|
+
""")
|
|
1718
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
1719
|
+
assert "SUCCESS" in result.stdout
|
|
1720
|
+
|
|
1721
|
+
def test_patch_openai_creates_spans(self):
|
|
1722
|
+
"""patch_openai() should create spans when making API calls."""
|
|
1723
|
+
result = run_in_subprocess("""
|
|
1724
|
+
from braintrust.oai import patch_openai
|
|
1725
|
+
from braintrust.test_helpers import init_test_logger
|
|
1726
|
+
from braintrust import logger
|
|
1727
|
+
|
|
1728
|
+
# Set up memory logger
|
|
1729
|
+
init_test_logger("test-auto")
|
|
1730
|
+
with logger._internal_with_memory_background_logger() as memory_logger:
|
|
1731
|
+
patch_openai()
|
|
1732
|
+
|
|
1733
|
+
import openai
|
|
1734
|
+
client = openai.OpenAI()
|
|
1735
|
+
|
|
1736
|
+
# Make a call within a span context
|
|
1737
|
+
import braintrust
|
|
1738
|
+
with braintrust.start_span(name="test") as span:
|
|
1739
|
+
try:
|
|
1740
|
+
# This will fail without API key, but span should still be created
|
|
1741
|
+
client.chat.completions.create(
|
|
1742
|
+
model="gpt-4o-mini",
|
|
1743
|
+
messages=[{"role": "user", "content": "hi"}],
|
|
1744
|
+
)
|
|
1745
|
+
except Exception:
|
|
1746
|
+
pass # Expected without API key
|
|
1747
|
+
|
|
1748
|
+
# Check that spans were logged
|
|
1749
|
+
spans = memory_logger.pop()
|
|
1750
|
+
# Should have at least the parent span
|
|
1751
|
+
assert len(spans) >= 1, f"Expected spans, got {spans}"
|
|
1752
|
+
print("SUCCESS")
|
|
1753
|
+
""")
|
|
1754
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
1755
|
+
assert "SUCCESS" in result.stdout
|
|
1756
|
+
|
|
1757
|
+
def test_patch_openai_before_import(self):
|
|
1758
|
+
"""patch_openai() should work when called before importing openai."""
|
|
1759
|
+
result = run_in_subprocess("""
|
|
1760
|
+
from braintrust.oai import patch_openai
|
|
1761
|
+
|
|
1762
|
+
# Patch BEFORE importing openai
|
|
1763
|
+
patch_openai()
|
|
1764
|
+
|
|
1765
|
+
import openai
|
|
1766
|
+
assert hasattr(openai, "__braintrust_wrapped__")
|
|
1767
|
+
|
|
1768
|
+
client = openai.OpenAI(api_key="test-key")
|
|
1769
|
+
print("SUCCESS")
|
|
1770
|
+
""")
|
|
1771
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
1772
|
+
assert "SUCCESS" in result.stdout
|
|
1773
|
+
|
|
1774
|
+
def test_patch_openai_after_import(self):
|
|
1775
|
+
"""patch_openai() should work when called after importing openai."""
|
|
1776
|
+
result = run_in_subprocess("""
|
|
1777
|
+
import openai
|
|
1778
|
+
from braintrust.oai import patch_openai
|
|
1779
|
+
|
|
1780
|
+
# Patch AFTER importing openai
|
|
1781
|
+
patch_openai()
|
|
1782
|
+
|
|
1783
|
+
assert hasattr(openai, "__braintrust_wrapped__")
|
|
1784
|
+
|
|
1785
|
+
client = openai.OpenAI(api_key="test-key")
|
|
1786
|
+
print("SUCCESS")
|
|
1787
|
+
""")
|
|
1788
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
1789
|
+
assert "SUCCESS" in result.stdout
|
|
1790
|
+
|
|
1791
|
+
def test_patch_openai_idempotent(self):
|
|
1792
|
+
"""Multiple patch_openai() calls should be safe."""
|
|
1793
|
+
result = run_in_subprocess("""
|
|
1794
|
+
from braintrust.oai import patch_openai
|
|
1795
|
+
import openai
|
|
1796
|
+
|
|
1797
|
+
patch_openai()
|
|
1798
|
+
patch_openai() # Second call - should be no-op, not double-wrap
|
|
1799
|
+
|
|
1800
|
+
# Verify we can still create clients
|
|
1801
|
+
client = openai.OpenAI(api_key="test-key")
|
|
1802
|
+
assert hasattr(client, "chat")
|
|
1803
|
+
print("SUCCESS")
|
|
1804
|
+
""")
|
|
1805
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
1806
|
+
assert "SUCCESS" in result.stdout
|
|
1807
|
+
|
|
1808
|
+
def test_patch_openai_chains_with_other_patches(self):
|
|
1809
|
+
"""patch_openai() should chain with other libraries that patch OpenAI."""
|
|
1810
|
+
result = run_in_subprocess("""
|
|
1811
|
+
import openai
|
|
1812
|
+
|
|
1813
|
+
# Simulate another library (like Datadog) patching OpenAI first
|
|
1814
|
+
other_library_init_called = []
|
|
1815
|
+
|
|
1816
|
+
class OtherLibraryOpenAI(openai.OpenAI):
|
|
1817
|
+
def __init__(self, *args, **kwargs):
|
|
1818
|
+
other_library_init_called.append(True)
|
|
1819
|
+
super().__init__(*args, **kwargs)
|
|
1820
|
+
|
|
1821
|
+
openai.OpenAI = OtherLibraryOpenAI
|
|
1822
|
+
|
|
1823
|
+
# Now apply our patch - should subclass OtherLibraryOpenAI
|
|
1824
|
+
from braintrust.oai import patch_openai
|
|
1825
|
+
patch_openai()
|
|
1826
|
+
|
|
1827
|
+
# Create a client - both patches should run
|
|
1828
|
+
client = openai.OpenAI(api_key="test-key")
|
|
1829
|
+
|
|
1830
|
+
# Verify other library's __init__ was called (chaining works)
|
|
1831
|
+
assert len(other_library_init_called) == 1, "Other library's patch should have run"
|
|
1832
|
+
|
|
1833
|
+
# Verify our patch was applied (client has wrapped chat)
|
|
1834
|
+
assert hasattr(client, "chat"), "Client should have chat attribute"
|
|
1835
|
+
|
|
1836
|
+
print("SUCCESS")
|
|
1837
|
+
""")
|
|
1838
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
1839
|
+
assert "SUCCESS" in result.stdout
|
|
1840
|
+
|
|
1841
|
+
def test_patch_openai_chains_async_client(self):
|
|
1842
|
+
"""patch_openai() should chain with other libraries for AsyncOpenAI too."""
|
|
1843
|
+
result = run_in_subprocess("""
|
|
1844
|
+
import openai
|
|
1845
|
+
|
|
1846
|
+
# Simulate another library patching AsyncOpenAI first
|
|
1847
|
+
other_library_init_called = []
|
|
1848
|
+
|
|
1849
|
+
class OtherLibraryAsyncOpenAI(openai.AsyncOpenAI):
|
|
1850
|
+
def __init__(self, *args, **kwargs):
|
|
1851
|
+
other_library_init_called.append(True)
|
|
1852
|
+
super().__init__(*args, **kwargs)
|
|
1853
|
+
|
|
1854
|
+
openai.AsyncOpenAI = OtherLibraryAsyncOpenAI
|
|
1855
|
+
|
|
1856
|
+
# Now apply our patch
|
|
1857
|
+
from braintrust.oai import patch_openai
|
|
1858
|
+
patch_openai()
|
|
1859
|
+
|
|
1860
|
+
# Create an async client - both patches should run
|
|
1861
|
+
client = openai.AsyncOpenAI(api_key="test-key")
|
|
1862
|
+
|
|
1863
|
+
# Verify other library's __init__ was called
|
|
1864
|
+
assert len(other_library_init_called) == 1, "Other library's patch should have run"
|
|
1865
|
+
|
|
1866
|
+
# Verify our patch was applied
|
|
1867
|
+
assert hasattr(client, "chat"), "Client should have chat attribute"
|
|
1868
|
+
|
|
1869
|
+
print("SUCCESS")
|
|
1870
|
+
""")
|
|
1871
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
1872
|
+
assert "SUCCESS" in result.stdout
|
|
1873
|
+
|
|
1874
|
+
|
|
1875
|
+
class TestPatchOpenAISpans:
|
|
1876
|
+
"""VCR-based tests verifying that patch_openai() produces spans."""
|
|
1877
|
+
|
|
1878
|
+
@pytest.mark.vcr
|
|
1879
|
+
def test_patch_openai_creates_spans(self, memory_logger):
|
|
1880
|
+
"""patch_openai() should create spans when making API calls."""
|
|
1881
|
+
from braintrust.oai import patch_openai
|
|
1882
|
+
|
|
1883
|
+
assert not memory_logger.pop()
|
|
1884
|
+
|
|
1885
|
+
patch_openai()
|
|
1886
|
+
client = openai.OpenAI()
|
|
1887
|
+
response = client.chat.completions.create(
|
|
1888
|
+
model="gpt-4o-mini",
|
|
1889
|
+
messages=[{"role": "user", "content": "Say hi"}],
|
|
1890
|
+
)
|
|
1891
|
+
assert response.choices[0].message.content
|
|
1892
|
+
|
|
1893
|
+
# Verify span was created
|
|
1894
|
+
spans = memory_logger.pop()
|
|
1895
|
+
assert len(spans) == 1
|
|
1896
|
+
span = spans[0]
|
|
1897
|
+
assert span["metadata"]["provider"] == "openai"
|
|
1898
|
+
assert "gpt-4o-mini" in span["metadata"]["model"]
|
|
1899
|
+
assert span["input"]
|
|
1900
|
+
|
|
1901
|
+
|
|
1902
|
+
class TestPatchOpenAIAsyncSpans:
|
|
1903
|
+
"""VCR-based tests verifying that patch_openai() produces spans for async clients."""
|
|
1904
|
+
|
|
1905
|
+
@pytest.mark.vcr
|
|
1906
|
+
@pytest.mark.asyncio
|
|
1907
|
+
async def test_patch_openai_async_creates_spans(self, memory_logger):
|
|
1908
|
+
"""patch_openai() should create spans for async API calls."""
|
|
1909
|
+
from braintrust.oai import patch_openai
|
|
1910
|
+
|
|
1911
|
+
assert not memory_logger.pop()
|
|
1912
|
+
|
|
1913
|
+
patch_openai()
|
|
1914
|
+
client = openai.AsyncOpenAI()
|
|
1915
|
+
response = await client.chat.completions.create(
|
|
1916
|
+
model="gpt-4o-mini",
|
|
1917
|
+
messages=[{"role": "user", "content": "Say hi async"}],
|
|
1918
|
+
)
|
|
1919
|
+
assert response.choices[0].message.content
|
|
1920
|
+
|
|
1921
|
+
# Verify span was created
|
|
1922
|
+
spans = memory_logger.pop()
|
|
1923
|
+
assert len(spans) == 1
|
|
1924
|
+
span = spans[0]
|
|
1925
|
+
assert span["metadata"]["provider"] == "openai"
|
|
1926
|
+
assert "gpt-4o-mini" in span["metadata"]["model"]
|
|
1927
|
+
assert span["input"]
|
|
1928
|
+
|
|
1929
|
+
|
|
1930
|
+
class TestAutoInstrumentOpenAI:
|
|
1931
|
+
"""Tests for auto_instrument() with OpenAI."""
|
|
1932
|
+
|
|
1933
|
+
def test_auto_instrument_openai(self):
|
|
1934
|
+
"""Test auto_instrument patches OpenAI, creates spans, and uninstrument works."""
|
|
1935
|
+
verify_autoinstrument_script("test_auto_openai.py")
|
|
@@ -9,6 +9,7 @@ import pytest
|
|
|
9
9
|
from braintrust import logger, setup_pydantic_ai, traced
|
|
10
10
|
from braintrust.span_types import SpanTypeAttribute
|
|
11
11
|
from braintrust.test_helpers import init_test_logger
|
|
12
|
+
from braintrust.wrappers.test_utils import verify_autoinstrument_script
|
|
12
13
|
from pydantic import BaseModel
|
|
13
14
|
from pydantic_ai import Agent, ModelSettings
|
|
14
15
|
from pydantic_ai.messages import ModelRequest, UserPromptPart
|
|
@@ -2572,3 +2573,11 @@ async def test_attachment_in_result_data(memory_logger):
|
|
|
2572
2573
|
copied = bt_safe_deep_copy(result_data)
|
|
2573
2574
|
assert copied["output_file"] is ext_attachment
|
|
2574
2575
|
assert copied["success"] is True
|
|
2576
|
+
|
|
2577
|
+
|
|
2578
|
+
class TestAutoInstrumentPydanticAI:
|
|
2579
|
+
"""Tests for auto_instrument() with Pydantic AI."""
|
|
2580
|
+
|
|
2581
|
+
def test_auto_instrument_pydantic_ai(self):
|
|
2582
|
+
"""Test auto_instrument patches Pydantic AI and creates spans."""
|
|
2583
|
+
verify_autoinstrument_script("test_auto_pydantic_ai.py")
|
|
@@ -1,3 +1,59 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
import textwrap
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import vcr
|
|
9
|
+
from braintrust import logger
|
|
10
|
+
from braintrust.conftest import get_vcr_config
|
|
11
|
+
from braintrust.test_helpers import init_test_logger
|
|
12
|
+
|
|
13
|
+
# Source directory paths (resolved to handle installed vs source locations)
|
|
14
|
+
_SOURCE_DIR = Path(__file__).resolve().parent
|
|
15
|
+
AUTO_TEST_SCRIPTS_DIR = _SOURCE_DIR / "auto_test_scripts"
|
|
16
|
+
|
|
17
|
+
# Cassettes dir can be overridden via env var for subprocess tests
|
|
18
|
+
CASSETTES_DIR = Path(os.environ.get("BRAINTRUST_CASSETTES_DIR", _SOURCE_DIR / "cassettes"))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run_in_subprocess(
|
|
22
|
+
code: str, timeout: int = 30, env: dict[str, str] | None = None
|
|
23
|
+
) -> subprocess.CompletedProcess:
|
|
24
|
+
"""Run Python code in a fresh subprocess."""
|
|
25
|
+
run_env = os.environ.copy()
|
|
26
|
+
if env:
|
|
27
|
+
run_env.update(env)
|
|
28
|
+
return subprocess.run(
|
|
29
|
+
[sys.executable, "-c", textwrap.dedent(code)],
|
|
30
|
+
capture_output=True,
|
|
31
|
+
text=True,
|
|
32
|
+
timeout=timeout,
|
|
33
|
+
env=run_env,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def verify_autoinstrument_script(script_name: str, timeout: int = 30) -> subprocess.CompletedProcess:
|
|
38
|
+
"""Run a test script from the auto_test_scripts directory.
|
|
39
|
+
|
|
40
|
+
Raises AssertionError if the script exits with non-zero code.
|
|
41
|
+
"""
|
|
42
|
+
script_path = AUTO_TEST_SCRIPTS_DIR / script_name
|
|
43
|
+
# Pass cassettes dir to subprocess since it may use installed package
|
|
44
|
+
env = os.environ.copy()
|
|
45
|
+
env["BRAINTRUST_CASSETTES_DIR"] = str(_SOURCE_DIR / "cassettes")
|
|
46
|
+
result = subprocess.run(
|
|
47
|
+
[sys.executable, str(script_path)],
|
|
48
|
+
capture_output=True,
|
|
49
|
+
text=True,
|
|
50
|
+
timeout=timeout,
|
|
51
|
+
env=env,
|
|
52
|
+
)
|
|
53
|
+
assert result.returncode == 0, f"Script {script_name} failed:\n{result.stderr}"
|
|
54
|
+
return result
|
|
55
|
+
|
|
56
|
+
|
|
1
57
|
def assert_metrics_are_valid(metrics, start=None, end=None):
|
|
2
58
|
assert metrics
|
|
3
59
|
# assert 0 < metrics["time_to_first_token"]
|
|
@@ -10,3 +66,26 @@ def assert_metrics_are_valid(metrics, start=None, end=None):
|
|
|
10
66
|
assert start <= metrics["start"] <= metrics["end"] <= end
|
|
11
67
|
else:
|
|
12
68
|
assert metrics["start"] <= metrics["end"]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@contextmanager
|
|
72
|
+
def autoinstrument_test_context(cassette_name: str):
|
|
73
|
+
"""Context manager for auto_instrument tests.
|
|
74
|
+
|
|
75
|
+
Sets up VCR and memory_logger, yields memory_logger for direct use.
|
|
76
|
+
|
|
77
|
+
Usage:
|
|
78
|
+
with autoinstrument_test_context("test_auto_openai") as memory_logger:
|
|
79
|
+
# make API call
|
|
80
|
+
spans = memory_logger.pop()
|
|
81
|
+
"""
|
|
82
|
+
cassette_path = CASSETTES_DIR / f"{cassette_name}.yaml"
|
|
83
|
+
|
|
84
|
+
init_test_logger("test-auto-instrument")
|
|
85
|
+
|
|
86
|
+
with logger._internal_with_memory_background_logger() as memory_logger:
|
|
87
|
+
memory_logger.pop() # Clear any prior spans
|
|
88
|
+
|
|
89
|
+
my_vcr = vcr.VCR(**get_vcr_config())
|
|
90
|
+
with my_vcr.use_cassette(str(cassette_path)):
|
|
91
|
+
yield memory_logger
|