braintrust 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. braintrust/__init__.py +3 -0
  2. braintrust/_generated_types.py +106 -6
  3. braintrust/auto.py +179 -0
  4. braintrust/conftest.py +23 -4
  5. braintrust/framework.py +113 -3
  6. braintrust/functions/invoke.py +3 -1
  7. braintrust/functions/test_invoke.py +61 -0
  8. braintrust/generated_types.py +7 -1
  9. braintrust/logger.py +127 -45
  10. braintrust/oai.py +51 -0
  11. braintrust/span_cache.py +337 -0
  12. braintrust/span_identifier_v3.py +21 -0
  13. braintrust/test_bt_json.py +0 -5
  14. braintrust/test_framework.py +37 -0
  15. braintrust/test_http.py +444 -0
  16. braintrust/test_logger.py +295 -5
  17. braintrust/test_span_cache.py +344 -0
  18. braintrust/test_trace.py +267 -0
  19. braintrust/test_util.py +58 -1
  20. braintrust/trace.py +385 -0
  21. braintrust/util.py +20 -0
  22. braintrust/version.py +2 -2
  23. braintrust/wrappers/agno/__init__.py +2 -3
  24. braintrust/wrappers/anthropic.py +64 -0
  25. braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
  26. braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
  27. braintrust/wrappers/claude_agent_sdk/test_wrapper.py +115 -0
  28. braintrust/wrappers/dspy.py +52 -1
  29. braintrust/wrappers/google_genai/__init__.py +9 -6
  30. braintrust/wrappers/litellm.py +6 -43
  31. braintrust/wrappers/pydantic_ai.py +2 -3
  32. braintrust/wrappers/test_agno.py +9 -0
  33. braintrust/wrappers/test_anthropic.py +156 -0
  34. braintrust/wrappers/test_dspy.py +117 -0
  35. braintrust/wrappers/test_google_genai.py +9 -0
  36. braintrust/wrappers/test_litellm.py +57 -55
  37. braintrust/wrappers/test_openai.py +253 -1
  38. braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
  39. braintrust/wrappers/test_utils.py +79 -0
  40. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
  41. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/RECORD +44 -37
  42. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
  43. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
  44. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,267 @@
1
+ """Tests for Trace functionality."""
2
+
3
+ import pytest
4
+ from braintrust.trace import CachedSpanFetcher, SpanData
5
+
6
+
7
+ # Helper to create mock spans
8
+ def make_span(span_id: str, span_type: str, **extra) -> SpanData:
9
+ return SpanData(
10
+ span_id=span_id,
11
+ input={"text": f"input-{span_id}"},
12
+ output={"text": f"output-{span_id}"},
13
+ span_attributes={"type": span_type},
14
+ **extra,
15
+ )
16
+
17
+
18
+ class TestCachedSpanFetcher:
19
+ """Test CachedSpanFetcher caching behavior."""
20
+
21
+ @pytest.mark.asyncio
22
+ async def test_fetch_all_spans_without_filter(self):
23
+ """Test fetching all spans when no filter specified."""
24
+ mock_spans = [
25
+ make_span("span-1", "llm"),
26
+ make_span("span-2", "function"),
27
+ make_span("span-3", "llm"),
28
+ ]
29
+
30
+ call_count = 0
31
+
32
+ async def fetch_fn(span_type):
33
+ nonlocal call_count
34
+ call_count += 1
35
+ return mock_spans
36
+
37
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
38
+ result = await fetcher.get_spans()
39
+
40
+ assert call_count == 1
41
+ assert len(result) == 3
42
+ assert {s.span_id for s in result} == {"span-1", "span-2", "span-3"}
43
+
44
+ @pytest.mark.asyncio
45
+ async def test_fetch_specific_span_types(self):
46
+ """Test fetching specific span types when filter specified."""
47
+ llm_spans = [make_span("span-1", "llm"), make_span("span-2", "llm")]
48
+
49
+ call_count = 0
50
+
51
+ async def fetch_fn(span_type):
52
+ nonlocal call_count
53
+ call_count += 1
54
+ assert span_type == ["llm"]
55
+ return llm_spans
56
+
57
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
58
+ result = await fetcher.get_spans(span_type=["llm"])
59
+
60
+ assert call_count == 1
61
+ assert len(result) == 2
62
+
63
+ @pytest.mark.asyncio
64
+ async def test_return_cached_spans_after_fetching_all(self):
65
+ """Test that cached spans are returned without re-fetching after fetching all."""
66
+ mock_spans = [
67
+ make_span("span-1", "llm"),
68
+ make_span("span-2", "function"),
69
+ ]
70
+
71
+ call_count = 0
72
+
73
+ async def fetch_fn(span_type):
74
+ nonlocal call_count
75
+ call_count += 1
76
+ return mock_spans
77
+
78
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
79
+
80
+ # First call - fetches
81
+ await fetcher.get_spans()
82
+ assert call_count == 1
83
+
84
+ # Second call - should use cache
85
+ result = await fetcher.get_spans()
86
+ assert call_count == 1 # Still 1
87
+ assert len(result) == 2
88
+
89
+ @pytest.mark.asyncio
90
+ async def test_return_cached_spans_for_previously_fetched_types(self):
91
+ """Test that previously fetched types are returned from cache."""
92
+ llm_spans = [make_span("span-1", "llm"), make_span("span-2", "llm")]
93
+
94
+ call_count = 0
95
+
96
+ async def fetch_fn(span_type):
97
+ nonlocal call_count
98
+ call_count += 1
99
+ return llm_spans
100
+
101
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
102
+
103
+ # First call - fetches llm spans
104
+ await fetcher.get_spans(span_type=["llm"])
105
+ assert call_count == 1
106
+
107
+ # Second call for same type - should use cache
108
+ result = await fetcher.get_spans(span_type=["llm"])
109
+ assert call_count == 1 # Still 1
110
+ assert len(result) == 2
111
+
112
+ @pytest.mark.asyncio
113
+ async def test_only_fetch_missing_span_types(self):
114
+ """Test that only missing span types are fetched."""
115
+ llm_spans = [make_span("span-1", "llm")]
116
+ function_spans = [make_span("span-2", "function")]
117
+
118
+ call_count = 0
119
+
120
+ async def fetch_fn(span_type):
121
+ nonlocal call_count
122
+ call_count += 1
123
+ if span_type == ["llm"]:
124
+ return llm_spans
125
+ elif span_type == ["function"]:
126
+ return function_spans
127
+ return []
128
+
129
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
130
+
131
+ # First call - fetches llm spans
132
+ await fetcher.get_spans(span_type=["llm"])
133
+ assert call_count == 1
134
+
135
+ # Second call for both types - should only fetch function
136
+ result = await fetcher.get_spans(span_type=["llm", "function"])
137
+ assert call_count == 2
138
+ assert len(result) == 2
139
+
140
+ @pytest.mark.asyncio
141
+ async def test_no_refetch_after_fetching_all_spans(self):
142
+ """Test that no re-fetching occurs after fetching all spans."""
143
+ all_spans = [
144
+ make_span("span-1", "llm"),
145
+ make_span("span-2", "function"),
146
+ make_span("span-3", "tool"),
147
+ ]
148
+
149
+ call_count = 0
150
+
151
+ async def fetch_fn(span_type):
152
+ nonlocal call_count
153
+ call_count += 1
154
+ return all_spans
155
+
156
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
157
+
158
+ # Fetch all spans
159
+ await fetcher.get_spans()
160
+ assert call_count == 1
161
+
162
+ # Subsequent filtered calls should use cache
163
+ llm_result = await fetcher.get_spans(span_type=["llm"])
164
+ assert call_count == 1 # Still 1
165
+ assert len(llm_result) == 1
166
+ assert llm_result[0].span_id == "span-1"
167
+
168
+ function_result = await fetcher.get_spans(span_type=["function"])
169
+ assert call_count == 1 # Still 1
170
+ assert len(function_result) == 1
171
+ assert function_result[0].span_id == "span-2"
172
+
173
+ @pytest.mark.asyncio
174
+ async def test_filter_by_multiple_span_types_from_cache(self):
175
+ """Test filtering by multiple span types from cache."""
176
+ all_spans = [
177
+ make_span("span-1", "llm"),
178
+ make_span("span-2", "function"),
179
+ make_span("span-3", "tool"),
180
+ make_span("span-4", "llm"),
181
+ ]
182
+
183
+ async def fetch_fn(span_type):
184
+ return all_spans
185
+
186
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
187
+
188
+ # Fetch all first
189
+ await fetcher.get_spans()
190
+
191
+ # Filter for llm and tool
192
+ result = await fetcher.get_spans(span_type=["llm", "tool"])
193
+ assert len(result) == 3
194
+ assert {s.span_id for s in result} == {"span-1", "span-3", "span-4"}
195
+
196
+ @pytest.mark.asyncio
197
+ async def test_return_empty_for_nonexistent_span_type(self):
198
+ """Test that empty array is returned for non-existent span type."""
199
+ all_spans = [make_span("span-1", "llm")]
200
+
201
+ async def fetch_fn(span_type):
202
+ return all_spans
203
+
204
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
205
+
206
+ # Fetch all first
207
+ await fetcher.get_spans()
208
+
209
+ # Query for non-existent type
210
+ result = await fetcher.get_spans(span_type=["nonexistent"])
211
+ assert len(result) == 0
212
+
213
+ @pytest.mark.asyncio
214
+ async def test_handle_spans_with_no_type(self):
215
+ """Test handling spans without type (empty string type)."""
216
+ spans = [
217
+ make_span("span-1", "llm"),
218
+ SpanData(span_id="span-2", input={}, span_attributes={}), # No type
219
+ SpanData(span_id="span-3", input={}), # No span_attributes
220
+ ]
221
+
222
+ async def fetch_fn(span_type):
223
+ return spans
224
+
225
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
226
+
227
+ # Fetch all
228
+ result = await fetcher.get_spans()
229
+ assert len(result) == 3
230
+
231
+ # Spans without type go into "" bucket
232
+ no_type_result = await fetcher.get_spans(span_type=[""])
233
+ assert len(no_type_result) == 2
234
+
235
+ @pytest.mark.asyncio
236
+ async def test_handle_empty_results(self):
237
+ """Test handling empty results."""
238
+
239
+ async def fetch_fn(span_type):
240
+ return []
241
+
242
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
243
+
244
+ result = await fetcher.get_spans()
245
+ assert len(result) == 0
246
+
247
+ # Should still mark as fetched
248
+ await fetcher.get_spans(span_type=["llm"])
249
+ # No additional assertions, just making sure it doesn't crash
250
+
251
+ @pytest.mark.asyncio
252
+ async def test_handle_empty_span_type_array(self):
253
+ """Test that empty spanType array is handled same as undefined."""
254
+ mock_spans = [make_span("span-1", "llm")]
255
+
256
+ call_args = []
257
+
258
+ async def fetch_fn(span_type):
259
+ call_args.append(span_type)
260
+ return mock_spans
261
+
262
+ fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
263
+
264
+ result = await fetcher.get_spans(span_type=[])
265
+
266
+ assert call_args[0] is None or call_args[0] == []
267
+ assert len(result) == 1
braintrust/test_util.py CHANGED
@@ -1,9 +1,66 @@
1
+ import os
1
2
  import unittest
2
3
  from typing import List
3
4
 
4
5
  import pytest
5
6
 
6
- from .util import LazyValue, mask_api_key, merge_dicts_with_paths
7
+ from .util import LazyValue, mask_api_key, merge_dicts_with_paths, parse_env_var_float
8
+
9
+
10
+ class TestParseEnvVarFloat:
11
+ """Tests for parse_env_var_float helper."""
12
+
13
+ def test_returns_default_when_env_not_set(self):
14
+ assert parse_env_var_float("NONEXISTENT_VAR_12345", 42.0) == 42.0
15
+
16
+ def test_parses_valid_float(self):
17
+ os.environ["TEST_FLOAT"] = "123.45"
18
+ try:
19
+ assert parse_env_var_float("TEST_FLOAT", 0.0) == 123.45
20
+ finally:
21
+ del os.environ["TEST_FLOAT"]
22
+
23
+ def test_returns_default_for_nan(self):
24
+ os.environ["TEST_FLOAT"] = "nan"
25
+ try:
26
+ assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
27
+ finally:
28
+ del os.environ["TEST_FLOAT"]
29
+
30
+ def test_returns_default_for_inf(self):
31
+ os.environ["TEST_FLOAT"] = "inf"
32
+ try:
33
+ assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
34
+ finally:
35
+ del os.environ["TEST_FLOAT"]
36
+
37
+ def test_returns_default_for_negative_inf(self):
38
+ os.environ["TEST_FLOAT"] = "-inf"
39
+ try:
40
+ assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
41
+ finally:
42
+ del os.environ["TEST_FLOAT"]
43
+
44
+ def test_returns_default_for_empty_string(self):
45
+ os.environ["TEST_FLOAT"] = ""
46
+ try:
47
+ assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
48
+ finally:
49
+ del os.environ["TEST_FLOAT"]
50
+
51
+ def test_returns_default_for_invalid_string(self):
52
+ os.environ["TEST_FLOAT"] = "not_a_number"
53
+ try:
54
+ assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
55
+ finally:
56
+ del os.environ["TEST_FLOAT"]
57
+
58
+ def test_allows_negative_values(self):
59
+ os.environ["TEST_FLOAT"] = "-5.5"
60
+ try:
61
+ assert parse_env_var_float("TEST_FLOAT", 0.0) == -5.5
62
+ finally:
63
+ del os.environ["TEST_FLOAT"]
7
64
 
8
65
 
9
66
  class TestLazyValue(unittest.TestCase):