braintrust 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/__init__.py +3 -0
- braintrust/_generated_types.py +106 -6
- braintrust/auto.py +179 -0
- braintrust/conftest.py +23 -4
- braintrust/framework.py +113 -3
- braintrust/functions/invoke.py +3 -1
- braintrust/functions/test_invoke.py +61 -0
- braintrust/generated_types.py +7 -1
- braintrust/logger.py +127 -45
- braintrust/oai.py +51 -0
- braintrust/span_cache.py +337 -0
- braintrust/span_identifier_v3.py +21 -0
- braintrust/test_bt_json.py +0 -5
- braintrust/test_framework.py +37 -0
- braintrust/test_http.py +444 -0
- braintrust/test_logger.py +295 -5
- braintrust/test_span_cache.py +344 -0
- braintrust/test_trace.py +267 -0
- braintrust/test_util.py +58 -1
- braintrust/trace.py +385 -0
- braintrust/util.py +20 -0
- braintrust/version.py +2 -2
- braintrust/wrappers/agno/__init__.py +2 -3
- braintrust/wrappers/anthropic.py +64 -0
- braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
- braintrust/wrappers/claude_agent_sdk/test_wrapper.py +115 -0
- braintrust/wrappers/dspy.py +52 -1
- braintrust/wrappers/google_genai/__init__.py +9 -6
- braintrust/wrappers/litellm.py +6 -43
- braintrust/wrappers/pydantic_ai.py +2 -3
- braintrust/wrappers/test_agno.py +9 -0
- braintrust/wrappers/test_anthropic.py +156 -0
- braintrust/wrappers/test_dspy.py +117 -0
- braintrust/wrappers/test_google_genai.py +9 -0
- braintrust/wrappers/test_litellm.py +57 -55
- braintrust/wrappers/test_openai.py +253 -1
- braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
- braintrust/wrappers/test_utils.py +79 -0
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/RECORD +44 -37
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0
braintrust/test_trace.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""Tests for Trace functionality."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from braintrust.trace import CachedSpanFetcher, SpanData
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Helper to create mock spans
|
|
8
|
+
def make_span(span_id: str, span_type: str, **extra) -> SpanData:
|
|
9
|
+
return SpanData(
|
|
10
|
+
span_id=span_id,
|
|
11
|
+
input={"text": f"input-{span_id}"},
|
|
12
|
+
output={"text": f"output-{span_id}"},
|
|
13
|
+
span_attributes={"type": span_type},
|
|
14
|
+
**extra,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestCachedSpanFetcher:
|
|
19
|
+
"""Test CachedSpanFetcher caching behavior."""
|
|
20
|
+
|
|
21
|
+
@pytest.mark.asyncio
|
|
22
|
+
async def test_fetch_all_spans_without_filter(self):
|
|
23
|
+
"""Test fetching all spans when no filter specified."""
|
|
24
|
+
mock_spans = [
|
|
25
|
+
make_span("span-1", "llm"),
|
|
26
|
+
make_span("span-2", "function"),
|
|
27
|
+
make_span("span-3", "llm"),
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
call_count = 0
|
|
31
|
+
|
|
32
|
+
async def fetch_fn(span_type):
|
|
33
|
+
nonlocal call_count
|
|
34
|
+
call_count += 1
|
|
35
|
+
return mock_spans
|
|
36
|
+
|
|
37
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
38
|
+
result = await fetcher.get_spans()
|
|
39
|
+
|
|
40
|
+
assert call_count == 1
|
|
41
|
+
assert len(result) == 3
|
|
42
|
+
assert {s.span_id for s in result} == {"span-1", "span-2", "span-3"}
|
|
43
|
+
|
|
44
|
+
@pytest.mark.asyncio
|
|
45
|
+
async def test_fetch_specific_span_types(self):
|
|
46
|
+
"""Test fetching specific span types when filter specified."""
|
|
47
|
+
llm_spans = [make_span("span-1", "llm"), make_span("span-2", "llm")]
|
|
48
|
+
|
|
49
|
+
call_count = 0
|
|
50
|
+
|
|
51
|
+
async def fetch_fn(span_type):
|
|
52
|
+
nonlocal call_count
|
|
53
|
+
call_count += 1
|
|
54
|
+
assert span_type == ["llm"]
|
|
55
|
+
return llm_spans
|
|
56
|
+
|
|
57
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
58
|
+
result = await fetcher.get_spans(span_type=["llm"])
|
|
59
|
+
|
|
60
|
+
assert call_count == 1
|
|
61
|
+
assert len(result) == 2
|
|
62
|
+
|
|
63
|
+
@pytest.mark.asyncio
|
|
64
|
+
async def test_return_cached_spans_after_fetching_all(self):
|
|
65
|
+
"""Test that cached spans are returned without re-fetching after fetching all."""
|
|
66
|
+
mock_spans = [
|
|
67
|
+
make_span("span-1", "llm"),
|
|
68
|
+
make_span("span-2", "function"),
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
call_count = 0
|
|
72
|
+
|
|
73
|
+
async def fetch_fn(span_type):
|
|
74
|
+
nonlocal call_count
|
|
75
|
+
call_count += 1
|
|
76
|
+
return mock_spans
|
|
77
|
+
|
|
78
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
79
|
+
|
|
80
|
+
# First call - fetches
|
|
81
|
+
await fetcher.get_spans()
|
|
82
|
+
assert call_count == 1
|
|
83
|
+
|
|
84
|
+
# Second call - should use cache
|
|
85
|
+
result = await fetcher.get_spans()
|
|
86
|
+
assert call_count == 1 # Still 1
|
|
87
|
+
assert len(result) == 2
|
|
88
|
+
|
|
89
|
+
@pytest.mark.asyncio
|
|
90
|
+
async def test_return_cached_spans_for_previously_fetched_types(self):
|
|
91
|
+
"""Test that previously fetched types are returned from cache."""
|
|
92
|
+
llm_spans = [make_span("span-1", "llm"), make_span("span-2", "llm")]
|
|
93
|
+
|
|
94
|
+
call_count = 0
|
|
95
|
+
|
|
96
|
+
async def fetch_fn(span_type):
|
|
97
|
+
nonlocal call_count
|
|
98
|
+
call_count += 1
|
|
99
|
+
return llm_spans
|
|
100
|
+
|
|
101
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
102
|
+
|
|
103
|
+
# First call - fetches llm spans
|
|
104
|
+
await fetcher.get_spans(span_type=["llm"])
|
|
105
|
+
assert call_count == 1
|
|
106
|
+
|
|
107
|
+
# Second call for same type - should use cache
|
|
108
|
+
result = await fetcher.get_spans(span_type=["llm"])
|
|
109
|
+
assert call_count == 1 # Still 1
|
|
110
|
+
assert len(result) == 2
|
|
111
|
+
|
|
112
|
+
@pytest.mark.asyncio
|
|
113
|
+
async def test_only_fetch_missing_span_types(self):
|
|
114
|
+
"""Test that only missing span types are fetched."""
|
|
115
|
+
llm_spans = [make_span("span-1", "llm")]
|
|
116
|
+
function_spans = [make_span("span-2", "function")]
|
|
117
|
+
|
|
118
|
+
call_count = 0
|
|
119
|
+
|
|
120
|
+
async def fetch_fn(span_type):
|
|
121
|
+
nonlocal call_count
|
|
122
|
+
call_count += 1
|
|
123
|
+
if span_type == ["llm"]:
|
|
124
|
+
return llm_spans
|
|
125
|
+
elif span_type == ["function"]:
|
|
126
|
+
return function_spans
|
|
127
|
+
return []
|
|
128
|
+
|
|
129
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
130
|
+
|
|
131
|
+
# First call - fetches llm spans
|
|
132
|
+
await fetcher.get_spans(span_type=["llm"])
|
|
133
|
+
assert call_count == 1
|
|
134
|
+
|
|
135
|
+
# Second call for both types - should only fetch function
|
|
136
|
+
result = await fetcher.get_spans(span_type=["llm", "function"])
|
|
137
|
+
assert call_count == 2
|
|
138
|
+
assert len(result) == 2
|
|
139
|
+
|
|
140
|
+
@pytest.mark.asyncio
|
|
141
|
+
async def test_no_refetch_after_fetching_all_spans(self):
|
|
142
|
+
"""Test that no re-fetching occurs after fetching all spans."""
|
|
143
|
+
all_spans = [
|
|
144
|
+
make_span("span-1", "llm"),
|
|
145
|
+
make_span("span-2", "function"),
|
|
146
|
+
make_span("span-3", "tool"),
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
call_count = 0
|
|
150
|
+
|
|
151
|
+
async def fetch_fn(span_type):
|
|
152
|
+
nonlocal call_count
|
|
153
|
+
call_count += 1
|
|
154
|
+
return all_spans
|
|
155
|
+
|
|
156
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
157
|
+
|
|
158
|
+
# Fetch all spans
|
|
159
|
+
await fetcher.get_spans()
|
|
160
|
+
assert call_count == 1
|
|
161
|
+
|
|
162
|
+
# Subsequent filtered calls should use cache
|
|
163
|
+
llm_result = await fetcher.get_spans(span_type=["llm"])
|
|
164
|
+
assert call_count == 1 # Still 1
|
|
165
|
+
assert len(llm_result) == 1
|
|
166
|
+
assert llm_result[0].span_id == "span-1"
|
|
167
|
+
|
|
168
|
+
function_result = await fetcher.get_spans(span_type=["function"])
|
|
169
|
+
assert call_count == 1 # Still 1
|
|
170
|
+
assert len(function_result) == 1
|
|
171
|
+
assert function_result[0].span_id == "span-2"
|
|
172
|
+
|
|
173
|
+
@pytest.mark.asyncio
|
|
174
|
+
async def test_filter_by_multiple_span_types_from_cache(self):
|
|
175
|
+
"""Test filtering by multiple span types from cache."""
|
|
176
|
+
all_spans = [
|
|
177
|
+
make_span("span-1", "llm"),
|
|
178
|
+
make_span("span-2", "function"),
|
|
179
|
+
make_span("span-3", "tool"),
|
|
180
|
+
make_span("span-4", "llm"),
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
async def fetch_fn(span_type):
|
|
184
|
+
return all_spans
|
|
185
|
+
|
|
186
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
187
|
+
|
|
188
|
+
# Fetch all first
|
|
189
|
+
await fetcher.get_spans()
|
|
190
|
+
|
|
191
|
+
# Filter for llm and tool
|
|
192
|
+
result = await fetcher.get_spans(span_type=["llm", "tool"])
|
|
193
|
+
assert len(result) == 3
|
|
194
|
+
assert {s.span_id for s in result} == {"span-1", "span-3", "span-4"}
|
|
195
|
+
|
|
196
|
+
@pytest.mark.asyncio
|
|
197
|
+
async def test_return_empty_for_nonexistent_span_type(self):
|
|
198
|
+
"""Test that empty array is returned for non-existent span type."""
|
|
199
|
+
all_spans = [make_span("span-1", "llm")]
|
|
200
|
+
|
|
201
|
+
async def fetch_fn(span_type):
|
|
202
|
+
return all_spans
|
|
203
|
+
|
|
204
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
205
|
+
|
|
206
|
+
# Fetch all first
|
|
207
|
+
await fetcher.get_spans()
|
|
208
|
+
|
|
209
|
+
# Query for non-existent type
|
|
210
|
+
result = await fetcher.get_spans(span_type=["nonexistent"])
|
|
211
|
+
assert len(result) == 0
|
|
212
|
+
|
|
213
|
+
@pytest.mark.asyncio
|
|
214
|
+
async def test_handle_spans_with_no_type(self):
|
|
215
|
+
"""Test handling spans without type (empty string type)."""
|
|
216
|
+
spans = [
|
|
217
|
+
make_span("span-1", "llm"),
|
|
218
|
+
SpanData(span_id="span-2", input={}, span_attributes={}), # No type
|
|
219
|
+
SpanData(span_id="span-3", input={}), # No span_attributes
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
async def fetch_fn(span_type):
|
|
223
|
+
return spans
|
|
224
|
+
|
|
225
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
226
|
+
|
|
227
|
+
# Fetch all
|
|
228
|
+
result = await fetcher.get_spans()
|
|
229
|
+
assert len(result) == 3
|
|
230
|
+
|
|
231
|
+
# Spans without type go into "" bucket
|
|
232
|
+
no_type_result = await fetcher.get_spans(span_type=[""])
|
|
233
|
+
assert len(no_type_result) == 2
|
|
234
|
+
|
|
235
|
+
@pytest.mark.asyncio
|
|
236
|
+
async def test_handle_empty_results(self):
|
|
237
|
+
"""Test handling empty results."""
|
|
238
|
+
|
|
239
|
+
async def fetch_fn(span_type):
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
243
|
+
|
|
244
|
+
result = await fetcher.get_spans()
|
|
245
|
+
assert len(result) == 0
|
|
246
|
+
|
|
247
|
+
# Should still mark as fetched
|
|
248
|
+
await fetcher.get_spans(span_type=["llm"])
|
|
249
|
+
# No additional assertions, just making sure it doesn't crash
|
|
250
|
+
|
|
251
|
+
@pytest.mark.asyncio
|
|
252
|
+
async def test_handle_empty_span_type_array(self):
|
|
253
|
+
"""Test that empty spanType array is handled same as undefined."""
|
|
254
|
+
mock_spans = [make_span("span-1", "llm")]
|
|
255
|
+
|
|
256
|
+
call_args = []
|
|
257
|
+
|
|
258
|
+
async def fetch_fn(span_type):
|
|
259
|
+
call_args.append(span_type)
|
|
260
|
+
return mock_spans
|
|
261
|
+
|
|
262
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
263
|
+
|
|
264
|
+
result = await fetcher.get_spans(span_type=[])
|
|
265
|
+
|
|
266
|
+
assert call_args[0] is None or call_args[0] == []
|
|
267
|
+
assert len(result) == 1
|
braintrust/test_util.py
CHANGED
|
@@ -1,9 +1,66 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import unittest
|
|
2
3
|
from typing import List
|
|
3
4
|
|
|
4
5
|
import pytest
|
|
5
6
|
|
|
6
|
-
from .util import LazyValue, mask_api_key, merge_dicts_with_paths
|
|
7
|
+
from .util import LazyValue, mask_api_key, merge_dicts_with_paths, parse_env_var_float
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestParseEnvVarFloat:
|
|
11
|
+
"""Tests for parse_env_var_float helper."""
|
|
12
|
+
|
|
13
|
+
def test_returns_default_when_env_not_set(self):
|
|
14
|
+
assert parse_env_var_float("NONEXISTENT_VAR_12345", 42.0) == 42.0
|
|
15
|
+
|
|
16
|
+
def test_parses_valid_float(self):
|
|
17
|
+
os.environ["TEST_FLOAT"] = "123.45"
|
|
18
|
+
try:
|
|
19
|
+
assert parse_env_var_float("TEST_FLOAT", 0.0) == 123.45
|
|
20
|
+
finally:
|
|
21
|
+
del os.environ["TEST_FLOAT"]
|
|
22
|
+
|
|
23
|
+
def test_returns_default_for_nan(self):
|
|
24
|
+
os.environ["TEST_FLOAT"] = "nan"
|
|
25
|
+
try:
|
|
26
|
+
assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
|
|
27
|
+
finally:
|
|
28
|
+
del os.environ["TEST_FLOAT"]
|
|
29
|
+
|
|
30
|
+
def test_returns_default_for_inf(self):
|
|
31
|
+
os.environ["TEST_FLOAT"] = "inf"
|
|
32
|
+
try:
|
|
33
|
+
assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
|
|
34
|
+
finally:
|
|
35
|
+
del os.environ["TEST_FLOAT"]
|
|
36
|
+
|
|
37
|
+
def test_returns_default_for_negative_inf(self):
|
|
38
|
+
os.environ["TEST_FLOAT"] = "-inf"
|
|
39
|
+
try:
|
|
40
|
+
assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
|
|
41
|
+
finally:
|
|
42
|
+
del os.environ["TEST_FLOAT"]
|
|
43
|
+
|
|
44
|
+
def test_returns_default_for_empty_string(self):
|
|
45
|
+
os.environ["TEST_FLOAT"] = ""
|
|
46
|
+
try:
|
|
47
|
+
assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
|
|
48
|
+
finally:
|
|
49
|
+
del os.environ["TEST_FLOAT"]
|
|
50
|
+
|
|
51
|
+
def test_returns_default_for_invalid_string(self):
|
|
52
|
+
os.environ["TEST_FLOAT"] = "not_a_number"
|
|
53
|
+
try:
|
|
54
|
+
assert parse_env_var_float("TEST_FLOAT", 99.0) == 99.0
|
|
55
|
+
finally:
|
|
56
|
+
del os.environ["TEST_FLOAT"]
|
|
57
|
+
|
|
58
|
+
def test_allows_negative_values(self):
|
|
59
|
+
os.environ["TEST_FLOAT"] = "-5.5"
|
|
60
|
+
try:
|
|
61
|
+
assert parse_env_var_float("TEST_FLOAT", 0.0) == -5.5
|
|
62
|
+
finally:
|
|
63
|
+
del os.environ["TEST_FLOAT"]
|
|
7
64
|
|
|
8
65
|
|
|
9
66
|
class TestLazyValue(unittest.TestCase):
|