braintrust 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/_generated_types.py +106 -6
- braintrust/framework.py +98 -1
- braintrust/functions/invoke.py +3 -1
- braintrust/functions/test_invoke.py +61 -0
- braintrust/generated_types.py +7 -1
- braintrust/logger.py +78 -32
- braintrust/span_cache.py +337 -0
- braintrust/span_identifier_v3.py +21 -0
- braintrust/test_logger.py +116 -0
- braintrust/test_span_cache.py +344 -0
- braintrust/test_trace.py +267 -0
- braintrust/trace.py +385 -0
- braintrust/version.py +2 -2
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
- braintrust/wrappers/claude_agent_sdk/test_wrapper.py +106 -0
- {braintrust-0.4.3.dist-info → braintrust-0.5.0.dist-info}/METADATA +1 -1
- {braintrust-0.4.3.dist-info → braintrust-0.5.0.dist-info}/RECORD +20 -15
- {braintrust-0.4.3.dist-info → braintrust-0.5.0.dist-info}/WHEEL +1 -1
- {braintrust-0.4.3.dist-info → braintrust-0.5.0.dist-info}/entry_points.txt +0 -0
- {braintrust-0.4.3.dist-info → braintrust-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""Tests for SpanCache (disk-based cache)."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from braintrust.span_cache import CachedSpan, SpanCache
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_span_cache_write_and_read():
|
|
8
|
+
"""Test storing and retrieving spans by rootSpanId."""
|
|
9
|
+
cache = SpanCache()
|
|
10
|
+
cache.start() # Start for testing (cache is disabled by default)
|
|
11
|
+
|
|
12
|
+
root_span_id = "root-123"
|
|
13
|
+
span1 = CachedSpan(
|
|
14
|
+
span_id="span-1",
|
|
15
|
+
input={"text": "hello"},
|
|
16
|
+
output={"response": "world"},
|
|
17
|
+
)
|
|
18
|
+
span2 = CachedSpan(
|
|
19
|
+
span_id="span-2",
|
|
20
|
+
input={"text": "foo"},
|
|
21
|
+
output={"response": "bar"},
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
cache.queue_write(root_span_id, span1.span_id, span1)
|
|
25
|
+
cache.queue_write(root_span_id, span2.span_id, span2)
|
|
26
|
+
|
|
27
|
+
spans = cache.get_by_root_span_id(root_span_id)
|
|
28
|
+
assert spans is not None
|
|
29
|
+
assert len(spans) == 2
|
|
30
|
+
|
|
31
|
+
span_ids = {s.span_id for s in spans}
|
|
32
|
+
assert "span-1" in span_ids
|
|
33
|
+
assert "span-2" in span_ids
|
|
34
|
+
|
|
35
|
+
cache.stop()
|
|
36
|
+
cache.dispose()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_span_cache_return_none_for_unknown():
|
|
40
|
+
"""Test that unknown rootSpanId returns None."""
|
|
41
|
+
cache = SpanCache()
|
|
42
|
+
cache.start()
|
|
43
|
+
|
|
44
|
+
spans = cache.get_by_root_span_id("nonexistent")
|
|
45
|
+
assert spans is None
|
|
46
|
+
|
|
47
|
+
cache.stop()
|
|
48
|
+
cache.dispose()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_span_cache_merge_on_duplicate_writes():
|
|
52
|
+
"""Test that subsequent writes to same spanId merge data."""
|
|
53
|
+
cache = SpanCache()
|
|
54
|
+
cache.start()
|
|
55
|
+
|
|
56
|
+
root_span_id = "root-123"
|
|
57
|
+
span_id = "span-1"
|
|
58
|
+
|
|
59
|
+
cache.queue_write(
|
|
60
|
+
root_span_id,
|
|
61
|
+
span_id,
|
|
62
|
+
CachedSpan(span_id=span_id, input={"text": "hello"}),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
cache.queue_write(
|
|
66
|
+
root_span_id,
|
|
67
|
+
span_id,
|
|
68
|
+
CachedSpan(span_id=span_id, output={"response": "world"}),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
spans = cache.get_by_root_span_id(root_span_id)
|
|
72
|
+
assert spans is not None
|
|
73
|
+
assert len(spans) == 1
|
|
74
|
+
assert spans[0].span_id == span_id
|
|
75
|
+
assert spans[0].input == {"text": "hello"}
|
|
76
|
+
assert spans[0].output == {"response": "world"}
|
|
77
|
+
|
|
78
|
+
cache.stop()
|
|
79
|
+
cache.dispose()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_span_cache_merge_metadata():
|
|
83
|
+
"""Test that metadata objects are merged."""
|
|
84
|
+
cache = SpanCache()
|
|
85
|
+
cache.start()
|
|
86
|
+
|
|
87
|
+
root_span_id = "root-123"
|
|
88
|
+
span_id = "span-1"
|
|
89
|
+
|
|
90
|
+
cache.queue_write(
|
|
91
|
+
root_span_id,
|
|
92
|
+
span_id,
|
|
93
|
+
CachedSpan(span_id=span_id, metadata={"key1": "value1"}),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
cache.queue_write(
|
|
97
|
+
root_span_id,
|
|
98
|
+
span_id,
|
|
99
|
+
CachedSpan(span_id=span_id, metadata={"key2": "value2"}),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
spans = cache.get_by_root_span_id(root_span_id)
|
|
103
|
+
assert spans is not None
|
|
104
|
+
assert spans[0].metadata == {"key1": "value1", "key2": "value2"}
|
|
105
|
+
|
|
106
|
+
cache.stop()
|
|
107
|
+
cache.dispose()
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_span_cache_has():
|
|
111
|
+
"""Test the has() method."""
|
|
112
|
+
cache = SpanCache()
|
|
113
|
+
cache.start()
|
|
114
|
+
|
|
115
|
+
cache.queue_write("root-123", "span-1", CachedSpan(span_id="span-1"))
|
|
116
|
+
assert cache.has("root-123") is True
|
|
117
|
+
assert cache.has("nonexistent") is False
|
|
118
|
+
|
|
119
|
+
cache.stop()
|
|
120
|
+
cache.dispose()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def test_span_cache_clear():
|
|
124
|
+
"""Test clearing spans for a specific rootSpanId."""
|
|
125
|
+
cache = SpanCache()
|
|
126
|
+
cache.start()
|
|
127
|
+
|
|
128
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
129
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
130
|
+
|
|
131
|
+
cache.clear("root-1")
|
|
132
|
+
|
|
133
|
+
assert cache.has("root-1") is False
|
|
134
|
+
assert cache.has("root-2") is True
|
|
135
|
+
|
|
136
|
+
cache.stop()
|
|
137
|
+
cache.dispose()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_span_cache_clear_all():
|
|
141
|
+
"""Test clearing all cached spans."""
|
|
142
|
+
cache = SpanCache()
|
|
143
|
+
cache.start()
|
|
144
|
+
|
|
145
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
146
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
147
|
+
|
|
148
|
+
cache.clear_all()
|
|
149
|
+
|
|
150
|
+
assert cache.size == 0
|
|
151
|
+
|
|
152
|
+
cache.stop()
|
|
153
|
+
cache.dispose()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def test_span_cache_size():
|
|
157
|
+
"""Test the size property."""
|
|
158
|
+
cache = SpanCache()
|
|
159
|
+
cache.start()
|
|
160
|
+
|
|
161
|
+
assert cache.size == 0
|
|
162
|
+
|
|
163
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
164
|
+
assert cache.size == 1
|
|
165
|
+
|
|
166
|
+
cache.queue_write("root-1", "span-2", CachedSpan(span_id="span-2")) # Same root
|
|
167
|
+
assert cache.size == 1
|
|
168
|
+
|
|
169
|
+
cache.queue_write("root-2", "span-3", CachedSpan(span_id="span-3")) # Different root
|
|
170
|
+
assert cache.size == 2
|
|
171
|
+
|
|
172
|
+
cache.stop()
|
|
173
|
+
cache.dispose()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def test_span_cache_dispose():
|
|
177
|
+
"""Test that dispose cleans up and allows reuse."""
|
|
178
|
+
cache = SpanCache()
|
|
179
|
+
cache.start()
|
|
180
|
+
|
|
181
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
182
|
+
assert cache.size == 1
|
|
183
|
+
|
|
184
|
+
# Stop first to decrement refcount, then dispose
|
|
185
|
+
cache.stop()
|
|
186
|
+
cache.dispose()
|
|
187
|
+
|
|
188
|
+
assert cache.size == 0
|
|
189
|
+
assert cache.has("root-1") is False
|
|
190
|
+
|
|
191
|
+
# Should be able to write again after dispose (if we start again)
|
|
192
|
+
cache.start()
|
|
193
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
194
|
+
assert cache.size == 1
|
|
195
|
+
|
|
196
|
+
cache.stop()
|
|
197
|
+
cache.dispose()
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def test_span_cache_disable():
|
|
201
|
+
"""Test that disable() prevents writes."""
|
|
202
|
+
cache = SpanCache()
|
|
203
|
+
cache.start()
|
|
204
|
+
|
|
205
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
206
|
+
assert cache.size == 1
|
|
207
|
+
|
|
208
|
+
cache.disable()
|
|
209
|
+
|
|
210
|
+
# Writes after disable should be no-ops
|
|
211
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
212
|
+
assert cache.size == 1 # Still 1, not 2
|
|
213
|
+
|
|
214
|
+
cache.stop()
|
|
215
|
+
cache.dispose()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def test_span_cache_disabled_getter():
|
|
219
|
+
"""Test the disabled property."""
|
|
220
|
+
# Cache is disabled by default until start() is called
|
|
221
|
+
cache = SpanCache()
|
|
222
|
+
assert cache.disabled is True
|
|
223
|
+
|
|
224
|
+
cache.start()
|
|
225
|
+
assert cache.disabled is False
|
|
226
|
+
|
|
227
|
+
cache.disable()
|
|
228
|
+
assert cache.disabled is True
|
|
229
|
+
|
|
230
|
+
cache.dispose()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def test_span_cache_disabled_from_constructor():
|
|
234
|
+
"""Test that cache can be disabled via constructor."""
|
|
235
|
+
cache = SpanCache(disabled=True)
|
|
236
|
+
assert cache.disabled is True
|
|
237
|
+
|
|
238
|
+
# Writes should be no-ops
|
|
239
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
240
|
+
assert cache.size == 0
|
|
241
|
+
assert cache.get_by_root_span_id("root-1") is None
|
|
242
|
+
|
|
243
|
+
cache.dispose()
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def test_span_cache_start_stop_lifecycle():
|
|
247
|
+
"""Test that stop() allows start() to work again."""
|
|
248
|
+
cache = SpanCache()
|
|
249
|
+
|
|
250
|
+
# Initially disabled by default
|
|
251
|
+
assert cache.disabled is True
|
|
252
|
+
|
|
253
|
+
# Start for first "eval"
|
|
254
|
+
cache.start()
|
|
255
|
+
assert cache.disabled is False
|
|
256
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
257
|
+
assert cache.size == 1
|
|
258
|
+
|
|
259
|
+
# Stop after first "eval"
|
|
260
|
+
cache.stop()
|
|
261
|
+
cache.dispose()
|
|
262
|
+
assert cache.disabled is True
|
|
263
|
+
|
|
264
|
+
# Start for second "eval" - should work!
|
|
265
|
+
cache.start()
|
|
266
|
+
assert cache.disabled is False
|
|
267
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
268
|
+
assert cache.size == 1
|
|
269
|
+
|
|
270
|
+
cache.stop()
|
|
271
|
+
cache.dispose()
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def test_span_cache_disable_prevents_start():
|
|
275
|
+
"""Test that disable() prevents start() from working."""
|
|
276
|
+
cache = SpanCache()
|
|
277
|
+
|
|
278
|
+
# Simulate disable being called
|
|
279
|
+
cache.disable()
|
|
280
|
+
assert cache.disabled is True
|
|
281
|
+
|
|
282
|
+
# start() should be a no-op after disable()
|
|
283
|
+
cache.start()
|
|
284
|
+
assert cache.disabled is True
|
|
285
|
+
|
|
286
|
+
# Writes should still be no-ops
|
|
287
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
288
|
+
assert cache.size == 0
|
|
289
|
+
|
|
290
|
+
cache.dispose()
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def test_span_cache_parallel_eval_refcount():
|
|
294
|
+
"""Test reference counting for parallel evals."""
|
|
295
|
+
cache = SpanCache()
|
|
296
|
+
|
|
297
|
+
# Simulate two evals starting
|
|
298
|
+
cache.start() # Eval 1
|
|
299
|
+
assert cache.disabled is False
|
|
300
|
+
|
|
301
|
+
cache.start() # Eval 2
|
|
302
|
+
assert cache.disabled is False
|
|
303
|
+
|
|
304
|
+
# Write data from both evals
|
|
305
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
306
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
307
|
+
assert cache.size == 2
|
|
308
|
+
|
|
309
|
+
# Eval 1 finishes first
|
|
310
|
+
cache.dispose() # Should NOT dispose (refcount = 2)
|
|
311
|
+
cache.stop() # Decrements to 1
|
|
312
|
+
|
|
313
|
+
# Cache should still be enabled and data intact
|
|
314
|
+
assert cache.disabled is False
|
|
315
|
+
assert cache.size == 2
|
|
316
|
+
assert cache.get_by_root_span_id("root-1") is not None
|
|
317
|
+
assert cache.get_by_root_span_id("root-2") is not None
|
|
318
|
+
|
|
319
|
+
# Eval 2 finishes
|
|
320
|
+
cache.dispose() # Should NOT dispose yet (refcount = 1)
|
|
321
|
+
cache.stop() # Decrements to 0, disables cache
|
|
322
|
+
|
|
323
|
+
# Now cache should be disabled
|
|
324
|
+
assert cache.disabled is True
|
|
325
|
+
|
|
326
|
+
# Final dispose should now work
|
|
327
|
+
cache.dispose() # NOW it disposes (refcount = 0)
|
|
328
|
+
assert cache.size == 0
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def test_span_cache_refcount_underflow():
|
|
332
|
+
"""Test that refcount handles underflow gracefully."""
|
|
333
|
+
cache = SpanCache()
|
|
334
|
+
|
|
335
|
+
# Call stop without start
|
|
336
|
+
cache.stop()
|
|
337
|
+
|
|
338
|
+
# Should work normally after
|
|
339
|
+
cache.start()
|
|
340
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
341
|
+
assert cache.size == 1
|
|
342
|
+
|
|
343
|
+
cache.stop()
|
|
344
|
+
cache.dispose()
|
braintrust/test_trace.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""Tests for Trace functionality."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from braintrust.trace import CachedSpanFetcher, SpanData
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Helper to create mock spans
|
|
8
|
+
def make_span(span_id: str, span_type: str, **extra) -> SpanData:
|
|
9
|
+
return SpanData(
|
|
10
|
+
span_id=span_id,
|
|
11
|
+
input={"text": f"input-{span_id}"},
|
|
12
|
+
output={"text": f"output-{span_id}"},
|
|
13
|
+
span_attributes={"type": span_type},
|
|
14
|
+
**extra,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestCachedSpanFetcher:
|
|
19
|
+
"""Test CachedSpanFetcher caching behavior."""
|
|
20
|
+
|
|
21
|
+
@pytest.mark.asyncio
|
|
22
|
+
async def test_fetch_all_spans_without_filter(self):
|
|
23
|
+
"""Test fetching all spans when no filter specified."""
|
|
24
|
+
mock_spans = [
|
|
25
|
+
make_span("span-1", "llm"),
|
|
26
|
+
make_span("span-2", "function"),
|
|
27
|
+
make_span("span-3", "llm"),
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
call_count = 0
|
|
31
|
+
|
|
32
|
+
async def fetch_fn(span_type):
|
|
33
|
+
nonlocal call_count
|
|
34
|
+
call_count += 1
|
|
35
|
+
return mock_spans
|
|
36
|
+
|
|
37
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
38
|
+
result = await fetcher.get_spans()
|
|
39
|
+
|
|
40
|
+
assert call_count == 1
|
|
41
|
+
assert len(result) == 3
|
|
42
|
+
assert {s.span_id for s in result} == {"span-1", "span-2", "span-3"}
|
|
43
|
+
|
|
44
|
+
@pytest.mark.asyncio
|
|
45
|
+
async def test_fetch_specific_span_types(self):
|
|
46
|
+
"""Test fetching specific span types when filter specified."""
|
|
47
|
+
llm_spans = [make_span("span-1", "llm"), make_span("span-2", "llm")]
|
|
48
|
+
|
|
49
|
+
call_count = 0
|
|
50
|
+
|
|
51
|
+
async def fetch_fn(span_type):
|
|
52
|
+
nonlocal call_count
|
|
53
|
+
call_count += 1
|
|
54
|
+
assert span_type == ["llm"]
|
|
55
|
+
return llm_spans
|
|
56
|
+
|
|
57
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
58
|
+
result = await fetcher.get_spans(span_type=["llm"])
|
|
59
|
+
|
|
60
|
+
assert call_count == 1
|
|
61
|
+
assert len(result) == 2
|
|
62
|
+
|
|
63
|
+
@pytest.mark.asyncio
|
|
64
|
+
async def test_return_cached_spans_after_fetching_all(self):
|
|
65
|
+
"""Test that cached spans are returned without re-fetching after fetching all."""
|
|
66
|
+
mock_spans = [
|
|
67
|
+
make_span("span-1", "llm"),
|
|
68
|
+
make_span("span-2", "function"),
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
call_count = 0
|
|
72
|
+
|
|
73
|
+
async def fetch_fn(span_type):
|
|
74
|
+
nonlocal call_count
|
|
75
|
+
call_count += 1
|
|
76
|
+
return mock_spans
|
|
77
|
+
|
|
78
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
79
|
+
|
|
80
|
+
# First call - fetches
|
|
81
|
+
await fetcher.get_spans()
|
|
82
|
+
assert call_count == 1
|
|
83
|
+
|
|
84
|
+
# Second call - should use cache
|
|
85
|
+
result = await fetcher.get_spans()
|
|
86
|
+
assert call_count == 1 # Still 1
|
|
87
|
+
assert len(result) == 2
|
|
88
|
+
|
|
89
|
+
@pytest.mark.asyncio
|
|
90
|
+
async def test_return_cached_spans_for_previously_fetched_types(self):
|
|
91
|
+
"""Test that previously fetched types are returned from cache."""
|
|
92
|
+
llm_spans = [make_span("span-1", "llm"), make_span("span-2", "llm")]
|
|
93
|
+
|
|
94
|
+
call_count = 0
|
|
95
|
+
|
|
96
|
+
async def fetch_fn(span_type):
|
|
97
|
+
nonlocal call_count
|
|
98
|
+
call_count += 1
|
|
99
|
+
return llm_spans
|
|
100
|
+
|
|
101
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
102
|
+
|
|
103
|
+
# First call - fetches llm spans
|
|
104
|
+
await fetcher.get_spans(span_type=["llm"])
|
|
105
|
+
assert call_count == 1
|
|
106
|
+
|
|
107
|
+
# Second call for same type - should use cache
|
|
108
|
+
result = await fetcher.get_spans(span_type=["llm"])
|
|
109
|
+
assert call_count == 1 # Still 1
|
|
110
|
+
assert len(result) == 2
|
|
111
|
+
|
|
112
|
+
@pytest.mark.asyncio
|
|
113
|
+
async def test_only_fetch_missing_span_types(self):
|
|
114
|
+
"""Test that only missing span types are fetched."""
|
|
115
|
+
llm_spans = [make_span("span-1", "llm")]
|
|
116
|
+
function_spans = [make_span("span-2", "function")]
|
|
117
|
+
|
|
118
|
+
call_count = 0
|
|
119
|
+
|
|
120
|
+
async def fetch_fn(span_type):
|
|
121
|
+
nonlocal call_count
|
|
122
|
+
call_count += 1
|
|
123
|
+
if span_type == ["llm"]:
|
|
124
|
+
return llm_spans
|
|
125
|
+
elif span_type == ["function"]:
|
|
126
|
+
return function_spans
|
|
127
|
+
return []
|
|
128
|
+
|
|
129
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
130
|
+
|
|
131
|
+
# First call - fetches llm spans
|
|
132
|
+
await fetcher.get_spans(span_type=["llm"])
|
|
133
|
+
assert call_count == 1
|
|
134
|
+
|
|
135
|
+
# Second call for both types - should only fetch function
|
|
136
|
+
result = await fetcher.get_spans(span_type=["llm", "function"])
|
|
137
|
+
assert call_count == 2
|
|
138
|
+
assert len(result) == 2
|
|
139
|
+
|
|
140
|
+
@pytest.mark.asyncio
|
|
141
|
+
async def test_no_refetch_after_fetching_all_spans(self):
|
|
142
|
+
"""Test that no re-fetching occurs after fetching all spans."""
|
|
143
|
+
all_spans = [
|
|
144
|
+
make_span("span-1", "llm"),
|
|
145
|
+
make_span("span-2", "function"),
|
|
146
|
+
make_span("span-3", "tool"),
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
call_count = 0
|
|
150
|
+
|
|
151
|
+
async def fetch_fn(span_type):
|
|
152
|
+
nonlocal call_count
|
|
153
|
+
call_count += 1
|
|
154
|
+
return all_spans
|
|
155
|
+
|
|
156
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
157
|
+
|
|
158
|
+
# Fetch all spans
|
|
159
|
+
await fetcher.get_spans()
|
|
160
|
+
assert call_count == 1
|
|
161
|
+
|
|
162
|
+
# Subsequent filtered calls should use cache
|
|
163
|
+
llm_result = await fetcher.get_spans(span_type=["llm"])
|
|
164
|
+
assert call_count == 1 # Still 1
|
|
165
|
+
assert len(llm_result) == 1
|
|
166
|
+
assert llm_result[0].span_id == "span-1"
|
|
167
|
+
|
|
168
|
+
function_result = await fetcher.get_spans(span_type=["function"])
|
|
169
|
+
assert call_count == 1 # Still 1
|
|
170
|
+
assert len(function_result) == 1
|
|
171
|
+
assert function_result[0].span_id == "span-2"
|
|
172
|
+
|
|
173
|
+
@pytest.mark.asyncio
|
|
174
|
+
async def test_filter_by_multiple_span_types_from_cache(self):
|
|
175
|
+
"""Test filtering by multiple span types from cache."""
|
|
176
|
+
all_spans = [
|
|
177
|
+
make_span("span-1", "llm"),
|
|
178
|
+
make_span("span-2", "function"),
|
|
179
|
+
make_span("span-3", "tool"),
|
|
180
|
+
make_span("span-4", "llm"),
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
async def fetch_fn(span_type):
|
|
184
|
+
return all_spans
|
|
185
|
+
|
|
186
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
187
|
+
|
|
188
|
+
# Fetch all first
|
|
189
|
+
await fetcher.get_spans()
|
|
190
|
+
|
|
191
|
+
# Filter for llm and tool
|
|
192
|
+
result = await fetcher.get_spans(span_type=["llm", "tool"])
|
|
193
|
+
assert len(result) == 3
|
|
194
|
+
assert {s.span_id for s in result} == {"span-1", "span-3", "span-4"}
|
|
195
|
+
|
|
196
|
+
@pytest.mark.asyncio
|
|
197
|
+
async def test_return_empty_for_nonexistent_span_type(self):
|
|
198
|
+
"""Test that empty array is returned for non-existent span type."""
|
|
199
|
+
all_spans = [make_span("span-1", "llm")]
|
|
200
|
+
|
|
201
|
+
async def fetch_fn(span_type):
|
|
202
|
+
return all_spans
|
|
203
|
+
|
|
204
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
205
|
+
|
|
206
|
+
# Fetch all first
|
|
207
|
+
await fetcher.get_spans()
|
|
208
|
+
|
|
209
|
+
# Query for non-existent type
|
|
210
|
+
result = await fetcher.get_spans(span_type=["nonexistent"])
|
|
211
|
+
assert len(result) == 0
|
|
212
|
+
|
|
213
|
+
@pytest.mark.asyncio
|
|
214
|
+
async def test_handle_spans_with_no_type(self):
|
|
215
|
+
"""Test handling spans without type (empty string type)."""
|
|
216
|
+
spans = [
|
|
217
|
+
make_span("span-1", "llm"),
|
|
218
|
+
SpanData(span_id="span-2", input={}, span_attributes={}), # No type
|
|
219
|
+
SpanData(span_id="span-3", input={}), # No span_attributes
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
async def fetch_fn(span_type):
|
|
223
|
+
return spans
|
|
224
|
+
|
|
225
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
226
|
+
|
|
227
|
+
# Fetch all
|
|
228
|
+
result = await fetcher.get_spans()
|
|
229
|
+
assert len(result) == 3
|
|
230
|
+
|
|
231
|
+
# Spans without type go into "" bucket
|
|
232
|
+
no_type_result = await fetcher.get_spans(span_type=[""])
|
|
233
|
+
assert len(no_type_result) == 2
|
|
234
|
+
|
|
235
|
+
@pytest.mark.asyncio
|
|
236
|
+
async def test_handle_empty_results(self):
|
|
237
|
+
"""Test handling empty results."""
|
|
238
|
+
|
|
239
|
+
async def fetch_fn(span_type):
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
243
|
+
|
|
244
|
+
result = await fetcher.get_spans()
|
|
245
|
+
assert len(result) == 0
|
|
246
|
+
|
|
247
|
+
# Should still mark as fetched
|
|
248
|
+
await fetcher.get_spans(span_type=["llm"])
|
|
249
|
+
# No additional assertions, just making sure it doesn't crash
|
|
250
|
+
|
|
251
|
+
@pytest.mark.asyncio
|
|
252
|
+
async def test_handle_empty_span_type_array(self):
|
|
253
|
+
"""Test that empty spanType array is handled same as undefined."""
|
|
254
|
+
mock_spans = [make_span("span-1", "llm")]
|
|
255
|
+
|
|
256
|
+
call_args = []
|
|
257
|
+
|
|
258
|
+
async def fetch_fn(span_type):
|
|
259
|
+
call_args.append(span_type)
|
|
260
|
+
return mock_spans
|
|
261
|
+
|
|
262
|
+
fetcher = CachedSpanFetcher(fetch_fn=fetch_fn)
|
|
263
|
+
|
|
264
|
+
result = await fetcher.get_spans(span_type=[])
|
|
265
|
+
|
|
266
|
+
assert call_args[0] is None or call_args[0] == []
|
|
267
|
+
assert len(result) == 1
|