braintrust 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. braintrust/__init__.py +3 -0
  2. braintrust/_generated_types.py +106 -6
  3. braintrust/auto.py +179 -0
  4. braintrust/conftest.py +23 -4
  5. braintrust/framework.py +113 -3
  6. braintrust/functions/invoke.py +3 -1
  7. braintrust/functions/test_invoke.py +61 -0
  8. braintrust/generated_types.py +7 -1
  9. braintrust/logger.py +127 -45
  10. braintrust/oai.py +51 -0
  11. braintrust/span_cache.py +337 -0
  12. braintrust/span_identifier_v3.py +21 -0
  13. braintrust/test_bt_json.py +0 -5
  14. braintrust/test_framework.py +37 -0
  15. braintrust/test_http.py +444 -0
  16. braintrust/test_logger.py +295 -5
  17. braintrust/test_span_cache.py +344 -0
  18. braintrust/test_trace.py +267 -0
  19. braintrust/test_util.py +58 -1
  20. braintrust/trace.py +385 -0
  21. braintrust/util.py +20 -0
  22. braintrust/version.py +2 -2
  23. braintrust/wrappers/agno/__init__.py +2 -3
  24. braintrust/wrappers/anthropic.py +64 -0
  25. braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
  26. braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
  27. braintrust/wrappers/claude_agent_sdk/test_wrapper.py +115 -0
  28. braintrust/wrappers/dspy.py +52 -1
  29. braintrust/wrappers/google_genai/__init__.py +9 -6
  30. braintrust/wrappers/litellm.py +6 -43
  31. braintrust/wrappers/pydantic_ai.py +2 -3
  32. braintrust/wrappers/test_agno.py +9 -0
  33. braintrust/wrappers/test_anthropic.py +156 -0
  34. braintrust/wrappers/test_dspy.py +117 -0
  35. braintrust/wrappers/test_google_genai.py +9 -0
  36. braintrust/wrappers/test_litellm.py +57 -55
  37. braintrust/wrappers/test_openai.py +253 -1
  38. braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
  39. braintrust/wrappers/test_utils.py +79 -0
  40. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
  41. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/RECORD +44 -37
  42. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
  43. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
  44. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0
braintrust/test_logger.py CHANGED
@@ -59,6 +59,33 @@ class TestInit(TestCase):
59
59
 
60
60
  assert str(cm.exception) == "Cannot open an experiment without specifying its name"
61
61
 
62
+ def test_init_with_dataset_id_only(self):
63
+ """Test that init accepts dataset={'id': '...'} parameter"""
64
+ # Test the logic that extracts dataset_id from the dict
65
+ from braintrust.logger import Dataset
66
+
67
+ # Test 1: dict with only id
68
+ dataset_dict = {"id": "dataset-id-123"}
69
+ assert isinstance(dataset_dict, dict)
70
+ assert not isinstance(dataset_dict, Dataset)
71
+ assert dataset_dict["id"] == "dataset-id-123"
72
+
73
+ # Test 2: full Dataset object has different behavior
74
+ # (We can't easily instantiate a Dataset here, but we can verify
75
+ # that the isinstance check distinguishes them)
76
+
77
+ def test_init_with_dataset_id_and_version(self):
78
+ """Test that init accepts dataset={'id': '...', 'version': '...'} parameter"""
79
+ # Test the logic that extracts both dataset_id and dataset_version from the dict
80
+ from braintrust.logger import Dataset
81
+
82
+ # Test: dict with id and version
83
+ dataset_dict = {"id": "dataset-id-123", "version": "v2"}
84
+ assert isinstance(dataset_dict, dict)
85
+ assert not isinstance(dataset_dict, Dataset)
86
+ assert dataset_dict["id"] == "dataset-id-123"
87
+ assert dataset_dict["version"] == "v2"
88
+
62
89
 
63
90
  class TestLogger(TestCase):
64
91
  def test_extract_attachments_no_op(self):
@@ -806,6 +833,16 @@ def test_span_project_id_logged_in(with_memory_logger, with_simulate_login):
806
833
  )
807
834
 
808
835
 
836
+ def test_span_export_disables_cache(with_memory_logger):
837
+ """Test that span.export() disables the span cache."""
838
+ logger = init_test_logger(__name__)
839
+
840
+ with logger.start_span(name="test_span") as span:
841
+ # Exporting should disable the span cache
842
+ span.export()
843
+ assert logger.state.span_cache.disabled
844
+
845
+
809
846
  def test_span_project_name_logged_in(with_simulate_login, with_memory_logger):
810
847
  init_logger(project="test-project")
811
848
  span = logger.start_span(name="test-span")
@@ -902,11 +939,7 @@ def test_permalink_with_valid_span_logged_in(with_simulate_login, with_memory_lo
902
939
 
903
940
  @pytest.mark.asyncio
904
941
  async def test_span_link_in_async_context(with_simulate_login, with_memory_logger):
905
- """Test that span.link() works correctly when called from within an async function.
906
-
907
- This tests the bug where current_logger was a plain attribute instead of a ContextVar,
908
- causing span.link() to return a noop link in async contexts even though the span was valid.
909
- """
942
+ """Test that span.link() works correctly when called from within an async function."""
910
943
  import asyncio
911
944
 
912
945
  logger = init_logger(
@@ -939,6 +972,174 @@ async def test_span_link_in_async_context(with_simulate_login, with_memory_logge
939
972
  assert "test-project-id" in link
940
973
 
941
974
 
975
+ @pytest.mark.asyncio
976
+ async def test_current_logger_after_multiple_awaits(with_simulate_login, with_memory_logger):
977
+ """Test that current_logger() works after multiple await points."""
978
+ import asyncio
979
+
980
+ logger = init_logger(project="test-project", project_id="test-project-id")
981
+
982
+ async def check_logger_after_awaits():
983
+ assert braintrust.current_logger() is logger
984
+ await asyncio.sleep(0.01)
985
+ assert braintrust.current_logger() is logger
986
+ await asyncio.sleep(0.01)
987
+ assert braintrust.current_logger() is logger
988
+ return braintrust.current_logger()
989
+
990
+ result = await check_logger_after_awaits()
991
+ assert result is logger
992
+
993
+
994
+ @pytest.mark.asyncio
995
+ async def test_current_logger_in_async_generator(with_simulate_login, with_memory_logger):
996
+ """Test that current_logger() works within an async generator (yield)."""
997
+ import asyncio
998
+
999
+ logger = init_logger(project="test-project", project_id="test-project-id")
1000
+
1001
+ async def logger_generator():
1002
+ for i in range(3):
1003
+ await asyncio.sleep(0.01)
1004
+ yield braintrust.current_logger()
1005
+
1006
+ results = []
1007
+ async for log in logger_generator():
1008
+ results.append(log)
1009
+
1010
+ assert len(results) == 3
1011
+ assert all(r is logger for r in results)
1012
+
1013
+
1014
+ @pytest.mark.asyncio
1015
+ async def test_current_logger_in_separate_task(with_simulate_login, with_memory_logger):
1016
+ """Test that current_logger() works in a separately created asyncio task."""
1017
+ import asyncio
1018
+
1019
+ logger = init_logger(project="test-project", project_id="test-project-id")
1020
+
1021
+ async def get_logger_in_task():
1022
+ await asyncio.sleep(0.01)
1023
+ return braintrust.current_logger()
1024
+
1025
+ # Create a separate task
1026
+ task = asyncio.create_task(get_logger_in_task())
1027
+ result = await task
1028
+
1029
+ assert result is logger
1030
+
1031
+
1032
+ @pytest.mark.asyncio
1033
+ async def test_span_link_in_nested_async(with_simulate_login, with_memory_logger):
1034
+ """Test that span.link() works in deeply nested async calls."""
1035
+ import asyncio
1036
+
1037
+ logger = init_logger(project="test-project", project_id="test-project-id")
1038
+ span = logger.start_span(name="test-span")
1039
+
1040
+ async def level3():
1041
+ await asyncio.sleep(0.01)
1042
+ return span.link()
1043
+
1044
+ async def level2():
1045
+ await asyncio.sleep(0.01)
1046
+ return await level3()
1047
+
1048
+ async def level1():
1049
+ await asyncio.sleep(0.01)
1050
+ return await level2()
1051
+
1052
+ link = await level1()
1053
+ span.end()
1054
+
1055
+ assert link != "https://www.braintrust.dev/noop-span"
1056
+ assert span._id in link
1057
+
1058
+
1059
+ def test_current_logger_in_thread(with_simulate_login, with_memory_logger):
1060
+ """Test that current_logger() works correctly when called from a new thread.
1061
+
1062
+ Regression test: ContextVar values don't propagate to new threads,
1063
+ so current_logger must be a plain attribute for thread access.
1064
+ """
1065
+ import threading
1066
+
1067
+ logger = init_logger(project="test-project", project_id="test-project-id")
1068
+ assert braintrust.current_logger() is logger
1069
+
1070
+ thread_result = {}
1071
+
1072
+ def check_logger_in_thread():
1073
+ thread_result["logger"] = braintrust.current_logger()
1074
+
1075
+ thread = threading.Thread(target=check_logger_in_thread)
1076
+ thread.start()
1077
+ thread.join()
1078
+
1079
+ assert thread_result["logger"] is logger
1080
+
1081
+
1082
+ def test_span_link_in_thread(with_simulate_login, with_memory_logger):
1083
+ """Test that span.link() works correctly when called from a new thread.
1084
+
1085
+ The span should be able to generate a valid link even when link() is called
1086
+ from a different thread than where the span was created.
1087
+ """
1088
+ import threading
1089
+
1090
+ logger = init_logger(project="test-project", project_id="test-project-id")
1091
+ span = logger.start_span(name="test-span")
1092
+
1093
+ thread_result = {}
1094
+
1095
+ def get_link_in_thread():
1096
+ # Call link() on the span directly (not via current_span() which uses ContextVar)
1097
+ thread_result["link"] = span.link()
1098
+
1099
+ thread = threading.Thread(target=get_link_in_thread)
1100
+ thread.start()
1101
+ thread.join()
1102
+ span.end()
1103
+
1104
+ # The link should NOT be the noop link
1105
+ assert thread_result["link"] != "https://www.braintrust.dev/noop-span"
1106
+ # The link should contain the span ID
1107
+ assert span._id in thread_result["link"]
1108
+
1109
+
1110
+ @pytest.mark.asyncio
1111
+ async def test_current_logger_async_context_isolation(with_simulate_login, with_memory_logger):
1112
+ """Test that different async contexts can have different loggers.
1113
+
1114
+ When a child task sets its own logger, it should not affect the parent context.
1115
+ This ensures async context isolation via ContextVar.
1116
+ """
1117
+ import asyncio
1118
+
1119
+ parent_logger = init_logger(project="parent-project", project_id="parent-project-id")
1120
+ assert braintrust.current_logger() is parent_logger
1121
+
1122
+ child_result = {}
1123
+
1124
+ async def child_task():
1125
+ # Child initially inherits parent's logger
1126
+ assert braintrust.current_logger() is parent_logger
1127
+
1128
+ # Child sets its own logger
1129
+ child_logger = init_logger(project="child-project", project_id="child-project-id")
1130
+ child_result["logger"] = braintrust.current_logger()
1131
+ return child_logger
1132
+
1133
+ # Run child task
1134
+ child_logger = await asyncio.create_task(child_task())
1135
+
1136
+ # Child should have seen its own logger
1137
+ assert child_result["logger"] is child_logger
1138
+
1139
+ # Parent should still see parent logger (not affected by child)
1140
+ assert braintrust.current_logger() is parent_logger
1141
+
1142
+
942
1143
  def test_span_set_current(with_memory_logger):
943
1144
  """Test that span.set_current() makes the span accessible via current_span()."""
944
1145
  init_test_logger(__name__)
@@ -2434,6 +2635,95 @@ def test_logger_export_respects_otel_compat_enabled():
2434
2635
  assert version == 4, f"Expected V4 encoding (version=4), got version={version}"
2435
2636
 
2436
2637
 
2638
+ def test_register_otel_flush_callback():
2639
+ """Test that register_otel_flush registers a callback correctly."""
2640
+ import asyncio
2641
+
2642
+ from braintrust import register_otel_flush
2643
+ from braintrust.logger import _internal_get_global_state
2644
+ from braintrust.test_helpers import init_test_logger
2645
+
2646
+ init_test_logger(__name__)
2647
+ state = _internal_get_global_state()
2648
+
2649
+ # Track if callback was invoked
2650
+ callback_invoked = False
2651
+
2652
+ async def mock_flush():
2653
+ nonlocal callback_invoked
2654
+ callback_invoked = True
2655
+
2656
+ # Register the callback
2657
+ register_otel_flush(mock_flush)
2658
+
2659
+ # Calling flush_otel should invoke the registered callback
2660
+ asyncio.run(state.flush_otel())
2661
+
2662
+ assert callback_invoked is True
2663
+
2664
+
2665
+ def test_register_otel_flush_disables_span_cache():
2666
+ """Test that register_otel_flush disables the span cache."""
2667
+ from braintrust import register_otel_flush
2668
+ from braintrust.logger import _internal_get_global_state
2669
+ from braintrust.test_helpers import init_test_logger
2670
+
2671
+ init_test_logger(__name__)
2672
+ state = _internal_get_global_state()
2673
+
2674
+ # Enable the cache (simulating what happens during eval)
2675
+ state.span_cache.start()
2676
+ assert state.span_cache.disabled is False
2677
+
2678
+ async def mock_flush():
2679
+ pass
2680
+
2681
+ # Register OTEL flush
2682
+ register_otel_flush(mock_flush)
2683
+
2684
+ # Cache should now be disabled
2685
+ assert state.span_cache.disabled is True
2686
+
2687
+
2688
+ def test_flush_otel_noop_when_no_callback():
2689
+ """Test that flush_otel is a no-op when no callback is registered."""
2690
+ import asyncio
2691
+
2692
+ from braintrust.logger import _internal_get_global_state
2693
+ from braintrust.test_helpers import init_test_logger
2694
+
2695
+ init_test_logger(__name__)
2696
+ state = _internal_get_global_state()
2697
+
2698
+ # Should not throw even with no callback registered
2699
+ asyncio.run(state.flush_otel())
2700
+
2701
+
2702
+ def test_register_otel_flush_permanently_disables_cache():
2703
+ """Test that register_otel_flush permanently disables the cache."""
2704
+ from braintrust import register_otel_flush
2705
+ from braintrust.logger import _internal_get_global_state
2706
+ from braintrust.test_helpers import init_test_logger
2707
+
2708
+ init_test_logger(__name__)
2709
+ state = _internal_get_global_state()
2710
+
2711
+ # Enable the cache
2712
+ state.span_cache.start()
2713
+ assert state.span_cache.disabled is False
2714
+
2715
+ async def mock_flush():
2716
+ pass
2717
+
2718
+ # Register OTEL flush
2719
+ register_otel_flush(mock_flush)
2720
+ assert state.span_cache.disabled is True
2721
+
2722
+ # Try to start again - should still be disabled because of explicit disable
2723
+ state.span_cache.start()
2724
+ assert state.span_cache.disabled is True
2725
+
2726
+
2437
2727
  class TestJSONAttachment(TestCase):
2438
2728
  def test_create_attachment_from_json_data(self):
2439
2729
  """Test creating an attachment from JSON data."""
@@ -0,0 +1,344 @@
1
+ """Tests for SpanCache (disk-based cache)."""
2
+
3
+
4
+ from braintrust.span_cache import CachedSpan, SpanCache
5
+
6
+
7
+ def test_span_cache_write_and_read():
8
+ """Test storing and retrieving spans by rootSpanId."""
9
+ cache = SpanCache()
10
+ cache.start() # Start for testing (cache is disabled by default)
11
+
12
+ root_span_id = "root-123"
13
+ span1 = CachedSpan(
14
+ span_id="span-1",
15
+ input={"text": "hello"},
16
+ output={"response": "world"},
17
+ )
18
+ span2 = CachedSpan(
19
+ span_id="span-2",
20
+ input={"text": "foo"},
21
+ output={"response": "bar"},
22
+ )
23
+
24
+ cache.queue_write(root_span_id, span1.span_id, span1)
25
+ cache.queue_write(root_span_id, span2.span_id, span2)
26
+
27
+ spans = cache.get_by_root_span_id(root_span_id)
28
+ assert spans is not None
29
+ assert len(spans) == 2
30
+
31
+ span_ids = {s.span_id for s in spans}
32
+ assert "span-1" in span_ids
33
+ assert "span-2" in span_ids
34
+
35
+ cache.stop()
36
+ cache.dispose()
37
+
38
+
39
+ def test_span_cache_return_none_for_unknown():
40
+ """Test that unknown rootSpanId returns None."""
41
+ cache = SpanCache()
42
+ cache.start()
43
+
44
+ spans = cache.get_by_root_span_id("nonexistent")
45
+ assert spans is None
46
+
47
+ cache.stop()
48
+ cache.dispose()
49
+
50
+
51
+ def test_span_cache_merge_on_duplicate_writes():
52
+ """Test that subsequent writes to same spanId merge data."""
53
+ cache = SpanCache()
54
+ cache.start()
55
+
56
+ root_span_id = "root-123"
57
+ span_id = "span-1"
58
+
59
+ cache.queue_write(
60
+ root_span_id,
61
+ span_id,
62
+ CachedSpan(span_id=span_id, input={"text": "hello"}),
63
+ )
64
+
65
+ cache.queue_write(
66
+ root_span_id,
67
+ span_id,
68
+ CachedSpan(span_id=span_id, output={"response": "world"}),
69
+ )
70
+
71
+ spans = cache.get_by_root_span_id(root_span_id)
72
+ assert spans is not None
73
+ assert len(spans) == 1
74
+ assert spans[0].span_id == span_id
75
+ assert spans[0].input == {"text": "hello"}
76
+ assert spans[0].output == {"response": "world"}
77
+
78
+ cache.stop()
79
+ cache.dispose()
80
+
81
+
82
+ def test_span_cache_merge_metadata():
83
+ """Test that metadata objects are merged."""
84
+ cache = SpanCache()
85
+ cache.start()
86
+
87
+ root_span_id = "root-123"
88
+ span_id = "span-1"
89
+
90
+ cache.queue_write(
91
+ root_span_id,
92
+ span_id,
93
+ CachedSpan(span_id=span_id, metadata={"key1": "value1"}),
94
+ )
95
+
96
+ cache.queue_write(
97
+ root_span_id,
98
+ span_id,
99
+ CachedSpan(span_id=span_id, metadata={"key2": "value2"}),
100
+ )
101
+
102
+ spans = cache.get_by_root_span_id(root_span_id)
103
+ assert spans is not None
104
+ assert spans[0].metadata == {"key1": "value1", "key2": "value2"}
105
+
106
+ cache.stop()
107
+ cache.dispose()
108
+
109
+
110
+ def test_span_cache_has():
111
+ """Test the has() method."""
112
+ cache = SpanCache()
113
+ cache.start()
114
+
115
+ cache.queue_write("root-123", "span-1", CachedSpan(span_id="span-1"))
116
+ assert cache.has("root-123") is True
117
+ assert cache.has("nonexistent") is False
118
+
119
+ cache.stop()
120
+ cache.dispose()
121
+
122
+
123
+ def test_span_cache_clear():
124
+ """Test clearing spans for a specific rootSpanId."""
125
+ cache = SpanCache()
126
+ cache.start()
127
+
128
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
129
+ cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
130
+
131
+ cache.clear("root-1")
132
+
133
+ assert cache.has("root-1") is False
134
+ assert cache.has("root-2") is True
135
+
136
+ cache.stop()
137
+ cache.dispose()
138
+
139
+
140
+ def test_span_cache_clear_all():
141
+ """Test clearing all cached spans."""
142
+ cache = SpanCache()
143
+ cache.start()
144
+
145
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
146
+ cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
147
+
148
+ cache.clear_all()
149
+
150
+ assert cache.size == 0
151
+
152
+ cache.stop()
153
+ cache.dispose()
154
+
155
+
156
+ def test_span_cache_size():
157
+ """Test the size property."""
158
+ cache = SpanCache()
159
+ cache.start()
160
+
161
+ assert cache.size == 0
162
+
163
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
164
+ assert cache.size == 1
165
+
166
+ cache.queue_write("root-1", "span-2", CachedSpan(span_id="span-2")) # Same root
167
+ assert cache.size == 1
168
+
169
+ cache.queue_write("root-2", "span-3", CachedSpan(span_id="span-3")) # Different root
170
+ assert cache.size == 2
171
+
172
+ cache.stop()
173
+ cache.dispose()
174
+
175
+
176
+ def test_span_cache_dispose():
177
+ """Test that dispose cleans up and allows reuse."""
178
+ cache = SpanCache()
179
+ cache.start()
180
+
181
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
182
+ assert cache.size == 1
183
+
184
+ # Stop first to decrement refcount, then dispose
185
+ cache.stop()
186
+ cache.dispose()
187
+
188
+ assert cache.size == 0
189
+ assert cache.has("root-1") is False
190
+
191
+ # Should be able to write again after dispose (if we start again)
192
+ cache.start()
193
+ cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
194
+ assert cache.size == 1
195
+
196
+ cache.stop()
197
+ cache.dispose()
198
+
199
+
200
+ def test_span_cache_disable():
201
+ """Test that disable() prevents writes."""
202
+ cache = SpanCache()
203
+ cache.start()
204
+
205
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
206
+ assert cache.size == 1
207
+
208
+ cache.disable()
209
+
210
+ # Writes after disable should be no-ops
211
+ cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
212
+ assert cache.size == 1 # Still 1, not 2
213
+
214
+ cache.stop()
215
+ cache.dispose()
216
+
217
+
218
+ def test_span_cache_disabled_getter():
219
+ """Test the disabled property."""
220
+ # Cache is disabled by default until start() is called
221
+ cache = SpanCache()
222
+ assert cache.disabled is True
223
+
224
+ cache.start()
225
+ assert cache.disabled is False
226
+
227
+ cache.disable()
228
+ assert cache.disabled is True
229
+
230
+ cache.dispose()
231
+
232
+
233
+ def test_span_cache_disabled_from_constructor():
234
+ """Test that cache can be disabled via constructor."""
235
+ cache = SpanCache(disabled=True)
236
+ assert cache.disabled is True
237
+
238
+ # Writes should be no-ops
239
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
240
+ assert cache.size == 0
241
+ assert cache.get_by_root_span_id("root-1") is None
242
+
243
+ cache.dispose()
244
+
245
+
246
+ def test_span_cache_start_stop_lifecycle():
247
+ """Test that stop() allows start() to work again."""
248
+ cache = SpanCache()
249
+
250
+ # Initially disabled by default
251
+ assert cache.disabled is True
252
+
253
+ # Start for first "eval"
254
+ cache.start()
255
+ assert cache.disabled is False
256
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
257
+ assert cache.size == 1
258
+
259
+ # Stop after first "eval"
260
+ cache.stop()
261
+ cache.dispose()
262
+ assert cache.disabled is True
263
+
264
+ # Start for second "eval" - should work!
265
+ cache.start()
266
+ assert cache.disabled is False
267
+ cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
268
+ assert cache.size == 1
269
+
270
+ cache.stop()
271
+ cache.dispose()
272
+
273
+
274
+ def test_span_cache_disable_prevents_start():
275
+ """Test that disable() prevents start() from working."""
276
+ cache = SpanCache()
277
+
278
+ # Simulate disable being called
279
+ cache.disable()
280
+ assert cache.disabled is True
281
+
282
+ # start() should be a no-op after disable()
283
+ cache.start()
284
+ assert cache.disabled is True
285
+
286
+ # Writes should still be no-ops
287
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
288
+ assert cache.size == 0
289
+
290
+ cache.dispose()
291
+
292
+
293
+ def test_span_cache_parallel_eval_refcount():
294
+ """Test reference counting for parallel evals."""
295
+ cache = SpanCache()
296
+
297
+ # Simulate two evals starting
298
+ cache.start() # Eval 1
299
+ assert cache.disabled is False
300
+
301
+ cache.start() # Eval 2
302
+ assert cache.disabled is False
303
+
304
+ # Write data from both evals
305
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
306
+ cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
307
+ assert cache.size == 2
308
+
309
+ # Eval 1 finishes first
310
+ cache.dispose() # Should NOT dispose (refcount = 2)
311
+ cache.stop() # Decrements to 1
312
+
313
+ # Cache should still be enabled and data intact
314
+ assert cache.disabled is False
315
+ assert cache.size == 2
316
+ assert cache.get_by_root_span_id("root-1") is not None
317
+ assert cache.get_by_root_span_id("root-2") is not None
318
+
319
+ # Eval 2 finishes
320
+ cache.dispose() # Should NOT dispose yet (refcount = 1)
321
+ cache.stop() # Decrements to 0, disables cache
322
+
323
+ # Now cache should be disabled
324
+ assert cache.disabled is True
325
+
326
+ # Final dispose should now work
327
+ cache.dispose() # NOW it disposes (refcount = 0)
328
+ assert cache.size == 0
329
+
330
+
331
+ def test_span_cache_refcount_underflow():
332
+ """Test that refcount handles underflow gracefully."""
333
+ cache = SpanCache()
334
+
335
+ # Call stop without start
336
+ cache.stop()
337
+
338
+ # Should work normally after
339
+ cache.start()
340
+ cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
341
+ assert cache.size == 1
342
+
343
+ cache.stop()
344
+ cache.dispose()