braintrust 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/__init__.py +3 -0
- braintrust/_generated_types.py +106 -6
- braintrust/auto.py +179 -0
- braintrust/conftest.py +23 -4
- braintrust/framework.py +113 -3
- braintrust/functions/invoke.py +3 -1
- braintrust/functions/test_invoke.py +61 -0
- braintrust/generated_types.py +7 -1
- braintrust/logger.py +127 -45
- braintrust/oai.py +51 -0
- braintrust/span_cache.py +337 -0
- braintrust/span_identifier_v3.py +21 -0
- braintrust/test_bt_json.py +0 -5
- braintrust/test_framework.py +37 -0
- braintrust/test_http.py +444 -0
- braintrust/test_logger.py +295 -5
- braintrust/test_span_cache.py +344 -0
- braintrust/test_trace.py +267 -0
- braintrust/test_util.py +58 -1
- braintrust/trace.py +385 -0
- braintrust/util.py +20 -0
- braintrust/version.py +2 -2
- braintrust/wrappers/agno/__init__.py +2 -3
- braintrust/wrappers/anthropic.py +64 -0
- braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
- braintrust/wrappers/claude_agent_sdk/test_wrapper.py +115 -0
- braintrust/wrappers/dspy.py +52 -1
- braintrust/wrappers/google_genai/__init__.py +9 -6
- braintrust/wrappers/litellm.py +6 -43
- braintrust/wrappers/pydantic_ai.py +2 -3
- braintrust/wrappers/test_agno.py +9 -0
- braintrust/wrappers/test_anthropic.py +156 -0
- braintrust/wrappers/test_dspy.py +117 -0
- braintrust/wrappers/test_google_genai.py +9 -0
- braintrust/wrappers/test_litellm.py +57 -55
- braintrust/wrappers/test_openai.py +253 -1
- braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
- braintrust/wrappers/test_utils.py +79 -0
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/RECORD +44 -37
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0
braintrust/test_logger.py
CHANGED
|
@@ -59,6 +59,33 @@ class TestInit(TestCase):
|
|
|
59
59
|
|
|
60
60
|
assert str(cm.exception) == "Cannot open an experiment without specifying its name"
|
|
61
61
|
|
|
62
|
+
def test_init_with_dataset_id_only(self):
|
|
63
|
+
"""Test that init accepts dataset={'id': '...'} parameter"""
|
|
64
|
+
# Test the logic that extracts dataset_id from the dict
|
|
65
|
+
from braintrust.logger import Dataset
|
|
66
|
+
|
|
67
|
+
# Test 1: dict with only id
|
|
68
|
+
dataset_dict = {"id": "dataset-id-123"}
|
|
69
|
+
assert isinstance(dataset_dict, dict)
|
|
70
|
+
assert not isinstance(dataset_dict, Dataset)
|
|
71
|
+
assert dataset_dict["id"] == "dataset-id-123"
|
|
72
|
+
|
|
73
|
+
# Test 2: full Dataset object has different behavior
|
|
74
|
+
# (We can't easily instantiate a Dataset here, but we can verify
|
|
75
|
+
# that the isinstance check distinguishes them)
|
|
76
|
+
|
|
77
|
+
def test_init_with_dataset_id_and_version(self):
|
|
78
|
+
"""Test that init accepts dataset={'id': '...', 'version': '...'} parameter"""
|
|
79
|
+
# Test the logic that extracts both dataset_id and dataset_version from the dict
|
|
80
|
+
from braintrust.logger import Dataset
|
|
81
|
+
|
|
82
|
+
# Test: dict with id and version
|
|
83
|
+
dataset_dict = {"id": "dataset-id-123", "version": "v2"}
|
|
84
|
+
assert isinstance(dataset_dict, dict)
|
|
85
|
+
assert not isinstance(dataset_dict, Dataset)
|
|
86
|
+
assert dataset_dict["id"] == "dataset-id-123"
|
|
87
|
+
assert dataset_dict["version"] == "v2"
|
|
88
|
+
|
|
62
89
|
|
|
63
90
|
class TestLogger(TestCase):
|
|
64
91
|
def test_extract_attachments_no_op(self):
|
|
@@ -806,6 +833,16 @@ def test_span_project_id_logged_in(with_memory_logger, with_simulate_login):
|
|
|
806
833
|
)
|
|
807
834
|
|
|
808
835
|
|
|
836
|
+
def test_span_export_disables_cache(with_memory_logger):
|
|
837
|
+
"""Test that span.export() disables the span cache."""
|
|
838
|
+
logger = init_test_logger(__name__)
|
|
839
|
+
|
|
840
|
+
with logger.start_span(name="test_span") as span:
|
|
841
|
+
# Exporting should disable the span cache
|
|
842
|
+
span.export()
|
|
843
|
+
assert logger.state.span_cache.disabled
|
|
844
|
+
|
|
845
|
+
|
|
809
846
|
def test_span_project_name_logged_in(with_simulate_login, with_memory_logger):
|
|
810
847
|
init_logger(project="test-project")
|
|
811
848
|
span = logger.start_span(name="test-span")
|
|
@@ -902,11 +939,7 @@ def test_permalink_with_valid_span_logged_in(with_simulate_login, with_memory_lo
|
|
|
902
939
|
|
|
903
940
|
@pytest.mark.asyncio
|
|
904
941
|
async def test_span_link_in_async_context(with_simulate_login, with_memory_logger):
|
|
905
|
-
"""Test that span.link() works correctly when called from within an async function.
|
|
906
|
-
|
|
907
|
-
This tests the bug where current_logger was a plain attribute instead of a ContextVar,
|
|
908
|
-
causing span.link() to return a noop link in async contexts even though the span was valid.
|
|
909
|
-
"""
|
|
942
|
+
"""Test that span.link() works correctly when called from within an async function."""
|
|
910
943
|
import asyncio
|
|
911
944
|
|
|
912
945
|
logger = init_logger(
|
|
@@ -939,6 +972,174 @@ async def test_span_link_in_async_context(with_simulate_login, with_memory_logge
|
|
|
939
972
|
assert "test-project-id" in link
|
|
940
973
|
|
|
941
974
|
|
|
975
|
+
@pytest.mark.asyncio
|
|
976
|
+
async def test_current_logger_after_multiple_awaits(with_simulate_login, with_memory_logger):
|
|
977
|
+
"""Test that current_logger() works after multiple await points."""
|
|
978
|
+
import asyncio
|
|
979
|
+
|
|
980
|
+
logger = init_logger(project="test-project", project_id="test-project-id")
|
|
981
|
+
|
|
982
|
+
async def check_logger_after_awaits():
|
|
983
|
+
assert braintrust.current_logger() is logger
|
|
984
|
+
await asyncio.sleep(0.01)
|
|
985
|
+
assert braintrust.current_logger() is logger
|
|
986
|
+
await asyncio.sleep(0.01)
|
|
987
|
+
assert braintrust.current_logger() is logger
|
|
988
|
+
return braintrust.current_logger()
|
|
989
|
+
|
|
990
|
+
result = await check_logger_after_awaits()
|
|
991
|
+
assert result is logger
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
@pytest.mark.asyncio
|
|
995
|
+
async def test_current_logger_in_async_generator(with_simulate_login, with_memory_logger):
|
|
996
|
+
"""Test that current_logger() works within an async generator (yield)."""
|
|
997
|
+
import asyncio
|
|
998
|
+
|
|
999
|
+
logger = init_logger(project="test-project", project_id="test-project-id")
|
|
1000
|
+
|
|
1001
|
+
async def logger_generator():
|
|
1002
|
+
for i in range(3):
|
|
1003
|
+
await asyncio.sleep(0.01)
|
|
1004
|
+
yield braintrust.current_logger()
|
|
1005
|
+
|
|
1006
|
+
results = []
|
|
1007
|
+
async for log in logger_generator():
|
|
1008
|
+
results.append(log)
|
|
1009
|
+
|
|
1010
|
+
assert len(results) == 3
|
|
1011
|
+
assert all(r is logger for r in results)
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
@pytest.mark.asyncio
|
|
1015
|
+
async def test_current_logger_in_separate_task(with_simulate_login, with_memory_logger):
|
|
1016
|
+
"""Test that current_logger() works in a separately created asyncio task."""
|
|
1017
|
+
import asyncio
|
|
1018
|
+
|
|
1019
|
+
logger = init_logger(project="test-project", project_id="test-project-id")
|
|
1020
|
+
|
|
1021
|
+
async def get_logger_in_task():
|
|
1022
|
+
await asyncio.sleep(0.01)
|
|
1023
|
+
return braintrust.current_logger()
|
|
1024
|
+
|
|
1025
|
+
# Create a separate task
|
|
1026
|
+
task = asyncio.create_task(get_logger_in_task())
|
|
1027
|
+
result = await task
|
|
1028
|
+
|
|
1029
|
+
assert result is logger
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
@pytest.mark.asyncio
|
|
1033
|
+
async def test_span_link_in_nested_async(with_simulate_login, with_memory_logger):
|
|
1034
|
+
"""Test that span.link() works in deeply nested async calls."""
|
|
1035
|
+
import asyncio
|
|
1036
|
+
|
|
1037
|
+
logger = init_logger(project="test-project", project_id="test-project-id")
|
|
1038
|
+
span = logger.start_span(name="test-span")
|
|
1039
|
+
|
|
1040
|
+
async def level3():
|
|
1041
|
+
await asyncio.sleep(0.01)
|
|
1042
|
+
return span.link()
|
|
1043
|
+
|
|
1044
|
+
async def level2():
|
|
1045
|
+
await asyncio.sleep(0.01)
|
|
1046
|
+
return await level3()
|
|
1047
|
+
|
|
1048
|
+
async def level1():
|
|
1049
|
+
await asyncio.sleep(0.01)
|
|
1050
|
+
return await level2()
|
|
1051
|
+
|
|
1052
|
+
link = await level1()
|
|
1053
|
+
span.end()
|
|
1054
|
+
|
|
1055
|
+
assert link != "https://www.braintrust.dev/noop-span"
|
|
1056
|
+
assert span._id in link
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
def test_current_logger_in_thread(with_simulate_login, with_memory_logger):
|
|
1060
|
+
"""Test that current_logger() works correctly when called from a new thread.
|
|
1061
|
+
|
|
1062
|
+
Regression test: ContextVar values don't propagate to new threads,
|
|
1063
|
+
so current_logger must be a plain attribute for thread access.
|
|
1064
|
+
"""
|
|
1065
|
+
import threading
|
|
1066
|
+
|
|
1067
|
+
logger = init_logger(project="test-project", project_id="test-project-id")
|
|
1068
|
+
assert braintrust.current_logger() is logger
|
|
1069
|
+
|
|
1070
|
+
thread_result = {}
|
|
1071
|
+
|
|
1072
|
+
def check_logger_in_thread():
|
|
1073
|
+
thread_result["logger"] = braintrust.current_logger()
|
|
1074
|
+
|
|
1075
|
+
thread = threading.Thread(target=check_logger_in_thread)
|
|
1076
|
+
thread.start()
|
|
1077
|
+
thread.join()
|
|
1078
|
+
|
|
1079
|
+
assert thread_result["logger"] is logger
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
def test_span_link_in_thread(with_simulate_login, with_memory_logger):
|
|
1083
|
+
"""Test that span.link() works correctly when called from a new thread.
|
|
1084
|
+
|
|
1085
|
+
The span should be able to generate a valid link even when link() is called
|
|
1086
|
+
from a different thread than where the span was created.
|
|
1087
|
+
"""
|
|
1088
|
+
import threading
|
|
1089
|
+
|
|
1090
|
+
logger = init_logger(project="test-project", project_id="test-project-id")
|
|
1091
|
+
span = logger.start_span(name="test-span")
|
|
1092
|
+
|
|
1093
|
+
thread_result = {}
|
|
1094
|
+
|
|
1095
|
+
def get_link_in_thread():
|
|
1096
|
+
# Call link() on the span directly (not via current_span() which uses ContextVar)
|
|
1097
|
+
thread_result["link"] = span.link()
|
|
1098
|
+
|
|
1099
|
+
thread = threading.Thread(target=get_link_in_thread)
|
|
1100
|
+
thread.start()
|
|
1101
|
+
thread.join()
|
|
1102
|
+
span.end()
|
|
1103
|
+
|
|
1104
|
+
# The link should NOT be the noop link
|
|
1105
|
+
assert thread_result["link"] != "https://www.braintrust.dev/noop-span"
|
|
1106
|
+
# The link should contain the span ID
|
|
1107
|
+
assert span._id in thread_result["link"]
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
@pytest.mark.asyncio
|
|
1111
|
+
async def test_current_logger_async_context_isolation(with_simulate_login, with_memory_logger):
|
|
1112
|
+
"""Test that different async contexts can have different loggers.
|
|
1113
|
+
|
|
1114
|
+
When a child task sets its own logger, it should not affect the parent context.
|
|
1115
|
+
This ensures async context isolation via ContextVar.
|
|
1116
|
+
"""
|
|
1117
|
+
import asyncio
|
|
1118
|
+
|
|
1119
|
+
parent_logger = init_logger(project="parent-project", project_id="parent-project-id")
|
|
1120
|
+
assert braintrust.current_logger() is parent_logger
|
|
1121
|
+
|
|
1122
|
+
child_result = {}
|
|
1123
|
+
|
|
1124
|
+
async def child_task():
|
|
1125
|
+
# Child initially inherits parent's logger
|
|
1126
|
+
assert braintrust.current_logger() is parent_logger
|
|
1127
|
+
|
|
1128
|
+
# Child sets its own logger
|
|
1129
|
+
child_logger = init_logger(project="child-project", project_id="child-project-id")
|
|
1130
|
+
child_result["logger"] = braintrust.current_logger()
|
|
1131
|
+
return child_logger
|
|
1132
|
+
|
|
1133
|
+
# Run child task
|
|
1134
|
+
child_logger = await asyncio.create_task(child_task())
|
|
1135
|
+
|
|
1136
|
+
# Child should have seen its own logger
|
|
1137
|
+
assert child_result["logger"] is child_logger
|
|
1138
|
+
|
|
1139
|
+
# Parent should still see parent logger (not affected by child)
|
|
1140
|
+
assert braintrust.current_logger() is parent_logger
|
|
1141
|
+
|
|
1142
|
+
|
|
942
1143
|
def test_span_set_current(with_memory_logger):
|
|
943
1144
|
"""Test that span.set_current() makes the span accessible via current_span()."""
|
|
944
1145
|
init_test_logger(__name__)
|
|
@@ -2434,6 +2635,95 @@ def test_logger_export_respects_otel_compat_enabled():
|
|
|
2434
2635
|
assert version == 4, f"Expected V4 encoding (version=4), got version={version}"
|
|
2435
2636
|
|
|
2436
2637
|
|
|
2638
|
+
def test_register_otel_flush_callback():
|
|
2639
|
+
"""Test that register_otel_flush registers a callback correctly."""
|
|
2640
|
+
import asyncio
|
|
2641
|
+
|
|
2642
|
+
from braintrust import register_otel_flush
|
|
2643
|
+
from braintrust.logger import _internal_get_global_state
|
|
2644
|
+
from braintrust.test_helpers import init_test_logger
|
|
2645
|
+
|
|
2646
|
+
init_test_logger(__name__)
|
|
2647
|
+
state = _internal_get_global_state()
|
|
2648
|
+
|
|
2649
|
+
# Track if callback was invoked
|
|
2650
|
+
callback_invoked = False
|
|
2651
|
+
|
|
2652
|
+
async def mock_flush():
|
|
2653
|
+
nonlocal callback_invoked
|
|
2654
|
+
callback_invoked = True
|
|
2655
|
+
|
|
2656
|
+
# Register the callback
|
|
2657
|
+
register_otel_flush(mock_flush)
|
|
2658
|
+
|
|
2659
|
+
# Calling flush_otel should invoke the registered callback
|
|
2660
|
+
asyncio.run(state.flush_otel())
|
|
2661
|
+
|
|
2662
|
+
assert callback_invoked is True
|
|
2663
|
+
|
|
2664
|
+
|
|
2665
|
+
def test_register_otel_flush_disables_span_cache():
|
|
2666
|
+
"""Test that register_otel_flush disables the span cache."""
|
|
2667
|
+
from braintrust import register_otel_flush
|
|
2668
|
+
from braintrust.logger import _internal_get_global_state
|
|
2669
|
+
from braintrust.test_helpers import init_test_logger
|
|
2670
|
+
|
|
2671
|
+
init_test_logger(__name__)
|
|
2672
|
+
state = _internal_get_global_state()
|
|
2673
|
+
|
|
2674
|
+
# Enable the cache (simulating what happens during eval)
|
|
2675
|
+
state.span_cache.start()
|
|
2676
|
+
assert state.span_cache.disabled is False
|
|
2677
|
+
|
|
2678
|
+
async def mock_flush():
|
|
2679
|
+
pass
|
|
2680
|
+
|
|
2681
|
+
# Register OTEL flush
|
|
2682
|
+
register_otel_flush(mock_flush)
|
|
2683
|
+
|
|
2684
|
+
# Cache should now be disabled
|
|
2685
|
+
assert state.span_cache.disabled is True
|
|
2686
|
+
|
|
2687
|
+
|
|
2688
|
+
def test_flush_otel_noop_when_no_callback():
|
|
2689
|
+
"""Test that flush_otel is a no-op when no callback is registered."""
|
|
2690
|
+
import asyncio
|
|
2691
|
+
|
|
2692
|
+
from braintrust.logger import _internal_get_global_state
|
|
2693
|
+
from braintrust.test_helpers import init_test_logger
|
|
2694
|
+
|
|
2695
|
+
init_test_logger(__name__)
|
|
2696
|
+
state = _internal_get_global_state()
|
|
2697
|
+
|
|
2698
|
+
# Should not throw even with no callback registered
|
|
2699
|
+
asyncio.run(state.flush_otel())
|
|
2700
|
+
|
|
2701
|
+
|
|
2702
|
+
def test_register_otel_flush_permanently_disables_cache():
|
|
2703
|
+
"""Test that register_otel_flush permanently disables the cache."""
|
|
2704
|
+
from braintrust import register_otel_flush
|
|
2705
|
+
from braintrust.logger import _internal_get_global_state
|
|
2706
|
+
from braintrust.test_helpers import init_test_logger
|
|
2707
|
+
|
|
2708
|
+
init_test_logger(__name__)
|
|
2709
|
+
state = _internal_get_global_state()
|
|
2710
|
+
|
|
2711
|
+
# Enable the cache
|
|
2712
|
+
state.span_cache.start()
|
|
2713
|
+
assert state.span_cache.disabled is False
|
|
2714
|
+
|
|
2715
|
+
async def mock_flush():
|
|
2716
|
+
pass
|
|
2717
|
+
|
|
2718
|
+
# Register OTEL flush
|
|
2719
|
+
register_otel_flush(mock_flush)
|
|
2720
|
+
assert state.span_cache.disabled is True
|
|
2721
|
+
|
|
2722
|
+
# Try to start again - should still be disabled because of explicit disable
|
|
2723
|
+
state.span_cache.start()
|
|
2724
|
+
assert state.span_cache.disabled is True
|
|
2725
|
+
|
|
2726
|
+
|
|
2437
2727
|
class TestJSONAttachment(TestCase):
|
|
2438
2728
|
def test_create_attachment_from_json_data(self):
|
|
2439
2729
|
"""Test creating an attachment from JSON data."""
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""Tests for SpanCache (disk-based cache)."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from braintrust.span_cache import CachedSpan, SpanCache
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_span_cache_write_and_read():
|
|
8
|
+
"""Test storing and retrieving spans by rootSpanId."""
|
|
9
|
+
cache = SpanCache()
|
|
10
|
+
cache.start() # Start for testing (cache is disabled by default)
|
|
11
|
+
|
|
12
|
+
root_span_id = "root-123"
|
|
13
|
+
span1 = CachedSpan(
|
|
14
|
+
span_id="span-1",
|
|
15
|
+
input={"text": "hello"},
|
|
16
|
+
output={"response": "world"},
|
|
17
|
+
)
|
|
18
|
+
span2 = CachedSpan(
|
|
19
|
+
span_id="span-2",
|
|
20
|
+
input={"text": "foo"},
|
|
21
|
+
output={"response": "bar"},
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
cache.queue_write(root_span_id, span1.span_id, span1)
|
|
25
|
+
cache.queue_write(root_span_id, span2.span_id, span2)
|
|
26
|
+
|
|
27
|
+
spans = cache.get_by_root_span_id(root_span_id)
|
|
28
|
+
assert spans is not None
|
|
29
|
+
assert len(spans) == 2
|
|
30
|
+
|
|
31
|
+
span_ids = {s.span_id for s in spans}
|
|
32
|
+
assert "span-1" in span_ids
|
|
33
|
+
assert "span-2" in span_ids
|
|
34
|
+
|
|
35
|
+
cache.stop()
|
|
36
|
+
cache.dispose()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_span_cache_return_none_for_unknown():
|
|
40
|
+
"""Test that unknown rootSpanId returns None."""
|
|
41
|
+
cache = SpanCache()
|
|
42
|
+
cache.start()
|
|
43
|
+
|
|
44
|
+
spans = cache.get_by_root_span_id("nonexistent")
|
|
45
|
+
assert spans is None
|
|
46
|
+
|
|
47
|
+
cache.stop()
|
|
48
|
+
cache.dispose()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_span_cache_merge_on_duplicate_writes():
|
|
52
|
+
"""Test that subsequent writes to same spanId merge data."""
|
|
53
|
+
cache = SpanCache()
|
|
54
|
+
cache.start()
|
|
55
|
+
|
|
56
|
+
root_span_id = "root-123"
|
|
57
|
+
span_id = "span-1"
|
|
58
|
+
|
|
59
|
+
cache.queue_write(
|
|
60
|
+
root_span_id,
|
|
61
|
+
span_id,
|
|
62
|
+
CachedSpan(span_id=span_id, input={"text": "hello"}),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
cache.queue_write(
|
|
66
|
+
root_span_id,
|
|
67
|
+
span_id,
|
|
68
|
+
CachedSpan(span_id=span_id, output={"response": "world"}),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
spans = cache.get_by_root_span_id(root_span_id)
|
|
72
|
+
assert spans is not None
|
|
73
|
+
assert len(spans) == 1
|
|
74
|
+
assert spans[0].span_id == span_id
|
|
75
|
+
assert spans[0].input == {"text": "hello"}
|
|
76
|
+
assert spans[0].output == {"response": "world"}
|
|
77
|
+
|
|
78
|
+
cache.stop()
|
|
79
|
+
cache.dispose()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_span_cache_merge_metadata():
|
|
83
|
+
"""Test that metadata objects are merged."""
|
|
84
|
+
cache = SpanCache()
|
|
85
|
+
cache.start()
|
|
86
|
+
|
|
87
|
+
root_span_id = "root-123"
|
|
88
|
+
span_id = "span-1"
|
|
89
|
+
|
|
90
|
+
cache.queue_write(
|
|
91
|
+
root_span_id,
|
|
92
|
+
span_id,
|
|
93
|
+
CachedSpan(span_id=span_id, metadata={"key1": "value1"}),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
cache.queue_write(
|
|
97
|
+
root_span_id,
|
|
98
|
+
span_id,
|
|
99
|
+
CachedSpan(span_id=span_id, metadata={"key2": "value2"}),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
spans = cache.get_by_root_span_id(root_span_id)
|
|
103
|
+
assert spans is not None
|
|
104
|
+
assert spans[0].metadata == {"key1": "value1", "key2": "value2"}
|
|
105
|
+
|
|
106
|
+
cache.stop()
|
|
107
|
+
cache.dispose()
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_span_cache_has():
|
|
111
|
+
"""Test the has() method."""
|
|
112
|
+
cache = SpanCache()
|
|
113
|
+
cache.start()
|
|
114
|
+
|
|
115
|
+
cache.queue_write("root-123", "span-1", CachedSpan(span_id="span-1"))
|
|
116
|
+
assert cache.has("root-123") is True
|
|
117
|
+
assert cache.has("nonexistent") is False
|
|
118
|
+
|
|
119
|
+
cache.stop()
|
|
120
|
+
cache.dispose()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def test_span_cache_clear():
|
|
124
|
+
"""Test clearing spans for a specific rootSpanId."""
|
|
125
|
+
cache = SpanCache()
|
|
126
|
+
cache.start()
|
|
127
|
+
|
|
128
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
129
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
130
|
+
|
|
131
|
+
cache.clear("root-1")
|
|
132
|
+
|
|
133
|
+
assert cache.has("root-1") is False
|
|
134
|
+
assert cache.has("root-2") is True
|
|
135
|
+
|
|
136
|
+
cache.stop()
|
|
137
|
+
cache.dispose()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_span_cache_clear_all():
|
|
141
|
+
"""Test clearing all cached spans."""
|
|
142
|
+
cache = SpanCache()
|
|
143
|
+
cache.start()
|
|
144
|
+
|
|
145
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
146
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
147
|
+
|
|
148
|
+
cache.clear_all()
|
|
149
|
+
|
|
150
|
+
assert cache.size == 0
|
|
151
|
+
|
|
152
|
+
cache.stop()
|
|
153
|
+
cache.dispose()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def test_span_cache_size():
|
|
157
|
+
"""Test the size property."""
|
|
158
|
+
cache = SpanCache()
|
|
159
|
+
cache.start()
|
|
160
|
+
|
|
161
|
+
assert cache.size == 0
|
|
162
|
+
|
|
163
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
164
|
+
assert cache.size == 1
|
|
165
|
+
|
|
166
|
+
cache.queue_write("root-1", "span-2", CachedSpan(span_id="span-2")) # Same root
|
|
167
|
+
assert cache.size == 1
|
|
168
|
+
|
|
169
|
+
cache.queue_write("root-2", "span-3", CachedSpan(span_id="span-3")) # Different root
|
|
170
|
+
assert cache.size == 2
|
|
171
|
+
|
|
172
|
+
cache.stop()
|
|
173
|
+
cache.dispose()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def test_span_cache_dispose():
|
|
177
|
+
"""Test that dispose cleans up and allows reuse."""
|
|
178
|
+
cache = SpanCache()
|
|
179
|
+
cache.start()
|
|
180
|
+
|
|
181
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
182
|
+
assert cache.size == 1
|
|
183
|
+
|
|
184
|
+
# Stop first to decrement refcount, then dispose
|
|
185
|
+
cache.stop()
|
|
186
|
+
cache.dispose()
|
|
187
|
+
|
|
188
|
+
assert cache.size == 0
|
|
189
|
+
assert cache.has("root-1") is False
|
|
190
|
+
|
|
191
|
+
# Should be able to write again after dispose (if we start again)
|
|
192
|
+
cache.start()
|
|
193
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
194
|
+
assert cache.size == 1
|
|
195
|
+
|
|
196
|
+
cache.stop()
|
|
197
|
+
cache.dispose()
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def test_span_cache_disable():
|
|
201
|
+
"""Test that disable() prevents writes."""
|
|
202
|
+
cache = SpanCache()
|
|
203
|
+
cache.start()
|
|
204
|
+
|
|
205
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
206
|
+
assert cache.size == 1
|
|
207
|
+
|
|
208
|
+
cache.disable()
|
|
209
|
+
|
|
210
|
+
# Writes after disable should be no-ops
|
|
211
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
212
|
+
assert cache.size == 1 # Still 1, not 2
|
|
213
|
+
|
|
214
|
+
cache.stop()
|
|
215
|
+
cache.dispose()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def test_span_cache_disabled_getter():
|
|
219
|
+
"""Test the disabled property."""
|
|
220
|
+
# Cache is disabled by default until start() is called
|
|
221
|
+
cache = SpanCache()
|
|
222
|
+
assert cache.disabled is True
|
|
223
|
+
|
|
224
|
+
cache.start()
|
|
225
|
+
assert cache.disabled is False
|
|
226
|
+
|
|
227
|
+
cache.disable()
|
|
228
|
+
assert cache.disabled is True
|
|
229
|
+
|
|
230
|
+
cache.dispose()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def test_span_cache_disabled_from_constructor():
|
|
234
|
+
"""Test that cache can be disabled via constructor."""
|
|
235
|
+
cache = SpanCache(disabled=True)
|
|
236
|
+
assert cache.disabled is True
|
|
237
|
+
|
|
238
|
+
# Writes should be no-ops
|
|
239
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
240
|
+
assert cache.size == 0
|
|
241
|
+
assert cache.get_by_root_span_id("root-1") is None
|
|
242
|
+
|
|
243
|
+
cache.dispose()
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def test_span_cache_start_stop_lifecycle():
|
|
247
|
+
"""Test that stop() allows start() to work again."""
|
|
248
|
+
cache = SpanCache()
|
|
249
|
+
|
|
250
|
+
# Initially disabled by default
|
|
251
|
+
assert cache.disabled is True
|
|
252
|
+
|
|
253
|
+
# Start for first "eval"
|
|
254
|
+
cache.start()
|
|
255
|
+
assert cache.disabled is False
|
|
256
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
257
|
+
assert cache.size == 1
|
|
258
|
+
|
|
259
|
+
# Stop after first "eval"
|
|
260
|
+
cache.stop()
|
|
261
|
+
cache.dispose()
|
|
262
|
+
assert cache.disabled is True
|
|
263
|
+
|
|
264
|
+
# Start for second "eval" - should work!
|
|
265
|
+
cache.start()
|
|
266
|
+
assert cache.disabled is False
|
|
267
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
268
|
+
assert cache.size == 1
|
|
269
|
+
|
|
270
|
+
cache.stop()
|
|
271
|
+
cache.dispose()
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def test_span_cache_disable_prevents_start():
|
|
275
|
+
"""Test that disable() prevents start() from working."""
|
|
276
|
+
cache = SpanCache()
|
|
277
|
+
|
|
278
|
+
# Simulate disable being called
|
|
279
|
+
cache.disable()
|
|
280
|
+
assert cache.disabled is True
|
|
281
|
+
|
|
282
|
+
# start() should be a no-op after disable()
|
|
283
|
+
cache.start()
|
|
284
|
+
assert cache.disabled is True
|
|
285
|
+
|
|
286
|
+
# Writes should still be no-ops
|
|
287
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
288
|
+
assert cache.size == 0
|
|
289
|
+
|
|
290
|
+
cache.dispose()
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def test_span_cache_parallel_eval_refcount():
|
|
294
|
+
"""Test reference counting for parallel evals."""
|
|
295
|
+
cache = SpanCache()
|
|
296
|
+
|
|
297
|
+
# Simulate two evals starting
|
|
298
|
+
cache.start() # Eval 1
|
|
299
|
+
assert cache.disabled is False
|
|
300
|
+
|
|
301
|
+
cache.start() # Eval 2
|
|
302
|
+
assert cache.disabled is False
|
|
303
|
+
|
|
304
|
+
# Write data from both evals
|
|
305
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
306
|
+
cache.queue_write("root-2", "span-2", CachedSpan(span_id="span-2"))
|
|
307
|
+
assert cache.size == 2
|
|
308
|
+
|
|
309
|
+
# Eval 1 finishes first
|
|
310
|
+
cache.dispose() # Should NOT dispose (refcount = 2)
|
|
311
|
+
cache.stop() # Decrements to 1
|
|
312
|
+
|
|
313
|
+
# Cache should still be enabled and data intact
|
|
314
|
+
assert cache.disabled is False
|
|
315
|
+
assert cache.size == 2
|
|
316
|
+
assert cache.get_by_root_span_id("root-1") is not None
|
|
317
|
+
assert cache.get_by_root_span_id("root-2") is not None
|
|
318
|
+
|
|
319
|
+
# Eval 2 finishes
|
|
320
|
+
cache.dispose() # Should NOT dispose yet (refcount = 1)
|
|
321
|
+
cache.stop() # Decrements to 0, disables cache
|
|
322
|
+
|
|
323
|
+
# Now cache should be disabled
|
|
324
|
+
assert cache.disabled is True
|
|
325
|
+
|
|
326
|
+
# Final dispose should now work
|
|
327
|
+
cache.dispose() # NOW it disposes (refcount = 0)
|
|
328
|
+
assert cache.size == 0
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def test_span_cache_refcount_underflow():
|
|
332
|
+
"""Test that refcount handles underflow gracefully."""
|
|
333
|
+
cache = SpanCache()
|
|
334
|
+
|
|
335
|
+
# Call stop without start
|
|
336
|
+
cache.stop()
|
|
337
|
+
|
|
338
|
+
# Should work normally after
|
|
339
|
+
cache.start()
|
|
340
|
+
cache.queue_write("root-1", "span-1", CachedSpan(span_id="span-1"))
|
|
341
|
+
assert cache.size == 1
|
|
342
|
+
|
|
343
|
+
cache.stop()
|
|
344
|
+
cache.dispose()
|