braintrust 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/_generated_types.py +737 -672
- braintrust/audit.py +2 -2
- braintrust/bt_json.py +178 -19
- braintrust/cli/eval.py +6 -7
- braintrust/cli/push.py +11 -11
- braintrust/context.py +12 -17
- braintrust/contrib/temporal/__init__.py +16 -27
- braintrust/contrib/temporal/test_temporal.py +8 -3
- braintrust/devserver/auth.py +8 -8
- braintrust/devserver/cache.py +3 -4
- braintrust/devserver/cors.py +8 -7
- braintrust/devserver/dataset.py +3 -5
- braintrust/devserver/eval_hooks.py +7 -6
- braintrust/devserver/schemas.py +22 -19
- braintrust/devserver/server.py +19 -12
- braintrust/devserver/test_cached_login.py +4 -4
- braintrust/framework.py +139 -142
- braintrust/framework2.py +88 -87
- braintrust/functions/invoke.py +66 -59
- braintrust/functions/stream.py +3 -2
- braintrust/generated_types.py +3 -1
- braintrust/git_fields.py +11 -11
- braintrust/gitutil.py +2 -3
- braintrust/graph_util.py +10 -10
- braintrust/id_gen.py +2 -2
- braintrust/logger.py +373 -471
- braintrust/merge_row_batch.py +10 -9
- braintrust/oai.py +21 -20
- braintrust/otel/__init__.py +49 -49
- braintrust/otel/context.py +16 -30
- braintrust/otel/test_distributed_tracing.py +14 -11
- braintrust/otel/test_otel_bt_integration.py +32 -31
- braintrust/parameters.py +8 -8
- braintrust/prompt.py +14 -14
- braintrust/prompt_cache/disk_cache.py +5 -4
- braintrust/prompt_cache/lru_cache.py +3 -2
- braintrust/prompt_cache/prompt_cache.py +13 -14
- braintrust/queue.py +4 -4
- braintrust/score.py +4 -4
- braintrust/serializable_data_class.py +4 -4
- braintrust/span_identifier_v1.py +1 -2
- braintrust/span_identifier_v2.py +3 -4
- braintrust/span_identifier_v3.py +23 -20
- braintrust/span_identifier_v4.py +34 -25
- braintrust/test_bt_json.py +644 -0
- braintrust/test_framework.py +72 -6
- braintrust/test_helpers.py +5 -5
- braintrust/test_id_gen.py +2 -3
- braintrust/test_logger.py +211 -107
- braintrust/test_otel.py +61 -53
- braintrust/test_queue.py +0 -1
- braintrust/test_score.py +1 -3
- braintrust/test_span_components.py +29 -44
- braintrust/util.py +9 -8
- braintrust/version.py +2 -2
- braintrust/wrappers/_anthropic_utils.py +4 -4
- braintrust/wrappers/agno/__init__.py +3 -4
- braintrust/wrappers/agno/agent.py +1 -2
- braintrust/wrappers/agno/function_call.py +1 -2
- braintrust/wrappers/agno/model.py +1 -2
- braintrust/wrappers/agno/team.py +1 -2
- braintrust/wrappers/agno/utils.py +12 -12
- braintrust/wrappers/anthropic.py +7 -8
- braintrust/wrappers/claude_agent_sdk/__init__.py +3 -4
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +29 -27
- braintrust/wrappers/dspy.py +15 -17
- braintrust/wrappers/google_genai/__init__.py +17 -30
- braintrust/wrappers/langchain.py +22 -24
- braintrust/wrappers/litellm.py +4 -3
- braintrust/wrappers/openai.py +15 -15
- braintrust/wrappers/pydantic_ai.py +225 -110
- braintrust/wrappers/test_agno.py +0 -1
- braintrust/wrappers/test_dspy.py +0 -1
- braintrust/wrappers/test_google_genai.py +64 -4
- braintrust/wrappers/test_litellm.py +0 -1
- braintrust/wrappers/test_pydantic_ai_integration.py +819 -22
- {braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/METADATA +3 -2
- braintrust-0.4.1.dist-info/RECORD +121 -0
- braintrust-0.3.15.dist-info/RECORD +0 -120
- {braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/WHEEL +0 -0
- {braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/entry_points.txt +0 -0
- {braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/top_level.txt +0 -0
braintrust/test_otel.py
CHANGED
|
@@ -274,12 +274,11 @@ class TestSpanFiltering:
|
|
|
274
274
|
except ImportError:
|
|
275
275
|
pytest.skip("OpenTelemetry SDK not fully installed, skipping AISpanProcessor tests")
|
|
276
276
|
|
|
277
|
+
from braintrust.otel import AISpanProcessor
|
|
277
278
|
from opentelemetry.sdk.trace import TracerProvider
|
|
278
279
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
279
280
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
280
281
|
|
|
281
|
-
from braintrust.otel import AISpanProcessor
|
|
282
|
-
|
|
283
282
|
self.memory_exporter = InMemorySpanExporter()
|
|
284
283
|
self.provider = TracerProvider()
|
|
285
284
|
|
|
@@ -403,12 +402,11 @@ class TestSpanFiltering:
|
|
|
403
402
|
return None # Don't influence decision
|
|
404
403
|
|
|
405
404
|
# Create processor with custom filter
|
|
405
|
+
from braintrust.otel import AISpanProcessor
|
|
406
406
|
from opentelemetry.sdk.trace import TracerProvider
|
|
407
407
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
408
408
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
409
409
|
|
|
410
|
-
from braintrust.otel import AISpanProcessor
|
|
411
|
-
|
|
412
410
|
memory_exporter = InMemorySpanExporter()
|
|
413
411
|
processor = AISpanProcessor(SimpleSpanProcessor(memory_exporter), custom_filter=custom_filter)
|
|
414
412
|
provider = TracerProvider()
|
|
@@ -435,12 +433,11 @@ class TestSpanFiltering:
|
|
|
435
433
|
return None # Don't influence decision
|
|
436
434
|
|
|
437
435
|
# Create processor with custom filter
|
|
436
|
+
from braintrust.otel import AISpanProcessor
|
|
438
437
|
from opentelemetry.sdk.trace import TracerProvider
|
|
439
438
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
440
439
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
441
440
|
|
|
442
|
-
from braintrust.otel import AISpanProcessor
|
|
443
|
-
|
|
444
441
|
memory_exporter = InMemorySpanExporter()
|
|
445
442
|
processor = AISpanProcessor(SimpleSpanProcessor(memory_exporter), custom_filter=custom_filter)
|
|
446
443
|
provider = TracerProvider()
|
|
@@ -465,12 +462,11 @@ class TestSpanFiltering:
|
|
|
465
462
|
return None # Always defer to default logic
|
|
466
463
|
|
|
467
464
|
# Create processor with custom filter
|
|
465
|
+
from braintrust.otel import AISpanProcessor
|
|
468
466
|
from opentelemetry.sdk.trace import TracerProvider
|
|
469
467
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
470
468
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
471
469
|
|
|
472
|
-
from braintrust.otel import AISpanProcessor
|
|
473
|
-
|
|
474
470
|
memory_exporter = InMemorySpanExporter()
|
|
475
471
|
processor = AISpanProcessor(SimpleSpanProcessor(memory_exporter), custom_filter=custom_filter)
|
|
476
472
|
provider = TracerProvider()
|
|
@@ -492,12 +488,11 @@ class TestSpanFiltering:
|
|
|
492
488
|
|
|
493
489
|
def test_filtering_vs_unfiltered_comparison(self):
|
|
494
490
|
# Set up two separate exporters and processors
|
|
491
|
+
from braintrust.otel import AISpanProcessor
|
|
495
492
|
from opentelemetry.sdk.trace import TracerProvider
|
|
496
493
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
497
494
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
498
495
|
|
|
499
|
-
from braintrust.otel import AISpanProcessor
|
|
500
|
-
|
|
501
496
|
all_spans_exporter = InMemorySpanExporter()
|
|
502
497
|
filtered_spans_exporter = InMemorySpanExporter()
|
|
503
498
|
|
|
@@ -569,49 +564,58 @@ def test_parent_from_headers_invalid_inputs():
|
|
|
569
564
|
assert result is None
|
|
570
565
|
|
|
571
566
|
# Test 2: Invalid traceparent (malformed)
|
|
572
|
-
result = parent_from_headers({
|
|
567
|
+
result = parent_from_headers({"traceparent": "invalid"})
|
|
573
568
|
assert result is None
|
|
574
569
|
|
|
575
570
|
# Test 3: Valid traceparent but invalid braintrust.parent format
|
|
576
|
-
result = parent_from_headers(
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
571
|
+
result = parent_from_headers(
|
|
572
|
+
{
|
|
573
|
+
"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01",
|
|
574
|
+
"baggage": "braintrust.parent=invalid_format",
|
|
575
|
+
}
|
|
576
|
+
)
|
|
580
577
|
assert result is None
|
|
581
578
|
|
|
582
579
|
# Test 4: Empty project_id
|
|
583
|
-
result = parent_from_headers(
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
580
|
+
result = parent_from_headers(
|
|
581
|
+
{
|
|
582
|
+
"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01",
|
|
583
|
+
"baggage": "braintrust.parent=project_id:",
|
|
584
|
+
}
|
|
585
|
+
)
|
|
587
586
|
assert result is None
|
|
588
587
|
|
|
589
588
|
# Test 5: Empty project_name
|
|
590
|
-
result = parent_from_headers(
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
589
|
+
result = parent_from_headers(
|
|
590
|
+
{
|
|
591
|
+
"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01",
|
|
592
|
+
"baggage": "braintrust.parent=project_name:",
|
|
593
|
+
}
|
|
594
|
+
)
|
|
594
595
|
assert result is None
|
|
595
596
|
|
|
596
597
|
# Test 6: Empty experiment_id
|
|
597
|
-
result = parent_from_headers(
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
598
|
+
result = parent_from_headers(
|
|
599
|
+
{
|
|
600
|
+
"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01",
|
|
601
|
+
"baggage": "braintrust.parent=experiment_id:",
|
|
602
|
+
}
|
|
603
|
+
)
|
|
601
604
|
assert result is None
|
|
602
605
|
|
|
603
606
|
# Test 7: Invalid trace_id length (too short)
|
|
604
|
-
result = parent_from_headers(
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
})
|
|
607
|
+
result = parent_from_headers(
|
|
608
|
+
{"traceparent": "00-4bf92f3577b34da6-00f067aa0ba902b7-01", "baggage": "braintrust.parent=project_name:test"}
|
|
609
|
+
)
|
|
608
610
|
assert result is None
|
|
609
611
|
|
|
610
612
|
# Test 8: Invalid span_id length (too short)
|
|
611
|
-
result = parent_from_headers(
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
613
|
+
result = parent_from_headers(
|
|
614
|
+
{
|
|
615
|
+
"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa-01",
|
|
616
|
+
"baggage": "braintrust.parent=project_name:test",
|
|
617
|
+
}
|
|
618
|
+
)
|
|
615
619
|
assert result is None
|
|
616
620
|
|
|
617
621
|
|
|
@@ -623,29 +627,35 @@ def test_parent_from_headers_valid_input():
|
|
|
623
627
|
from braintrust.otel import parent_from_headers
|
|
624
628
|
|
|
625
629
|
# Test with valid project_name
|
|
626
|
-
result = parent_from_headers(
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
+
result = parent_from_headers(
|
|
631
|
+
{
|
|
632
|
+
"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01",
|
|
633
|
+
"baggage": "braintrust.parent=project_name:test-project",
|
|
634
|
+
}
|
|
635
|
+
)
|
|
630
636
|
assert result is not None
|
|
631
637
|
# Result is base64 encoded, so just check it's a non-empty string
|
|
632
638
|
assert isinstance(result, str)
|
|
633
639
|
assert len(result) > 0
|
|
634
640
|
|
|
635
641
|
# Test with valid project_id
|
|
636
|
-
result = parent_from_headers(
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
642
|
+
result = parent_from_headers(
|
|
643
|
+
{
|
|
644
|
+
"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01",
|
|
645
|
+
"baggage": "braintrust.parent=project_id:abc123",
|
|
646
|
+
}
|
|
647
|
+
)
|
|
640
648
|
assert result is not None
|
|
641
649
|
assert isinstance(result, str)
|
|
642
650
|
assert len(result) > 0
|
|
643
651
|
|
|
644
652
|
# Test with valid experiment_id
|
|
645
|
-
result = parent_from_headers(
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
653
|
+
result = parent_from_headers(
|
|
654
|
+
{
|
|
655
|
+
"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01",
|
|
656
|
+
"baggage": "braintrust.parent=experiment_id:exp-456",
|
|
657
|
+
}
|
|
658
|
+
)
|
|
649
659
|
assert result is not None
|
|
650
660
|
assert isinstance(result, str)
|
|
651
661
|
assert len(result) > 0
|
|
@@ -656,16 +666,15 @@ def test_add_parent_to_baggage():
|
|
|
656
666
|
if not _check_otel_installed():
|
|
657
667
|
pytest.skip("OpenTelemetry SDK not fully installed, skipping test")
|
|
658
668
|
|
|
659
|
-
from opentelemetry import baggage, context
|
|
660
|
-
|
|
661
669
|
from braintrust.otel import add_parent_to_baggage
|
|
670
|
+
from opentelemetry import baggage, context
|
|
662
671
|
|
|
663
672
|
# Test adding parent to baggage
|
|
664
673
|
token = add_parent_to_baggage("project_name:test-project")
|
|
665
674
|
assert token is not None
|
|
666
675
|
|
|
667
676
|
# Verify it's in baggage
|
|
668
|
-
parent_value = baggage.get_baggage(
|
|
677
|
+
parent_value = baggage.get_baggage("braintrust.parent")
|
|
669
678
|
assert parent_value == "project_name:test-project"
|
|
670
679
|
|
|
671
680
|
# Clean up
|
|
@@ -677,11 +686,10 @@ def test_add_span_parent_to_baggage():
|
|
|
677
686
|
if not _check_otel_installed():
|
|
678
687
|
pytest.skip("OpenTelemetry SDK not fully installed, skipping test")
|
|
679
688
|
|
|
689
|
+
from braintrust.otel import add_span_parent_to_baggage
|
|
680
690
|
from opentelemetry import baggage, context, trace
|
|
681
691
|
from opentelemetry.sdk.trace import TracerProvider
|
|
682
692
|
|
|
683
|
-
from braintrust.otel import add_span_parent_to_baggage
|
|
684
|
-
|
|
685
693
|
# Setup tracer
|
|
686
694
|
provider = TracerProvider()
|
|
687
695
|
trace.set_tracer_provider(provider)
|
|
@@ -695,7 +703,7 @@ def test_add_span_parent_to_baggage():
|
|
|
695
703
|
assert token is not None
|
|
696
704
|
|
|
697
705
|
# Verify it's in baggage
|
|
698
|
-
parent_value = baggage.get_baggage(
|
|
706
|
+
parent_value = baggage.get_baggage("braintrust.parent")
|
|
699
707
|
assert parent_value == "project_name:test"
|
|
700
708
|
|
|
701
709
|
context.detach(token)
|
braintrust/test_queue.py
CHANGED
braintrust/test_score.py
CHANGED
|
@@ -65,9 +65,7 @@ class TestScore(unittest.TestCase):
|
|
|
65
65
|
|
|
66
66
|
def test_from_dict_round_trip(self):
|
|
67
67
|
"""Test that Score can be serialized to dict and deserialized back."""
|
|
68
|
-
original = Score(
|
|
69
|
-
name="round_trip_scorer", score=0.95, metadata={"info": "test"}
|
|
70
|
-
)
|
|
68
|
+
original = Score(name="round_trip_scorer", score=0.95, metadata={"info": "test"})
|
|
71
69
|
|
|
72
70
|
# Serialize to dict
|
|
73
71
|
as_dict = original.as_dict()
|
|
@@ -6,7 +6,6 @@ Tests serialization, deserialization, OTEL compatibility, and backward compatibi
|
|
|
6
6
|
from uuid import uuid4
|
|
7
7
|
|
|
8
8
|
import pytest
|
|
9
|
-
|
|
10
9
|
from braintrust.id_gen import OTELIDGenerator
|
|
11
10
|
from braintrust.span_identifier_v3 import SpanComponentsV3, SpanObjectTypeV3
|
|
12
11
|
from braintrust.span_identifier_v4 import SpanComponentsV4
|
|
@@ -22,7 +21,7 @@ class TestSpanComponentsV3:
|
|
|
22
21
|
object_id=str(uuid4()),
|
|
23
22
|
row_id=str(uuid4()),
|
|
24
23
|
span_id=str(uuid4()),
|
|
25
|
-
root_span_id=str(uuid4())
|
|
24
|
+
root_span_id=str(uuid4()),
|
|
26
25
|
)
|
|
27
26
|
|
|
28
27
|
exported = components.to_str()
|
|
@@ -39,7 +38,7 @@ class TestSpanComponentsV3:
|
|
|
39
38
|
components = SpanComponentsV3(
|
|
40
39
|
object_type=SpanObjectTypeV3.EXPERIMENT,
|
|
41
40
|
object_id=str(uuid4()),
|
|
42
|
-
propagated_event={"key": "value", "nested": {"a": 1}}
|
|
41
|
+
propagated_event={"key": "value", "nested": {"a": 1}},
|
|
43
42
|
)
|
|
44
43
|
|
|
45
44
|
exported = components.to_str()
|
|
@@ -53,15 +52,15 @@ class TestSpanComponentsV3:
|
|
|
53
52
|
"""Test that V3 fails to preserve OTEL hex strings for 16-byte IDs (converts to UUID format)."""
|
|
54
53
|
otel_gen = OTELIDGenerator()
|
|
55
54
|
trace_id = otel_gen.get_trace_id() # 32-char hex (16 bytes)
|
|
56
|
-
span_id = otel_gen.get_span_id()
|
|
55
|
+
span_id = otel_gen.get_span_id() # 16-char hex (8 bytes)
|
|
57
56
|
|
|
58
57
|
# Use 16-byte hex strings for object_id and root_span_id to see UUID conversion
|
|
59
58
|
components = SpanComponentsV3(
|
|
60
59
|
object_type=SpanObjectTypeV3.PROJECT_LOGS,
|
|
61
60
|
object_id=trace_id, # 16-byte hex should get converted to UUID format
|
|
62
|
-
row_id=
|
|
63
|
-
span_id=span_id,
|
|
64
|
-
root_span_id=trace_id # 16-byte hex should get converted to UUID format
|
|
61
|
+
row_id="test-row-id",
|
|
62
|
+
span_id=span_id, # 8-byte hex might be preserved
|
|
63
|
+
root_span_id=trace_id, # 16-byte hex should get converted to UUID format
|
|
65
64
|
)
|
|
66
65
|
|
|
67
66
|
exported = components.to_str()
|
|
@@ -79,14 +78,14 @@ class TestSpanComponentsV4:
|
|
|
79
78
|
"""Test that V4 preserves OTEL hex strings exactly."""
|
|
80
79
|
otel_gen = OTELIDGenerator()
|
|
81
80
|
trace_id = otel_gen.get_trace_id() # 32-char hex
|
|
82
|
-
span_id = otel_gen.get_span_id()
|
|
81
|
+
span_id = otel_gen.get_span_id() # 16-char hex
|
|
83
82
|
|
|
84
83
|
components = SpanComponentsV4(
|
|
85
84
|
object_type=SpanObjectTypeV3.PROJECT_LOGS,
|
|
86
|
-
object_id=
|
|
87
|
-
row_id=
|
|
85
|
+
object_id="test-project-id",
|
|
86
|
+
row_id="test-row-id",
|
|
88
87
|
span_id=span_id,
|
|
89
|
-
root_span_id=trace_id
|
|
88
|
+
root_span_id=trace_id,
|
|
90
89
|
)
|
|
91
90
|
|
|
92
91
|
exported = components.to_str()
|
|
@@ -108,9 +107,9 @@ class TestSpanComponentsV4:
|
|
|
108
107
|
components = SpanComponentsV4(
|
|
109
108
|
object_type=SpanObjectTypeV3.PROJECT_LOGS,
|
|
110
109
|
object_id=uuid_object_id,
|
|
111
|
-
row_id=
|
|
110
|
+
row_id="test-row-id",
|
|
112
111
|
span_id=uuid_span_id,
|
|
113
|
-
root_span_id=uuid_root_span_id
|
|
112
|
+
root_span_id=uuid_root_span_id,
|
|
114
113
|
)
|
|
115
114
|
|
|
116
115
|
exported = components.to_str()
|
|
@@ -133,9 +132,9 @@ class TestSpanComponentsV4:
|
|
|
133
132
|
components = SpanComponentsV4(
|
|
134
133
|
object_type=SpanObjectTypeV3.EXPERIMENT,
|
|
135
134
|
object_id=uuid_object_id,
|
|
136
|
-
row_id=
|
|
135
|
+
row_id="test-row-id",
|
|
137
136
|
span_id=hex_span_id,
|
|
138
|
-
root_span_id=hex_trace_id
|
|
137
|
+
root_span_id=hex_trace_id,
|
|
139
138
|
)
|
|
140
139
|
|
|
141
140
|
exported = components.to_str()
|
|
@@ -162,10 +161,10 @@ class TestSpanComponentsV4:
|
|
|
162
161
|
# Create equivalent Python object
|
|
163
162
|
py_components = SpanComponentsV4(
|
|
164
163
|
object_type=SpanObjectTypeV3.EXPERIMENT,
|
|
165
|
-
object_id=
|
|
166
|
-
row_id=
|
|
167
|
-
span_id=
|
|
168
|
-
root_span_id=
|
|
164
|
+
object_id="js-test-experiment-id",
|
|
165
|
+
row_id="js-test-row-id",
|
|
166
|
+
span_id="abcdef1234567890",
|
|
167
|
+
root_span_id="fedcba0987654321fedcba0987654321",
|
|
169
168
|
)
|
|
170
169
|
|
|
171
170
|
# Python should generate the same slug
|
|
@@ -184,8 +183,8 @@ class TestSpanComponentsV4:
|
|
|
184
183
|
"""Test V4 with additional metadata."""
|
|
185
184
|
components = SpanComponentsV4(
|
|
186
185
|
object_type=SpanObjectTypeV3.PLAYGROUND_LOGS,
|
|
187
|
-
object_id=
|
|
188
|
-
propagated_event={"user": "test", "data": [1, 2, 3]}
|
|
186
|
+
object_id="test-session-id",
|
|
187
|
+
propagated_event={"user": "test", "data": [1, 2, 3]},
|
|
189
188
|
)
|
|
190
189
|
|
|
191
190
|
exported = components.to_str()
|
|
@@ -199,14 +198,14 @@ class TestSpanComponentsV4:
|
|
|
199
198
|
"""Test that non-UUID/hex strings are stored in JSON portion."""
|
|
200
199
|
components = SpanComponentsV4(
|
|
201
200
|
object_type=SpanObjectTypeV3.PROJECT_LOGS,
|
|
202
|
-
object_id=
|
|
201
|
+
object_id="not-a-uuid-or-hex", # Will be stored in JSON
|
|
203
202
|
# Don't test row_id alone - if present, span_id and root_span_id must also be present
|
|
204
203
|
)
|
|
205
204
|
|
|
206
205
|
exported = components.to_str()
|
|
207
206
|
imported = SpanComponentsV4.from_str(exported)
|
|
208
207
|
|
|
209
|
-
assert imported.object_id ==
|
|
208
|
+
assert imported.object_id == "not-a-uuid-or-hex"
|
|
210
209
|
|
|
211
210
|
|
|
212
211
|
class TestBackwardCompatibility:
|
|
@@ -221,7 +220,7 @@ class TestBackwardCompatibility:
|
|
|
221
220
|
row_id=str(uuid4()),
|
|
222
221
|
span_id=str(uuid4()),
|
|
223
222
|
root_span_id=str(uuid4()),
|
|
224
|
-
propagated_event={"version": "v3"}
|
|
223
|
+
propagated_event={"version": "v3"},
|
|
225
224
|
)
|
|
226
225
|
|
|
227
226
|
# Serialize with V3
|
|
@@ -238,7 +237,6 @@ class TestBackwardCompatibility:
|
|
|
238
237
|
assert v4_imported.propagated_event == v3_components.propagated_event
|
|
239
238
|
|
|
240
239
|
|
|
241
|
-
|
|
242
240
|
class TestErrorHandling:
|
|
243
241
|
"""Test error handling and edge cases."""
|
|
244
242
|
|
|
@@ -247,7 +245,7 @@ class TestErrorHandling:
|
|
|
247
245
|
with pytest.raises(AssertionError):
|
|
248
246
|
SpanComponentsV4(
|
|
249
247
|
object_type="invalid_type", # Should be SpanObjectTypeV3 enum
|
|
250
|
-
object_id="test-id"
|
|
248
|
+
object_id="test-id",
|
|
251
249
|
)
|
|
252
250
|
|
|
253
251
|
def test_missing_required_fields(self):
|
|
@@ -280,10 +278,7 @@ class TestErrorHandling:
|
|
|
280
278
|
import base64
|
|
281
279
|
|
|
282
280
|
# Create valid data then corrupt it
|
|
283
|
-
components = SpanComponentsV4(
|
|
284
|
-
object_type=SpanObjectTypeV3.PROJECT_LOGS,
|
|
285
|
-
object_id="test-id"
|
|
286
|
-
)
|
|
281
|
+
components = SpanComponentsV4(object_type=SpanObjectTypeV3.PROJECT_LOGS, object_id="test-id")
|
|
287
282
|
valid_exported = components.to_str()
|
|
288
283
|
|
|
289
284
|
# Decode, corrupt, re-encode
|
|
@@ -302,30 +297,21 @@ class TestObjectIdFields:
|
|
|
302
297
|
|
|
303
298
|
def test_experiment_object_id_fields(self):
|
|
304
299
|
"""Test object_id_fields for experiment type."""
|
|
305
|
-
components = SpanComponentsV4(
|
|
306
|
-
object_type=SpanObjectTypeV3.EXPERIMENT,
|
|
307
|
-
object_id="test-experiment-id"
|
|
308
|
-
)
|
|
300
|
+
components = SpanComponentsV4(object_type=SpanObjectTypeV3.EXPERIMENT, object_id="test-experiment-id")
|
|
309
301
|
|
|
310
302
|
fields = components.object_id_fields()
|
|
311
303
|
assert fields == {"experiment_id": "test-experiment-id"}
|
|
312
304
|
|
|
313
305
|
def test_project_logs_object_id_fields(self):
|
|
314
306
|
"""Test object_id_fields for project_logs type."""
|
|
315
|
-
components = SpanComponentsV4(
|
|
316
|
-
object_type=SpanObjectTypeV3.PROJECT_LOGS,
|
|
317
|
-
object_id="test-project-id"
|
|
318
|
-
)
|
|
307
|
+
components = SpanComponentsV4(object_type=SpanObjectTypeV3.PROJECT_LOGS, object_id="test-project-id")
|
|
319
308
|
|
|
320
309
|
fields = components.object_id_fields()
|
|
321
310
|
assert fields == {"project_id": "test-project-id", "log_id": "g"}
|
|
322
311
|
|
|
323
312
|
def test_playground_logs_object_id_fields(self):
|
|
324
313
|
"""Test object_id_fields for playground_logs type."""
|
|
325
|
-
components = SpanComponentsV4(
|
|
326
|
-
object_type=SpanObjectTypeV3.PLAYGROUND_LOGS,
|
|
327
|
-
object_id="test-session-id"
|
|
328
|
-
)
|
|
314
|
+
components = SpanComponentsV4(object_type=SpanObjectTypeV3.PLAYGROUND_LOGS, object_id="test-session-id")
|
|
329
315
|
|
|
330
316
|
fields = components.object_id_fields()
|
|
331
317
|
assert fields == {"prompt_session_id": "test-session-id", "log_id": "x"}
|
|
@@ -333,8 +319,7 @@ class TestObjectIdFields:
|
|
|
333
319
|
def test_object_id_fields_without_object_id(self):
|
|
334
320
|
"""Test that object_id_fields raises error without object_id."""
|
|
335
321
|
components = SpanComponentsV4(
|
|
336
|
-
object_type=SpanObjectTypeV3.PROJECT_LOGS,
|
|
337
|
-
compute_object_metadata_args={"key": "value"}
|
|
322
|
+
object_type=SpanObjectTypeV3.PROJECT_LOGS, compute_object_metadata_args={"key": "value"}
|
|
338
323
|
)
|
|
339
324
|
|
|
340
325
|
with pytest.raises(Exception) as exc_info:
|
braintrust/util.py
CHANGED
|
@@ -2,8 +2,9 @@ import inspect
|
|
|
2
2
|
import sys
|
|
3
3
|
import threading
|
|
4
4
|
import urllib.parse
|
|
5
|
+
from collections.abc import Callable, Mapping
|
|
5
6
|
from dataclasses import dataclass
|
|
6
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Generic, Literal, TypedDict, TypeVar, Union
|
|
7
8
|
|
|
8
9
|
from requests import HTTPError, Response
|
|
9
10
|
|
|
@@ -29,8 +30,8 @@ def coalesce(*args):
|
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
def merge_dicts_with_paths(
|
|
32
|
-
merge_into:
|
|
33
|
-
) ->
|
|
33
|
+
merge_into: dict[str, Any], merge_from: Mapping[str, Any], path: tuple[str, ...], merge_paths: set[tuple[str]]
|
|
34
|
+
) -> dict[str, Any]:
|
|
34
35
|
"""Merges merge_from into merge_into, destructively updating merge_into. Does not merge any further than
|
|
35
36
|
merge_paths."""
|
|
36
37
|
|
|
@@ -50,7 +51,7 @@ def merge_dicts_with_paths(
|
|
|
50
51
|
return merge_into
|
|
51
52
|
|
|
52
53
|
|
|
53
|
-
def merge_dicts(merge_into:
|
|
54
|
+
def merge_dicts(merge_into: dict[str, Any], merge_from: Mapping[str, Any]) -> dict[str, Any]:
|
|
54
55
|
"""Merges merge_from into merge_into, destructively updating merge_into."""
|
|
55
56
|
|
|
56
57
|
return merge_dicts_with_paths(merge_into, merge_from, (), set())
|
|
@@ -92,7 +93,7 @@ class CallerLocation(TypedDict):
|
|
|
92
93
|
caller_lineno: int
|
|
93
94
|
|
|
94
95
|
|
|
95
|
-
def get_caller_location() ->
|
|
96
|
+
def get_caller_location() -> CallerLocation | None:
|
|
96
97
|
frame = inspect.currentframe()
|
|
97
98
|
while frame:
|
|
98
99
|
frame = frame.f_back
|
|
@@ -145,7 +146,7 @@ class LazyValue(Generic[T]):
|
|
|
145
146
|
return self._state.has_succeeded
|
|
146
147
|
|
|
147
148
|
@property
|
|
148
|
-
def value(self) ->
|
|
149
|
+
def value(self) -> T | None:
|
|
149
150
|
return self._state.value if self._state.has_succeeded == True else None
|
|
150
151
|
|
|
151
152
|
def get(self) -> T:
|
|
@@ -167,7 +168,7 @@ class LazyValue(Generic[T]):
|
|
|
167
168
|
if self.mutex:
|
|
168
169
|
self.mutex.release()
|
|
169
170
|
|
|
170
|
-
def get_sync(self) ->
|
|
171
|
+
def get_sync(self) -> tuple[bool, T | None]:
|
|
171
172
|
"""Returns a tuple of (has_succeeded, value) without triggering evaluation."""
|
|
172
173
|
if self._state.has_succeeded:
|
|
173
174
|
# should be fine without the mutex check
|
|
@@ -206,7 +207,7 @@ def bt_iscoroutinefunction(f):
|
|
|
206
207
|
return inspect.iscoroutinefunction(f) or inspect.isasyncgenfunction(f) or getattr(f, BT_IS_ASYNC_ATTRIBUTE, False)
|
|
207
208
|
|
|
208
209
|
|
|
209
|
-
def add_azure_blob_headers(headers:
|
|
210
|
+
def add_azure_blob_headers(headers: dict[str, str], url: str) -> None:
|
|
210
211
|
# According to https://stackoverflow.com/questions/37824136/put-on-sas-blob-url-without-specifying-x-ms-blob-type-header,
|
|
211
212
|
# there is no way to avoid including this.
|
|
212
213
|
if "blob.core.windows.net" in url:
|
braintrust/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Shared utilities for Anthropic API wrappers."""
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Wrapper:
|
|
@@ -13,7 +13,7 @@ class Wrapper:
|
|
|
13
13
|
return getattr(self.__wrapped, name)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def extract_anthropic_usage(usage: Any) ->
|
|
16
|
+
def extract_anthropic_usage(usage: Any) -> dict[str, float]:
|
|
17
17
|
"""Extract and normalize usage metrics from Anthropic usage object or dict.
|
|
18
18
|
|
|
19
19
|
Converts Anthropic's usage format to Braintrust's standard token metric names.
|
|
@@ -29,7 +29,7 @@ def extract_anthropic_usage(usage: Any) -> Dict[str, float]:
|
|
|
29
29
|
- prompt_cached_tokens (from cache_read_input_tokens)
|
|
30
30
|
- prompt_cache_creation_tokens (from cache_creation_input_tokens)
|
|
31
31
|
"""
|
|
32
|
-
metrics:
|
|
32
|
+
metrics: dict[str, float] = {}
|
|
33
33
|
|
|
34
34
|
if not usage:
|
|
35
35
|
return metrics
|
|
@@ -73,7 +73,7 @@ def extract_anthropic_usage(usage: Any) -> Dict[str, float]:
|
|
|
73
73
|
return metrics
|
|
74
74
|
|
|
75
75
|
|
|
76
|
-
def finalize_anthropic_tokens(metrics:
|
|
76
|
+
def finalize_anthropic_tokens(metrics: dict[str, float]) -> dict[str, float]:
|
|
77
77
|
"""Finalize Anthropic token calculations.
|
|
78
78
|
|
|
79
79
|
Anthropic doesn't include cache tokens in the total, so we need to sum them.
|
|
@@ -21,7 +21,6 @@ Usage:
|
|
|
21
21
|
__all__ = ["setup_agno", "wrap_agent", "wrap_function_call", "wrap_model", "wrap_team"]
|
|
22
22
|
|
|
23
23
|
import logging
|
|
24
|
-
from typing import Optional
|
|
25
24
|
|
|
26
25
|
from braintrust.logger import NOOP_SPAN, current_span, init_logger
|
|
27
26
|
|
|
@@ -34,9 +33,9 @@ logger = logging.getLogger(__name__)
|
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
def setup_agno(
|
|
37
|
-
api_key:
|
|
38
|
-
project_id:
|
|
39
|
-
project_name:
|
|
36
|
+
api_key: str | None = None,
|
|
37
|
+
project_id: str | None = None,
|
|
38
|
+
project_name: str | None = None,
|
|
40
39
|
) -> bool:
|
|
41
40
|
"""
|
|
42
41
|
Setup Braintrust integration with Agno. Will automatically patch Agno agents, models, and function calls for tracing.
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from typing import Any
|
|
3
3
|
|
|
4
|
-
from wrapt import wrap_function_wrapper
|
|
5
|
-
|
|
6
4
|
from braintrust.logger import start_span
|
|
7
5
|
from braintrust.span_types import SpanTypeAttribute
|
|
6
|
+
from wrapt import wrap_function_wrapper
|
|
8
7
|
|
|
9
8
|
from .utils import (
|
|
10
9
|
_aggregate_agent_chunks,
|
|
@@ -5,10 +5,9 @@ ModelWrapper class for Braintrust-Agno model observability.
|
|
|
5
5
|
import time
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
-
from wrapt import wrap_function_wrapper
|
|
9
|
-
|
|
10
8
|
from braintrust.logger import start_span
|
|
11
9
|
from braintrust.span_types import SpanTypeAttribute
|
|
10
|
+
from wrapt import wrap_function_wrapper
|
|
12
11
|
|
|
13
12
|
from .utils import (
|
|
14
13
|
_aggregate_model_chunks,
|
braintrust/wrappers/agno/team.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from typing import Any
|
|
3
3
|
|
|
4
|
-
from wrapt import wrap_function_wrapper
|
|
5
|
-
|
|
6
4
|
from braintrust.logger import start_span
|
|
7
5
|
from braintrust.span_types import SpanTypeAttribute
|
|
6
|
+
from wrapt import wrap_function_wrapper
|
|
8
7
|
|
|
9
8
|
from .utils import (
|
|
10
9
|
_aggregate_agent_chunks,
|