braintrust 0.3.14__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/__init__.py +4 -0
- braintrust/_generated_types.py +1200 -611
- braintrust/audit.py +2 -2
- braintrust/cli/eval.py +6 -7
- braintrust/cli/push.py +11 -11
- braintrust/conftest.py +1 -0
- braintrust/context.py +12 -17
- braintrust/contrib/temporal/__init__.py +16 -27
- braintrust/contrib/temporal/test_temporal.py +8 -3
- braintrust/devserver/auth.py +8 -8
- braintrust/devserver/cache.py +3 -4
- braintrust/devserver/cors.py +8 -7
- braintrust/devserver/dataset.py +3 -5
- braintrust/devserver/eval_hooks.py +7 -6
- braintrust/devserver/schemas.py +22 -19
- braintrust/devserver/server.py +19 -12
- braintrust/devserver/test_cached_login.py +4 -4
- braintrust/framework.py +128 -140
- braintrust/framework2.py +88 -87
- braintrust/functions/invoke.py +93 -53
- braintrust/functions/stream.py +3 -2
- braintrust/generated_types.py +17 -1
- braintrust/git_fields.py +11 -11
- braintrust/gitutil.py +2 -3
- braintrust/graph_util.py +10 -10
- braintrust/id_gen.py +2 -2
- braintrust/logger.py +346 -357
- braintrust/merge_row_batch.py +10 -9
- braintrust/oai.py +107 -24
- braintrust/otel/__init__.py +49 -49
- braintrust/otel/context.py +16 -30
- braintrust/otel/test_distributed_tracing.py +14 -11
- braintrust/otel/test_otel_bt_integration.py +32 -31
- braintrust/parameters.py +8 -8
- braintrust/prompt.py +14 -14
- braintrust/prompt_cache/disk_cache.py +5 -4
- braintrust/prompt_cache/lru_cache.py +3 -2
- braintrust/prompt_cache/prompt_cache.py +13 -14
- braintrust/queue.py +4 -4
- braintrust/score.py +4 -4
- braintrust/serializable_data_class.py +4 -4
- braintrust/span_identifier_v1.py +1 -2
- braintrust/span_identifier_v2.py +3 -4
- braintrust/span_identifier_v3.py +23 -20
- braintrust/span_identifier_v4.py +34 -25
- braintrust/test_framework.py +16 -6
- braintrust/test_helpers.py +5 -5
- braintrust/test_id_gen.py +2 -3
- braintrust/test_otel.py +61 -53
- braintrust/test_queue.py +0 -1
- braintrust/test_score.py +1 -3
- braintrust/test_span_components.py +29 -44
- braintrust/util.py +9 -8
- braintrust/version.py +2 -2
- braintrust/wrappers/_anthropic_utils.py +4 -4
- braintrust/wrappers/agno/__init__.py +3 -4
- braintrust/wrappers/agno/agent.py +1 -2
- braintrust/wrappers/agno/function_call.py +1 -2
- braintrust/wrappers/agno/model.py +1 -2
- braintrust/wrappers/agno/team.py +1 -2
- braintrust/wrappers/agno/utils.py +12 -12
- braintrust/wrappers/anthropic.py +7 -8
- braintrust/wrappers/claude_agent_sdk/__init__.py +3 -4
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +29 -27
- braintrust/wrappers/dspy.py +15 -17
- braintrust/wrappers/google_genai/__init__.py +16 -16
- braintrust/wrappers/langchain.py +22 -24
- braintrust/wrappers/litellm.py +4 -3
- braintrust/wrappers/openai.py +15 -15
- braintrust/wrappers/pydantic_ai.py +1204 -0
- braintrust/wrappers/test_agno.py +0 -1
- braintrust/wrappers/test_dspy.py +0 -1
- braintrust/wrappers/test_google_genai.py +2 -3
- braintrust/wrappers/test_litellm.py +0 -1
- braintrust/wrappers/test_oai_attachments.py +322 -0
- braintrust/wrappers/test_pydantic_ai_integration.py +1788 -0
- braintrust/wrappers/{test_pydantic_ai.py → test_pydantic_ai_wrap_openai.py} +1 -2
- {braintrust-0.3.14.dist-info → braintrust-0.4.0.dist-info}/METADATA +3 -2
- braintrust-0.4.0.dist-info/RECORD +120 -0
- braintrust-0.3.14.dist-info/RECORD +0 -117
- {braintrust-0.3.14.dist-info → braintrust-0.4.0.dist-info}/WHEEL +0 -0
- {braintrust-0.3.14.dist-info → braintrust-0.4.0.dist-info}/entry_points.txt +0 -0
- {braintrust-0.3.14.dist-info → braintrust-0.4.0.dist-info}/top_level.txt +0 -0
braintrust/otel/context.py
CHANGED
|
@@ -3,23 +3,21 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
from typing import Any, Optional
|
|
5
5
|
|
|
6
|
-
from opentelemetry import context, trace
|
|
7
|
-
from opentelemetry.trace import SpanContext, TraceFlags
|
|
8
|
-
|
|
9
6
|
from braintrust.context import ParentSpanIds, SpanInfo
|
|
10
7
|
from braintrust.logger import Span
|
|
8
|
+
from opentelemetry import context, trace
|
|
9
|
+
from opentelemetry.trace import SpanContext, TraceFlags
|
|
11
10
|
|
|
12
11
|
log = logging.getLogger(__name__)
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
|
|
16
14
|
class ContextManager:
|
|
17
15
|
"""Context manager that uses OTEL's built-in context as single storage."""
|
|
18
16
|
|
|
19
17
|
def __init__(self):
|
|
20
18
|
pass
|
|
21
19
|
|
|
22
|
-
def get_current_span_info(self) -> Optional[
|
|
20
|
+
def get_current_span_info(self) -> Optional["SpanInfo"]:
|
|
23
21
|
"""Get information about the currently active span from OTEL context."""
|
|
24
22
|
|
|
25
23
|
# Get the current span from OTEL context
|
|
@@ -35,25 +33,17 @@ class ContextManager:
|
|
|
35
33
|
if span_context and span_context.span_id != 0:
|
|
36
34
|
# Always prioritize the actual current OTEL span over stored BT span
|
|
37
35
|
# Only use stored BT span if the current OTEL span IS the BT span wrapper
|
|
38
|
-
bt_span = context.get_value(
|
|
36
|
+
bt_span = context.get_value("braintrust_span")
|
|
39
37
|
|
|
40
38
|
# If there's a BT span stored AND the current OTEL span is a NonRecordingSpan
|
|
41
39
|
# (which means it's our BT->OTEL wrapper), then return BT span info
|
|
42
|
-
if
|
|
43
|
-
return SpanInfo(
|
|
44
|
-
trace_id=bt_span.root_span_id,
|
|
45
|
-
span_id=bt_span.span_id,
|
|
46
|
-
span_object=bt_span
|
|
47
|
-
)
|
|
40
|
+
if bt_span and isinstance(current_span, trace.NonRecordingSpan):
|
|
41
|
+
return SpanInfo(trace_id=bt_span.root_span_id, span_id=bt_span.span_id, span_object=bt_span)
|
|
48
42
|
else:
|
|
49
43
|
# Return OTEL span info - this is a real OTEL span, not our wrapper
|
|
50
|
-
otel_trace_id = format(span_context.trace_id,
|
|
51
|
-
otel_span_id = format(span_context.span_id,
|
|
52
|
-
return SpanInfo(
|
|
53
|
-
trace_id=otel_trace_id,
|
|
54
|
-
span_id=otel_span_id,
|
|
55
|
-
span_object=current_span
|
|
56
|
-
)
|
|
44
|
+
otel_trace_id = format(span_context.trace_id, "032x")
|
|
45
|
+
otel_span_id = format(span_context.span_id, "016x")
|
|
46
|
+
return SpanInfo(trace_id=otel_trace_id, span_id=otel_span_id, span_object=current_span)
|
|
57
47
|
|
|
58
48
|
return None
|
|
59
49
|
|
|
@@ -61,11 +51,10 @@ class ContextManager:
|
|
|
61
51
|
"""Set the current active span in OTEL context."""
|
|
62
52
|
from opentelemetry import context, trace
|
|
63
53
|
|
|
64
|
-
if hasattr(span,
|
|
54
|
+
if hasattr(span, "get_span_context"):
|
|
65
55
|
# This is an OTEL span - it will manage its own context
|
|
66
56
|
return None
|
|
67
57
|
else:
|
|
68
|
-
|
|
69
58
|
try:
|
|
70
59
|
trace_id_int = int(span.root_span_id, 16)
|
|
71
60
|
except ValueError:
|
|
@@ -80,15 +69,12 @@ class ContextManager:
|
|
|
80
69
|
|
|
81
70
|
# This is a BT span - store it in OTEL context AND set as current OTEL span
|
|
82
71
|
# First store the BT span
|
|
83
|
-
ctx = context.set_value(
|
|
72
|
+
ctx = context.set_value("braintrust_span", span)
|
|
84
73
|
parent_value = span._get_otel_parent()
|
|
85
|
-
ctx = context.set_value(
|
|
74
|
+
ctx = context.set_value("braintrust.parent", parent_value, ctx)
|
|
86
75
|
|
|
87
76
|
otel_span_context = SpanContext(
|
|
88
|
-
trace_id=trace_id_int,
|
|
89
|
-
span_id=span_id_int,
|
|
90
|
-
is_remote=False,
|
|
91
|
-
trace_flags=TraceFlags(TraceFlags.SAMPLED)
|
|
77
|
+
trace_id=trace_id_int, span_id=span_id_int, is_remote=False, trace_flags=TraceFlags(TraceFlags.SAMPLED)
|
|
92
78
|
)
|
|
93
79
|
|
|
94
80
|
# Create a non-recording span to represent the BT span in OTEL context
|
|
@@ -110,9 +96,9 @@ class ContextManager:
|
|
|
110
96
|
else:
|
|
111
97
|
# No token means we need to explicitly clear the span
|
|
112
98
|
# This shouldn't normally happen, but handle it gracefully
|
|
113
|
-
context.attach(context.set_value(
|
|
99
|
+
context.attach(context.set_value("braintrust_span", None))
|
|
114
100
|
|
|
115
|
-
def get_parent_span_ids(self) ->
|
|
101
|
+
def get_parent_span_ids(self) -> ParentSpanIds | None:
|
|
116
102
|
"""Get parent information for creating a new BT span."""
|
|
117
103
|
span_info = self.get_current_span_info()
|
|
118
104
|
if not span_info:
|
|
@@ -125,4 +111,4 @@ class ContextManager:
|
|
|
125
111
|
|
|
126
112
|
def _is_otel_span(span: Any) -> bool:
|
|
127
113
|
"""Check if the span object is an OTEL span."""
|
|
128
|
-
return hasattr(span,
|
|
114
|
+
return hasattr(span, "get_span_context")
|
|
@@ -8,7 +8,6 @@ is exported from one service and imported in another service.
|
|
|
8
8
|
import os
|
|
9
9
|
|
|
10
10
|
import pytest
|
|
11
|
-
|
|
12
11
|
from braintrust.logger import _internal_with_memory_background_logger
|
|
13
12
|
from braintrust.otel import BraintrustSpanProcessor, context_from_span_export
|
|
14
13
|
from braintrust.test_helpers import init_test_logger, preserve_env_vars
|
|
@@ -19,6 +18,7 @@ try:
|
|
|
19
18
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
20
19
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
21
20
|
except ImportError:
|
|
21
|
+
|
|
22
22
|
class InMemorySpanExporter:
|
|
23
23
|
def __init__(self):
|
|
24
24
|
pass
|
|
@@ -47,10 +47,10 @@ def otel_fixture():
|
|
|
47
47
|
if not OTEL_AVAILABLE:
|
|
48
48
|
pytest.skip("OpenTelemetry not installed")
|
|
49
49
|
|
|
50
|
-
with preserve_env_vars(
|
|
50
|
+
with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_API_KEY"):
|
|
51
51
|
# Enable OTEL compatibility mode
|
|
52
|
-
os.environ[
|
|
53
|
-
os.environ[
|
|
52
|
+
os.environ["BRAINTRUST_OTEL_COMPAT"] = "true"
|
|
53
|
+
os.environ["BRAINTRUST_API_KEY"] = "test-api-key-for-fixture"
|
|
54
54
|
|
|
55
55
|
# Set up memory logger for BT spans
|
|
56
56
|
with _internal_with_memory_background_logger() as memory_logger:
|
|
@@ -103,6 +103,7 @@ def test_bt_to_otel_simple_distributed_trace(otel_fixture):
|
|
|
103
103
|
# ===== Service B: Import context and create OTEL child span =====
|
|
104
104
|
# Simulate receiving exported_context over network (e.g., in HTTP header)
|
|
105
105
|
from opentelemetry import context as otel_context
|
|
106
|
+
|
|
106
107
|
ctx = context_from_span_export(exported_context)
|
|
107
108
|
|
|
108
109
|
# Attach the context to make it current, then create the span
|
|
@@ -125,22 +126,24 @@ def test_bt_to_otel_simple_distributed_trace(otel_fixture):
|
|
|
125
126
|
service_b_exported = otel_spans[0]
|
|
126
127
|
|
|
127
128
|
# Convert OTEL IDs to hex for comparison
|
|
128
|
-
service_b_trace_id = format(service_b_exported.context.trace_id,
|
|
129
|
-
service_b_parent_span_id = format(service_b_exported.parent.span_id,
|
|
129
|
+
service_b_trace_id = format(service_b_exported.context.trace_id, "032x")
|
|
130
|
+
service_b_parent_span_id = format(service_b_exported.parent.span_id, "016x") if service_b_exported.parent else None
|
|
130
131
|
|
|
131
132
|
# Assert unified trace ID
|
|
132
|
-
assert service_a_trace_id == service_b_trace_id,
|
|
133
|
+
assert service_a_trace_id == service_b_trace_id, (
|
|
133
134
|
f"Trace IDs should match: {service_a_trace_id} != {service_b_trace_id}"
|
|
135
|
+
)
|
|
134
136
|
|
|
135
137
|
# Assert Service B span has Service A span as parent
|
|
136
|
-
assert service_b_parent_span_id == service_a_span_id,
|
|
138
|
+
assert service_b_parent_span_id == service_a_span_id, (
|
|
137
139
|
f"Service B parent should be Service A span: {service_b_parent_span_id} != {service_a_span_id}"
|
|
140
|
+
)
|
|
138
141
|
|
|
139
142
|
# Assert braintrust.parent attribute is set on OTEL span
|
|
140
|
-
assert "braintrust.parent" in service_b_exported.attributes,
|
|
141
|
-
|
|
142
|
-
assert service_b_exported.attributes["braintrust.parent"] == f"project_name:{project_name}", \
|
|
143
|
+
assert "braintrust.parent" in service_b_exported.attributes, "OTEL span should have braintrust.parent attribute"
|
|
144
|
+
assert service_b_exported.attributes["braintrust.parent"] == f"project_name:{project_name}", (
|
|
143
145
|
f"braintrust.parent should be 'project_name:{project_name}'"
|
|
146
|
+
)
|
|
144
147
|
|
|
145
148
|
|
|
146
149
|
if __name__ == "__main__":
|
|
@@ -8,7 +8,6 @@ when created in mixed contexts.
|
|
|
8
8
|
import os
|
|
9
9
|
|
|
10
10
|
import pytest
|
|
11
|
-
|
|
12
11
|
from braintrust import current_span
|
|
13
12
|
from braintrust.logger import _internal_with_memory_background_logger
|
|
14
13
|
from braintrust.otel import BraintrustSpanProcessor
|
|
@@ -20,6 +19,7 @@ try:
|
|
|
20
19
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
21
20
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
|
22
21
|
except ImportError:
|
|
22
|
+
|
|
23
23
|
class InMemorySpanExporter:
|
|
24
24
|
def __init__(self):
|
|
25
25
|
pass
|
|
@@ -44,17 +44,17 @@ class OtelFixture:
|
|
|
44
44
|
|
|
45
45
|
@pytest.fixture
|
|
46
46
|
def otel_fixture():
|
|
47
|
-
"""
|
|
48
|
-
|
|
47
|
+
"""otel fixture configures everything we need to run mixed otel/bt tracing tests
|
|
48
|
+
that export to memory.
|
|
49
49
|
"""
|
|
50
50
|
if not OTEL_AVAILABLE:
|
|
51
51
|
pytest.skip("OpenTelemetry not installed")
|
|
52
52
|
|
|
53
|
-
with preserve_env_vars(
|
|
53
|
+
with preserve_env_vars("BRAINTRUST_OTEL_COMPAT", "BRAINTRUST_API_KEY"):
|
|
54
54
|
# 1. Set environment variable first
|
|
55
|
-
os.environ[
|
|
55
|
+
os.environ["BRAINTRUST_OTEL_COMPAT"] = "true"
|
|
56
56
|
# Set dummy API key for tests
|
|
57
|
-
os.environ[
|
|
57
|
+
os.environ["BRAINTRUST_API_KEY"] = "test-api-key-for-fixture"
|
|
58
58
|
|
|
59
59
|
# 2. Set up memory logger with proper context manager
|
|
60
60
|
with _internal_with_memory_background_logger() as memory_logger:
|
|
@@ -109,8 +109,8 @@ def test_mixed_otel_bt_tracing_with_bt_logger_first(otel_fixture):
|
|
|
109
109
|
s2 = otel_spans_by_name["2"]
|
|
110
110
|
|
|
111
111
|
# Verify unified trace IDs - convert OTEL trace to hex string for comparison
|
|
112
|
-
s2_trace_id = format(s2.context.trace_id,
|
|
113
|
-
s2_span_id = format(s2.context.span_id,
|
|
112
|
+
s2_trace_id = format(s2.context.trace_id, "032x")
|
|
113
|
+
s2_span_id = format(s2.context.span_id, "016x")
|
|
114
114
|
|
|
115
115
|
assert s1["root_span_id"] == s2_trace_id
|
|
116
116
|
assert s1["root_span_id"] == s3["root_span_id"]
|
|
@@ -151,8 +151,8 @@ def test_mixed_otel_bt_tracing_with_experiment_parent(otel_fixture):
|
|
|
151
151
|
s1, s2, s3 = spans_by_name["1"], spans_by_name["2"], spans_by_name["3"]
|
|
152
152
|
|
|
153
153
|
# Verify unified trace IDs - convert OTEL trace to hex string for comparison
|
|
154
|
-
s2_trace_id = format(s2.context.trace_id,
|
|
155
|
-
s2_span_id = format(s2.context.span_id,
|
|
154
|
+
s2_trace_id = format(s2.context.trace_id, "032x")
|
|
155
|
+
s2_span_id = format(s2.context.span_id, "016x")
|
|
156
156
|
|
|
157
157
|
assert s1["root_span_id"] == s2_trace_id
|
|
158
158
|
assert s1["root_span_id"] == s3["root_span_id"]
|
|
@@ -193,10 +193,10 @@ def test_mixed_otel_bt_tracing_with_otel_first(otel_fixture):
|
|
|
193
193
|
s1, s2, s3 = spans_by_name["1"], spans_by_name["2"], spans_by_name["3"]
|
|
194
194
|
|
|
195
195
|
# Verify unified trace IDs - convert OTEL traces to hex string for comparison
|
|
196
|
-
s1_trace_id = format(s1.context.trace_id,
|
|
197
|
-
s1_span_id = format(s1.context.span_id,
|
|
198
|
-
s3_trace_id = format(s3.context.trace_id,
|
|
199
|
-
s3_span_id = format(s3.context.span_id,
|
|
196
|
+
s1_trace_id = format(s1.context.trace_id, "032x")
|
|
197
|
+
s1_span_id = format(s1.context.span_id, "016x")
|
|
198
|
+
s3_trace_id = format(s3.context.trace_id, "032x")
|
|
199
|
+
s3_span_id = format(s3.context.span_id, "016x")
|
|
200
200
|
|
|
201
201
|
assert s1_trace_id == s2["root_span_id"]
|
|
202
202
|
assert s1_trace_id == s3_trace_id
|
|
@@ -222,14 +222,14 @@ def test_separate_traces_should_not_be_unified(otel_fixture):
|
|
|
222
222
|
# Second trace: OTEL only
|
|
223
223
|
trace2_spans = []
|
|
224
224
|
with tracer.start_as_current_span("otel_trace2") as otel_span2:
|
|
225
|
-
trace2_id = format(otel_span2.context.trace_id,
|
|
225
|
+
trace2_id = format(otel_span2.context.trace_id, "032x")
|
|
226
226
|
trace2_spans.append(trace2_id)
|
|
227
227
|
otel_span2.set_attribute("test", "second_trace")
|
|
228
228
|
|
|
229
229
|
# Third trace: OTEL root with BT child
|
|
230
230
|
trace3_spans = []
|
|
231
231
|
with tracer.start_as_current_span("otel_trace3_root") as otel_span3:
|
|
232
|
-
otel3_trace_id = format(otel_span3.context.trace_id,
|
|
232
|
+
otel3_trace_id = format(otel_span3.context.trace_id, "032x")
|
|
233
233
|
trace3_spans.append(otel3_trace_id)
|
|
234
234
|
|
|
235
235
|
# BT span inside OTEL - should inherit OTEL trace ID, not previous BT trace
|
|
@@ -286,28 +286,28 @@ def test_otel_spans_inherit_parent_attribute(otel_fixture):
|
|
|
286
286
|
assert len(bt_spans) == 1
|
|
287
287
|
|
|
288
288
|
|
|
289
|
-
|
|
290
289
|
def test_uses_braintrust_context_manager_when_otel_disabled():
|
|
291
290
|
"""Test that BraintrustContextManager is used when OTEL is not enabled."""
|
|
292
291
|
# Ensure OTEL is disabled
|
|
293
|
-
os.environ.pop(
|
|
292
|
+
os.environ.pop("BRAINTRUST_OTEL_COMPAT", None)
|
|
294
293
|
|
|
295
294
|
try:
|
|
296
295
|
from braintrust.context import get_context_manager
|
|
296
|
+
|
|
297
297
|
cm = get_context_manager()
|
|
298
298
|
|
|
299
299
|
# Should be BraintrustContextManager, not OTEL ContextManager
|
|
300
300
|
assert type(cm).__name__ == "BraintrustContextManager"
|
|
301
301
|
|
|
302
302
|
# Verify it has the expected interface
|
|
303
|
-
assert hasattr(cm,
|
|
304
|
-
assert hasattr(cm,
|
|
305
|
-
assert hasattr(cm,
|
|
306
|
-
assert hasattr(cm,
|
|
303
|
+
assert hasattr(cm, "get_current_span_info")
|
|
304
|
+
assert hasattr(cm, "get_parent_span_ids")
|
|
305
|
+
assert hasattr(cm, "set_current_span")
|
|
306
|
+
assert hasattr(cm, "unset_current_span")
|
|
307
307
|
|
|
308
308
|
finally:
|
|
309
309
|
# Clean up - remove any environment variable we might have set
|
|
310
|
-
os.environ.pop(
|
|
310
|
+
os.environ.pop("BRAINTRUST_OTEL_COMPAT", None)
|
|
311
311
|
|
|
312
312
|
|
|
313
313
|
def test_uses_otel_context_manager_when_enabled():
|
|
@@ -315,21 +315,22 @@ def test_uses_otel_context_manager_when_enabled():
|
|
|
315
315
|
if not OTEL_AVAILABLE:
|
|
316
316
|
pytest.skip("OpenTelemetry not installed")
|
|
317
317
|
|
|
318
|
-
with preserve_env_vars(
|
|
318
|
+
with preserve_env_vars("BRAINTRUST_OTEL_COMPAT"):
|
|
319
319
|
# Enable OTEL
|
|
320
|
-
os.environ[
|
|
320
|
+
os.environ["BRAINTRUST_OTEL_COMPAT"] = "true"
|
|
321
321
|
|
|
322
322
|
from braintrust.context import get_context_manager
|
|
323
|
+
|
|
323
324
|
cm = get_context_manager()
|
|
324
325
|
|
|
325
326
|
# Should be OTEL ContextManager, not BraintrustContextManager
|
|
326
327
|
assert type(cm).__name__ == "ContextManager"
|
|
327
328
|
|
|
328
329
|
# Verify it has the expected interface
|
|
329
|
-
assert hasattr(cm,
|
|
330
|
-
assert hasattr(cm,
|
|
331
|
-
assert hasattr(cm,
|
|
332
|
-
assert hasattr(cm,
|
|
330
|
+
assert hasattr(cm, "get_current_span_info")
|
|
331
|
+
assert hasattr(cm, "get_parent_span_ids")
|
|
332
|
+
assert hasattr(cm, "set_current_span")
|
|
333
|
+
assert hasattr(cm, "unset_current_span")
|
|
333
334
|
|
|
334
335
|
|
|
335
336
|
def test_bt_span_without_explicit_parent_inherits_from_otel(otel_fixture):
|
|
@@ -359,8 +360,8 @@ def test_bt_span_without_explicit_parent_inherits_from_otel(otel_fixture):
|
|
|
359
360
|
otel_parent = otel_spans[0]
|
|
360
361
|
|
|
361
362
|
# Convert OTEL IDs to hex for comparison
|
|
362
|
-
otel_trace_id = format(otel_parent.context.trace_id,
|
|
363
|
-
otel_span_id = format(otel_parent.context.span_id,
|
|
363
|
+
otel_trace_id = format(otel_parent.context.trace_id, "032x")
|
|
364
|
+
otel_span_id = format(otel_parent.context.span_id, "016x")
|
|
364
365
|
|
|
365
366
|
# BT span should have inherited OTEL parent's trace ID as root_span_id
|
|
366
367
|
assert bt_child["root_span_id"] == otel_trace_id
|
braintrust/parameters.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Evaluation parameters support for Python SDK."""
|
|
2
2
|
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, TypedDict
|
|
4
4
|
|
|
5
5
|
from typing_extensions import NotRequired
|
|
6
6
|
|
|
@@ -12,17 +12,17 @@ class PromptParameter(TypedDict):
|
|
|
12
12
|
"""A prompt parameter specification."""
|
|
13
13
|
|
|
14
14
|
type: str # Literal["prompt"] but using str for flexibility
|
|
15
|
-
default: NotRequired[
|
|
16
|
-
description: NotRequired[
|
|
15
|
+
default: NotRequired[PromptData | None]
|
|
16
|
+
description: NotRequired[str | None]
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
# EvalParameters is a dict where values can be either:
|
|
20
20
|
# - A PromptParameter (dict with type="prompt")
|
|
21
21
|
# - A pydantic model class (typed as Any for now)
|
|
22
|
-
EvalParameters =
|
|
22
|
+
EvalParameters = dict[str, PromptParameter | Any]
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
def _pydantic_to_json_schema(model: Any) ->
|
|
25
|
+
def _pydantic_to_json_schema(model: Any) -> dict[str, Any]:
|
|
26
26
|
"""Convert a pydantic model to JSON schema."""
|
|
27
27
|
if hasattr(model, "model_json_schema"):
|
|
28
28
|
# pydantic 2
|
|
@@ -35,9 +35,9 @@ def _pydantic_to_json_schema(model: Any) -> Dict[str, Any]:
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
def validate_parameters(
|
|
38
|
-
parameters:
|
|
38
|
+
parameters: dict[str, Any],
|
|
39
39
|
parameter_schema: EvalParameters,
|
|
40
|
-
) ->
|
|
40
|
+
) -> dict[str, Any]:
|
|
41
41
|
"""
|
|
42
42
|
Validate parameters against the schema.
|
|
43
43
|
|
|
@@ -120,7 +120,7 @@ def validate_parameters(
|
|
|
120
120
|
return result
|
|
121
121
|
|
|
122
122
|
|
|
123
|
-
def parameters_to_json_schema(parameters: EvalParameters) ->
|
|
123
|
+
def parameters_to_json_schema(parameters: EvalParameters) -> dict[str, Any]:
|
|
124
124
|
"""
|
|
125
125
|
Convert EvalParameters to JSON schema format for serialization.
|
|
126
126
|
|
braintrust/prompt.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Literal, Union
|
|
3
3
|
|
|
4
4
|
from .generated_types import PromptOptions
|
|
5
5
|
from .serializable_data_class import SerializableDataClass
|
|
@@ -46,17 +46,17 @@ class ImagePart(SerializableDataClass):
|
|
|
46
46
|
|
|
47
47
|
@dataclass
|
|
48
48
|
class PromptMessage(SerializableDataClass):
|
|
49
|
-
content:
|
|
49
|
+
content: str | list[TextPart | ImagePart]
|
|
50
50
|
role: Literal["system", "user", "assistant", "function", "tool", "model"]
|
|
51
|
-
name:
|
|
52
|
-
function_call:
|
|
53
|
-
tool_calls:
|
|
51
|
+
name: str | None = None
|
|
52
|
+
function_call: str | FunctionCall | None = None
|
|
53
|
+
tool_calls: list[ToolCall] | None = None
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
@dataclass
|
|
57
57
|
class PromptChatBlock(SerializableDataClass):
|
|
58
|
-
messages:
|
|
59
|
-
tools:
|
|
58
|
+
messages: list[PromptMessage]
|
|
59
|
+
tools: str | None = None
|
|
60
60
|
type: Literal["chat"] = "chat"
|
|
61
61
|
|
|
62
62
|
|
|
@@ -65,20 +65,20 @@ PromptBlockData = Union[PromptCompletionBlock, PromptChatBlock]
|
|
|
65
65
|
|
|
66
66
|
@dataclass
|
|
67
67
|
class PromptData(SerializableDataClass):
|
|
68
|
-
prompt:
|
|
69
|
-
options:
|
|
68
|
+
prompt: PromptBlockData | None = None
|
|
69
|
+
options: PromptOptions | None = None
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
@dataclass
|
|
73
73
|
class PromptSchema(SerializableDataClass):
|
|
74
|
-
id:
|
|
75
|
-
project_id:
|
|
76
|
-
_xact_id:
|
|
74
|
+
id: str | None
|
|
75
|
+
project_id: str | None
|
|
76
|
+
_xact_id: str | None
|
|
77
77
|
name: str
|
|
78
78
|
slug: str
|
|
79
|
-
description:
|
|
79
|
+
description: str | None
|
|
80
80
|
prompt_data: PromptData
|
|
81
|
-
tags:
|
|
81
|
+
tags: list[str] | None
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
BRAINTRUST_PARAMS = ["use_cache"]
|
|
@@ -12,7 +12,8 @@ import hashlib
|
|
|
12
12
|
import json
|
|
13
13
|
import logging
|
|
14
14
|
import os
|
|
15
|
-
from
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from typing import Any, Generic, TypeVar
|
|
16
17
|
|
|
17
18
|
T = TypeVar("T")
|
|
18
19
|
|
|
@@ -36,9 +37,9 @@ class DiskCache(Generic[T]):
|
|
|
36
37
|
def __init__(
|
|
37
38
|
self,
|
|
38
39
|
cache_dir: str,
|
|
39
|
-
max_size:
|
|
40
|
-
serializer:
|
|
41
|
-
deserializer:
|
|
40
|
+
max_size: int | None = None,
|
|
41
|
+
serializer: Callable[[T], Any] | None = None,
|
|
42
|
+
deserializer: Callable[[Any], T] | None = None,
|
|
42
43
|
log_warnings: bool = True,
|
|
43
44
|
mkdirs: bool = True,
|
|
44
45
|
):
|
|
@@ -7,7 +7,8 @@ used items when it reaches a maximum size. The implementation uses an OrderedDic
|
|
|
7
7
|
for O(1) access and update operations.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from
|
|
10
|
+
from collections import OrderedDict
|
|
11
|
+
from typing import Generic, TypeVar
|
|
11
12
|
|
|
12
13
|
K = TypeVar("K")
|
|
13
14
|
V = TypeVar("V")
|
|
@@ -28,7 +29,7 @@ class LRUCache(Generic[K, V]):
|
|
|
28
29
|
If not specified, the cache will grow unbounded.
|
|
29
30
|
"""
|
|
30
31
|
|
|
31
|
-
def __init__(self, max_size:
|
|
32
|
+
def __init__(self, max_size: int | None = None):
|
|
32
33
|
self._cache: OrderedDict[K, V] = OrderedDict()
|
|
33
34
|
self._max_size = max_size
|
|
34
35
|
|
|
@@ -9,18 +9,17 @@ This allows for efficient prompt retrieval while maintaining persistence across
|
|
|
9
9
|
The cache is keyed by project identifier (ID or name), prompt slug, and version.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
from typing import Optional
|
|
13
12
|
|
|
14
13
|
from braintrust import prompt
|
|
15
14
|
from braintrust.prompt_cache import disk_cache, lru_cache
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
def _create_cache_key(
|
|
19
|
-
project_id:
|
|
20
|
-
project_name:
|
|
21
|
-
slug:
|
|
18
|
+
project_id: str | None,
|
|
19
|
+
project_name: str | None,
|
|
20
|
+
slug: str | None,
|
|
22
21
|
version: str = "latest",
|
|
23
|
-
id:
|
|
22
|
+
id: str | None = None,
|
|
24
23
|
) -> str:
|
|
25
24
|
"""Creates a unique cache key from project identifier, slug and version, or from ID."""
|
|
26
25
|
if id:
|
|
@@ -47,7 +46,7 @@ class PromptCache:
|
|
|
47
46
|
def __init__(
|
|
48
47
|
self,
|
|
49
48
|
memory_cache: lru_cache.LRUCache[str, prompt.PromptSchema],
|
|
50
|
-
disk_cache:
|
|
49
|
+
disk_cache: disk_cache.DiskCache[prompt.PromptSchema] | None = None,
|
|
51
50
|
):
|
|
52
51
|
"""
|
|
53
52
|
Initialize the prompt cache.
|
|
@@ -61,11 +60,11 @@ class PromptCache:
|
|
|
61
60
|
|
|
62
61
|
def get(
|
|
63
62
|
self,
|
|
64
|
-
slug:
|
|
63
|
+
slug: str | None = None,
|
|
65
64
|
version: str = "latest",
|
|
66
|
-
project_id:
|
|
67
|
-
project_name:
|
|
68
|
-
id:
|
|
65
|
+
project_id: str | None = None,
|
|
66
|
+
project_name: str | None = None,
|
|
67
|
+
id: str | None = None,
|
|
69
68
|
) -> prompt.PromptSchema:
|
|
70
69
|
"""
|
|
71
70
|
Retrieve a prompt from the cache.
|
|
@@ -107,11 +106,11 @@ class PromptCache:
|
|
|
107
106
|
def set(
|
|
108
107
|
self,
|
|
109
108
|
value: prompt.PromptSchema,
|
|
110
|
-
slug:
|
|
109
|
+
slug: str | None = None,
|
|
111
110
|
version: str = "latest",
|
|
112
|
-
project_id:
|
|
113
|
-
project_name:
|
|
114
|
-
id:
|
|
111
|
+
project_id: str | None = None,
|
|
112
|
+
project_name: str | None = None,
|
|
113
|
+
id: str | None = None,
|
|
115
114
|
) -> None:
|
|
116
115
|
"""
|
|
117
116
|
Store a prompt in the cache.
|
braintrust/queue.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import threading
|
|
2
2
|
from collections import deque
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import TypeVar
|
|
4
4
|
|
|
5
5
|
from .util import eprint
|
|
6
6
|
|
|
@@ -46,7 +46,7 @@ class LogQueue:
|
|
|
46
46
|
with self._mutex:
|
|
47
47
|
self._enforce_size_limit = enforce
|
|
48
48
|
|
|
49
|
-
def put(self, item: T) ->
|
|
49
|
+
def put(self, item: T) -> list[T]:
|
|
50
50
|
"""
|
|
51
51
|
Put an item in the queue.
|
|
52
52
|
|
|
@@ -76,7 +76,7 @@ class LogQueue:
|
|
|
76
76
|
|
|
77
77
|
return dropped
|
|
78
78
|
|
|
79
|
-
def drain_all(self) ->
|
|
79
|
+
def drain_all(self) -> list[T]:
|
|
80
80
|
"""
|
|
81
81
|
Drain all items from the queue.
|
|
82
82
|
|
|
@@ -105,7 +105,7 @@ class LogQueue:
|
|
|
105
105
|
"""
|
|
106
106
|
return len(self._queue)
|
|
107
107
|
|
|
108
|
-
def wait_for_items(self, timeout:
|
|
108
|
+
def wait_for_items(self, timeout: float | None = None) -> bool:
|
|
109
109
|
"""
|
|
110
110
|
Will block until the queue has at least one item in it. Might be empty by the time
|
|
111
111
|
you read though.
|
braintrust/score.py
CHANGED
|
@@ -2,7 +2,7 @@ import dataclasses
|
|
|
2
2
|
import inspect
|
|
3
3
|
import warnings
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
from .serializable_data_class import SerializableDataClass
|
|
8
8
|
|
|
@@ -22,14 +22,14 @@ class Score(SerializableDataClass):
|
|
|
22
22
|
name: str
|
|
23
23
|
"""The name of the score. This should be a unique name for the scorer."""
|
|
24
24
|
|
|
25
|
-
score:
|
|
25
|
+
score: float | None = None
|
|
26
26
|
"""The score for the evaluation. This should be a float between 0 and 1. If the score is None, the evaluation is considered to be skipped."""
|
|
27
27
|
|
|
28
|
-
metadata:
|
|
28
|
+
metadata: dict[str, Any] = dataclasses.field(default_factory=dict)
|
|
29
29
|
"""Metadata for the score. This can be used to store additional information about the score."""
|
|
30
30
|
|
|
31
31
|
# DEPRECATION_NOTICE: this field is deprecated, as errors are propagated up to the caller.
|
|
32
|
-
error:
|
|
32
|
+
error: Exception | None = None
|
|
33
33
|
"""Deprecated: The error field is deprecated, as errors are now propagated to the caller. The field will be removed in a future version of the library."""
|
|
34
34
|
|
|
35
35
|
def as_dict(self):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import json
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Union, get_origin
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class SerializableDataClass:
|
|
@@ -16,15 +16,15 @@ class SerializableDataClass:
|
|
|
16
16
|
return getattr(self, item)
|
|
17
17
|
|
|
18
18
|
@classmethod
|
|
19
|
-
def from_dict(cls, d:
|
|
19
|
+
def from_dict(cls, d: dict):
|
|
20
20
|
"""Deserialize the object from a dictionary. This method
|
|
21
21
|
is shallow and will not call from_dict() on nested objects."""
|
|
22
|
-
fields =
|
|
22
|
+
fields = {f.name for f in dataclasses.fields(cls)}
|
|
23
23
|
filtered = {k: v for k, v in d.items() if k in fields}
|
|
24
24
|
return cls(**filtered)
|
|
25
25
|
|
|
26
26
|
@classmethod
|
|
27
|
-
def from_dict_deep(cls, d:
|
|
27
|
+
def from_dict_deep(cls, d: dict):
|
|
28
28
|
"""Deserialize the object from a dictionary. This method
|
|
29
29
|
is deep and will call from_dict_deep() on nested objects."""
|
|
30
30
|
fields = {f.name: f for f in dataclasses.fields(cls)}
|