braintrust 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/__init__.py +3 -0
- braintrust/_generated_types.py +106 -6
- braintrust/auto.py +179 -0
- braintrust/conftest.py +23 -4
- braintrust/framework.py +113 -3
- braintrust/functions/invoke.py +3 -1
- braintrust/functions/test_invoke.py +61 -0
- braintrust/generated_types.py +7 -1
- braintrust/logger.py +127 -45
- braintrust/oai.py +51 -0
- braintrust/span_cache.py +337 -0
- braintrust/span_identifier_v3.py +21 -0
- braintrust/test_bt_json.py +0 -5
- braintrust/test_framework.py +37 -0
- braintrust/test_http.py +444 -0
- braintrust/test_logger.py +295 -5
- braintrust/test_span_cache.py +344 -0
- braintrust/test_trace.py +267 -0
- braintrust/test_util.py +58 -1
- braintrust/trace.py +385 -0
- braintrust/util.py +20 -0
- braintrust/version.py +2 -2
- braintrust/wrappers/agno/__init__.py +2 -3
- braintrust/wrappers/anthropic.py +64 -0
- braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
- braintrust/wrappers/claude_agent_sdk/test_wrapper.py +115 -0
- braintrust/wrappers/dspy.py +52 -1
- braintrust/wrappers/google_genai/__init__.py +9 -6
- braintrust/wrappers/litellm.py +6 -43
- braintrust/wrappers/pydantic_ai.py +2 -3
- braintrust/wrappers/test_agno.py +9 -0
- braintrust/wrappers/test_anthropic.py +156 -0
- braintrust/wrappers/test_dspy.py +117 -0
- braintrust/wrappers/test_google_genai.py +9 -0
- braintrust/wrappers/test_litellm.py +57 -55
- braintrust/wrappers/test_openai.py +253 -1
- braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
- braintrust/wrappers/test_utils.py +79 -0
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/RECORD +44 -37
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
- {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0
|
@@ -23,6 +23,7 @@ from braintrust.wrappers.claude_agent_sdk._wrapper import (
|
|
|
23
23
|
_create_client_wrapper_class,
|
|
24
24
|
_create_tool_wrapper_class,
|
|
25
25
|
)
|
|
26
|
+
from braintrust.wrappers.test_utils import verify_autoinstrument_script
|
|
26
27
|
|
|
27
28
|
PROJECT_NAME = "test-claude-agent-sdk"
|
|
28
29
|
TEST_MODEL = "claude-haiku-4-5-20251001"
|
|
@@ -177,3 +178,117 @@ async def test_calculator_with_multiple_operations(memory_logger):
|
|
|
177
178
|
if span["span_id"] != root_span_id:
|
|
178
179
|
assert span["root_span_id"] == root_span_id
|
|
179
180
|
assert root_span_id in span["span_parents"]
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _make_message(content: str) -> dict:
|
|
184
|
+
"""Create a streaming format message dict."""
|
|
185
|
+
return {"type": "user", "message": {"role": "user", "content": content}}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _assert_structured_input(task_span: dict, expected_contents: list[str]) -> None:
|
|
189
|
+
"""Assert that task span input is a structured list with expected content."""
|
|
190
|
+
inp = task_span.get("input")
|
|
191
|
+
assert isinstance(inp, list), f"Expected list input, got {type(inp).__name__}: {inp}"
|
|
192
|
+
assert [x["message"]["content"] for x in inp] == expected_contents
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class CustomAsyncIterable:
|
|
196
|
+
"""Custom AsyncIterable class (not a generator) for testing."""
|
|
197
|
+
|
|
198
|
+
def __init__(self, messages: list[dict]):
|
|
199
|
+
self._messages = messages
|
|
200
|
+
|
|
201
|
+
def __aiter__(self):
|
|
202
|
+
return CustomAsyncIterator(self._messages)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class CustomAsyncIterator:
|
|
206
|
+
"""Iterator for CustomAsyncIterable."""
|
|
207
|
+
|
|
208
|
+
def __init__(self, messages: list[dict]):
|
|
209
|
+
self._messages = messages
|
|
210
|
+
self._index = 0
|
|
211
|
+
|
|
212
|
+
async def __anext__(self):
|
|
213
|
+
if self._index >= len(self._messages):
|
|
214
|
+
raise StopAsyncIteration
|
|
215
|
+
msg = self._messages[self._index]
|
|
216
|
+
self._index += 1
|
|
217
|
+
return msg
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed")
|
|
221
|
+
@pytest.mark.asyncio
|
|
222
|
+
@pytest.mark.parametrize(
|
|
223
|
+
"input_factory,expected_contents",
|
|
224
|
+
[
|
|
225
|
+
pytest.param(
|
|
226
|
+
lambda: (msg async for msg in _single_message_generator()),
|
|
227
|
+
["What is 2 + 2?"],
|
|
228
|
+
id="asyncgen_single",
|
|
229
|
+
),
|
|
230
|
+
pytest.param(
|
|
231
|
+
lambda: (msg async for msg in _multi_message_generator()),
|
|
232
|
+
["Part 1", "Part 2"],
|
|
233
|
+
id="asyncgen_multi",
|
|
234
|
+
),
|
|
235
|
+
pytest.param(
|
|
236
|
+
lambda: CustomAsyncIterable([_make_message("Custom 1"), _make_message("Custom 2")]),
|
|
237
|
+
["Custom 1", "Custom 2"],
|
|
238
|
+
id="custom_async_iterable",
|
|
239
|
+
),
|
|
240
|
+
],
|
|
241
|
+
)
|
|
242
|
+
async def test_query_async_iterable(memory_logger, input_factory, expected_contents):
|
|
243
|
+
"""Test that async iterable inputs are captured as structured lists.
|
|
244
|
+
|
|
245
|
+
Verifies that passing AsyncIterable[dict] to query() results in the span
|
|
246
|
+
input showing the structured message list, not a flattened string or repr.
|
|
247
|
+
"""
|
|
248
|
+
assert not memory_logger.pop()
|
|
249
|
+
|
|
250
|
+
original_client = claude_agent_sdk.ClaudeSDKClient
|
|
251
|
+
claude_agent_sdk.ClaudeSDKClient = _create_client_wrapper_class(original_client)
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
options = claude_agent_sdk.ClaudeAgentOptions(model=TEST_MODEL)
|
|
255
|
+
|
|
256
|
+
async with claude_agent_sdk.ClaudeSDKClient(options=options) as client:
|
|
257
|
+
await client.query(input_factory())
|
|
258
|
+
async for message in client.receive_response():
|
|
259
|
+
if type(message).__name__ == "ResultMessage":
|
|
260
|
+
break
|
|
261
|
+
|
|
262
|
+
spans = memory_logger.pop()
|
|
263
|
+
|
|
264
|
+
task_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TASK]
|
|
265
|
+
assert len(task_spans) >= 1, f"Should have at least one task span, got {len(task_spans)}"
|
|
266
|
+
|
|
267
|
+
task_span = next(
|
|
268
|
+
(s for s in task_spans if s["span_attributes"]["name"] == "Claude Agent"),
|
|
269
|
+
task_spans[0],
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
_assert_structured_input(task_span, expected_contents)
|
|
273
|
+
|
|
274
|
+
finally:
|
|
275
|
+
claude_agent_sdk.ClaudeSDKClient = original_client
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
async def _single_message_generator():
|
|
279
|
+
"""Generator yielding a single message."""
|
|
280
|
+
yield _make_message("What is 2 + 2?")
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
async def _multi_message_generator():
|
|
284
|
+
"""Generator yielding multiple messages."""
|
|
285
|
+
yield _make_message("Part 1")
|
|
286
|
+
yield _make_message("Part 2")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class TestAutoInstrumentClaudeAgentSDK:
|
|
290
|
+
"""Tests for auto_instrument() with Claude Agent SDK."""
|
|
291
|
+
|
|
292
|
+
def test_auto_instrument_claude_agent_sdk(self):
|
|
293
|
+
"""Test auto_instrument patches Claude Agent SDK and creates spans."""
|
|
294
|
+
verify_autoinstrument_script("test_auto_claude_agent_sdk.py")
|
braintrust/wrappers/dspy.py
CHANGED
|
@@ -51,6 +51,7 @@ from typing import Any
|
|
|
51
51
|
|
|
52
52
|
from braintrust.logger import current_span, start_span
|
|
53
53
|
from braintrust.span_types import SpanTypeAttribute
|
|
54
|
+
from wrapt import wrap_function_wrapper
|
|
54
55
|
|
|
55
56
|
# Note: For detailed token and cost metrics, use patch_litellm() before importing DSPy.
|
|
56
57
|
# The DSPy callback focuses on execution flow and span hierarchy.
|
|
@@ -60,6 +61,8 @@ try:
|
|
|
60
61
|
except ImportError:
|
|
61
62
|
raise ImportError("DSPy is not installed. Please install it with: pip install dspy")
|
|
62
63
|
|
|
64
|
+
__all__ = ["BraintrustDSpyCallback", "patch_dspy"]
|
|
65
|
+
|
|
63
66
|
|
|
64
67
|
class BraintrustDSpyCallback(BaseCallback):
|
|
65
68
|
"""Callback handler that logs DSPy execution traces to Braintrust.
|
|
@@ -412,4 +415,52 @@ class BraintrustDSpyCallback(BaseCallback):
|
|
|
412
415
|
span.end()
|
|
413
416
|
|
|
414
417
|
|
|
415
|
-
|
|
418
|
+
def _configure_wrapper(wrapped, instance, args, kwargs):
|
|
419
|
+
"""Wrapper for dspy.configure that auto-adds BraintrustDSpyCallback."""
|
|
420
|
+
callbacks = kwargs.get("callbacks")
|
|
421
|
+
if callbacks is None:
|
|
422
|
+
callbacks = []
|
|
423
|
+
else:
|
|
424
|
+
callbacks = list(callbacks)
|
|
425
|
+
|
|
426
|
+
# Check if already has Braintrust callback
|
|
427
|
+
has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
|
|
428
|
+
if not has_bt_callback:
|
|
429
|
+
callbacks.append(BraintrustDSpyCallback())
|
|
430
|
+
|
|
431
|
+
kwargs["callbacks"] = callbacks
|
|
432
|
+
return wrapped(*args, **kwargs)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def patch_dspy() -> bool:
|
|
436
|
+
"""
|
|
437
|
+
Patch DSPy to automatically add Braintrust tracing callback.
|
|
438
|
+
|
|
439
|
+
After calling this, all calls to dspy.configure() will automatically
|
|
440
|
+
include the BraintrustDSpyCallback.
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
True if DSPy was patched (or already patched), False if DSPy is not installed.
|
|
444
|
+
|
|
445
|
+
Example:
|
|
446
|
+
```python
|
|
447
|
+
import braintrust
|
|
448
|
+
braintrust.patch_dspy()
|
|
449
|
+
|
|
450
|
+
import dspy
|
|
451
|
+
lm = dspy.LM("openai/gpt-4o-mini")
|
|
452
|
+
dspy.configure(lm=lm) # BraintrustDSpyCallback auto-added!
|
|
453
|
+
```
|
|
454
|
+
"""
|
|
455
|
+
try:
|
|
456
|
+
import dspy
|
|
457
|
+
|
|
458
|
+
if getattr(dspy, "__braintrust_wrapped__", False):
|
|
459
|
+
return True # Already patched
|
|
460
|
+
|
|
461
|
+
wrap_function_wrapper("dspy", "configure", _configure_wrapper)
|
|
462
|
+
dspy.__braintrust_wrapped__ = True
|
|
463
|
+
return True
|
|
464
|
+
|
|
465
|
+
except ImportError:
|
|
466
|
+
return False
|
|
@@ -15,7 +15,13 @@ def setup_genai(
|
|
|
15
15
|
api_key: str | None = None,
|
|
16
16
|
project_id: str | None = None,
|
|
17
17
|
project_name: str | None = None,
|
|
18
|
-
):
|
|
18
|
+
) -> bool:
|
|
19
|
+
"""
|
|
20
|
+
Setup Braintrust integration with Google GenAI.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
True if setup was successful, False if google-genai is not installed.
|
|
24
|
+
"""
|
|
19
25
|
span = current_span()
|
|
20
26
|
if span == NOOP_SPAN:
|
|
21
27
|
init_logger(project=project_name, api_key=api_key, project_id=project_id)
|
|
@@ -27,11 +33,8 @@ def setup_genai(
|
|
|
27
33
|
genai.Client = wrap_client(genai.Client)
|
|
28
34
|
models.Models = wrap_models(models.Models)
|
|
29
35
|
models.AsyncModels = wrap_async_models(models.AsyncModels)
|
|
30
|
-
|
|
31
|
-
except ImportError
|
|
32
|
-
logger.error(
|
|
33
|
-
f"Failed to import Google ADK agents: {e}. Google ADK is not installed. Please install it with: pip install google-adk"
|
|
34
|
-
)
|
|
36
|
+
return True
|
|
37
|
+
except ImportError:
|
|
35
38
|
return False
|
|
36
39
|
|
|
37
40
|
|
braintrust/wrappers/litellm.py
CHANGED
|
@@ -631,13 +631,16 @@ def serialize_response_format(response_format: Any) -> Any:
|
|
|
631
631
|
return response_format
|
|
632
632
|
|
|
633
633
|
|
|
634
|
-
def patch_litellm():
|
|
634
|
+
def patch_litellm() -> bool:
|
|
635
635
|
"""
|
|
636
636
|
Patch LiteLLM to add Braintrust tracing.
|
|
637
637
|
|
|
638
638
|
This wraps litellm.completion and litellm.acompletion to automatically
|
|
639
639
|
create Braintrust spans with detailed token metrics, timing, and costs.
|
|
640
640
|
|
|
641
|
+
Returns:
|
|
642
|
+
True if LiteLLM was patched (or already patched), False if LiteLLM is not installed.
|
|
643
|
+
|
|
641
644
|
Example:
|
|
642
645
|
```python
|
|
643
646
|
import braintrust
|
|
@@ -657,52 +660,12 @@ def patch_litellm():
|
|
|
657
660
|
import litellm
|
|
658
661
|
|
|
659
662
|
if not hasattr(litellm, "_braintrust_wrapped"):
|
|
660
|
-
# Store originals for unpatch_litellm()
|
|
661
|
-
litellm._braintrust_original_completion = litellm.completion
|
|
662
|
-
litellm._braintrust_original_acompletion = litellm.acompletion
|
|
663
|
-
litellm._braintrust_original_responses = litellm.responses
|
|
664
|
-
litellm._braintrust_original_aresponses = litellm.aresponses
|
|
665
|
-
|
|
666
663
|
wrapped = wrap_litellm(litellm)
|
|
667
664
|
litellm.completion = wrapped.completion
|
|
668
665
|
litellm.acompletion = wrapped.acompletion
|
|
669
666
|
litellm.responses = wrapped.responses
|
|
670
667
|
litellm.aresponses = wrapped.aresponses
|
|
671
668
|
litellm._braintrust_wrapped = True
|
|
669
|
+
return True
|
|
672
670
|
except ImportError:
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
def unpatch_litellm():
|
|
677
|
-
"""
|
|
678
|
-
Restore LiteLLM to its original state, removing Braintrust tracing.
|
|
679
|
-
|
|
680
|
-
This undoes the patching done by patch_litellm(), restoring the original
|
|
681
|
-
completion, acompletion, responses, and aresponses functions.
|
|
682
|
-
|
|
683
|
-
Example:
|
|
684
|
-
```python
|
|
685
|
-
import braintrust
|
|
686
|
-
braintrust.patch_litellm()
|
|
687
|
-
|
|
688
|
-
# ... use litellm with tracing ...
|
|
689
|
-
|
|
690
|
-
braintrust.unpatch_litellm() # restore original behavior
|
|
691
|
-
```
|
|
692
|
-
"""
|
|
693
|
-
try:
|
|
694
|
-
import litellm
|
|
695
|
-
|
|
696
|
-
if hasattr(litellm, "_braintrust_wrapped"):
|
|
697
|
-
litellm.completion = litellm._braintrust_original_completion
|
|
698
|
-
litellm.acompletion = litellm._braintrust_original_acompletion
|
|
699
|
-
litellm.responses = litellm._braintrust_original_responses
|
|
700
|
-
litellm.aresponses = litellm._braintrust_original_aresponses
|
|
701
|
-
|
|
702
|
-
delattr(litellm, "_braintrust_wrapped")
|
|
703
|
-
delattr(litellm, "_braintrust_original_completion")
|
|
704
|
-
delattr(litellm, "_braintrust_original_acompletion")
|
|
705
|
-
delattr(litellm, "_braintrust_original_responses")
|
|
706
|
-
delattr(litellm, "_braintrust_original_aresponses")
|
|
707
|
-
except ImportError:
|
|
708
|
-
pass # litellm not available
|
|
671
|
+
return False
|
|
@@ -51,9 +51,8 @@ def setup_pydantic_ai(
|
|
|
51
51
|
wrap_model_classes()
|
|
52
52
|
|
|
53
53
|
return True
|
|
54
|
-
except ImportError
|
|
55
|
-
|
|
56
|
-
logger.error("Pydantic AI is not installed. Please install it with: pip install pydantic-ai-slim")
|
|
54
|
+
except ImportError:
|
|
55
|
+
# Not installed - this is expected when using auto_instrument()
|
|
57
56
|
return False
|
|
58
57
|
|
|
59
58
|
|
braintrust/wrappers/test_agno.py
CHANGED
|
@@ -8,6 +8,7 @@ import pytest
|
|
|
8
8
|
from braintrust import logger
|
|
9
9
|
from braintrust.test_helpers import init_test_logger
|
|
10
10
|
from braintrust.wrappers.agno import setup_agno
|
|
11
|
+
from braintrust.wrappers.test_utils import verify_autoinstrument_script
|
|
11
12
|
|
|
12
13
|
TEST_ORG_ID = "test-org-123"
|
|
13
14
|
PROJECT_NAME = "test-agno-app"
|
|
@@ -94,3 +95,11 @@ def test_agno_simple_agent_execution(memory_logger):
|
|
|
94
95
|
assert llm_span["metrics"]["prompt_tokens"] == 38
|
|
95
96
|
assert llm_span["metrics"]["completion_tokens"] == 4
|
|
96
97
|
assert llm_span["metrics"]["tokens"] == 42
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TestAutoInstrumentAgno:
|
|
101
|
+
"""Tests for auto_instrument() with Agno."""
|
|
102
|
+
|
|
103
|
+
def test_auto_instrument_agno(self):
|
|
104
|
+
"""Test auto_instrument patches Agno and creates spans."""
|
|
105
|
+
verify_autoinstrument_script("test_auto_agno.py")
|
|
@@ -9,6 +9,7 @@ import pytest
|
|
|
9
9
|
from braintrust import logger
|
|
10
10
|
from braintrust.test_helpers import init_test_logger
|
|
11
11
|
from braintrust.wrappers.anthropic import wrap_anthropic
|
|
12
|
+
from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
|
|
12
13
|
|
|
13
14
|
TEST_ORG_ID = "test-org-123"
|
|
14
15
|
PROJECT_NAME = "test-anthropic-app"
|
|
@@ -481,3 +482,158 @@ async def test_anthropic_beta_messages_streaming_async(memory_logger):
|
|
|
481
482
|
assert metrics["prompt_tokens"] == usage.input_tokens
|
|
482
483
|
assert metrics["completion_tokens"] == usage.output_tokens
|
|
483
484
|
assert metrics["tokens"] == usage.input_tokens + usage.output_tokens
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
class TestPatchAnthropic:
|
|
488
|
+
"""Tests for patch_anthropic() / unpatch_anthropic()."""
|
|
489
|
+
|
|
490
|
+
def test_patch_anthropic_sets_wrapped_flag(self):
|
|
491
|
+
"""patch_anthropic() should set __braintrust_wrapped__ on anthropic module."""
|
|
492
|
+
result = run_in_subprocess("""
|
|
493
|
+
from braintrust.wrappers.anthropic import patch_anthropic
|
|
494
|
+
import anthropic
|
|
495
|
+
|
|
496
|
+
assert not hasattr(anthropic, "__braintrust_wrapped__")
|
|
497
|
+
patch_anthropic()
|
|
498
|
+
assert hasattr(anthropic, "__braintrust_wrapped__")
|
|
499
|
+
print("SUCCESS")
|
|
500
|
+
""")
|
|
501
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
502
|
+
assert "SUCCESS" in result.stdout
|
|
503
|
+
|
|
504
|
+
def test_patch_anthropic_wraps_new_clients(self):
|
|
505
|
+
"""After patch_anthropic(), new Anthropic() clients should be wrapped."""
|
|
506
|
+
result = run_in_subprocess("""
|
|
507
|
+
from braintrust.wrappers.anthropic import patch_anthropic
|
|
508
|
+
patch_anthropic()
|
|
509
|
+
|
|
510
|
+
import anthropic
|
|
511
|
+
client = anthropic.Anthropic(api_key="test-key")
|
|
512
|
+
|
|
513
|
+
# Check that messages is wrapped
|
|
514
|
+
messages_type = type(client.messages).__name__
|
|
515
|
+
print(f"messages_type={messages_type}")
|
|
516
|
+
print("SUCCESS")
|
|
517
|
+
""")
|
|
518
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
519
|
+
assert "SUCCESS" in result.stdout
|
|
520
|
+
|
|
521
|
+
def test_patch_anthropic_idempotent(self):
|
|
522
|
+
"""Multiple patch_anthropic() calls should be safe."""
|
|
523
|
+
result = run_in_subprocess("""
|
|
524
|
+
from braintrust.wrappers.anthropic import patch_anthropic
|
|
525
|
+
import anthropic
|
|
526
|
+
|
|
527
|
+
patch_anthropic()
|
|
528
|
+
first_class = anthropic.Anthropic
|
|
529
|
+
|
|
530
|
+
patch_anthropic() # Second call
|
|
531
|
+
second_class = anthropic.Anthropic
|
|
532
|
+
|
|
533
|
+
assert first_class is second_class
|
|
534
|
+
print("SUCCESS")
|
|
535
|
+
""")
|
|
536
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
537
|
+
assert "SUCCESS" in result.stdout
|
|
538
|
+
|
|
539
|
+
def test_patch_anthropic_creates_spans(self):
|
|
540
|
+
"""patch_anthropic() should create spans when making API calls."""
|
|
541
|
+
result = run_in_subprocess("""
|
|
542
|
+
from braintrust.wrappers.anthropic import patch_anthropic
|
|
543
|
+
from braintrust.test_helpers import init_test_logger
|
|
544
|
+
from braintrust import logger
|
|
545
|
+
|
|
546
|
+
# Set up memory logger
|
|
547
|
+
init_test_logger("test-auto")
|
|
548
|
+
with logger._internal_with_memory_background_logger() as memory_logger:
|
|
549
|
+
patch_anthropic()
|
|
550
|
+
|
|
551
|
+
import anthropic
|
|
552
|
+
client = anthropic.Anthropic()
|
|
553
|
+
|
|
554
|
+
# Make a call within a span context
|
|
555
|
+
import braintrust
|
|
556
|
+
with braintrust.start_span(name="test") as span:
|
|
557
|
+
try:
|
|
558
|
+
# This will fail without API key, but span should still be created
|
|
559
|
+
client.messages.create(
|
|
560
|
+
model="claude-3-5-haiku-latest",
|
|
561
|
+
max_tokens=100,
|
|
562
|
+
messages=[{"role": "user", "content": "hi"}],
|
|
563
|
+
)
|
|
564
|
+
except Exception:
|
|
565
|
+
pass # Expected without API key
|
|
566
|
+
|
|
567
|
+
# Check that spans were logged
|
|
568
|
+
spans = memory_logger.pop()
|
|
569
|
+
# Should have at least the parent span
|
|
570
|
+
assert len(spans) >= 1, f"Expected spans, got {spans}"
|
|
571
|
+
print("SUCCESS")
|
|
572
|
+
""")
|
|
573
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
574
|
+
assert "SUCCESS" in result.stdout
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
class TestPatchAnthropicSpans:
|
|
578
|
+
"""VCR-based tests verifying that patch_anthropic() produces spans."""
|
|
579
|
+
|
|
580
|
+
@pytest.mark.vcr
|
|
581
|
+
def test_patch_anthropic_creates_spans(self, memory_logger):
|
|
582
|
+
"""patch_anthropic() should create spans when making API calls."""
|
|
583
|
+
from braintrust.wrappers.anthropic import patch_anthropic
|
|
584
|
+
|
|
585
|
+
assert not memory_logger.pop()
|
|
586
|
+
|
|
587
|
+
patch_anthropic()
|
|
588
|
+
client = anthropic.Anthropic()
|
|
589
|
+
response = client.messages.create(
|
|
590
|
+
model="claude-3-5-haiku-latest",
|
|
591
|
+
max_tokens=100,
|
|
592
|
+
messages=[{"role": "user", "content": "Say hi"}],
|
|
593
|
+
)
|
|
594
|
+
assert response.content[0].text
|
|
595
|
+
|
|
596
|
+
# Verify span was created
|
|
597
|
+
spans = memory_logger.pop()
|
|
598
|
+
assert len(spans) == 1
|
|
599
|
+
span = spans[0]
|
|
600
|
+
assert span["metadata"]["provider"] == "anthropic"
|
|
601
|
+
assert "claude" in span["metadata"]["model"]
|
|
602
|
+
assert span["input"]
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
class TestPatchAnthropicAsyncSpans:
|
|
606
|
+
"""VCR-based tests verifying that patch_anthropic() produces spans for async clients."""
|
|
607
|
+
|
|
608
|
+
@pytest.mark.vcr
|
|
609
|
+
@pytest.mark.asyncio
|
|
610
|
+
async def test_patch_anthropic_async_creates_spans(self, memory_logger):
|
|
611
|
+
"""patch_anthropic() should create spans for async API calls."""
|
|
612
|
+
from braintrust.wrappers.anthropic import patch_anthropic
|
|
613
|
+
|
|
614
|
+
assert not memory_logger.pop()
|
|
615
|
+
|
|
616
|
+
patch_anthropic()
|
|
617
|
+
client = anthropic.AsyncAnthropic()
|
|
618
|
+
response = await client.messages.create(
|
|
619
|
+
model="claude-3-5-haiku-latest",
|
|
620
|
+
max_tokens=100,
|
|
621
|
+
messages=[{"role": "user", "content": "Say hi async"}],
|
|
622
|
+
)
|
|
623
|
+
assert response.content[0].text
|
|
624
|
+
|
|
625
|
+
# Verify span was created
|
|
626
|
+
spans = memory_logger.pop()
|
|
627
|
+
assert len(spans) == 1
|
|
628
|
+
span = spans[0]
|
|
629
|
+
assert span["metadata"]["provider"] == "anthropic"
|
|
630
|
+
assert "claude" in span["metadata"]["model"]
|
|
631
|
+
assert span["input"]
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
class TestAutoInstrumentAnthropic:
|
|
635
|
+
"""Tests for auto_instrument() with Anthropic."""
|
|
636
|
+
|
|
637
|
+
def test_auto_instrument_anthropic(self):
|
|
638
|
+
"""Test auto_instrument patches Anthropic, creates spans, and uninstrument works."""
|
|
639
|
+
verify_autoinstrument_script("test_auto_anthropic.py")
|
braintrust/wrappers/test_dspy.py
CHANGED
|
@@ -7,6 +7,7 @@ import pytest
|
|
|
7
7
|
from braintrust import logger
|
|
8
8
|
from braintrust.test_helpers import init_test_logger
|
|
9
9
|
from braintrust.wrappers.dspy import BraintrustDSpyCallback
|
|
10
|
+
from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
|
|
10
11
|
|
|
11
12
|
PROJECT_NAME = "test-dspy-app"
|
|
12
13
|
MODEL = "openai/gpt-4o-mini"
|
|
@@ -58,3 +59,119 @@ def test_dspy_callback(memory_logger):
|
|
|
58
59
|
|
|
59
60
|
# Verify span parenting (LM span should have parent)
|
|
60
61
|
assert lm_span.get("span_parents") # LM span should have parent
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class TestPatchDSPy:
|
|
65
|
+
"""Tests for patch_dspy() / unpatch_dspy()."""
|
|
66
|
+
|
|
67
|
+
def test_patch_dspy_sets_wrapped_flag(self):
|
|
68
|
+
"""patch_dspy() should set __braintrust_wrapped__ on dspy module."""
|
|
69
|
+
result = run_in_subprocess("""
|
|
70
|
+
dspy = __import__("dspy")
|
|
71
|
+
from braintrust.wrappers.dspy import patch_dspy
|
|
72
|
+
|
|
73
|
+
assert not hasattr(dspy, "__braintrust_wrapped__")
|
|
74
|
+
patch_dspy()
|
|
75
|
+
assert hasattr(dspy, "__braintrust_wrapped__")
|
|
76
|
+
print("SUCCESS")
|
|
77
|
+
""")
|
|
78
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
79
|
+
assert "SUCCESS" in result.stdout
|
|
80
|
+
|
|
81
|
+
def test_patch_dspy_wraps_configure(self):
|
|
82
|
+
"""After patch_dspy(), dspy.configure() should auto-add BraintrustDSpyCallback."""
|
|
83
|
+
result = run_in_subprocess("""
|
|
84
|
+
from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
|
|
85
|
+
patch_dspy()
|
|
86
|
+
|
|
87
|
+
import dspy
|
|
88
|
+
|
|
89
|
+
# Configure without explicitly adding callback
|
|
90
|
+
dspy.configure(lm=None)
|
|
91
|
+
|
|
92
|
+
# Check that BraintrustDSpyCallback was auto-added
|
|
93
|
+
from dspy.dsp.utils.settings import settings
|
|
94
|
+
callbacks = settings.callbacks
|
|
95
|
+
has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
|
|
96
|
+
assert has_bt_callback, f"Expected BraintrustDSpyCallback in {callbacks}"
|
|
97
|
+
print("SUCCESS")
|
|
98
|
+
""")
|
|
99
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
100
|
+
assert "SUCCESS" in result.stdout
|
|
101
|
+
|
|
102
|
+
def test_patch_dspy_preserves_existing_callbacks(self):
|
|
103
|
+
"""patch_dspy() should preserve user-provided callbacks."""
|
|
104
|
+
result = run_in_subprocess("""
|
|
105
|
+
from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
|
|
106
|
+
patch_dspy()
|
|
107
|
+
|
|
108
|
+
import dspy
|
|
109
|
+
from dspy.utils.callback import BaseCallback
|
|
110
|
+
|
|
111
|
+
class MyCallback(BaseCallback):
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
my_callback = MyCallback()
|
|
115
|
+
dspy.configure(lm=None, callbacks=[my_callback])
|
|
116
|
+
|
|
117
|
+
from dspy.dsp.utils.settings import settings
|
|
118
|
+
callbacks = settings.callbacks
|
|
119
|
+
|
|
120
|
+
# Should have both callbacks
|
|
121
|
+
has_my_callback = any(cb is my_callback for cb in callbacks)
|
|
122
|
+
has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
|
|
123
|
+
|
|
124
|
+
assert has_my_callback, "User callback should be preserved"
|
|
125
|
+
assert has_bt_callback, "BraintrustDSpyCallback should be added"
|
|
126
|
+
print("SUCCESS")
|
|
127
|
+
""")
|
|
128
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
129
|
+
assert "SUCCESS" in result.stdout
|
|
130
|
+
|
|
131
|
+
def test_patch_dspy_does_not_duplicate_callback(self):
|
|
132
|
+
"""patch_dspy() should not add duplicate BraintrustDSpyCallback."""
|
|
133
|
+
result = run_in_subprocess("""
|
|
134
|
+
from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
|
|
135
|
+
patch_dspy()
|
|
136
|
+
|
|
137
|
+
import dspy
|
|
138
|
+
|
|
139
|
+
# User explicitly adds BraintrustDSpyCallback
|
|
140
|
+
bt_callback = BraintrustDSpyCallback()
|
|
141
|
+
dspy.configure(lm=None, callbacks=[bt_callback])
|
|
142
|
+
|
|
143
|
+
from dspy.dsp.utils.settings import settings
|
|
144
|
+
callbacks = settings.callbacks
|
|
145
|
+
|
|
146
|
+
# Should only have one BraintrustDSpyCallback
|
|
147
|
+
bt_callbacks = [cb for cb in callbacks if isinstance(cb, BraintrustDSpyCallback)]
|
|
148
|
+
assert len(bt_callbacks) == 1, f"Expected 1 BraintrustDSpyCallback, got {len(bt_callbacks)}"
|
|
149
|
+
print("SUCCESS")
|
|
150
|
+
""")
|
|
151
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
152
|
+
assert "SUCCESS" in result.stdout
|
|
153
|
+
|
|
154
|
+
def test_patch_dspy_idempotent(self):
|
|
155
|
+
"""Multiple patch_dspy() calls should be safe."""
|
|
156
|
+
result = run_in_subprocess("""
|
|
157
|
+
from braintrust.wrappers.dspy import patch_dspy
|
|
158
|
+
import dspy
|
|
159
|
+
|
|
160
|
+
patch_dspy()
|
|
161
|
+
patch_dspy() # Second call - should be no-op, not double-wrap
|
|
162
|
+
|
|
163
|
+
# Verify configure still works
|
|
164
|
+
lm = dspy.LM("openai/gpt-4o-mini")
|
|
165
|
+
dspy.configure(lm=lm)
|
|
166
|
+
print("SUCCESS")
|
|
167
|
+
""")
|
|
168
|
+
assert result.returncode == 0, f"Failed: {result.stderr}"
|
|
169
|
+
assert "SUCCESS" in result.stdout
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class TestAutoInstrumentDSPy:
|
|
173
|
+
"""Tests for auto_instrument() with DSPy."""
|
|
174
|
+
|
|
175
|
+
def test_auto_instrument_dspy(self):
|
|
176
|
+
"""Test auto_instrument patches DSPy, creates spans, and uninstrument works."""
|
|
177
|
+
verify_autoinstrument_script("test_auto_dspy.py")
|
|
@@ -6,6 +6,7 @@ import pytest
|
|
|
6
6
|
from braintrust import logger
|
|
7
7
|
from braintrust.test_helpers import init_test_logger
|
|
8
8
|
from braintrust.wrappers.google_genai import setup_genai
|
|
9
|
+
from braintrust.wrappers.test_utils import verify_autoinstrument_script
|
|
9
10
|
from google.genai import types
|
|
10
11
|
from google.genai.client import Client
|
|
11
12
|
|
|
@@ -637,3 +638,11 @@ def test_attachment_with_pydantic_model(memory_logger):
|
|
|
637
638
|
|
|
638
639
|
# Attachment should be preserved
|
|
639
640
|
assert copied["context_file"] is attachment
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
class TestAutoInstrumentGoogleGenAI:
|
|
644
|
+
"""Tests for auto_instrument() with Google GenAI."""
|
|
645
|
+
|
|
646
|
+
def test_auto_instrument_google_genai(self):
|
|
647
|
+
"""Test auto_instrument patches Google GenAI and creates spans."""
|
|
648
|
+
verify_autoinstrument_script("test_auto_google_genai.py")
|