braintrust 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. braintrust/__init__.py +3 -0
  2. braintrust/_generated_types.py +106 -6
  3. braintrust/auto.py +179 -0
  4. braintrust/conftest.py +23 -4
  5. braintrust/framework.py +113 -3
  6. braintrust/functions/invoke.py +3 -1
  7. braintrust/functions/test_invoke.py +61 -0
  8. braintrust/generated_types.py +7 -1
  9. braintrust/logger.py +127 -45
  10. braintrust/oai.py +51 -0
  11. braintrust/span_cache.py +337 -0
  12. braintrust/span_identifier_v3.py +21 -0
  13. braintrust/test_bt_json.py +0 -5
  14. braintrust/test_framework.py +37 -0
  15. braintrust/test_http.py +444 -0
  16. braintrust/test_logger.py +295 -5
  17. braintrust/test_span_cache.py +344 -0
  18. braintrust/test_trace.py +267 -0
  19. braintrust/test_util.py +58 -1
  20. braintrust/trace.py +385 -0
  21. braintrust/util.py +20 -0
  22. braintrust/version.py +2 -2
  23. braintrust/wrappers/agno/__init__.py +2 -3
  24. braintrust/wrappers/anthropic.py +64 -0
  25. braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
  26. braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
  27. braintrust/wrappers/claude_agent_sdk/test_wrapper.py +115 -0
  28. braintrust/wrappers/dspy.py +52 -1
  29. braintrust/wrappers/google_genai/__init__.py +9 -6
  30. braintrust/wrappers/litellm.py +6 -43
  31. braintrust/wrappers/pydantic_ai.py +2 -3
  32. braintrust/wrappers/test_agno.py +9 -0
  33. braintrust/wrappers/test_anthropic.py +156 -0
  34. braintrust/wrappers/test_dspy.py +117 -0
  35. braintrust/wrappers/test_google_genai.py +9 -0
  36. braintrust/wrappers/test_litellm.py +57 -55
  37. braintrust/wrappers/test_openai.py +253 -1
  38. braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
  39. braintrust/wrappers/test_utils.py +79 -0
  40. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/METADATA +1 -1
  41. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/RECORD +44 -37
  42. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/WHEEL +1 -1
  43. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/entry_points.txt +0 -0
  44. {braintrust-0.4.3.dist-info → braintrust-0.5.2.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,7 @@ from braintrust.wrappers.claude_agent_sdk._wrapper import (
23
23
  _create_client_wrapper_class,
24
24
  _create_tool_wrapper_class,
25
25
  )
26
+ from braintrust.wrappers.test_utils import verify_autoinstrument_script
26
27
 
27
28
  PROJECT_NAME = "test-claude-agent-sdk"
28
29
  TEST_MODEL = "claude-haiku-4-5-20251001"
@@ -177,3 +178,117 @@ async def test_calculator_with_multiple_operations(memory_logger):
177
178
  if span["span_id"] != root_span_id:
178
179
  assert span["root_span_id"] == root_span_id
179
180
  assert root_span_id in span["span_parents"]
181
+
182
+
183
+ def _make_message(content: str) -> dict:
184
+ """Create a streaming format message dict."""
185
+ return {"type": "user", "message": {"role": "user", "content": content}}
186
+
187
+
188
+ def _assert_structured_input(task_span: dict, expected_contents: list[str]) -> None:
189
+ """Assert that task span input is a structured list with expected content."""
190
+ inp = task_span.get("input")
191
+ assert isinstance(inp, list), f"Expected list input, got {type(inp).__name__}: {inp}"
192
+ assert [x["message"]["content"] for x in inp] == expected_contents
193
+
194
+
195
+ class CustomAsyncIterable:
196
+ """Custom AsyncIterable class (not a generator) for testing."""
197
+
198
+ def __init__(self, messages: list[dict]):
199
+ self._messages = messages
200
+
201
+ def __aiter__(self):
202
+ return CustomAsyncIterator(self._messages)
203
+
204
+
205
+ class CustomAsyncIterator:
206
+ """Iterator for CustomAsyncIterable."""
207
+
208
+ def __init__(self, messages: list[dict]):
209
+ self._messages = messages
210
+ self._index = 0
211
+
212
+ async def __anext__(self):
213
+ if self._index >= len(self._messages):
214
+ raise StopAsyncIteration
215
+ msg = self._messages[self._index]
216
+ self._index += 1
217
+ return msg
218
+
219
+
220
+ @pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed")
221
+ @pytest.mark.asyncio
222
+ @pytest.mark.parametrize(
223
+ "input_factory,expected_contents",
224
+ [
225
+ pytest.param(
226
+ lambda: (msg async for msg in _single_message_generator()),
227
+ ["What is 2 + 2?"],
228
+ id="asyncgen_single",
229
+ ),
230
+ pytest.param(
231
+ lambda: (msg async for msg in _multi_message_generator()),
232
+ ["Part 1", "Part 2"],
233
+ id="asyncgen_multi",
234
+ ),
235
+ pytest.param(
236
+ lambda: CustomAsyncIterable([_make_message("Custom 1"), _make_message("Custom 2")]),
237
+ ["Custom 1", "Custom 2"],
238
+ id="custom_async_iterable",
239
+ ),
240
+ ],
241
+ )
242
+ async def test_query_async_iterable(memory_logger, input_factory, expected_contents):
243
+ """Test that async iterable inputs are captured as structured lists.
244
+
245
+ Verifies that passing AsyncIterable[dict] to query() results in the span
246
+ input showing the structured message list, not a flattened string or repr.
247
+ """
248
+ assert not memory_logger.pop()
249
+
250
+ original_client = claude_agent_sdk.ClaudeSDKClient
251
+ claude_agent_sdk.ClaudeSDKClient = _create_client_wrapper_class(original_client)
252
+
253
+ try:
254
+ options = claude_agent_sdk.ClaudeAgentOptions(model=TEST_MODEL)
255
+
256
+ async with claude_agent_sdk.ClaudeSDKClient(options=options) as client:
257
+ await client.query(input_factory())
258
+ async for message in client.receive_response():
259
+ if type(message).__name__ == "ResultMessage":
260
+ break
261
+
262
+ spans = memory_logger.pop()
263
+
264
+ task_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TASK]
265
+ assert len(task_spans) >= 1, f"Should have at least one task span, got {len(task_spans)}"
266
+
267
+ task_span = next(
268
+ (s for s in task_spans if s["span_attributes"]["name"] == "Claude Agent"),
269
+ task_spans[0],
270
+ )
271
+
272
+ _assert_structured_input(task_span, expected_contents)
273
+
274
+ finally:
275
+ claude_agent_sdk.ClaudeSDKClient = original_client
276
+
277
+
278
+ async def _single_message_generator():
279
+ """Generator yielding a single message."""
280
+ yield _make_message("What is 2 + 2?")
281
+
282
+
283
+ async def _multi_message_generator():
284
+ """Generator yielding multiple messages."""
285
+ yield _make_message("Part 1")
286
+ yield _make_message("Part 2")
287
+
288
+
289
+ class TestAutoInstrumentClaudeAgentSDK:
290
+ """Tests for auto_instrument() with Claude Agent SDK."""
291
+
292
+ def test_auto_instrument_claude_agent_sdk(self):
293
+ """Test auto_instrument patches Claude Agent SDK and creates spans."""
294
+ verify_autoinstrument_script("test_auto_claude_agent_sdk.py")
@@ -51,6 +51,7 @@ from typing import Any
51
51
 
52
52
  from braintrust.logger import current_span, start_span
53
53
  from braintrust.span_types import SpanTypeAttribute
54
+ from wrapt import wrap_function_wrapper
54
55
 
55
56
  # Note: For detailed token and cost metrics, use patch_litellm() before importing DSPy.
56
57
  # The DSPy callback focuses on execution flow and span hierarchy.
@@ -60,6 +61,8 @@ try:
60
61
  except ImportError:
61
62
  raise ImportError("DSPy is not installed. Please install it with: pip install dspy")
62
63
 
64
+ __all__ = ["BraintrustDSpyCallback", "patch_dspy"]
65
+
63
66
 
64
67
  class BraintrustDSpyCallback(BaseCallback):
65
68
  """Callback handler that logs DSPy execution traces to Braintrust.
@@ -412,4 +415,52 @@ class BraintrustDSpyCallback(BaseCallback):
412
415
  span.end()
413
416
 
414
417
 
415
- __all__ = ["BraintrustDSpyCallback"]
418
+ def _configure_wrapper(wrapped, instance, args, kwargs):
419
+ """Wrapper for dspy.configure that auto-adds BraintrustDSpyCallback."""
420
+ callbacks = kwargs.get("callbacks")
421
+ if callbacks is None:
422
+ callbacks = []
423
+ else:
424
+ callbacks = list(callbacks)
425
+
426
+ # Check if already has Braintrust callback
427
+ has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
428
+ if not has_bt_callback:
429
+ callbacks.append(BraintrustDSpyCallback())
430
+
431
+ kwargs["callbacks"] = callbacks
432
+ return wrapped(*args, **kwargs)
433
+
434
+
435
+ def patch_dspy() -> bool:
436
+ """
437
+ Patch DSPy to automatically add Braintrust tracing callback.
438
+
439
+ After calling this, all calls to dspy.configure() will automatically
440
+ include the BraintrustDSpyCallback.
441
+
442
+ Returns:
443
+ True if DSPy was patched (or already patched), False if DSPy is not installed.
444
+
445
+ Example:
446
+ ```python
447
+ import braintrust
448
+ braintrust.patch_dspy()
449
+
450
+ import dspy
451
+ lm = dspy.LM("openai/gpt-4o-mini")
452
+ dspy.configure(lm=lm) # BraintrustDSpyCallback auto-added!
453
+ ```
454
+ """
455
+ try:
456
+ import dspy
457
+
458
+ if getattr(dspy, "__braintrust_wrapped__", False):
459
+ return True # Already patched
460
+
461
+ wrap_function_wrapper("dspy", "configure", _configure_wrapper)
462
+ dspy.__braintrust_wrapped__ = True
463
+ return True
464
+
465
+ except ImportError:
466
+ return False
@@ -15,7 +15,13 @@ def setup_genai(
15
15
  api_key: str | None = None,
16
16
  project_id: str | None = None,
17
17
  project_name: str | None = None,
18
- ):
18
+ ) -> bool:
19
+ """
20
+ Setup Braintrust integration with Google GenAI.
21
+
22
+ Returns:
23
+ True if setup was successful, False if google-genai is not installed.
24
+ """
19
25
  span = current_span()
20
26
  if span == NOOP_SPAN:
21
27
  init_logger(project=project_name, api_key=api_key, project_id=project_id)
@@ -27,11 +33,8 @@ def setup_genai(
27
33
  genai.Client = wrap_client(genai.Client)
28
34
  models.Models = wrap_models(models.Models)
29
35
  models.AsyncModels = wrap_async_models(models.AsyncModels)
30
- pass
31
- except ImportError as e:
32
- logger.error(
33
- f"Failed to import Google ADK agents: {e}. Google ADK is not installed. Please install it with: pip install google-adk"
34
- )
36
+ return True
37
+ except ImportError:
35
38
  return False
36
39
 
37
40
 
@@ -631,13 +631,16 @@ def serialize_response_format(response_format: Any) -> Any:
631
631
  return response_format
632
632
 
633
633
 
634
- def patch_litellm():
634
+ def patch_litellm() -> bool:
635
635
  """
636
636
  Patch LiteLLM to add Braintrust tracing.
637
637
 
638
638
  This wraps litellm.completion and litellm.acompletion to automatically
639
639
  create Braintrust spans with detailed token metrics, timing, and costs.
640
640
 
641
+ Returns:
642
+ True if LiteLLM was patched (or already patched), False if LiteLLM is not installed.
643
+
641
644
  Example:
642
645
  ```python
643
646
  import braintrust
@@ -657,52 +660,12 @@ def patch_litellm():
657
660
  import litellm
658
661
 
659
662
  if not hasattr(litellm, "_braintrust_wrapped"):
660
- # Store originals for unpatch_litellm()
661
- litellm._braintrust_original_completion = litellm.completion
662
- litellm._braintrust_original_acompletion = litellm.acompletion
663
- litellm._braintrust_original_responses = litellm.responses
664
- litellm._braintrust_original_aresponses = litellm.aresponses
665
-
666
663
  wrapped = wrap_litellm(litellm)
667
664
  litellm.completion = wrapped.completion
668
665
  litellm.acompletion = wrapped.acompletion
669
666
  litellm.responses = wrapped.responses
670
667
  litellm.aresponses = wrapped.aresponses
671
668
  litellm._braintrust_wrapped = True
669
+ return True
672
670
  except ImportError:
673
- pass # litellm not available
674
-
675
-
676
- def unpatch_litellm():
677
- """
678
- Restore LiteLLM to its original state, removing Braintrust tracing.
679
-
680
- This undoes the patching done by patch_litellm(), restoring the original
681
- completion, acompletion, responses, and aresponses functions.
682
-
683
- Example:
684
- ```python
685
- import braintrust
686
- braintrust.patch_litellm()
687
-
688
- # ... use litellm with tracing ...
689
-
690
- braintrust.unpatch_litellm() # restore original behavior
691
- ```
692
- """
693
- try:
694
- import litellm
695
-
696
- if hasattr(litellm, "_braintrust_wrapped"):
697
- litellm.completion = litellm._braintrust_original_completion
698
- litellm.acompletion = litellm._braintrust_original_acompletion
699
- litellm.responses = litellm._braintrust_original_responses
700
- litellm.aresponses = litellm._braintrust_original_aresponses
701
-
702
- delattr(litellm, "_braintrust_wrapped")
703
- delattr(litellm, "_braintrust_original_completion")
704
- delattr(litellm, "_braintrust_original_acompletion")
705
- delattr(litellm, "_braintrust_original_responses")
706
- delattr(litellm, "_braintrust_original_aresponses")
707
- except ImportError:
708
- pass # litellm not available
671
+ return False
@@ -51,9 +51,8 @@ def setup_pydantic_ai(
51
51
  wrap_model_classes()
52
52
 
53
53
  return True
54
- except ImportError as e:
55
- logger.error(f"Failed to import Pydantic AI: {e}")
56
- logger.error("Pydantic AI is not installed. Please install it with: pip install pydantic-ai-slim")
54
+ except ImportError:
55
+ # Not installed - this is expected when using auto_instrument()
57
56
  return False
58
57
 
59
58
 
@@ -8,6 +8,7 @@ import pytest
8
8
  from braintrust import logger
9
9
  from braintrust.test_helpers import init_test_logger
10
10
  from braintrust.wrappers.agno import setup_agno
11
+ from braintrust.wrappers.test_utils import verify_autoinstrument_script
11
12
 
12
13
  TEST_ORG_ID = "test-org-123"
13
14
  PROJECT_NAME = "test-agno-app"
@@ -94,3 +95,11 @@ def test_agno_simple_agent_execution(memory_logger):
94
95
  assert llm_span["metrics"]["prompt_tokens"] == 38
95
96
  assert llm_span["metrics"]["completion_tokens"] == 4
96
97
  assert llm_span["metrics"]["tokens"] == 42
98
+
99
+
100
+ class TestAutoInstrumentAgno:
101
+ """Tests for auto_instrument() with Agno."""
102
+
103
+ def test_auto_instrument_agno(self):
104
+ """Test auto_instrument patches Agno and creates spans."""
105
+ verify_autoinstrument_script("test_auto_agno.py")
@@ -9,6 +9,7 @@ import pytest
9
9
  from braintrust import logger
10
10
  from braintrust.test_helpers import init_test_logger
11
11
  from braintrust.wrappers.anthropic import wrap_anthropic
12
+ from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
12
13
 
13
14
  TEST_ORG_ID = "test-org-123"
14
15
  PROJECT_NAME = "test-anthropic-app"
@@ -481,3 +482,158 @@ async def test_anthropic_beta_messages_streaming_async(memory_logger):
481
482
  assert metrics["prompt_tokens"] == usage.input_tokens
482
483
  assert metrics["completion_tokens"] == usage.output_tokens
483
484
  assert metrics["tokens"] == usage.input_tokens + usage.output_tokens
485
+
486
+
487
+ class TestPatchAnthropic:
488
+ """Tests for patch_anthropic() / unpatch_anthropic()."""
489
+
490
+ def test_patch_anthropic_sets_wrapped_flag(self):
491
+ """patch_anthropic() should set __braintrust_wrapped__ on anthropic module."""
492
+ result = run_in_subprocess("""
493
+ from braintrust.wrappers.anthropic import patch_anthropic
494
+ import anthropic
495
+
496
+ assert not hasattr(anthropic, "__braintrust_wrapped__")
497
+ patch_anthropic()
498
+ assert hasattr(anthropic, "__braintrust_wrapped__")
499
+ print("SUCCESS")
500
+ """)
501
+ assert result.returncode == 0, f"Failed: {result.stderr}"
502
+ assert "SUCCESS" in result.stdout
503
+
504
+ def test_patch_anthropic_wraps_new_clients(self):
505
+ """After patch_anthropic(), new Anthropic() clients should be wrapped."""
506
+ result = run_in_subprocess("""
507
+ from braintrust.wrappers.anthropic import patch_anthropic
508
+ patch_anthropic()
509
+
510
+ import anthropic
511
+ client = anthropic.Anthropic(api_key="test-key")
512
+
513
+ # Check that messages is wrapped
514
+ messages_type = type(client.messages).__name__
515
+ print(f"messages_type={messages_type}")
516
+ print("SUCCESS")
517
+ """)
518
+ assert result.returncode == 0, f"Failed: {result.stderr}"
519
+ assert "SUCCESS" in result.stdout
520
+
521
+ def test_patch_anthropic_idempotent(self):
522
+ """Multiple patch_anthropic() calls should be safe."""
523
+ result = run_in_subprocess("""
524
+ from braintrust.wrappers.anthropic import patch_anthropic
525
+ import anthropic
526
+
527
+ patch_anthropic()
528
+ first_class = anthropic.Anthropic
529
+
530
+ patch_anthropic() # Second call
531
+ second_class = anthropic.Anthropic
532
+
533
+ assert first_class is second_class
534
+ print("SUCCESS")
535
+ """)
536
+ assert result.returncode == 0, f"Failed: {result.stderr}"
537
+ assert "SUCCESS" in result.stdout
538
+
539
+ def test_patch_anthropic_creates_spans(self):
540
+ """patch_anthropic() should create spans when making API calls."""
541
+ result = run_in_subprocess("""
542
+ from braintrust.wrappers.anthropic import patch_anthropic
543
+ from braintrust.test_helpers import init_test_logger
544
+ from braintrust import logger
545
+
546
+ # Set up memory logger
547
+ init_test_logger("test-auto")
548
+ with logger._internal_with_memory_background_logger() as memory_logger:
549
+ patch_anthropic()
550
+
551
+ import anthropic
552
+ client = anthropic.Anthropic()
553
+
554
+ # Make a call within a span context
555
+ import braintrust
556
+ with braintrust.start_span(name="test") as span:
557
+ try:
558
+ # This will fail without API key, but span should still be created
559
+ client.messages.create(
560
+ model="claude-3-5-haiku-latest",
561
+ max_tokens=100,
562
+ messages=[{"role": "user", "content": "hi"}],
563
+ )
564
+ except Exception:
565
+ pass # Expected without API key
566
+
567
+ # Check that spans were logged
568
+ spans = memory_logger.pop()
569
+ # Should have at least the parent span
570
+ assert len(spans) >= 1, f"Expected spans, got {spans}"
571
+ print("SUCCESS")
572
+ """)
573
+ assert result.returncode == 0, f"Failed: {result.stderr}"
574
+ assert "SUCCESS" in result.stdout
575
+
576
+
577
+ class TestPatchAnthropicSpans:
578
+ """VCR-based tests verifying that patch_anthropic() produces spans."""
579
+
580
+ @pytest.mark.vcr
581
+ def test_patch_anthropic_creates_spans(self, memory_logger):
582
+ """patch_anthropic() should create spans when making API calls."""
583
+ from braintrust.wrappers.anthropic import patch_anthropic
584
+
585
+ assert not memory_logger.pop()
586
+
587
+ patch_anthropic()
588
+ client = anthropic.Anthropic()
589
+ response = client.messages.create(
590
+ model="claude-3-5-haiku-latest",
591
+ max_tokens=100,
592
+ messages=[{"role": "user", "content": "Say hi"}],
593
+ )
594
+ assert response.content[0].text
595
+
596
+ # Verify span was created
597
+ spans = memory_logger.pop()
598
+ assert len(spans) == 1
599
+ span = spans[0]
600
+ assert span["metadata"]["provider"] == "anthropic"
601
+ assert "claude" in span["metadata"]["model"]
602
+ assert span["input"]
603
+
604
+
605
+ class TestPatchAnthropicAsyncSpans:
606
+ """VCR-based tests verifying that patch_anthropic() produces spans for async clients."""
607
+
608
+ @pytest.mark.vcr
609
+ @pytest.mark.asyncio
610
+ async def test_patch_anthropic_async_creates_spans(self, memory_logger):
611
+ """patch_anthropic() should create spans for async API calls."""
612
+ from braintrust.wrappers.anthropic import patch_anthropic
613
+
614
+ assert not memory_logger.pop()
615
+
616
+ patch_anthropic()
617
+ client = anthropic.AsyncAnthropic()
618
+ response = await client.messages.create(
619
+ model="claude-3-5-haiku-latest",
620
+ max_tokens=100,
621
+ messages=[{"role": "user", "content": "Say hi async"}],
622
+ )
623
+ assert response.content[0].text
624
+
625
+ # Verify span was created
626
+ spans = memory_logger.pop()
627
+ assert len(spans) == 1
628
+ span = spans[0]
629
+ assert span["metadata"]["provider"] == "anthropic"
630
+ assert "claude" in span["metadata"]["model"]
631
+ assert span["input"]
632
+
633
+
634
+ class TestAutoInstrumentAnthropic:
635
+ """Tests for auto_instrument() with Anthropic."""
636
+
637
+ def test_auto_instrument_anthropic(self):
638
+ """Test auto_instrument patches Anthropic, creates spans, and uninstrument works."""
639
+ verify_autoinstrument_script("test_auto_anthropic.py")
@@ -7,6 +7,7 @@ import pytest
7
7
  from braintrust import logger
8
8
  from braintrust.test_helpers import init_test_logger
9
9
  from braintrust.wrappers.dspy import BraintrustDSpyCallback
10
+ from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script
10
11
 
11
12
  PROJECT_NAME = "test-dspy-app"
12
13
  MODEL = "openai/gpt-4o-mini"
@@ -58,3 +59,119 @@ def test_dspy_callback(memory_logger):
58
59
 
59
60
  # Verify span parenting (LM span should have parent)
60
61
  assert lm_span.get("span_parents") # LM span should have parent
62
+
63
+
64
+ class TestPatchDSPy:
65
+ """Tests for patch_dspy() / unpatch_dspy()."""
66
+
67
+ def test_patch_dspy_sets_wrapped_flag(self):
68
+ """patch_dspy() should set __braintrust_wrapped__ on dspy module."""
69
+ result = run_in_subprocess("""
70
+ dspy = __import__("dspy")
71
+ from braintrust.wrappers.dspy import patch_dspy
72
+
73
+ assert not hasattr(dspy, "__braintrust_wrapped__")
74
+ patch_dspy()
75
+ assert hasattr(dspy, "__braintrust_wrapped__")
76
+ print("SUCCESS")
77
+ """)
78
+ assert result.returncode == 0, f"Failed: {result.stderr}"
79
+ assert "SUCCESS" in result.stdout
80
+
81
+ def test_patch_dspy_wraps_configure(self):
82
+ """After patch_dspy(), dspy.configure() should auto-add BraintrustDSpyCallback."""
83
+ result = run_in_subprocess("""
84
+ from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
85
+ patch_dspy()
86
+
87
+ import dspy
88
+
89
+ # Configure without explicitly adding callback
90
+ dspy.configure(lm=None)
91
+
92
+ # Check that BraintrustDSpyCallback was auto-added
93
+ from dspy.dsp.utils.settings import settings
94
+ callbacks = settings.callbacks
95
+ has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
96
+ assert has_bt_callback, f"Expected BraintrustDSpyCallback in {callbacks}"
97
+ print("SUCCESS")
98
+ """)
99
+ assert result.returncode == 0, f"Failed: {result.stderr}"
100
+ assert "SUCCESS" in result.stdout
101
+
102
+ def test_patch_dspy_preserves_existing_callbacks(self):
103
+ """patch_dspy() should preserve user-provided callbacks."""
104
+ result = run_in_subprocess("""
105
+ from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
106
+ patch_dspy()
107
+
108
+ import dspy
109
+ from dspy.utils.callback import BaseCallback
110
+
111
+ class MyCallback(BaseCallback):
112
+ pass
113
+
114
+ my_callback = MyCallback()
115
+ dspy.configure(lm=None, callbacks=[my_callback])
116
+
117
+ from dspy.dsp.utils.settings import settings
118
+ callbacks = settings.callbacks
119
+
120
+ # Should have both callbacks
121
+ has_my_callback = any(cb is my_callback for cb in callbacks)
122
+ has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks)
123
+
124
+ assert has_my_callback, "User callback should be preserved"
125
+ assert has_bt_callback, "BraintrustDSpyCallback should be added"
126
+ print("SUCCESS")
127
+ """)
128
+ assert result.returncode == 0, f"Failed: {result.stderr}"
129
+ assert "SUCCESS" in result.stdout
130
+
131
+ def test_patch_dspy_does_not_duplicate_callback(self):
132
+ """patch_dspy() should not add duplicate BraintrustDSpyCallback."""
133
+ result = run_in_subprocess("""
134
+ from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback
135
+ patch_dspy()
136
+
137
+ import dspy
138
+
139
+ # User explicitly adds BraintrustDSpyCallback
140
+ bt_callback = BraintrustDSpyCallback()
141
+ dspy.configure(lm=None, callbacks=[bt_callback])
142
+
143
+ from dspy.dsp.utils.settings import settings
144
+ callbacks = settings.callbacks
145
+
146
+ # Should only have one BraintrustDSpyCallback
147
+ bt_callbacks = [cb for cb in callbacks if isinstance(cb, BraintrustDSpyCallback)]
148
+ assert len(bt_callbacks) == 1, f"Expected 1 BraintrustDSpyCallback, got {len(bt_callbacks)}"
149
+ print("SUCCESS")
150
+ """)
151
+ assert result.returncode == 0, f"Failed: {result.stderr}"
152
+ assert "SUCCESS" in result.stdout
153
+
154
+ def test_patch_dspy_idempotent(self):
155
+ """Multiple patch_dspy() calls should be safe."""
156
+ result = run_in_subprocess("""
157
+ from braintrust.wrappers.dspy import patch_dspy
158
+ import dspy
159
+
160
+ patch_dspy()
161
+ patch_dspy() # Second call - should be no-op, not double-wrap
162
+
163
+ # Verify configure still works
164
+ lm = dspy.LM("openai/gpt-4o-mini")
165
+ dspy.configure(lm=lm)
166
+ print("SUCCESS")
167
+ """)
168
+ assert result.returncode == 0, f"Failed: {result.stderr}"
169
+ assert "SUCCESS" in result.stdout
170
+
171
+
172
+ class TestAutoInstrumentDSPy:
173
+ """Tests for auto_instrument() with DSPy."""
174
+
175
+ def test_auto_instrument_dspy(self):
176
+ """Test auto_instrument patches DSPy, creates spans, and uninstrument works."""
177
+ verify_autoinstrument_script("test_auto_dspy.py")
@@ -6,6 +6,7 @@ import pytest
6
6
  from braintrust import logger
7
7
  from braintrust.test_helpers import init_test_logger
8
8
  from braintrust.wrappers.google_genai import setup_genai
9
+ from braintrust.wrappers.test_utils import verify_autoinstrument_script
9
10
  from google.genai import types
10
11
  from google.genai.client import Client
11
12
 
@@ -637,3 +638,11 @@ def test_attachment_with_pydantic_model(memory_logger):
637
638
 
638
639
  # Attachment should be preserved
639
640
  assert copied["context_file"] is attachment
641
+
642
+
643
+ class TestAutoInstrumentGoogleGenAI:
644
+ """Tests for auto_instrument() with Google GenAI."""
645
+
646
+ def test_auto_instrument_google_genai(self):
647
+ """Test auto_instrument patches Google GenAI and creates spans."""
648
+ verify_autoinstrument_script("test_auto_google_genai.py")