braintrust 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -241,6 +241,62 @@ async def test_hooks_trial_index_multiple_inputs():
241
241
  assert sorted(input_2_trials) == [0, 1]
242
242
 
243
243
 
244
+ @pytest.mark.asyncio
245
+ async def test_scorer_spans_have_purpose_attribute(with_memory_logger, with_simulate_login):
246
+ """Test that scorer spans have span_attributes.purpose='scorer' and propagate to subspans."""
247
+ # Define test data
248
+ data = [
249
+ EvalCase(input="hello", expected="hello"),
250
+ ]
251
+
252
+ def simple_task(input_value):
253
+ return input_value
254
+
255
+ def purpose_scorer(input_value, output, expected):
256
+ return 1.0 if output == expected else 0.0
257
+
258
+ evaluator = Evaluator(
259
+ project_name="test-project",
260
+ eval_name="test-scorer-purpose",
261
+ data=data,
262
+ task=simple_task,
263
+ scores=[purpose_scorer],
264
+ experiment_name="test-scorer-purpose",
265
+ metadata=None,
266
+ )
267
+
268
+ # Create experiment so spans get logged
269
+ exp = init_test_exp("test-scorer-purpose", "test-project")
270
+
271
+ # Run evaluator
272
+ result = await run_evaluator(experiment=exp, evaluator=evaluator, position=None, filters=[])
273
+
274
+ assert len(result.results) == 1
275
+ assert result.results[0].scores.get("purpose_scorer") == 1.0
276
+
277
+ # Check the logged spans
278
+ logs = with_memory_logger.pop()
279
+
280
+ # Find the scorer span (has type="score")
281
+ scorer_spans = [log for log in logs if log.get("span_attributes", {}).get("type") == "score"]
282
+ assert len(scorer_spans) == 1, f"Expected 1 scorer span, found {len(scorer_spans)}"
283
+
284
+ scorer_span = scorer_spans[0]
285
+
286
+ # Verify the scorer span has purpose='scorer'
287
+ assert scorer_span["span_attributes"].get("purpose") == "scorer", (
288
+ f"Scorer span should have purpose='scorer', got: {scorer_span['span_attributes']}"
289
+ )
290
+
291
+ # Verify that non-scorer spans (task, eval) do NOT have purpose='scorer'
292
+ non_scorer_spans = [log for log in logs if log.get("span_attributes", {}).get("type") != "score"]
293
+ assert len(non_scorer_spans) > 0, "Expected at least one non-scorer span"
294
+ for span in non_scorer_spans:
295
+ assert span.get("span_attributes", {}).get("purpose") != "scorer", (
296
+ f"Non-scorer span should NOT have purpose='scorer', got: {span['span_attributes']}"
297
+ )
298
+
299
+
244
300
  @pytest.fixture
245
301
  def simple_scorer():
246
302
  def simple_scorer_function(input, output, expected):
braintrust/test_logger.py CHANGED
@@ -1,3 +1,5 @@
1
+ # pyright: reportUnknownVariableType=false
2
+ # pyright: reportPrivateUsage=false
1
3
  import asyncio
2
4
  import json
3
5
  import logging
@@ -20,7 +22,12 @@ from braintrust import (
20
22
  logger,
21
23
  )
22
24
  from braintrust.id_gen import OTELIDGenerator, get_id_generator
23
- from braintrust.logger import _deep_copy_event, _extract_attachments, parent_context, render_message, render_mustache
25
+ from braintrust.logger import (
26
+ _extract_attachments,
27
+ parent_context,
28
+ render_message,
29
+ render_mustache,
30
+ )
24
31
  from braintrust.prompt import PromptChatBlock, PromptData, PromptMessage, PromptSchema
25
32
  from braintrust.test_helpers import (
26
33
  assert_dict_matches,
@@ -170,113 +177,7 @@ class TestLogger(TestCase):
170
177
  },
171
178
  )
172
179
 
173
- def test_deep_copy_event_basic(self):
174
- original = {
175
- "input": {"foo": "bar", "null": None, "empty": {}},
176
- "output": [1, 2, "3", None, {}],
177
- }
178
- copy = _deep_copy_event(original)
179
- self.assertEqual(copy, original)
180
- self.assertIsNot(copy, original)
181
- self.assertIsNot(copy["input"], original["input"])
182
- self.assertIsNot(copy["output"], original["output"])
183
-
184
- def test_deep_copy_event_with_attachments(self):
185
- attachment1 = Attachment(
186
- data=b"data",
187
- filename="filename",
188
- content_type="text/plain",
189
- )
190
- attachment2 = Attachment(
191
- data=b"data2",
192
- filename="filename2",
193
- content_type="text/plain",
194
- )
195
- attachment3 = ExternalAttachment(
196
- url="s3://bucket/path/to/key.pdf",
197
- filename="filename3",
198
- content_type="application/pdf",
199
- )
200
- date = "2024-10-23T05:02:48.796Z"
201
-
202
- original = {
203
- "input": "Testing",
204
- "output": {
205
- "span": "<span>",
206
- "myIllegalObjects": ["<experiment>", "<dataset>", "<logger>"],
207
- "myOtherWeirdObjects": [None, date, None, None],
208
- "attachment": attachment1,
209
- "another_attachment": attachment3,
210
- "attachmentList": [attachment1, attachment2, "string", attachment3],
211
- "nestedAttachment": {
212
- "attachment": attachment2,
213
- "another_attachment": attachment3,
214
- },
215
- "fake": {
216
- "_bt_internal_saved_attachment": "not a number",
217
- },
218
- },
219
- }
220
180
 
221
- copy = _deep_copy_event(original)
222
-
223
- self.assertEqual(
224
- copy,
225
- {
226
- "input": "Testing",
227
- "output": {
228
- "span": "<span>",
229
- "myIllegalObjects": ["<experiment>", "<dataset>", "<logger>"],
230
- "myOtherWeirdObjects": [None, date, None, None],
231
- "attachment": attachment1,
232
- "another_attachment": attachment3,
233
- "attachmentList": [attachment1, attachment2, "string", attachment3],
234
- "nestedAttachment": {
235
- "attachment": attachment2,
236
- "another_attachment": attachment3,
237
- },
238
- "fake": {
239
- "_bt_internal_saved_attachment": "not a number",
240
- },
241
- },
242
- },
243
- )
244
-
245
- self.assertIsNot(copy, original)
246
-
247
- self.assertIs(copy["output"]["attachment"], attachment1)
248
- self.assertIs(copy["output"]["another_attachment"], attachment3)
249
- self.assertIs(copy["output"]["nestedAttachment"]["attachment"], attachment2)
250
- self.assertIs(copy["output"]["nestedAttachment"]["another_attachment"], attachment3)
251
- self.assertIs(copy["output"]["attachmentList"][0], attachment1)
252
- self.assertIs(copy["output"]["attachmentList"][1], attachment2)
253
- self.assertIs(copy["output"]["attachmentList"][3], attachment3)
254
-
255
- def test_check_json_serializable_catches_circular_references(self):
256
- """Test that _check_json_serializable properly handles circular references.
257
-
258
- After fix, _check_json_serializable should catch ValueError from circular
259
- references and convert them to a more appropriate exception or handle them.
260
- """
261
- from braintrust.logger import _check_json_serializable
262
-
263
- # Create data with circular reference
264
- data = {"a": "b"}
265
- data["self"] = data
266
-
267
- # Should either succeed (by handling circular refs) or raise a clear exception
268
- # The error message should indicate the data is not serializable
269
- try:
270
- result = _check_json_serializable(data)
271
- # If it succeeds, it should return a serialized string
272
- self.assertIsInstance(result, str)
273
- except Exception as e:
274
- # If it raises an exception, it should mention serialization issue
275
- error_msg = str(e).lower()
276
- self.assertTrue(
277
- "json-serializable" in error_msg or "circular" in error_msg,
278
- f"Expected error message to mention serialization issue, got: {e}",
279
- )
280
181
 
281
182
  def test_prompt_build_with_structured_output_templating(self):
282
183
  self.maxDiff = None
@@ -756,6 +657,25 @@ def test_span_log_handles_infinity_gracefully(with_memory_logger):
756
657
  assert logs[0]["output"]["neg"] == "-Infinity"
757
658
 
758
659
 
660
+ def test_span_log_with_binary_data(with_memory_logger):
661
+ """Test how span.log() currently handles binary data."""
662
+ logger = init_test_logger(__name__)
663
+
664
+ with logger.start_span(name="test_span") as span:
665
+ span.log(
666
+ input={"file": "image.png"},
667
+ output={"embedding": b"\x00\x01\x02\x03" * 100},
668
+ )
669
+
670
+ logs = with_memory_logger.pop()
671
+ assert len(logs) == 1
672
+ # Document actual behavior - binary data goes through deep_copy_and_sanitize_dict
673
+ # which uses bt_dumps/bt_loads roundtrip
674
+ assert logs[0]["input"]["file"] == "image.png"
675
+ # The embedding should be present (converted to some serializable form)
676
+ assert "embedding" in logs[0]["output"]
677
+
678
+
759
679
  def test_span_log_handles_unstringifiable_object_gracefully(with_memory_logger):
760
680
  """Test that span.log() should handle objects with bad __str__ gracefully without raising.
761
681
 
@@ -2746,3 +2666,187 @@ class TestDatasetInternalBtql(TestCase):
2746
2666
 
2747
2667
  # Verify that the custom batch_size is used
2748
2668
  self.assertEqual(query_json["limit"], custom_batch_size)
2669
+
2670
+
2671
+ def test_attachment_identity_preserved_through_bt_safe_deep_copy():
2672
+ """Test that attachment object identity is preserved through bt_safe_deep_copy."""
2673
+ from braintrust.bt_json import bt_safe_deep_copy
2674
+
2675
+ attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
2676
+ original_id = id(attachment)
2677
+
2678
+ # Simulate what happens in Span.log
2679
+ partial_record = {"input": {"file": attachment}}
2680
+ copied = bt_safe_deep_copy(partial_record)
2681
+
2682
+ # Verify identity preserved
2683
+ assert copied["input"]["file"] is attachment
2684
+ assert id(copied["input"]["file"]) == original_id
2685
+
2686
+
2687
+ def test_extract_attachments_collects_and_replaces():
2688
+ """Test that _extract_attachments properly collects attachments and replaces them with references."""
2689
+ from braintrust.logger import _extract_attachments
2690
+
2691
+ attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
2692
+ attachment2 = Attachment(data=b"data2", filename="file2.txt", content_type="text/plain")
2693
+ ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="file3.pdf", content_type="application/pdf")
2694
+
2695
+ event = {
2696
+ "input": {"file": attachment1},
2697
+ "output": {"file": attachment2},
2698
+ "metadata": {"files": [attachment1, ext_attachment]}
2699
+ }
2700
+
2701
+ attachments = []
2702
+ _extract_attachments(event, attachments)
2703
+
2704
+ # Should have collected all 4 attachment instances (attachment1 appears twice)
2705
+ assert len(attachments) == 4
2706
+ assert attachments[0] is attachment1
2707
+ assert attachments[1] is attachment2
2708
+ assert attachments[2] is attachment1 # Same instance collected again
2709
+ assert attachments[3] is ext_attachment
2710
+
2711
+ # Event should have been modified to contain references
2712
+ assert event["input"]["file"] == attachment1.reference
2713
+ assert event["output"]["file"] == attachment2.reference
2714
+ assert event["metadata"]["files"][0] == attachment1.reference
2715
+ assert event["metadata"]["files"][1] == ext_attachment.reference
2716
+
2717
+
2718
+ def test_extract_attachments_preserves_identity():
2719
+ """Test that the same attachment instance is collected multiple times when it appears in different places."""
2720
+ from braintrust.logger import _extract_attachments
2721
+
2722
+ attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
2723
+ original_id = id(attachment)
2724
+
2725
+ event = {
2726
+ "input": attachment,
2727
+ "output": attachment, # Same instance
2728
+ "metadata": {"file": attachment} # Same instance again
2729
+ }
2730
+
2731
+ attachments = []
2732
+ _extract_attachments(event, attachments)
2733
+
2734
+ # Should collect the same instance 3 times
2735
+ assert len(attachments) == 3
2736
+ assert all(att is attachment for att in attachments)
2737
+ assert all(id(att) == original_id for att in attachments)
2738
+
2739
+
2740
+ def test_attachment_upload_tracked_on_flush(with_memory_logger, with_simulate_login):
2741
+ """Test that attachment upload is tracked when attachments are logged and flushed."""
2742
+ attachment = Attachment(data=b"test data", filename="test.txt", content_type="text/plain")
2743
+
2744
+ logger = init_test_logger(__name__)
2745
+ span = logger.start_span(name="test_span")
2746
+ span.log(input={"file": attachment})
2747
+ span.end()
2748
+
2749
+ # No upload attempts yet
2750
+ assert len(with_memory_logger.upload_attempts) == 0
2751
+
2752
+ # Flush should track upload attempt
2753
+ logger.flush()
2754
+
2755
+ # Now upload should be tracked
2756
+ assert len(with_memory_logger.upload_attempts) == 1
2757
+ assert with_memory_logger.upload_attempts[0] is attachment
2758
+
2759
+
2760
+ def test_multiple_attachments_upload_tracked(with_memory_logger, with_simulate_login):
2761
+ """Test that upload is tracked for multiple attachments."""
2762
+ attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
2763
+ attachment2 = Attachment(data=b"data2", filename="file2.txt", content_type="text/plain")
2764
+
2765
+ logger = init_test_logger(__name__)
2766
+ span = logger.start_span(name="test_span")
2767
+ span.log(
2768
+ input={"file1": attachment1},
2769
+ output={"file2": attachment2}
2770
+ )
2771
+ span.end()
2772
+ logger.flush()
2773
+
2774
+ # Both attachments should be tracked
2775
+ assert len(with_memory_logger.upload_attempts) == 2
2776
+ assert attachment1 in with_memory_logger.upload_attempts
2777
+ assert attachment2 in with_memory_logger.upload_attempts
2778
+
2779
+
2780
+ def test_same_attachment_logged_twice_tracked_twice(with_memory_logger, with_simulate_login):
2781
+ """Test that same attachment logged twice appears twice in upload attempts."""
2782
+ attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
2783
+
2784
+ logger = init_test_logger(__name__)
2785
+ span = logger.start_span(name="test_span")
2786
+ span.log(input={"file": attachment})
2787
+ span.log(metadata={"same_file": attachment})
2788
+ span.end()
2789
+ logger.flush()
2790
+
2791
+ # Same attachment should be tracked twice (once for each log call)
2792
+ assert len(with_memory_logger.upload_attempts) == 2
2793
+ assert with_memory_logger.upload_attempts[0] is attachment
2794
+ assert with_memory_logger.upload_attempts[1] is attachment
2795
+
2796
+
2797
+ def test_external_attachment_upload_tracked(with_memory_logger, with_simulate_login):
2798
+ """Test that ExternalAttachment upload is also tracked."""
2799
+ ext_attachment = ExternalAttachment(
2800
+ url="s3://bucket/key.pdf",
2801
+ filename="external.pdf",
2802
+ content_type="application/pdf"
2803
+ )
2804
+
2805
+ logger = init_test_logger(__name__)
2806
+ span = logger.start_span(name="test_span")
2807
+ span.log(input={"file": ext_attachment})
2808
+ span.end()
2809
+ logger.flush()
2810
+
2811
+ # ExternalAttachment should be tracked
2812
+ assert len(with_memory_logger.upload_attempts) == 1
2813
+ assert with_memory_logger.upload_attempts[0] is ext_attachment
2814
+
2815
+
2816
+ def test_json_attachment_upload_tracked(with_memory_logger, with_simulate_login):
2817
+ """Test that JSONAttachment upload is tracked."""
2818
+ data = {"key": "value", "nested": {"array": [1, 2, 3]}}
2819
+ json_attachment = JSONAttachment(data, filename="data.json")
2820
+
2821
+ logger = init_test_logger(__name__)
2822
+ span = logger.start_span(name="test_span")
2823
+ span.log(output={"data": json_attachment})
2824
+ span.end()
2825
+ logger.flush()
2826
+
2827
+ # JSONAttachment should be tracked
2828
+ assert len(with_memory_logger.upload_attempts) == 1
2829
+ assert with_memory_logger.upload_attempts[0] is json_attachment
2830
+
2831
+
2832
+ def test_multiple_attachment_types_tracked(with_memory_logger, with_simulate_login):
2833
+ """Test that different attachment types are all tracked."""
2834
+ attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
2835
+ json_attachment = JSONAttachment({"key": "value"}, filename="data.json")
2836
+ ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="file.pdf", content_type="application/pdf")
2837
+
2838
+ logger = init_test_logger(__name__)
2839
+ span = logger.start_span(name="test_span")
2840
+ span.log(
2841
+ input=attachment,
2842
+ output=json_attachment,
2843
+ metadata={"file": ext_attachment}
2844
+ )
2845
+ span.end()
2846
+ logger.flush()
2847
+
2848
+ # All three types should be tracked
2849
+ assert len(with_memory_logger.upload_attempts) == 3
2850
+ assert attachment in with_memory_logger.upload_attempts
2851
+ assert json_attachment in with_memory_logger.upload_attempts
2852
+ assert ext_attachment in with_memory_logger.upload_attempts
braintrust/version.py CHANGED
@@ -1,4 +1,4 @@
1
- VERSION = "0.4.0"
1
+ VERSION = "0.4.1"
2
2
 
3
3
  # this will be templated during the build
4
- GIT_COMMIT = "8ab13f3f48af6a4d3c0b053e4bbabfd4f24f23ec"
4
+ GIT_COMMIT = "d9c624ea93ca6bf62c2412abce1b3a2ef1a2be67"
@@ -3,6 +3,7 @@ import time
3
3
  from collections.abc import Iterable
4
4
  from typing import Any
5
5
 
6
+ from braintrust.bt_json import bt_safe_deep_copy
6
7
  from braintrust.logger import NOOP_SPAN, Attachment, current_span, init_logger, start_span
7
8
  from braintrust.span_types import SpanTypeAttribute
8
9
  from wrapt import wrap_function_wrapper
@@ -149,7 +150,7 @@ def wrap_async_models(AsyncModels: Any):
149
150
 
150
151
 
151
152
  def _serialize_input(api_client: Any, input: dict[str, Any]):
152
- config = _try_dict(input.get("config"))
153
+ config = bt_safe_deep_copy(input.get("config"))
153
154
 
154
155
  if config is not None:
155
156
  tools = _serialize_tools(api_client, input)
@@ -424,17 +425,3 @@ def get_path(obj: dict[str, Any], path: str, default: Any = None) -> Any | None:
424
425
  current = current[key]
425
426
 
426
427
  return current
427
-
428
-
429
- def _try_dict(obj: Any) -> dict[str, Any] | None:
430
- try:
431
- return obj.model_dump()
432
- except AttributeError:
433
- pass
434
-
435
- try:
436
- return obj.dump()
437
- except AttributeError:
438
- pass
439
-
440
- return obj