braintrust 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -241,6 +241,62 @@ async def test_hooks_trial_index_multiple_inputs():
241
241
  assert sorted(input_2_trials) == [0, 1]
242
242
 
243
243
 
244
+ @pytest.mark.asyncio
245
+ async def test_scorer_spans_have_purpose_attribute(with_memory_logger, with_simulate_login):
246
+ """Test that scorer spans have span_attributes.purpose='scorer' and propagate to subspans."""
247
+ # Define test data
248
+ data = [
249
+ EvalCase(input="hello", expected="hello"),
250
+ ]
251
+
252
+ def simple_task(input_value):
253
+ return input_value
254
+
255
+ def purpose_scorer(input_value, output, expected):
256
+ return 1.0 if output == expected else 0.0
257
+
258
+ evaluator = Evaluator(
259
+ project_name="test-project",
260
+ eval_name="test-scorer-purpose",
261
+ data=data,
262
+ task=simple_task,
263
+ scores=[purpose_scorer],
264
+ experiment_name="test-scorer-purpose",
265
+ metadata=None,
266
+ )
267
+
268
+ # Create experiment so spans get logged
269
+ exp = init_test_exp("test-scorer-purpose", "test-project")
270
+
271
+ # Run evaluator
272
+ result = await run_evaluator(experiment=exp, evaluator=evaluator, position=None, filters=[])
273
+
274
+ assert len(result.results) == 1
275
+ assert result.results[0].scores.get("purpose_scorer") == 1.0
276
+
277
+ # Check the logged spans
278
+ logs = with_memory_logger.pop()
279
+
280
+ # Find the scorer span (has type="score")
281
+ scorer_spans = [log for log in logs if log.get("span_attributes", {}).get("type") == "score"]
282
+ assert len(scorer_spans) == 1, f"Expected 1 scorer span, found {len(scorer_spans)}"
283
+
284
+ scorer_span = scorer_spans[0]
285
+
286
+ # Verify the scorer span has purpose='scorer'
287
+ assert scorer_span["span_attributes"].get("purpose") == "scorer", (
288
+ f"Scorer span should have purpose='scorer', got: {scorer_span['span_attributes']}"
289
+ )
290
+
291
+ # Verify that non-scorer spans (task, eval) do NOT have purpose='scorer'
292
+ non_scorer_spans = [log for log in logs if log.get("span_attributes", {}).get("type") != "score"]
293
+ assert len(non_scorer_spans) > 0, "Expected at least one non-scorer span"
294
+ for span in non_scorer_spans:
295
+ assert span.get("span_attributes", {}).get("purpose") != "scorer", (
296
+ f"Non-scorer span should NOT have purpose='scorer', got: {span['span_attributes']}"
297
+ )
298
+
299
+
244
300
  @pytest.fixture
245
301
  def simple_scorer():
246
302
  def simple_scorer_function(input, output, expected):
@@ -287,6 +343,31 @@ async def test_eval_no_send_logs_true(with_memory_logger, simple_scorer):
287
343
  assert len(logs) == 0
288
344
 
289
345
 
346
+ @pytest.mark.asyncio
347
+ async def test_eval_no_send_logs_with_none_score(with_memory_logger):
348
+ """Test that scorers returning None don't crash local mode."""
349
+
350
+ def sometimes_none_scorer(input, output, expected):
351
+ # Return None for first input, score for second
352
+ if input == "hello":
353
+ return {"name": "conditional", "score": None}
354
+ return {"name": "conditional", "score": 1.0}
355
+
356
+ result = await Eval(
357
+ "test-none-score",
358
+ data=[
359
+ {"input": "hello", "expected": "hello world"},
360
+ {"input": "test", "expected": "test world"},
361
+ ],
362
+ task=lambda input_val: input_val + " world",
363
+ scores=[sometimes_none_scorer],
364
+ no_send_logs=True,
365
+ )
366
+
367
+ # Should not crash and should calculate average from non-None scores only
368
+ assert result.summary.scores["conditional"].score == 1.0 # Only the second score counts
369
+
370
+
290
371
  @pytest.mark.asyncio
291
372
  async def test_hooks_tags_append(with_memory_logger, with_simulate_login, simple_scorer):
292
373
  """Test that hooks.tags can be appended to and logged."""
braintrust/test_logger.py CHANGED
@@ -1,3 +1,5 @@
1
+ # pyright: reportUnknownVariableType=false
2
+ # pyright: reportPrivateUsage=false
1
3
  import asyncio
2
4
  import json
3
5
  import logging
@@ -20,7 +22,12 @@ from braintrust import (
20
22
  logger,
21
23
  )
22
24
  from braintrust.id_gen import OTELIDGenerator, get_id_generator
23
- from braintrust.logger import _deep_copy_event, _extract_attachments, parent_context, render_message, render_mustache
25
+ from braintrust.logger import (
26
+ _extract_attachments,
27
+ parent_context,
28
+ render_message,
29
+ render_mustache,
30
+ )
24
31
  from braintrust.prompt import PromptChatBlock, PromptData, PromptMessage, PromptSchema
25
32
  from braintrust.test_helpers import (
26
33
  assert_dict_matches,
@@ -170,113 +177,7 @@ class TestLogger(TestCase):
170
177
  },
171
178
  )
172
179
 
173
- def test_deep_copy_event_basic(self):
174
- original = {
175
- "input": {"foo": "bar", "null": None, "empty": {}},
176
- "output": [1, 2, "3", None, {}],
177
- }
178
- copy = _deep_copy_event(original)
179
- self.assertEqual(copy, original)
180
- self.assertIsNot(copy, original)
181
- self.assertIsNot(copy["input"], original["input"])
182
- self.assertIsNot(copy["output"], original["output"])
183
-
184
- def test_deep_copy_event_with_attachments(self):
185
- attachment1 = Attachment(
186
- data=b"data",
187
- filename="filename",
188
- content_type="text/plain",
189
- )
190
- attachment2 = Attachment(
191
- data=b"data2",
192
- filename="filename2",
193
- content_type="text/plain",
194
- )
195
- attachment3 = ExternalAttachment(
196
- url="s3://bucket/path/to/key.pdf",
197
- filename="filename3",
198
- content_type="application/pdf",
199
- )
200
- date = "2024-10-23T05:02:48.796Z"
201
-
202
- original = {
203
- "input": "Testing",
204
- "output": {
205
- "span": "<span>",
206
- "myIllegalObjects": ["<experiment>", "<dataset>", "<logger>"],
207
- "myOtherWeirdObjects": [None, date, None, None],
208
- "attachment": attachment1,
209
- "another_attachment": attachment3,
210
- "attachmentList": [attachment1, attachment2, "string", attachment3],
211
- "nestedAttachment": {
212
- "attachment": attachment2,
213
- "another_attachment": attachment3,
214
- },
215
- "fake": {
216
- "_bt_internal_saved_attachment": "not a number",
217
- },
218
- },
219
- }
220
-
221
- copy = _deep_copy_event(original)
222
-
223
- self.assertEqual(
224
- copy,
225
- {
226
- "input": "Testing",
227
- "output": {
228
- "span": "<span>",
229
- "myIllegalObjects": ["<experiment>", "<dataset>", "<logger>"],
230
- "myOtherWeirdObjects": [None, date, None, None],
231
- "attachment": attachment1,
232
- "another_attachment": attachment3,
233
- "attachmentList": [attachment1, attachment2, "string", attachment3],
234
- "nestedAttachment": {
235
- "attachment": attachment2,
236
- "another_attachment": attachment3,
237
- },
238
- "fake": {
239
- "_bt_internal_saved_attachment": "not a number",
240
- },
241
- },
242
- },
243
- )
244
180
 
245
- self.assertIsNot(copy, original)
246
-
247
- self.assertIs(copy["output"]["attachment"], attachment1)
248
- self.assertIs(copy["output"]["another_attachment"], attachment3)
249
- self.assertIs(copy["output"]["nestedAttachment"]["attachment"], attachment2)
250
- self.assertIs(copy["output"]["nestedAttachment"]["another_attachment"], attachment3)
251
- self.assertIs(copy["output"]["attachmentList"][0], attachment1)
252
- self.assertIs(copy["output"]["attachmentList"][1], attachment2)
253
- self.assertIs(copy["output"]["attachmentList"][3], attachment3)
254
-
255
- def test_check_json_serializable_catches_circular_references(self):
256
- """Test that _check_json_serializable properly handles circular references.
257
-
258
- After fix, _check_json_serializable should catch ValueError from circular
259
- references and convert them to a more appropriate exception or handle them.
260
- """
261
- from braintrust.logger import _check_json_serializable
262
-
263
- # Create data with circular reference
264
- data = {"a": "b"}
265
- data["self"] = data
266
-
267
- # Should either succeed (by handling circular refs) or raise a clear exception
268
- # The error message should indicate the data is not serializable
269
- try:
270
- result = _check_json_serializable(data)
271
- # If it succeeds, it should return a serialized string
272
- self.assertIsInstance(result, str)
273
- except Exception as e:
274
- # If it raises an exception, it should mention serialization issue
275
- error_msg = str(e).lower()
276
- self.assertTrue(
277
- "json-serializable" in error_msg or "circular" in error_msg,
278
- f"Expected error message to mention serialization issue, got: {e}",
279
- )
280
181
 
281
182
  def test_prompt_build_with_structured_output_templating(self):
282
183
  self.maxDiff = None
@@ -756,6 +657,25 @@ def test_span_log_handles_infinity_gracefully(with_memory_logger):
756
657
  assert logs[0]["output"]["neg"] == "-Infinity"
757
658
 
758
659
 
660
+ def test_span_log_with_binary_data(with_memory_logger):
661
+ """Test how span.log() currently handles binary data."""
662
+ logger = init_test_logger(__name__)
663
+
664
+ with logger.start_span(name="test_span") as span:
665
+ span.log(
666
+ input={"file": "image.png"},
667
+ output={"embedding": b"\x00\x01\x02\x03" * 100},
668
+ )
669
+
670
+ logs = with_memory_logger.pop()
671
+ assert len(logs) == 1
672
+ # Document actual behavior - binary data goes through deep_copy_and_sanitize_dict
673
+ # which uses bt_dumps/bt_loads roundtrip
674
+ assert logs[0]["input"]["file"] == "image.png"
675
+ # The embedding should be present (converted to some serializable form)
676
+ assert "embedding" in logs[0]["output"]
677
+
678
+
759
679
  def test_span_log_handles_unstringifiable_object_gracefully(with_memory_logger):
760
680
  """Test that span.log() should handle objects with bad __str__ gracefully without raising.
761
681
 
@@ -929,6 +849,40 @@ def test_span_link_with_unresolved_experiment(with_simulate_login, with_memory_l
929
849
  assert link == "https://www.braintrust.dev/error-generating-link?msg=resolve-experiment-id"
930
850
 
931
851
 
852
+ def test_experiment_span_link_uses_env_vars_when_logged_out(with_memory_logger):
853
+ """Verify EXPERIMENT spans use BRAINTRUST_ORG_NAME env var when not logged in."""
854
+ simulate_logout()
855
+ assert_logged_out()
856
+
857
+ keys = ["BRAINTRUST_APP_URL", "BRAINTRUST_ORG_NAME"]
858
+ originals = {k: os.environ.get(k) for k in keys}
859
+ try:
860
+ os.environ["BRAINTRUST_APP_URL"] = "https://test-app.example.com"
861
+ os.environ["BRAINTRUST_ORG_NAME"] = "env-org-name"
862
+
863
+ experiment = braintrust.init(
864
+ project="test-project",
865
+ experiment="test-experiment",
866
+ )
867
+
868
+ # Create span with resolved experiment ID
869
+ span = experiment.start_span(name="test-span")
870
+ span.parent_object_id = LazyValue(lambda: "test-exp-id", use_mutex=False)
871
+ span.end()
872
+
873
+ link = span.link()
874
+
875
+ # Should use env var org name and app url
876
+ assert "env-org-name" in link
877
+ assert "test-app.example.com" in link
878
+ assert "test-exp-id" in link
879
+ finally:
880
+ for k, v in originals.items():
881
+ os.environ.pop(k, None)
882
+ if v:
883
+ os.environ[k] = v
884
+
885
+
932
886
  def test_permalink_with_valid_span_logged_in(with_simulate_login, with_memory_logger):
933
887
  logger = init_logger(
934
888
  project="test-project",
@@ -2746,3 +2700,187 @@ class TestDatasetInternalBtql(TestCase):
2746
2700
 
2747
2701
  # Verify that the custom batch_size is used
2748
2702
  self.assertEqual(query_json["limit"], custom_batch_size)
2703
+
2704
+
2705
+ def test_attachment_identity_preserved_through_bt_safe_deep_copy():
2706
+ """Test that attachment object identity is preserved through bt_safe_deep_copy."""
2707
+ from braintrust.bt_json import bt_safe_deep_copy
2708
+
2709
+ attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
2710
+ original_id = id(attachment)
2711
+
2712
+ # Simulate what happens in Span.log
2713
+ partial_record = {"input": {"file": attachment}}
2714
+ copied = bt_safe_deep_copy(partial_record)
2715
+
2716
+ # Verify identity preserved
2717
+ assert copied["input"]["file"] is attachment
2718
+ assert id(copied["input"]["file"]) == original_id
2719
+
2720
+
2721
+ def test_extract_attachments_collects_and_replaces():
2722
+ """Test that _extract_attachments properly collects attachments and replaces them with references."""
2723
+ from braintrust.logger import _extract_attachments
2724
+
2725
+ attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
2726
+ attachment2 = Attachment(data=b"data2", filename="file2.txt", content_type="text/plain")
2727
+ ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="file3.pdf", content_type="application/pdf")
2728
+
2729
+ event = {
2730
+ "input": {"file": attachment1},
2731
+ "output": {"file": attachment2},
2732
+ "metadata": {"files": [attachment1, ext_attachment]}
2733
+ }
2734
+
2735
+ attachments = []
2736
+ _extract_attachments(event, attachments)
2737
+
2738
+ # Should have collected all 4 attachment instances (attachment1 appears twice)
2739
+ assert len(attachments) == 4
2740
+ assert attachments[0] is attachment1
2741
+ assert attachments[1] is attachment2
2742
+ assert attachments[2] is attachment1 # Same instance collected again
2743
+ assert attachments[3] is ext_attachment
2744
+
2745
+ # Event should have been modified to contain references
2746
+ assert event["input"]["file"] == attachment1.reference
2747
+ assert event["output"]["file"] == attachment2.reference
2748
+ assert event["metadata"]["files"][0] == attachment1.reference
2749
+ assert event["metadata"]["files"][1] == ext_attachment.reference
2750
+
2751
+
2752
+ def test_extract_attachments_preserves_identity():
2753
+ """Test that the same attachment instance is collected multiple times when it appears in different places."""
2754
+ from braintrust.logger import _extract_attachments
2755
+
2756
+ attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
2757
+ original_id = id(attachment)
2758
+
2759
+ event = {
2760
+ "input": attachment,
2761
+ "output": attachment, # Same instance
2762
+ "metadata": {"file": attachment} # Same instance again
2763
+ }
2764
+
2765
+ attachments = []
2766
+ _extract_attachments(event, attachments)
2767
+
2768
+ # Should collect the same instance 3 times
2769
+ assert len(attachments) == 3
2770
+ assert all(att is attachment for att in attachments)
2771
+ assert all(id(att) == original_id for att in attachments)
2772
+
2773
+
2774
+ def test_attachment_upload_tracked_on_flush(with_memory_logger, with_simulate_login):
2775
+ """Test that attachment upload is tracked when attachments are logged and flushed."""
2776
+ attachment = Attachment(data=b"test data", filename="test.txt", content_type="text/plain")
2777
+
2778
+ logger = init_test_logger(__name__)
2779
+ span = logger.start_span(name="test_span")
2780
+ span.log(input={"file": attachment})
2781
+ span.end()
2782
+
2783
+ # No upload attempts yet
2784
+ assert len(with_memory_logger.upload_attempts) == 0
2785
+
2786
+ # Flush should track upload attempt
2787
+ logger.flush()
2788
+
2789
+ # Now upload should be tracked
2790
+ assert len(with_memory_logger.upload_attempts) == 1
2791
+ assert with_memory_logger.upload_attempts[0] is attachment
2792
+
2793
+
2794
+ def test_multiple_attachments_upload_tracked(with_memory_logger, with_simulate_login):
2795
+ """Test that upload is tracked for multiple attachments."""
2796
+ attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
2797
+ attachment2 = Attachment(data=b"data2", filename="file2.txt", content_type="text/plain")
2798
+
2799
+ logger = init_test_logger(__name__)
2800
+ span = logger.start_span(name="test_span")
2801
+ span.log(
2802
+ input={"file1": attachment1},
2803
+ output={"file2": attachment2}
2804
+ )
2805
+ span.end()
2806
+ logger.flush()
2807
+
2808
+ # Both attachments should be tracked
2809
+ assert len(with_memory_logger.upload_attempts) == 2
2810
+ assert attachment1 in with_memory_logger.upload_attempts
2811
+ assert attachment2 in with_memory_logger.upload_attempts
2812
+
2813
+
2814
+ def test_same_attachment_logged_twice_tracked_twice(with_memory_logger, with_simulate_login):
2815
+ """Test that same attachment logged twice appears twice in upload attempts."""
2816
+ attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
2817
+
2818
+ logger = init_test_logger(__name__)
2819
+ span = logger.start_span(name="test_span")
2820
+ span.log(input={"file": attachment})
2821
+ span.log(metadata={"same_file": attachment})
2822
+ span.end()
2823
+ logger.flush()
2824
+
2825
+ # Same attachment should be tracked twice (once for each log call)
2826
+ assert len(with_memory_logger.upload_attempts) == 2
2827
+ assert with_memory_logger.upload_attempts[0] is attachment
2828
+ assert with_memory_logger.upload_attempts[1] is attachment
2829
+
2830
+
2831
+ def test_external_attachment_upload_tracked(with_memory_logger, with_simulate_login):
2832
+ """Test that ExternalAttachment upload is also tracked."""
2833
+ ext_attachment = ExternalAttachment(
2834
+ url="s3://bucket/key.pdf",
2835
+ filename="external.pdf",
2836
+ content_type="application/pdf"
2837
+ )
2838
+
2839
+ logger = init_test_logger(__name__)
2840
+ span = logger.start_span(name="test_span")
2841
+ span.log(input={"file": ext_attachment})
2842
+ span.end()
2843
+ logger.flush()
2844
+
2845
+ # ExternalAttachment should be tracked
2846
+ assert len(with_memory_logger.upload_attempts) == 1
2847
+ assert with_memory_logger.upload_attempts[0] is ext_attachment
2848
+
2849
+
2850
+ def test_json_attachment_upload_tracked(with_memory_logger, with_simulate_login):
2851
+ """Test that JSONAttachment upload is tracked."""
2852
+ data = {"key": "value", "nested": {"array": [1, 2, 3]}}
2853
+ json_attachment = JSONAttachment(data, filename="data.json")
2854
+
2855
+ logger = init_test_logger(__name__)
2856
+ span = logger.start_span(name="test_span")
2857
+ span.log(output={"data": json_attachment})
2858
+ span.end()
2859
+ logger.flush()
2860
+
2861
+ # JSONAttachment should be tracked
2862
+ assert len(with_memory_logger.upload_attempts) == 1
2863
+ assert with_memory_logger.upload_attempts[0] is json_attachment
2864
+
2865
+
2866
+ def test_multiple_attachment_types_tracked(with_memory_logger, with_simulate_login):
2867
+ """Test that different attachment types are all tracked."""
2868
+ attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
2869
+ json_attachment = JSONAttachment({"key": "value"}, filename="data.json")
2870
+ ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="file.pdf", content_type="application/pdf")
2871
+
2872
+ logger = init_test_logger(__name__)
2873
+ span = logger.start_span(name="test_span")
2874
+ span.log(
2875
+ input=attachment,
2876
+ output=json_attachment,
2877
+ metadata={"file": ext_attachment}
2878
+ )
2879
+ span.end()
2880
+ logger.flush()
2881
+
2882
+ # All three types should be tracked
2883
+ assert len(with_memory_logger.upload_attempts) == 3
2884
+ assert attachment in with_memory_logger.upload_attempts
2885
+ assert json_attachment in with_memory_logger.upload_attempts
2886
+ assert ext_attachment in with_memory_logger.upload_attempts