PyPI - braintrust - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

braintrust 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

braintrust/bt_json.py +178 -19
braintrust/db_fields.py +1 -0
braintrust/framework.py +13 -4
braintrust/logger.py +30 -120
braintrust/otel/__init__.py +24 -15
braintrust/test_bt_json.py +644 -0
braintrust/test_framework.py +81 -0
braintrust/test_logger.py +245 -107
braintrust/test_otel.py +118 -26
braintrust/test_util.py +51 -1
braintrust/util.py +24 -3
braintrust/version.py +2 -2
braintrust/wrappers/google_genai/__init__.py +2 -15
braintrust/wrappers/litellm.py +43 -0
braintrust/wrappers/pydantic_ai.py +209 -95
braintrust/wrappers/test_google_genai.py +62 -1
braintrust/wrappers/test_litellm.py +73 -0
braintrust/wrappers/test_pydantic_ai_integration.py +819 -22
{braintrust-0.4.0.dist-info → braintrust-0.4.2.dist-info}/METADATA +1 -1
{braintrust-0.4.0.dist-info → braintrust-0.4.2.dist-info}/RECORD +23 -22
{braintrust-0.4.0.dist-info → braintrust-0.4.2.dist-info}/WHEEL +0 -0
{braintrust-0.4.0.dist-info → braintrust-0.4.2.dist-info}/entry_points.txt +0 -0
{braintrust-0.4.0.dist-info → braintrust-0.4.2.dist-info}/top_level.txt +0 -0

braintrust/test_framework.py CHANGED Viewed

@@ -241,6 +241,62 @@ async def test_hooks_trial_index_multiple_inputs():
     assert sorted(input_2_trials) == [0, 1]
+@pytest.mark.asyncio
+async def test_scorer_spans_have_purpose_attribute(with_memory_logger, with_simulate_login):
+    """Test that scorer spans have span_attributes.purpose='scorer' and propagate to subspans."""
+    # Define test data
+    data = [
+        EvalCase(input="hello", expected="hello"),
+    ]
+    def simple_task(input_value):
+        return input_value
+    def purpose_scorer(input_value, output, expected):
+        return 1.0 if output == expected else 0.0
+    evaluator = Evaluator(
+        project_name="test-project",
+        eval_name="test-scorer-purpose",
+        data=data,
+        task=simple_task,
+        scores=[purpose_scorer],
+        experiment_name="test-scorer-purpose",
+        metadata=None,
+    )
+    # Create experiment so spans get logged
+    exp = init_test_exp("test-scorer-purpose", "test-project")
+    # Run evaluator
+    result = await run_evaluator(experiment=exp, evaluator=evaluator, position=None, filters=[])
+    assert len(result.results) == 1
+    assert result.results[0].scores.get("purpose_scorer") == 1.0
+    # Check the logged spans
+    logs = with_memory_logger.pop()
+    # Find the scorer span (has type="score")
+    scorer_spans = [log for log in logs if log.get("span_attributes", {}).get("type") == "score"]
+    assert len(scorer_spans) == 1, f"Expected 1 scorer span, found {len(scorer_spans)}"
+    scorer_span = scorer_spans[0]
+    # Verify the scorer span has purpose='scorer'
+    assert scorer_span["span_attributes"].get("purpose") == "scorer", (
+        f"Scorer span should have purpose='scorer', got: {scorer_span['span_attributes']}"
+    )
+    # Verify that non-scorer spans (task, eval) do NOT have purpose='scorer'
+    non_scorer_spans = [log for log in logs if log.get("span_attributes", {}).get("type") != "score"]
+    assert len(non_scorer_spans) > 0, "Expected at least one non-scorer span"
+    for span in non_scorer_spans:
+        assert span.get("span_attributes", {}).get("purpose") != "scorer", (
+            f"Non-scorer span should NOT have purpose='scorer', got: {span['span_attributes']}"
+        )
 @pytest.fixture
 def simple_scorer():
     def simple_scorer_function(input, output, expected):
@@ -287,6 +343,31 @@ async def test_eval_no_send_logs_true(with_memory_logger, simple_scorer):
     assert len(logs) == 0
+@pytest.mark.asyncio
+async def test_eval_no_send_logs_with_none_score(with_memory_logger):
+    """Test that scorers returning None don't crash local mode."""
+    def sometimes_none_scorer(input, output, expected):
+        # Return None for first input, score for second
+        if input == "hello":
+            return {"name": "conditional", "score": None}
+        return {"name": "conditional", "score": 1.0}
+    result = await Eval(
+        "test-none-score",
+        data=[
+            {"input": "hello", "expected": "hello world"},
+            {"input": "test", "expected": "test world"},
+        ],
+        task=lambda input_val: input_val + " world",
+        scores=[sometimes_none_scorer],
+        no_send_logs=True,
+    )
+    # Should not crash and should calculate average from non-None scores only
+    assert result.summary.scores["conditional"].score == 1.0  # Only the second score counts
 @pytest.mark.asyncio
 async def test_hooks_tags_append(with_memory_logger, with_simulate_login, simple_scorer):
     """Test that hooks.tags can be appended to and logged."""

braintrust/test_logger.py CHANGED Viewed

@@ -1,3 +1,5 @@
+# pyright: reportUnknownVariableType=false
+# pyright: reportPrivateUsage=false
 import asyncio
 import json
 import logging
@@ -20,7 +22,12 @@ from braintrust import (
     logger,
 )
 from braintrust.id_gen import OTELIDGenerator, get_id_generator
-from braintrust.logger import _deep_copy_event, _extract_attachments, parent_context, render_message, render_mustache
+from braintrust.logger import (
+    _extract_attachments,
+    parent_context,
+    render_message,
+    render_mustache,
+)
 from braintrust.prompt import PromptChatBlock, PromptData, PromptMessage, PromptSchema
 from braintrust.test_helpers import (
     assert_dict_matches,
@@ -170,113 +177,7 @@ class TestLogger(TestCase):
             },
         )
-    def test_deep_copy_event_basic(self):
-        original = {
-            "input": {"foo": "bar", "null": None, "empty": {}},
-            "output": [1, 2, "3", None, {}],
-        }
-        copy = _deep_copy_event(original)
-        self.assertEqual(copy, original)
-        self.assertIsNot(copy, original)
-        self.assertIsNot(copy["input"], original["input"])
-        self.assertIsNot(copy["output"], original["output"])
-    def test_deep_copy_event_with_attachments(self):
-        attachment1 = Attachment(
-            data=b"data",
-            filename="filename",
-            content_type="text/plain",
-        )
-        attachment2 = Attachment(
-            data=b"data2",
-            filename="filename2",
-            content_type="text/plain",
-        )
-        attachment3 = ExternalAttachment(
-            url="s3://bucket/path/to/key.pdf",
-            filename="filename3",
-            content_type="application/pdf",
-        )
-        date = "2024-10-23T05:02:48.796Z"
-        original = {
-            "input": "Testing",
-            "output": {
-                "span": "<span>",
-                "myIllegalObjects": ["<experiment>", "<dataset>", "<logger>"],
-                "myOtherWeirdObjects": [None, date, None, None],
-                "attachment": attachment1,
-                "another_attachment": attachment3,
-                "attachmentList": [attachment1, attachment2, "string", attachment3],
-                "nestedAttachment": {
-                    "attachment": attachment2,
-                    "another_attachment": attachment3,
-                },
-                "fake": {
-                    "_bt_internal_saved_attachment": "not a number",
-                },
-            },
-        }
-        copy = _deep_copy_event(original)
-        self.assertEqual(
-            copy,
-            {
-                "input": "Testing",
-                "output": {
-                    "span": "<span>",
-                    "myIllegalObjects": ["<experiment>", "<dataset>", "<logger>"],
-                    "myOtherWeirdObjects": [None, date, None, None],
-                    "attachment": attachment1,
-                    "another_attachment": attachment3,
-                    "attachmentList": [attachment1, attachment2, "string", attachment3],
-                    "nestedAttachment": {
-                        "attachment": attachment2,
-                        "another_attachment": attachment3,
-                    },
-                    "fake": {
-                        "_bt_internal_saved_attachment": "not a number",
-                    },
-                },
-            },
-        )
-        self.assertIsNot(copy, original)
-        self.assertIs(copy["output"]["attachment"], attachment1)
-        self.assertIs(copy["output"]["another_attachment"], attachment3)
-        self.assertIs(copy["output"]["nestedAttachment"]["attachment"], attachment2)
-        self.assertIs(copy["output"]["nestedAttachment"]["another_attachment"], attachment3)
-        self.assertIs(copy["output"]["attachmentList"][0], attachment1)
-        self.assertIs(copy["output"]["attachmentList"][1], attachment2)
-        self.assertIs(copy["output"]["attachmentList"][3], attachment3)
-    def test_check_json_serializable_catches_circular_references(self):
-        """Test that _check_json_serializable properly handles circular references.
-        After fix, _check_json_serializable should catch ValueError from circular
-        references and convert them to a more appropriate exception or handle them.
-        """
-        from braintrust.logger import _check_json_serializable
-        # Create data with circular reference
-        data = {"a": "b"}
-        data["self"] = data
-        # Should either succeed (by handling circular refs) or raise a clear exception
-        # The error message should indicate the data is not serializable
-        try:
-            result = _check_json_serializable(data)
-            # If it succeeds, it should return a serialized string
-            self.assertIsInstance(result, str)
-        except Exception as e:
-            # If it raises an exception, it should mention serialization issue
-            error_msg = str(e).lower()
-            self.assertTrue(
-                "json-serializable" in error_msg or "circular" in error_msg,
-                f"Expected error message to mention serialization issue, got: {e}",
-            )
     def test_prompt_build_with_structured_output_templating(self):
         self.maxDiff = None
@@ -756,6 +657,25 @@ def test_span_log_handles_infinity_gracefully(with_memory_logger):
     assert logs[0]["output"]["neg"] == "-Infinity"
+def test_span_log_with_binary_data(with_memory_logger):
+    """Test how span.log() currently handles binary data."""
+    logger = init_test_logger(__name__)
+    with logger.start_span(name="test_span") as span:
+        span.log(
+            input={"file": "image.png"},
+            output={"embedding": b"\x00\x01\x02\x03" * 100},
+        )
+    logs = with_memory_logger.pop()
+    assert len(logs) == 1
+    # Document actual behavior - binary data goes through deep_copy_and_sanitize_dict
+    # which uses bt_dumps/bt_loads roundtrip
+    assert logs[0]["input"]["file"] == "image.png"
+    # The embedding should be present (converted to some serializable form)
+    assert "embedding" in logs[0]["output"]
 def test_span_log_handles_unstringifiable_object_gracefully(with_memory_logger):
     """Test that span.log() should handle objects with bad __str__ gracefully without raising.
@@ -929,6 +849,40 @@ def test_span_link_with_unresolved_experiment(with_simulate_login, with_memory_l
     assert link == "https://www.braintrust.dev/error-generating-link?msg=resolve-experiment-id"
+def test_experiment_span_link_uses_env_vars_when_logged_out(with_memory_logger):
+    """Verify EXPERIMENT spans use BRAINTRUST_ORG_NAME env var when not logged in."""
+    simulate_logout()
+    assert_logged_out()
+    keys = ["BRAINTRUST_APP_URL", "BRAINTRUST_ORG_NAME"]
+    originals = {k: os.environ.get(k) for k in keys}
+    try:
+        os.environ["BRAINTRUST_APP_URL"] = "https://test-app.example.com"
+        os.environ["BRAINTRUST_ORG_NAME"] = "env-org-name"
+        experiment = braintrust.init(
+            project="test-project",
+            experiment="test-experiment",
+        )
+        # Create span with resolved experiment ID
+        span = experiment.start_span(name="test-span")
+        span.parent_object_id = LazyValue(lambda: "test-exp-id", use_mutex=False)
+        span.end()
+        link = span.link()
+        # Should use env var org name and app url
+        assert "env-org-name" in link
+        assert "test-app.example.com" in link
+        assert "test-exp-id" in link
+    finally:
+        for k, v in originals.items():
+            os.environ.pop(k, None)
+            if v:
+                os.environ[k] = v
 def test_permalink_with_valid_span_logged_in(with_simulate_login, with_memory_logger):
     logger = init_logger(
         project="test-project",
@@ -2746,3 +2700,187 @@ class TestDatasetInternalBtql(TestCase):
         # Verify that the custom batch_size is used
         self.assertEqual(query_json["limit"], custom_batch_size)
+def test_attachment_identity_preserved_through_bt_safe_deep_copy():
+    """Test that attachment object identity is preserved through bt_safe_deep_copy."""
+    from braintrust.bt_json import bt_safe_deep_copy
+    attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
+    original_id = id(attachment)
+    # Simulate what happens in Span.log
+    partial_record = {"input": {"file": attachment}}
+    copied = bt_safe_deep_copy(partial_record)
+    # Verify identity preserved
+    assert copied["input"]["file"] is attachment
+    assert id(copied["input"]["file"]) == original_id
+def test_extract_attachments_collects_and_replaces():
+    """Test that _extract_attachments properly collects attachments and replaces them with references."""
+    from braintrust.logger import _extract_attachments
+    attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
+    attachment2 = Attachment(data=b"data2", filename="file2.txt", content_type="text/plain")
+    ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="file3.pdf", content_type="application/pdf")
+    event = {
+        "input": {"file": attachment1},
+        "output": {"file": attachment2},
+        "metadata": {"files": [attachment1, ext_attachment]}
+    }
+    attachments = []
+    _extract_attachments(event, attachments)
+    # Should have collected all 4 attachment instances (attachment1 appears twice)
+    assert len(attachments) == 4
+    assert attachments[0] is attachment1
+    assert attachments[1] is attachment2
+    assert attachments[2] is attachment1  # Same instance collected again
+    assert attachments[3] is ext_attachment
+    # Event should have been modified to contain references
+    assert event["input"]["file"] == attachment1.reference
+    assert event["output"]["file"] == attachment2.reference
+    assert event["metadata"]["files"][0] == attachment1.reference
+    assert event["metadata"]["files"][1] == ext_attachment.reference
+def test_extract_attachments_preserves_identity():
+    """Test that the same attachment instance is collected multiple times when it appears in different places."""
+    from braintrust.logger import _extract_attachments
+    attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
+    original_id = id(attachment)
+    event = {
+        "input": attachment,
+        "output": attachment,  # Same instance
+        "metadata": {"file": attachment}  # Same instance again
+    }
+    attachments = []
+    _extract_attachments(event, attachments)
+    # Should collect the same instance 3 times
+    assert len(attachments) == 3
+    assert all(att is attachment for att in attachments)
+    assert all(id(att) == original_id for att in attachments)
+def test_attachment_upload_tracked_on_flush(with_memory_logger, with_simulate_login):
+    """Test that attachment upload is tracked when attachments are logged and flushed."""
+    attachment = Attachment(data=b"test data", filename="test.txt", content_type="text/plain")
+    logger = init_test_logger(__name__)
+    span = logger.start_span(name="test_span")
+    span.log(input={"file": attachment})
+    span.end()
+    # No upload attempts yet
+    assert len(with_memory_logger.upload_attempts) == 0
+    # Flush should track upload attempt
+    logger.flush()
+    # Now upload should be tracked
+    assert len(with_memory_logger.upload_attempts) == 1
+    assert with_memory_logger.upload_attempts[0] is attachment
+def test_multiple_attachments_upload_tracked(with_memory_logger, with_simulate_login):
+    """Test that upload is tracked for multiple attachments."""
+    attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
+    attachment2 = Attachment(data=b"data2", filename="file2.txt", content_type="text/plain")
+    logger = init_test_logger(__name__)
+    span = logger.start_span(name="test_span")
+    span.log(
+        input={"file1": attachment1},
+        output={"file2": attachment2}
+    )
+    span.end()
+    logger.flush()
+    # Both attachments should be tracked
+    assert len(with_memory_logger.upload_attempts) == 2
+    assert attachment1 in with_memory_logger.upload_attempts
+    assert attachment2 in with_memory_logger.upload_attempts
+def test_same_attachment_logged_twice_tracked_twice(with_memory_logger, with_simulate_login):
+    """Test that same attachment logged twice appears twice in upload attempts."""
+    attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
+    logger = init_test_logger(__name__)
+    span = logger.start_span(name="test_span")
+    span.log(input={"file": attachment})
+    span.log(metadata={"same_file": attachment})
+    span.end()
+    logger.flush()
+    # Same attachment should be tracked twice (once for each log call)
+    assert len(with_memory_logger.upload_attempts) == 2
+    assert with_memory_logger.upload_attempts[0] is attachment
+    assert with_memory_logger.upload_attempts[1] is attachment
+def test_external_attachment_upload_tracked(with_memory_logger, with_simulate_login):
+    """Test that ExternalAttachment upload is also tracked."""
+    ext_attachment = ExternalAttachment(
+        url="s3://bucket/key.pdf",
+        filename="external.pdf",
+        content_type="application/pdf"
+    )
+    logger = init_test_logger(__name__)
+    span = logger.start_span(name="test_span")
+    span.log(input={"file": ext_attachment})
+    span.end()
+    logger.flush()
+    # ExternalAttachment should be tracked
+    assert len(with_memory_logger.upload_attempts) == 1
+    assert with_memory_logger.upload_attempts[0] is ext_attachment
+def test_json_attachment_upload_tracked(with_memory_logger, with_simulate_login):
+    """Test that JSONAttachment upload is tracked."""
+    data = {"key": "value", "nested": {"array": [1, 2, 3]}}
+    json_attachment = JSONAttachment(data, filename="data.json")
+    logger = init_test_logger(__name__)
+    span = logger.start_span(name="test_span")
+    span.log(output={"data": json_attachment})
+    span.end()
+    logger.flush()
+    # JSONAttachment should be tracked
+    assert len(with_memory_logger.upload_attempts) == 1
+    assert with_memory_logger.upload_attempts[0] is json_attachment
+def test_multiple_attachment_types_tracked(with_memory_logger, with_simulate_login):
+    """Test that different attachment types are all tracked."""
+    attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
+    json_attachment = JSONAttachment({"key": "value"}, filename="data.json")
+    ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="file.pdf", content_type="application/pdf")
+    logger = init_test_logger(__name__)
+    span = logger.start_span(name="test_span")
+    span.log(
+        input=attachment,
+        output=json_attachment,
+        metadata={"file": ext_attachment}
+    )
+    span.end()
+    logger.flush()
+    # All three types should be tracked
+    assert len(with_memory_logger.upload_attempts) == 3
+    assert attachment in with_memory_logger.upload_attempts
+    assert json_attachment in with_memory_logger.upload_attempts
+    assert ext_attachment in with_memory_logger.upload_attempts

braintrust 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

braintrust 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl