braintrust 0.3.15__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/_generated_types.py +737 -672
- braintrust/audit.py +2 -2
- braintrust/bt_json.py +178 -19
- braintrust/cli/eval.py +6 -7
- braintrust/cli/push.py +11 -11
- braintrust/context.py +12 -17
- braintrust/contrib/temporal/__init__.py +16 -27
- braintrust/contrib/temporal/test_temporal.py +8 -3
- braintrust/devserver/auth.py +8 -8
- braintrust/devserver/cache.py +3 -4
- braintrust/devserver/cors.py +8 -7
- braintrust/devserver/dataset.py +3 -5
- braintrust/devserver/eval_hooks.py +7 -6
- braintrust/devserver/schemas.py +22 -19
- braintrust/devserver/server.py +19 -12
- braintrust/devserver/test_cached_login.py +4 -4
- braintrust/framework.py +139 -142
- braintrust/framework2.py +88 -87
- braintrust/functions/invoke.py +66 -59
- braintrust/functions/stream.py +3 -2
- braintrust/generated_types.py +3 -1
- braintrust/git_fields.py +11 -11
- braintrust/gitutil.py +2 -3
- braintrust/graph_util.py +10 -10
- braintrust/id_gen.py +2 -2
- braintrust/logger.py +373 -471
- braintrust/merge_row_batch.py +10 -9
- braintrust/oai.py +21 -20
- braintrust/otel/__init__.py +49 -49
- braintrust/otel/context.py +16 -30
- braintrust/otel/test_distributed_tracing.py +14 -11
- braintrust/otel/test_otel_bt_integration.py +32 -31
- braintrust/parameters.py +8 -8
- braintrust/prompt.py +14 -14
- braintrust/prompt_cache/disk_cache.py +5 -4
- braintrust/prompt_cache/lru_cache.py +3 -2
- braintrust/prompt_cache/prompt_cache.py +13 -14
- braintrust/queue.py +4 -4
- braintrust/score.py +4 -4
- braintrust/serializable_data_class.py +4 -4
- braintrust/span_identifier_v1.py +1 -2
- braintrust/span_identifier_v2.py +3 -4
- braintrust/span_identifier_v3.py +23 -20
- braintrust/span_identifier_v4.py +34 -25
- braintrust/test_bt_json.py +644 -0
- braintrust/test_framework.py +72 -6
- braintrust/test_helpers.py +5 -5
- braintrust/test_id_gen.py +2 -3
- braintrust/test_logger.py +211 -107
- braintrust/test_otel.py +61 -53
- braintrust/test_queue.py +0 -1
- braintrust/test_score.py +1 -3
- braintrust/test_span_components.py +29 -44
- braintrust/util.py +9 -8
- braintrust/version.py +2 -2
- braintrust/wrappers/_anthropic_utils.py +4 -4
- braintrust/wrappers/agno/__init__.py +3 -4
- braintrust/wrappers/agno/agent.py +1 -2
- braintrust/wrappers/agno/function_call.py +1 -2
- braintrust/wrappers/agno/model.py +1 -2
- braintrust/wrappers/agno/team.py +1 -2
- braintrust/wrappers/agno/utils.py +12 -12
- braintrust/wrappers/anthropic.py +7 -8
- braintrust/wrappers/claude_agent_sdk/__init__.py +3 -4
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +29 -27
- braintrust/wrappers/dspy.py +15 -17
- braintrust/wrappers/google_genai/__init__.py +17 -30
- braintrust/wrappers/langchain.py +22 -24
- braintrust/wrappers/litellm.py +4 -3
- braintrust/wrappers/openai.py +15 -15
- braintrust/wrappers/pydantic_ai.py +225 -110
- braintrust/wrappers/test_agno.py +0 -1
- braintrust/wrappers/test_dspy.py +0 -1
- braintrust/wrappers/test_google_genai.py +64 -4
- braintrust/wrappers/test_litellm.py +0 -1
- braintrust/wrappers/test_pydantic_ai_integration.py +819 -22
- {braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/METADATA +3 -2
- braintrust-0.4.1.dist-info/RECORD +121 -0
- braintrust-0.3.15.dist-info/RECORD +0 -120
- {braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/WHEEL +0 -0
- {braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/entry_points.txt +0 -0
- {braintrust-0.3.15.dist-info → braintrust-0.4.1.dist-info}/top_level.txt +0 -0
braintrust/test_framework.py
CHANGED
|
@@ -240,12 +240,71 @@ async def test_hooks_trial_index_multiple_inputs():
|
|
|
240
240
|
assert sorted(input_1_trials) == [0, 1]
|
|
241
241
|
assert sorted(input_2_trials) == [0, 1]
|
|
242
242
|
|
|
243
|
+
|
|
244
|
+
@pytest.mark.asyncio
|
|
245
|
+
async def test_scorer_spans_have_purpose_attribute(with_memory_logger, with_simulate_login):
|
|
246
|
+
"""Test that scorer spans have span_attributes.purpose='scorer' and propagate to subspans."""
|
|
247
|
+
# Define test data
|
|
248
|
+
data = [
|
|
249
|
+
EvalCase(input="hello", expected="hello"),
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
def simple_task(input_value):
|
|
253
|
+
return input_value
|
|
254
|
+
|
|
255
|
+
def purpose_scorer(input_value, output, expected):
|
|
256
|
+
return 1.0 if output == expected else 0.0
|
|
257
|
+
|
|
258
|
+
evaluator = Evaluator(
|
|
259
|
+
project_name="test-project",
|
|
260
|
+
eval_name="test-scorer-purpose",
|
|
261
|
+
data=data,
|
|
262
|
+
task=simple_task,
|
|
263
|
+
scores=[purpose_scorer],
|
|
264
|
+
experiment_name="test-scorer-purpose",
|
|
265
|
+
metadata=None,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Create experiment so spans get logged
|
|
269
|
+
exp = init_test_exp("test-scorer-purpose", "test-project")
|
|
270
|
+
|
|
271
|
+
# Run evaluator
|
|
272
|
+
result = await run_evaluator(experiment=exp, evaluator=evaluator, position=None, filters=[])
|
|
273
|
+
|
|
274
|
+
assert len(result.results) == 1
|
|
275
|
+
assert result.results[0].scores.get("purpose_scorer") == 1.0
|
|
276
|
+
|
|
277
|
+
# Check the logged spans
|
|
278
|
+
logs = with_memory_logger.pop()
|
|
279
|
+
|
|
280
|
+
# Find the scorer span (has type="score")
|
|
281
|
+
scorer_spans = [log for log in logs if log.get("span_attributes", {}).get("type") == "score"]
|
|
282
|
+
assert len(scorer_spans) == 1, f"Expected 1 scorer span, found {len(scorer_spans)}"
|
|
283
|
+
|
|
284
|
+
scorer_span = scorer_spans[0]
|
|
285
|
+
|
|
286
|
+
# Verify the scorer span has purpose='scorer'
|
|
287
|
+
assert scorer_span["span_attributes"].get("purpose") == "scorer", (
|
|
288
|
+
f"Scorer span should have purpose='scorer', got: {scorer_span['span_attributes']}"
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
# Verify that non-scorer spans (task, eval) do NOT have purpose='scorer'
|
|
292
|
+
non_scorer_spans = [log for log in logs if log.get("span_attributes", {}).get("type") != "score"]
|
|
293
|
+
assert len(non_scorer_spans) > 0, "Expected at least one non-scorer span"
|
|
294
|
+
for span in non_scorer_spans:
|
|
295
|
+
assert span.get("span_attributes", {}).get("purpose") != "scorer", (
|
|
296
|
+
f"Non-scorer span should NOT have purpose='scorer', got: {span['span_attributes']}"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
243
300
|
@pytest.fixture
|
|
244
301
|
def simple_scorer():
|
|
245
302
|
def simple_scorer_function(input, output, expected):
|
|
246
303
|
return {"name": "simple_scorer", "score": 0.8}
|
|
304
|
+
|
|
247
305
|
return simple_scorer_function
|
|
248
306
|
|
|
307
|
+
|
|
249
308
|
@pytest.mark.asyncio
|
|
250
309
|
async def test_eval_no_send_logs_true(with_memory_logger, simple_scorer):
|
|
251
310
|
"""Test that Eval with no_send_logs=True runs locally without creating experiment."""
|
|
@@ -286,7 +345,7 @@ async def test_eval_no_send_logs_true(with_memory_logger, simple_scorer):
|
|
|
286
345
|
|
|
287
346
|
@pytest.mark.asyncio
|
|
288
347
|
async def test_hooks_tags_append(with_memory_logger, with_simulate_login, simple_scorer):
|
|
289
|
-
"""
|
|
348
|
+
"""Test that hooks.tags can be appended to and logged."""
|
|
290
349
|
|
|
291
350
|
initial_tags = ["cookies n cream"]
|
|
292
351
|
appended_tags = ["chocolate", "vanilla", "strawberry"]
|
|
@@ -321,9 +380,12 @@ async def test_hooks_tags_append(with_memory_logger, with_simulate_login, simple
|
|
|
321
380
|
|
|
322
381
|
|
|
323
382
|
@pytest.mark.asyncio
|
|
324
|
-
@pytest.mark.parametrize(
|
|
383
|
+
@pytest.mark.parametrize(
|
|
384
|
+
("tags", "expected_tags"),
|
|
385
|
+
[(None, None), ([], None), (["chocolate", "vanilla", "strawberry"], ["chocolate", "vanilla", "strawberry"])],
|
|
386
|
+
)
|
|
325
387
|
async def test_hooks_tags_list(with_memory_logger, with_simulate_login, simple_scorer, tags, expected_tags):
|
|
326
|
-
"""
|
|
388
|
+
"""Test that hooks.tags can be set to a list."""
|
|
327
389
|
|
|
328
390
|
def task_with_hooks(input, hooks):
|
|
329
391
|
hooks.tags = tags
|
|
@@ -351,9 +413,10 @@ async def test_hooks_tags_list(with_memory_logger, with_simulate_login, simple_s
|
|
|
351
413
|
assert len(root_span) == 1
|
|
352
414
|
assert root_span[0].get("tags") == expected_tags
|
|
353
415
|
|
|
416
|
+
|
|
354
417
|
@pytest.mark.asyncio
|
|
355
418
|
async def test_hooks_tags_with_failing_scorer(with_memory_logger, with_simulate_login, simple_scorer):
|
|
356
|
-
"""
|
|
419
|
+
"""Test that hooks.tags can be set to a list."""
|
|
357
420
|
|
|
358
421
|
expected_tags = ["chocolate", "vanilla", "strawberry"]
|
|
359
422
|
|
|
@@ -386,9 +449,11 @@ async def test_hooks_tags_with_failing_scorer(with_memory_logger, with_simulate_
|
|
|
386
449
|
assert len(root_span) == 1
|
|
387
450
|
assert root_span[0].get("tags") == expected_tags
|
|
388
451
|
|
|
452
|
+
|
|
389
453
|
@pytest.mark.asyncio
|
|
390
454
|
async def test_hooks_tags_with_invalid_type(with_memory_logger, with_simulate_login, simple_scorer):
|
|
391
|
-
"""
|
|
455
|
+
"""Test that result contains an error for cases where hooks.tags is set to an invalid type."""
|
|
456
|
+
|
|
392
457
|
def task_with_hooks(input, hooks):
|
|
393
458
|
hooks.tags = 123
|
|
394
459
|
return input
|
|
@@ -411,7 +476,8 @@ async def test_hooks_tags_with_invalid_type(with_memory_logger, with_simulate_lo
|
|
|
411
476
|
|
|
412
477
|
@pytest.mark.asyncio
|
|
413
478
|
async def test_hooks_without_setting_tags(with_memory_logger, with_simulate_login, simple_scorer):
|
|
414
|
-
"""
|
|
479
|
+
"""Test where hooks.tags is not set"""
|
|
480
|
+
|
|
415
481
|
def task_with_hooks(input, hooks):
|
|
416
482
|
return input
|
|
417
483
|
|
braintrust/test_helpers.py
CHANGED
|
@@ -2,7 +2,6 @@ import os
|
|
|
2
2
|
from contextlib import contextmanager
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
|
-
|
|
6
5
|
from braintrust import logger
|
|
7
6
|
from braintrust.logger import ObjectMetadata, OrgProjectMetadata, ProjectExperimentMetadata
|
|
8
7
|
from braintrust.util import LazyValue
|
|
@@ -15,10 +14,8 @@ TEST_ORG_NAME = "test-org-name"
|
|
|
15
14
|
def has_devserver_installed() -> bool:
|
|
16
15
|
"""Check if devserver dependencies (starlette, uvicorn) are installed."""
|
|
17
16
|
import importlib.util
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
and importlib.util.find_spec("uvicorn") is not None
|
|
21
|
-
)
|
|
17
|
+
|
|
18
|
+
return importlib.util.find_spec("starlette") is not None and importlib.util.find_spec("uvicorn") is not None
|
|
22
19
|
|
|
23
20
|
|
|
24
21
|
def simulate_login() -> None:
|
|
@@ -68,12 +65,14 @@ def with_memory_logger():
|
|
|
68
65
|
# Clean up global state to prevent test contamination
|
|
69
66
|
logger._state.reset_parent_state()
|
|
70
67
|
|
|
68
|
+
|
|
71
69
|
@pytest.fixture
|
|
72
70
|
def memory_logger():
|
|
73
71
|
with logger._internal_with_memory_background_logger() as bgl:
|
|
74
72
|
yield bgl
|
|
75
73
|
logger._state.current_experiment = None
|
|
76
74
|
|
|
75
|
+
|
|
77
76
|
@contextmanager
|
|
78
77
|
def preserve_env_vars(*vars):
|
|
79
78
|
original_env = {v: os.environ.get(v) for v in vars}
|
|
@@ -114,6 +113,7 @@ def init_test_logger(project_name: str):
|
|
|
114
113
|
logger._compute_logger_metadata = fake_compute_logger_metadata
|
|
115
114
|
return l
|
|
116
115
|
|
|
116
|
+
|
|
117
117
|
def init_test_exp(experiment_name: str, project_name: str = None):
|
|
118
118
|
"""
|
|
119
119
|
Initialize an experiment for testing with fake project and experiment metadata.
|
braintrust/test_id_gen.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
|
|
2
1
|
import os
|
|
3
2
|
import uuid
|
|
4
3
|
|
|
5
4
|
import pytest
|
|
6
|
-
|
|
7
5
|
from braintrust import id_gen
|
|
8
6
|
|
|
9
7
|
|
|
@@ -76,7 +74,8 @@ def test_id_get_env_var(reset_id_generator_state):
|
|
|
76
74
|
|
|
77
75
|
|
|
78
76
|
def _is_hex(s):
|
|
79
|
-
return all(c in
|
|
77
|
+
return all(c in "0123456789abcdef" for c in s.lower())
|
|
78
|
+
|
|
80
79
|
|
|
81
80
|
def _assert_is_hex(x):
|
|
82
81
|
assert _is_hex(x)
|
braintrust/test_logger.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# pyright: reportUnknownVariableType=false
|
|
2
|
+
# pyright: reportPrivateUsage=false
|
|
1
3
|
import asyncio
|
|
2
4
|
import json
|
|
3
5
|
import logging
|
|
@@ -20,7 +22,12 @@ from braintrust import (
|
|
|
20
22
|
logger,
|
|
21
23
|
)
|
|
22
24
|
from braintrust.id_gen import OTELIDGenerator, get_id_generator
|
|
23
|
-
from braintrust.logger import
|
|
25
|
+
from braintrust.logger import (
|
|
26
|
+
_extract_attachments,
|
|
27
|
+
parent_context,
|
|
28
|
+
render_message,
|
|
29
|
+
render_mustache,
|
|
30
|
+
)
|
|
24
31
|
from braintrust.prompt import PromptChatBlock, PromptData, PromptMessage, PromptSchema
|
|
25
32
|
from braintrust.test_helpers import (
|
|
26
33
|
assert_dict_matches,
|
|
@@ -170,113 +177,7 @@ class TestLogger(TestCase):
|
|
|
170
177
|
},
|
|
171
178
|
)
|
|
172
179
|
|
|
173
|
-
def test_deep_copy_event_basic(self):
|
|
174
|
-
original = {
|
|
175
|
-
"input": {"foo": "bar", "null": None, "empty": {}},
|
|
176
|
-
"output": [1, 2, "3", None, {}],
|
|
177
|
-
}
|
|
178
|
-
copy = _deep_copy_event(original)
|
|
179
|
-
self.assertEqual(copy, original)
|
|
180
|
-
self.assertIsNot(copy, original)
|
|
181
|
-
self.assertIsNot(copy["input"], original["input"])
|
|
182
|
-
self.assertIsNot(copy["output"], original["output"])
|
|
183
|
-
|
|
184
|
-
def test_deep_copy_event_with_attachments(self):
|
|
185
|
-
attachment1 = Attachment(
|
|
186
|
-
data=b"data",
|
|
187
|
-
filename="filename",
|
|
188
|
-
content_type="text/plain",
|
|
189
|
-
)
|
|
190
|
-
attachment2 = Attachment(
|
|
191
|
-
data=b"data2",
|
|
192
|
-
filename="filename2",
|
|
193
|
-
content_type="text/plain",
|
|
194
|
-
)
|
|
195
|
-
attachment3 = ExternalAttachment(
|
|
196
|
-
url="s3://bucket/path/to/key.pdf",
|
|
197
|
-
filename="filename3",
|
|
198
|
-
content_type="application/pdf",
|
|
199
|
-
)
|
|
200
|
-
date = "2024-10-23T05:02:48.796Z"
|
|
201
|
-
|
|
202
|
-
original = {
|
|
203
|
-
"input": "Testing",
|
|
204
|
-
"output": {
|
|
205
|
-
"span": "<span>",
|
|
206
|
-
"myIllegalObjects": ["<experiment>", "<dataset>", "<logger>"],
|
|
207
|
-
"myOtherWeirdObjects": [None, date, None, None],
|
|
208
|
-
"attachment": attachment1,
|
|
209
|
-
"another_attachment": attachment3,
|
|
210
|
-
"attachmentList": [attachment1, attachment2, "string", attachment3],
|
|
211
|
-
"nestedAttachment": {
|
|
212
|
-
"attachment": attachment2,
|
|
213
|
-
"another_attachment": attachment3,
|
|
214
|
-
},
|
|
215
|
-
"fake": {
|
|
216
|
-
"_bt_internal_saved_attachment": "not a number",
|
|
217
|
-
},
|
|
218
|
-
},
|
|
219
|
-
}
|
|
220
180
|
|
|
221
|
-
copy = _deep_copy_event(original)
|
|
222
|
-
|
|
223
|
-
self.assertEqual(
|
|
224
|
-
copy,
|
|
225
|
-
{
|
|
226
|
-
"input": "Testing",
|
|
227
|
-
"output": {
|
|
228
|
-
"span": "<span>",
|
|
229
|
-
"myIllegalObjects": ["<experiment>", "<dataset>", "<logger>"],
|
|
230
|
-
"myOtherWeirdObjects": [None, date, None, None],
|
|
231
|
-
"attachment": attachment1,
|
|
232
|
-
"another_attachment": attachment3,
|
|
233
|
-
"attachmentList": [attachment1, attachment2, "string", attachment3],
|
|
234
|
-
"nestedAttachment": {
|
|
235
|
-
"attachment": attachment2,
|
|
236
|
-
"another_attachment": attachment3,
|
|
237
|
-
},
|
|
238
|
-
"fake": {
|
|
239
|
-
"_bt_internal_saved_attachment": "not a number",
|
|
240
|
-
},
|
|
241
|
-
},
|
|
242
|
-
},
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
self.assertIsNot(copy, original)
|
|
246
|
-
|
|
247
|
-
self.assertIs(copy["output"]["attachment"], attachment1)
|
|
248
|
-
self.assertIs(copy["output"]["another_attachment"], attachment3)
|
|
249
|
-
self.assertIs(copy["output"]["nestedAttachment"]["attachment"], attachment2)
|
|
250
|
-
self.assertIs(copy["output"]["nestedAttachment"]["another_attachment"], attachment3)
|
|
251
|
-
self.assertIs(copy["output"]["attachmentList"][0], attachment1)
|
|
252
|
-
self.assertIs(copy["output"]["attachmentList"][1], attachment2)
|
|
253
|
-
self.assertIs(copy["output"]["attachmentList"][3], attachment3)
|
|
254
|
-
|
|
255
|
-
def test_check_json_serializable_catches_circular_references(self):
|
|
256
|
-
"""Test that _check_json_serializable properly handles circular references.
|
|
257
|
-
|
|
258
|
-
After fix, _check_json_serializable should catch ValueError from circular
|
|
259
|
-
references and convert them to a more appropriate exception or handle them.
|
|
260
|
-
"""
|
|
261
|
-
from braintrust.logger import _check_json_serializable
|
|
262
|
-
|
|
263
|
-
# Create data with circular reference
|
|
264
|
-
data = {"a": "b"}
|
|
265
|
-
data["self"] = data
|
|
266
|
-
|
|
267
|
-
# Should either succeed (by handling circular refs) or raise a clear exception
|
|
268
|
-
# The error message should indicate the data is not serializable
|
|
269
|
-
try:
|
|
270
|
-
result = _check_json_serializable(data)
|
|
271
|
-
# If it succeeds, it should return a serialized string
|
|
272
|
-
self.assertIsInstance(result, str)
|
|
273
|
-
except Exception as e:
|
|
274
|
-
# If it raises an exception, it should mention serialization issue
|
|
275
|
-
error_msg = str(e).lower()
|
|
276
|
-
self.assertTrue(
|
|
277
|
-
"json-serializable" in error_msg or "circular" in error_msg,
|
|
278
|
-
f"Expected error message to mention serialization issue, got: {e}",
|
|
279
|
-
)
|
|
280
181
|
|
|
281
182
|
def test_prompt_build_with_structured_output_templating(self):
|
|
282
183
|
self.maxDiff = None
|
|
@@ -756,6 +657,25 @@ def test_span_log_handles_infinity_gracefully(with_memory_logger):
|
|
|
756
657
|
assert logs[0]["output"]["neg"] == "-Infinity"
|
|
757
658
|
|
|
758
659
|
|
|
660
|
+
def test_span_log_with_binary_data(with_memory_logger):
|
|
661
|
+
"""Test how span.log() currently handles binary data."""
|
|
662
|
+
logger = init_test_logger(__name__)
|
|
663
|
+
|
|
664
|
+
with logger.start_span(name="test_span") as span:
|
|
665
|
+
span.log(
|
|
666
|
+
input={"file": "image.png"},
|
|
667
|
+
output={"embedding": b"\x00\x01\x02\x03" * 100},
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
logs = with_memory_logger.pop()
|
|
671
|
+
assert len(logs) == 1
|
|
672
|
+
# Document actual behavior - binary data goes through deep_copy_and_sanitize_dict
|
|
673
|
+
# which uses bt_dumps/bt_loads roundtrip
|
|
674
|
+
assert logs[0]["input"]["file"] == "image.png"
|
|
675
|
+
# The embedding should be present (converted to some serializable form)
|
|
676
|
+
assert "embedding" in logs[0]["output"]
|
|
677
|
+
|
|
678
|
+
|
|
759
679
|
def test_span_log_handles_unstringifiable_object_gracefully(with_memory_logger):
|
|
760
680
|
"""Test that span.log() should handle objects with bad __str__ gracefully without raising.
|
|
761
681
|
|
|
@@ -2746,3 +2666,187 @@ class TestDatasetInternalBtql(TestCase):
|
|
|
2746
2666
|
|
|
2747
2667
|
# Verify that the custom batch_size is used
|
|
2748
2668
|
self.assertEqual(query_json["limit"], custom_batch_size)
|
|
2669
|
+
|
|
2670
|
+
|
|
2671
|
+
def test_attachment_identity_preserved_through_bt_safe_deep_copy():
|
|
2672
|
+
"""Test that attachment object identity is preserved through bt_safe_deep_copy."""
|
|
2673
|
+
from braintrust.bt_json import bt_safe_deep_copy
|
|
2674
|
+
|
|
2675
|
+
attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
|
|
2676
|
+
original_id = id(attachment)
|
|
2677
|
+
|
|
2678
|
+
# Simulate what happens in Span.log
|
|
2679
|
+
partial_record = {"input": {"file": attachment}}
|
|
2680
|
+
copied = bt_safe_deep_copy(partial_record)
|
|
2681
|
+
|
|
2682
|
+
# Verify identity preserved
|
|
2683
|
+
assert copied["input"]["file"] is attachment
|
|
2684
|
+
assert id(copied["input"]["file"]) == original_id
|
|
2685
|
+
|
|
2686
|
+
|
|
2687
|
+
def test_extract_attachments_collects_and_replaces():
|
|
2688
|
+
"""Test that _extract_attachments properly collects attachments and replaces them with references."""
|
|
2689
|
+
from braintrust.logger import _extract_attachments
|
|
2690
|
+
|
|
2691
|
+
attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
|
|
2692
|
+
attachment2 = Attachment(data=b"data2", filename="file2.txt", content_type="text/plain")
|
|
2693
|
+
ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="file3.pdf", content_type="application/pdf")
|
|
2694
|
+
|
|
2695
|
+
event = {
|
|
2696
|
+
"input": {"file": attachment1},
|
|
2697
|
+
"output": {"file": attachment2},
|
|
2698
|
+
"metadata": {"files": [attachment1, ext_attachment]}
|
|
2699
|
+
}
|
|
2700
|
+
|
|
2701
|
+
attachments = []
|
|
2702
|
+
_extract_attachments(event, attachments)
|
|
2703
|
+
|
|
2704
|
+
# Should have collected all 4 attachment instances (attachment1 appears twice)
|
|
2705
|
+
assert len(attachments) == 4
|
|
2706
|
+
assert attachments[0] is attachment1
|
|
2707
|
+
assert attachments[1] is attachment2
|
|
2708
|
+
assert attachments[2] is attachment1 # Same instance collected again
|
|
2709
|
+
assert attachments[3] is ext_attachment
|
|
2710
|
+
|
|
2711
|
+
# Event should have been modified to contain references
|
|
2712
|
+
assert event["input"]["file"] == attachment1.reference
|
|
2713
|
+
assert event["output"]["file"] == attachment2.reference
|
|
2714
|
+
assert event["metadata"]["files"][0] == attachment1.reference
|
|
2715
|
+
assert event["metadata"]["files"][1] == ext_attachment.reference
|
|
2716
|
+
|
|
2717
|
+
|
|
2718
|
+
def test_extract_attachments_preserves_identity():
|
|
2719
|
+
"""Test that the same attachment instance is collected multiple times when it appears in different places."""
|
|
2720
|
+
from braintrust.logger import _extract_attachments
|
|
2721
|
+
|
|
2722
|
+
attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
|
|
2723
|
+
original_id = id(attachment)
|
|
2724
|
+
|
|
2725
|
+
event = {
|
|
2726
|
+
"input": attachment,
|
|
2727
|
+
"output": attachment, # Same instance
|
|
2728
|
+
"metadata": {"file": attachment} # Same instance again
|
|
2729
|
+
}
|
|
2730
|
+
|
|
2731
|
+
attachments = []
|
|
2732
|
+
_extract_attachments(event, attachments)
|
|
2733
|
+
|
|
2734
|
+
# Should collect the same instance 3 times
|
|
2735
|
+
assert len(attachments) == 3
|
|
2736
|
+
assert all(att is attachment for att in attachments)
|
|
2737
|
+
assert all(id(att) == original_id for att in attachments)
|
|
2738
|
+
|
|
2739
|
+
|
|
2740
|
+
def test_attachment_upload_tracked_on_flush(with_memory_logger, with_simulate_login):
|
|
2741
|
+
"""Test that attachment upload is tracked when attachments are logged and flushed."""
|
|
2742
|
+
attachment = Attachment(data=b"test data", filename="test.txt", content_type="text/plain")
|
|
2743
|
+
|
|
2744
|
+
logger = init_test_logger(__name__)
|
|
2745
|
+
span = logger.start_span(name="test_span")
|
|
2746
|
+
span.log(input={"file": attachment})
|
|
2747
|
+
span.end()
|
|
2748
|
+
|
|
2749
|
+
# No upload attempts yet
|
|
2750
|
+
assert len(with_memory_logger.upload_attempts) == 0
|
|
2751
|
+
|
|
2752
|
+
# Flush should track upload attempt
|
|
2753
|
+
logger.flush()
|
|
2754
|
+
|
|
2755
|
+
# Now upload should be tracked
|
|
2756
|
+
assert len(with_memory_logger.upload_attempts) == 1
|
|
2757
|
+
assert with_memory_logger.upload_attempts[0] is attachment
|
|
2758
|
+
|
|
2759
|
+
|
|
2760
|
+
def test_multiple_attachments_upload_tracked(with_memory_logger, with_simulate_login):
|
|
2761
|
+
"""Test that upload is tracked for multiple attachments."""
|
|
2762
|
+
attachment1 = Attachment(data=b"data1", filename="file1.txt", content_type="text/plain")
|
|
2763
|
+
attachment2 = Attachment(data=b"data2", filename="file2.txt", content_type="text/plain")
|
|
2764
|
+
|
|
2765
|
+
logger = init_test_logger(__name__)
|
|
2766
|
+
span = logger.start_span(name="test_span")
|
|
2767
|
+
span.log(
|
|
2768
|
+
input={"file1": attachment1},
|
|
2769
|
+
output={"file2": attachment2}
|
|
2770
|
+
)
|
|
2771
|
+
span.end()
|
|
2772
|
+
logger.flush()
|
|
2773
|
+
|
|
2774
|
+
# Both attachments should be tracked
|
|
2775
|
+
assert len(with_memory_logger.upload_attempts) == 2
|
|
2776
|
+
assert attachment1 in with_memory_logger.upload_attempts
|
|
2777
|
+
assert attachment2 in with_memory_logger.upload_attempts
|
|
2778
|
+
|
|
2779
|
+
|
|
2780
|
+
def test_same_attachment_logged_twice_tracked_twice(with_memory_logger, with_simulate_login):
|
|
2781
|
+
"""Test that same attachment logged twice appears twice in upload attempts."""
|
|
2782
|
+
attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
|
|
2783
|
+
|
|
2784
|
+
logger = init_test_logger(__name__)
|
|
2785
|
+
span = logger.start_span(name="test_span")
|
|
2786
|
+
span.log(input={"file": attachment})
|
|
2787
|
+
span.log(metadata={"same_file": attachment})
|
|
2788
|
+
span.end()
|
|
2789
|
+
logger.flush()
|
|
2790
|
+
|
|
2791
|
+
# Same attachment should be tracked twice (once for each log call)
|
|
2792
|
+
assert len(with_memory_logger.upload_attempts) == 2
|
|
2793
|
+
assert with_memory_logger.upload_attempts[0] is attachment
|
|
2794
|
+
assert with_memory_logger.upload_attempts[1] is attachment
|
|
2795
|
+
|
|
2796
|
+
|
|
2797
|
+
def test_external_attachment_upload_tracked(with_memory_logger, with_simulate_login):
|
|
2798
|
+
"""Test that ExternalAttachment upload is also tracked."""
|
|
2799
|
+
ext_attachment = ExternalAttachment(
|
|
2800
|
+
url="s3://bucket/key.pdf",
|
|
2801
|
+
filename="external.pdf",
|
|
2802
|
+
content_type="application/pdf"
|
|
2803
|
+
)
|
|
2804
|
+
|
|
2805
|
+
logger = init_test_logger(__name__)
|
|
2806
|
+
span = logger.start_span(name="test_span")
|
|
2807
|
+
span.log(input={"file": ext_attachment})
|
|
2808
|
+
span.end()
|
|
2809
|
+
logger.flush()
|
|
2810
|
+
|
|
2811
|
+
# ExternalAttachment should be tracked
|
|
2812
|
+
assert len(with_memory_logger.upload_attempts) == 1
|
|
2813
|
+
assert with_memory_logger.upload_attempts[0] is ext_attachment
|
|
2814
|
+
|
|
2815
|
+
|
|
2816
|
+
def test_json_attachment_upload_tracked(with_memory_logger, with_simulate_login):
|
|
2817
|
+
"""Test that JSONAttachment upload is tracked."""
|
|
2818
|
+
data = {"key": "value", "nested": {"array": [1, 2, 3]}}
|
|
2819
|
+
json_attachment = JSONAttachment(data, filename="data.json")
|
|
2820
|
+
|
|
2821
|
+
logger = init_test_logger(__name__)
|
|
2822
|
+
span = logger.start_span(name="test_span")
|
|
2823
|
+
span.log(output={"data": json_attachment})
|
|
2824
|
+
span.end()
|
|
2825
|
+
logger.flush()
|
|
2826
|
+
|
|
2827
|
+
# JSONAttachment should be tracked
|
|
2828
|
+
assert len(with_memory_logger.upload_attempts) == 1
|
|
2829
|
+
assert with_memory_logger.upload_attempts[0] is json_attachment
|
|
2830
|
+
|
|
2831
|
+
|
|
2832
|
+
def test_multiple_attachment_types_tracked(with_memory_logger, with_simulate_login):
|
|
2833
|
+
"""Test that different attachment types are all tracked."""
|
|
2834
|
+
attachment = Attachment(data=b"data", filename="file.txt", content_type="text/plain")
|
|
2835
|
+
json_attachment = JSONAttachment({"key": "value"}, filename="data.json")
|
|
2836
|
+
ext_attachment = ExternalAttachment(url="s3://bucket/key", filename="file.pdf", content_type="application/pdf")
|
|
2837
|
+
|
|
2838
|
+
logger = init_test_logger(__name__)
|
|
2839
|
+
span = logger.start_span(name="test_span")
|
|
2840
|
+
span.log(
|
|
2841
|
+
input=attachment,
|
|
2842
|
+
output=json_attachment,
|
|
2843
|
+
metadata={"file": ext_attachment}
|
|
2844
|
+
)
|
|
2845
|
+
span.end()
|
|
2846
|
+
logger.flush()
|
|
2847
|
+
|
|
2848
|
+
# All three types should be tracked
|
|
2849
|
+
assert len(with_memory_logger.upload_attempts) == 3
|
|
2850
|
+
assert attachment in with_memory_logger.upload_attempts
|
|
2851
|
+
assert json_attachment in with_memory_logger.upload_attempts
|
|
2852
|
+
assert ext_attachment in with_memory_logger.upload_attempts
|