braintrust 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. braintrust/_generated_types.py +224 -122
  2. braintrust/cli/install/api.py +1 -1
  3. braintrust/conftest.py +24 -0
  4. braintrust/db_fields.py +1 -0
  5. braintrust/devserver/test_server_integration.py +0 -11
  6. braintrust/framework.py +2 -2
  7. braintrust/functions/invoke.py +1 -8
  8. braintrust/generated_types.py +7 -7
  9. braintrust/logger.py +30 -38
  10. braintrust/otel/__init__.py +24 -15
  11. braintrust/prompt_cache/test_disk_cache.py +3 -3
  12. braintrust/span_types.py +3 -0
  13. braintrust/test_bt_json.py +23 -19
  14. braintrust/test_framework.py +25 -0
  15. braintrust/test_logger.py +34 -0
  16. braintrust/test_otel.py +118 -26
  17. braintrust/test_util.py +51 -1
  18. braintrust/util.py +24 -3
  19. braintrust/version.py +2 -2
  20. braintrust/wrappers/langsmith_wrapper.py +517 -0
  21. braintrust/wrappers/litellm.py +43 -0
  22. braintrust/wrappers/test_agno.py +0 -12
  23. braintrust/wrappers/test_anthropic.py +1 -11
  24. braintrust/wrappers/test_dspy.py +0 -11
  25. braintrust/wrappers/test_google_genai.py +6 -1
  26. braintrust/wrappers/test_langsmith_wrapper.py +338 -0
  27. braintrust/wrappers/test_litellm.py +73 -10
  28. braintrust/wrappers/test_oai_attachments.py +0 -10
  29. braintrust/wrappers/test_openai.py +3 -12
  30. braintrust/wrappers/test_openrouter.py +0 -9
  31. braintrust/wrappers/test_pydantic_ai_integration.py +0 -11
  32. braintrust/wrappers/test_pydantic_ai_wrap_openai.py +2 -0
  33. {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/METADATA +1 -1
  34. {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/RECORD +37 -35
  35. {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/WHEEL +0 -0
  36. {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/entry_points.txt +0 -0
  37. {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ from typing import Any, Literal, TypedDict, TypeVar, overload
2
2
 
3
3
  from sseclient import SSEClient
4
4
 
5
- from .._generated_types import FunctionTypeEnum, InvokeContext
5
+ from .._generated_types import FunctionTypeEnum
6
6
  from ..logger import Exportable, get_span_parent_object, login, proxy_conn
7
7
  from ..util import response_raise_for_status
8
8
  from .constants import INVOKE_API_VERSION
@@ -43,7 +43,6 @@ def invoke(
43
43
  # arguments to the function
44
44
  input: Any = None,
45
45
  messages: list[Any] | None = None,
46
- context: InvokeContext | None = None,
47
46
  metadata: dict[str, Any] | None = None,
48
47
  tags: list[str] | None = None,
49
48
  parent: Exportable | str | None = None,
@@ -72,7 +71,6 @@ def invoke(
72
71
  # arguments to the function
73
72
  input: Any = None,
74
73
  messages: list[Any] | None = None,
75
- context: InvokeContext | None = None,
76
74
  metadata: dict[str, Any] | None = None,
77
75
  tags: list[str] | None = None,
78
76
  parent: Exportable | str | None = None,
@@ -100,7 +98,6 @@ def invoke(
100
98
  # arguments to the function
101
99
  input: Any = None,
102
100
  messages: list[Any] | None = None,
103
- context: InvokeContext | None = None,
104
101
  metadata: dict[str, Any] | None = None,
105
102
  tags: list[str] | None = None,
106
103
  parent: Exportable | str | None = None,
@@ -119,8 +116,6 @@ def invoke(
119
116
  Args:
120
117
  input: The input to the function. This will be logged as the `input` field in the span.
121
118
  messages: Additional OpenAI-style messages to add to the prompt (only works for llm functions).
122
- context: Context for functions that operate on spans/traces (e.g., facets). Should contain
123
- `object_type`, `object_id`, and `scope` fields.
124
119
  metadata: Additional metadata to add to the span. This will be logged as the `metadata` field in the span.
125
120
  It will also be available as the {{metadata}} field in the prompt and as the `metadata` argument
126
121
  to the function.
@@ -195,8 +190,6 @@ def invoke(
195
190
  )
196
191
  if messages is not None:
197
192
  request["messages"] = messages
198
- if context is not None:
199
- request["context"] = context
200
193
  if mode is not None:
201
194
  request["mode"] = mode
202
195
  if strict is not None:
@@ -1,4 +1,4 @@
1
- """Auto-generated file (internal git SHA 547fa17c0937e0e25fdf9214487be6f31c91a37a) -- do not modify"""
1
+ """Auto-generated file (internal git SHA 87ac73f4945a47eff2d4e42775ba4dbc58854c73) -- do not modify"""
2
2
 
3
3
  from ._generated_types import (
4
4
  Acl,
@@ -10,6 +10,7 @@ from ._generated_types import (
10
10
  AsyncScoringState,
11
11
  AttachmentReference,
12
12
  AttachmentStatus,
13
+ BatchedFacetData,
13
14
  BraintrustAttachmentReference,
14
15
  BraintrustModelParams,
15
16
  CallEvent,
@@ -47,15 +48,13 @@ from ._generated_types import (
47
48
  GraphEdge,
48
49
  GraphNode,
49
50
  Group,
51
+ GroupScope,
50
52
  IfExists,
51
- InvokeContext,
52
53
  InvokeFunction,
53
54
  InvokeParent,
54
- InvokeScope,
55
55
  MCPServer,
56
56
  MessageRole,
57
57
  ModelParams,
58
- NullableFunctionTypeEnum,
59
58
  NullableSavedFunctionId,
60
59
  ObjectReference,
61
60
  ObjectReferenceNullish,
@@ -99,6 +98,7 @@ from ._generated_types import (
99
98
  StreamingMode,
100
99
  ToolFunctionDefinition,
101
100
  TraceScope,
101
+ TriggeredFunctionState,
102
102
  UploadStatus,
103
103
  User,
104
104
  View,
@@ -117,6 +117,7 @@ __all__ = [
117
117
  "AsyncScoringState",
118
118
  "AttachmentReference",
119
119
  "AttachmentStatus",
120
+ "BatchedFacetData",
120
121
  "BraintrustAttachmentReference",
121
122
  "BraintrustModelParams",
122
123
  "CallEvent",
@@ -154,15 +155,13 @@ __all__ = [
154
155
  "GraphEdge",
155
156
  "GraphNode",
156
157
  "Group",
158
+ "GroupScope",
157
159
  "IfExists",
158
- "InvokeContext",
159
160
  "InvokeFunction",
160
161
  "InvokeParent",
161
- "InvokeScope",
162
162
  "MCPServer",
163
163
  "MessageRole",
164
164
  "ModelParams",
165
- "NullableFunctionTypeEnum",
166
165
  "NullableSavedFunctionId",
167
166
  "ObjectReference",
168
167
  "ObjectReferenceNullish",
@@ -206,6 +205,7 @@ __all__ = [
206
205
  "StreamingMode",
207
206
  "ToolFunctionDefinition",
208
207
  "TraceScope",
208
+ "TriggeredFunctionState",
209
209
  "UploadStatus",
210
210
  "User",
211
211
  "View",
braintrust/logger.py CHANGED
@@ -454,24 +454,22 @@ class BraintrustState:
454
454
 
455
455
  def copy_state(self, other: "BraintrustState"):
456
456
  """Copy login information from another BraintrustState instance."""
457
- self.__dict__.update(
458
- {
459
- k: v
460
- for (k, v) in other.__dict__.items()
461
- if k
462
- not in (
463
- "current_experiment",
464
- "current_logger",
465
- "current_parent",
466
- "current_span",
467
- "_global_bg_logger",
468
- "_override_bg_logger",
469
- "_context_manager",
470
- "_last_otel_setting",
471
- "_context_manager_lock",
472
- )
473
- }
474
- )
457
+ self.__dict__.update({
458
+ k: v
459
+ for (k, v) in other.__dict__.items()
460
+ if k
461
+ not in (
462
+ "current_experiment",
463
+ "current_logger",
464
+ "current_parent",
465
+ "current_span",
466
+ "_global_bg_logger",
467
+ "_override_bg_logger",
468
+ "_context_manager",
469
+ "_last_otel_setting",
470
+ "_context_manager_lock",
471
+ )
472
+ })
475
473
 
476
474
  def login(
477
475
  self,
@@ -2344,6 +2342,7 @@ def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any])
2344
2342
  SKIP_ASYNC_SCORING_FIELD,
2345
2343
  "span_id",
2346
2344
  "root_span_id",
2345
+ "_bt_internal_override_pagination_key",
2347
2346
  }
2348
2347
  if forbidden_keys:
2349
2348
  raise ValueError(f"The following keys are not permitted: {forbidden_keys}")
@@ -3856,9 +3855,6 @@ class SpanImpl(Span):
3856
3855
  if serializable_partial_record.get("metrics", {}).get("end") is not None:
3857
3856
  self._logged_end_time = serializable_partial_record["metrics"]["end"]
3858
3857
 
3859
- if len(serializable_partial_record.get("tags", [])) > 0 and self.span_parents:
3860
- raise Exception("Tags can only be logged to the root span")
3861
-
3862
3858
  def compute_record() -> dict[str, Any]:
3863
3859
  exporter = _get_exporter()
3864
3860
  return dict(
@@ -4406,24 +4402,20 @@ def render_message(render: Callable[[str], str], message: PromptMessage):
4406
4402
  if c["type"] == "text":
4407
4403
  rendered_content.append({**c, "text": render(c["text"])})
4408
4404
  elif c["type"] == "image_url":
4409
- rendered_content.append(
4410
- {
4411
- **c,
4412
- "image_url": {**c["image_url"], "url": render(c["image_url"]["url"])},
4413
- }
4414
- )
4405
+ rendered_content.append({
4406
+ **c,
4407
+ "image_url": {**c["image_url"], "url": render(c["image_url"]["url"])},
4408
+ })
4415
4409
  elif c["type"] == "file":
4416
- rendered_content.append(
4417
- {
4418
- **c,
4419
- "file": {
4420
- **c["file"],
4421
- "file_data": render(c["file"]["file_data"]),
4422
- **({} if "file_id" not in c["file"] else {"file_id": render(c["file"]["file_id"])}),
4423
- **({} if "filename" not in c["file"] else {"filename": render(c["file"]["filename"])}),
4424
- },
4425
- }
4426
- )
4410
+ rendered_content.append({
4411
+ **c,
4412
+ "file": {
4413
+ **c["file"],
4414
+ "file_data": render(c["file"]["file_data"]),
4415
+ **({} if "file_id" not in c["file"] else {"file_id": render(c["file"]["file_id"])}),
4416
+ **({} if "filename" not in c["file"] else {"filename": render(c["file"]["filename"])}),
4417
+ },
4418
+ })
4427
4419
  else:
4428
4420
  raise ValueError(f"Unknown content type: {c['type']}")
4429
4421
 
@@ -90,18 +90,13 @@ class AISpanProcessor:
90
90
  def _should_keep_filtered_span(self, span):
91
91
  """
92
92
  Keep spans if:
93
- 1. It's a root span (no parent)
94
- 2. Custom filter returns True/False (if provided)
95
- 3. Span name starts with 'gen_ai.', 'braintrust.', 'llm.', 'ai.', or 'traceloop.'
96
- 4. Any attribute name starts with those prefixes
93
+ 1. Custom filter returns True/False (if provided)
94
+ 2. Span name starts with 'gen_ai.', 'braintrust.', 'llm.', 'ai.', or 'traceloop.'
95
+ 3. Any attribute name starts with those prefixes
97
96
  """
98
97
  if not span:
99
98
  return False
100
99
 
101
- # Braintrust requires root spans, so always keep them
102
- if span.parent is None:
103
- return True
104
-
105
100
  # Apply custom filter if provided
106
101
  if self._custom_filter:
107
102
  custom_result = self._custom_filter(span)
@@ -384,6 +379,9 @@ def _get_braintrust_parent(object_type, object_id: str | None = None, compute_ar
384
379
 
385
380
  return None
386
381
 
382
+ def is_root_span(span) -> bool:
383
+ """Returns True if the span is a root span (no parent span)."""
384
+ return getattr(span, "parent", None) is None
387
385
 
388
386
  def context_from_span_export(export_str: str):
389
387
  """
@@ -522,15 +520,17 @@ def add_span_parent_to_baggage(span, ctx=None):
522
520
  return add_parent_to_baggage(parent_value, ctx=ctx)
523
521
 
524
522
 
525
- def parent_from_headers(headers: dict[str, str]) -> str | None:
523
+ def parent_from_headers(headers: dict[str, str], propagator=None) -> str | None:
526
524
  """
527
- Extract a Braintrust-compatible parent string from W3C Trace Context headers.
525
+ Extract a Braintrust-compatible parent string from trace context headers.
528
526
 
529
- This converts OTEL trace context headers (traceparent/baggage) into a format
530
- that can be passed as the 'parent' parameter to Braintrust's start_span() method.
527
+ This converts OTEL trace context headers into a format that can be passed
528
+ as the 'parent' parameter to Braintrust's start_span() method.
531
529
 
532
530
  Args:
533
- headers: Dictionary with 'traceparent' and optionally 'baggage' keys
531
+ headers: Dictionary with trace context headers (e.g., 'traceparent'/'baggage' for W3C)
532
+ propagator: Optional custom TextMapPropagator. If not provided, uses the
533
+ globally registered propagator (W3C TraceContext by default).
534
534
 
535
535
  Returns:
536
536
  Braintrust V4 export string that can be used as parent parameter,
@@ -545,6 +545,12 @@ def parent_from_headers(headers: dict[str, str]) -> str | None:
545
545
  >>> parent = parent_from_headers(headers)
546
546
  >>> with project.start_span(name="service_c", parent=parent) as span:
547
547
  >>> span.log(input="BT span as child of OTEL parent")
548
+
549
+ >>> # Using a custom propagator (e.g., B3 format)
550
+ >>> from opentelemetry.propagators.b3 import B3MultiFormat
551
+ >>> propagator = B3MultiFormat()
552
+ >>> headers = {'X-B3-TraceId': '...', 'X-B3-SpanId': '...', 'baggage': '...'}
553
+ >>> parent = parent_from_headers(headers, propagator=propagator)
548
554
  """
549
555
  if not OTEL_AVAILABLE:
550
556
  raise ImportError(INSTALL_ERR_MSG)
@@ -553,8 +559,11 @@ def parent_from_headers(headers: dict[str, str]) -> str | None:
553
559
  from opentelemetry import baggage, trace
554
560
  from opentelemetry.propagate import extract
555
561
 
556
- # Extract context from headers using W3C Trace Context propagator
557
- ctx = extract(headers)
562
+ # Extract context from headers using provided propagator or global propagator
563
+ if propagator is not None:
564
+ ctx = propagator.extract(headers)
565
+ else:
566
+ ctx = extract(headers)
558
567
 
559
568
  # Get span from context
560
569
  span = trace.get_current_span(ctx)
@@ -39,7 +39,7 @@ class TestDiskCache(unittest.TestCase):
39
39
  "a\nb",
40
40
  ]
41
41
  for k in weird_keys:
42
- time.sleep(0.05) # make sure the mtimes are different
42
+ time.sleep(0.01) # make sure the mtimes are different
43
43
  self.cache.set(k, data)
44
44
  result = self.cache.get(k)
45
45
  assert data == result
@@ -61,7 +61,7 @@ class TestDiskCache(unittest.TestCase):
61
61
  # Fill cache beyond max size (3).
62
62
  for i in range(3):
63
63
  self.cache.set(f"key{i}", {"value": i})
64
- time.sleep(0.1) # wait to ensure different mtimes
64
+ time.sleep(0.01) # wait to ensure different mtimes
65
65
 
66
66
  # Add one more to trigger eviction.
67
67
  self.cache.set("key3", {"value": 3})
@@ -75,7 +75,7 @@ class TestDiskCache(unittest.TestCase):
75
75
  # Fill cache beyond max size (3).
76
76
  for i in range(3):
77
77
  self.cache.set(f"key{i}", {"value": i})
78
- time.sleep(0.1) # wait to ensure different mtimes
78
+ time.sleep(0.01) # wait to ensure different mtimes
79
79
 
80
80
  # Add one more to trigger eviction.
81
81
  self.cache.set("key3", {"value": 3})
braintrust/span_types.py CHANGED
@@ -13,6 +13,9 @@ class SpanTypeAttribute(str, Enum):
13
13
  EVAL = "eval"
14
14
  TASK = "task"
15
15
  TOOL = "tool"
16
+ AUTOMATION = "automation"
17
+ FACET = "facet"
18
+ PREPROCESSOR = "preprocessor"
16
19
 
17
20
 
18
21
  class SpanPurpose(str, Enum):
@@ -5,6 +5,7 @@ import json
5
5
  from typing import Any
6
6
  from unittest import TestCase
7
7
 
8
+ import pytest
8
9
  from braintrust.bt_json import bt_dumps, bt_safe_deep_copy
9
10
  from braintrust.logger import Attachment, ExternalAttachment
10
11
 
@@ -281,30 +282,33 @@ class TestBTJson(TestCase):
281
282
  self.assertTrue("(1, 2)" in result or "1, 2" in result)
282
283
  self.assertIn("None", result)
283
284
 
284
- def test_to_bt_safe_special_objects(self):
285
- """Test _to_bt_safe handling of Span, Experiment, Dataset, Logger objects."""
286
- from braintrust import init, init_dataset, init_logger
285
+ @pytest.mark.vcr
286
+ def test_to_bt_safe_special_objects():
287
+ """Test _to_bt_safe handling of Span, Experiment, Dataset, Logger objects."""
288
+ from braintrust import init, init_dataset, init_logger
287
289
 
288
- # Create actual objects
289
- exp = init(project="test", experiment="test")
290
- dataset = init_dataset(project="test", name="test")
291
- logger = init_logger(project="test")
292
- span = exp.start_span()
290
+ # Create actual objects
291
+ exp = init(project="test", experiment="test")
292
+ dataset = init_dataset(project="test", name="test")
293
+ logger = init_logger(project="test")
294
+ span = exp.start_span()
293
295
 
294
- # Import _to_bt_safe
295
- from braintrust.bt_json import _to_bt_safe
296
+ # Import _to_bt_safe
297
+ from braintrust.bt_json import _to_bt_safe
298
+
299
+ # Test each special object
300
+ assert _to_bt_safe(span) == "<span>"
301
+ assert _to_bt_safe(exp) == "<experiment>"
302
+ assert _to_bt_safe(dataset) == "<dataset>"
303
+ assert _to_bt_safe(logger) == "<logger>"
296
304
 
297
- # Test each special object
298
- self.assertEqual(_to_bt_safe(span), "<span>")
299
- self.assertEqual(_to_bt_safe(exp), "<experiment>")
300
- self.assertEqual(_to_bt_safe(dataset), "<dataset>")
301
- self.assertEqual(_to_bt_safe(logger), "<logger>")
305
+ # Clean up
306
+ exp.flush()
307
+ dataset.flush()
308
+ logger.flush()
302
309
 
303
- # Clean up
304
- exp.flush()
305
- dataset.flush()
306
- logger.flush()
307
310
 
311
+ class TestBTJsonAttachments(TestCase):
308
312
  def test_to_bt_safe_attachments(self):
309
313
  """Test _to_bt_safe preserves BaseAttachment and converts ReadonlyAttachment to reference."""
310
314
  from braintrust.bt_json import _to_bt_safe
@@ -343,6 +343,31 @@ async def test_eval_no_send_logs_true(with_memory_logger, simple_scorer):
343
343
  assert len(logs) == 0
344
344
 
345
345
 
346
+ @pytest.mark.asyncio
347
+ async def test_eval_no_send_logs_with_none_score(with_memory_logger):
348
+ """Test that scorers returning None don't crash local mode."""
349
+
350
+ def sometimes_none_scorer(input, output, expected):
351
+ # Return None for first input, score for second
352
+ if input == "hello":
353
+ return {"name": "conditional", "score": None}
354
+ return {"name": "conditional", "score": 1.0}
355
+
356
+ result = await Eval(
357
+ "test-none-score",
358
+ data=[
359
+ {"input": "hello", "expected": "hello world"},
360
+ {"input": "test", "expected": "test world"},
361
+ ],
362
+ task=lambda input_val: input_val + " world",
363
+ scores=[sometimes_none_scorer],
364
+ no_send_logs=True,
365
+ )
366
+
367
+ # Should not crash and should calculate average from non-None scores only
368
+ assert result.summary.scores["conditional"].score == 1.0 # Only the second score counts
369
+
370
+
346
371
  @pytest.mark.asyncio
347
372
  async def test_hooks_tags_append(with_memory_logger, with_simulate_login, simple_scorer):
348
373
  """Test that hooks.tags can be appended to and logged."""
braintrust/test_logger.py CHANGED
@@ -849,6 +849,40 @@ def test_span_link_with_unresolved_experiment(with_simulate_login, with_memory_l
849
849
  assert link == "https://www.braintrust.dev/error-generating-link?msg=resolve-experiment-id"
850
850
 
851
851
 
852
+ def test_experiment_span_link_uses_env_vars_when_logged_out(with_memory_logger):
853
+ """Verify EXPERIMENT spans use BRAINTRUST_ORG_NAME env var when not logged in."""
854
+ simulate_logout()
855
+ assert_logged_out()
856
+
857
+ keys = ["BRAINTRUST_APP_URL", "BRAINTRUST_ORG_NAME"]
858
+ originals = {k: os.environ.get(k) for k in keys}
859
+ try:
860
+ os.environ["BRAINTRUST_APP_URL"] = "https://test-app.example.com"
861
+ os.environ["BRAINTRUST_ORG_NAME"] = "env-org-name"
862
+
863
+ experiment = braintrust.init(
864
+ project="test-project",
865
+ experiment="test-experiment",
866
+ )
867
+
868
+ # Create span with resolved experiment ID
869
+ span = experiment.start_span(name="test-span")
870
+ span.parent_object_id = LazyValue(lambda: "test-exp-id", use_mutex=False)
871
+ span.end()
872
+
873
+ link = span.link()
874
+
875
+ # Should use env var org name and app url
876
+ assert "env-org-name" in link
877
+ assert "test-app.example.com" in link
878
+ assert "test-exp-id" in link
879
+ finally:
880
+ for k, v in originals.items():
881
+ os.environ.pop(k, None)
882
+ if v:
883
+ os.environ[k] = v
884
+
885
+
852
886
  def test_permalink_with_valid_span_logged_in(with_simulate_login, with_memory_logger):
853
887
  logger = init_logger(
854
888
  project="test-project",