braintrust 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/_generated_types.py +224 -122
- braintrust/cli/install/api.py +1 -1
- braintrust/conftest.py +24 -0
- braintrust/db_fields.py +1 -0
- braintrust/devserver/test_server_integration.py +0 -11
- braintrust/framework.py +2 -2
- braintrust/functions/invoke.py +1 -8
- braintrust/generated_types.py +7 -7
- braintrust/logger.py +30 -38
- braintrust/otel/__init__.py +24 -15
- braintrust/prompt_cache/test_disk_cache.py +3 -3
- braintrust/span_types.py +3 -0
- braintrust/test_bt_json.py +23 -19
- braintrust/test_framework.py +25 -0
- braintrust/test_logger.py +34 -0
- braintrust/test_otel.py +118 -26
- braintrust/test_util.py +51 -1
- braintrust/util.py +24 -3
- braintrust/version.py +2 -2
- braintrust/wrappers/langsmith_wrapper.py +517 -0
- braintrust/wrappers/litellm.py +43 -0
- braintrust/wrappers/test_agno.py +0 -12
- braintrust/wrappers/test_anthropic.py +1 -11
- braintrust/wrappers/test_dspy.py +0 -11
- braintrust/wrappers/test_google_genai.py +6 -1
- braintrust/wrappers/test_langsmith_wrapper.py +338 -0
- braintrust/wrappers/test_litellm.py +73 -10
- braintrust/wrappers/test_oai_attachments.py +0 -10
- braintrust/wrappers/test_openai.py +3 -12
- braintrust/wrappers/test_openrouter.py +0 -9
- braintrust/wrappers/test_pydantic_ai_integration.py +0 -11
- braintrust/wrappers/test_pydantic_ai_wrap_openai.py +2 -0
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/METADATA +1 -1
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/RECORD +37 -35
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/WHEEL +0 -0
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/entry_points.txt +0 -0
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/top_level.txt +0 -0
braintrust/functions/invoke.py
CHANGED
|
@@ -2,7 +2,7 @@ from typing import Any, Literal, TypedDict, TypeVar, overload
|
|
|
2
2
|
|
|
3
3
|
from sseclient import SSEClient
|
|
4
4
|
|
|
5
|
-
from .._generated_types import FunctionTypeEnum
|
|
5
|
+
from .._generated_types import FunctionTypeEnum
|
|
6
6
|
from ..logger import Exportable, get_span_parent_object, login, proxy_conn
|
|
7
7
|
from ..util import response_raise_for_status
|
|
8
8
|
from .constants import INVOKE_API_VERSION
|
|
@@ -43,7 +43,6 @@ def invoke(
|
|
|
43
43
|
# arguments to the function
|
|
44
44
|
input: Any = None,
|
|
45
45
|
messages: list[Any] | None = None,
|
|
46
|
-
context: InvokeContext | None = None,
|
|
47
46
|
metadata: dict[str, Any] | None = None,
|
|
48
47
|
tags: list[str] | None = None,
|
|
49
48
|
parent: Exportable | str | None = None,
|
|
@@ -72,7 +71,6 @@ def invoke(
|
|
|
72
71
|
# arguments to the function
|
|
73
72
|
input: Any = None,
|
|
74
73
|
messages: list[Any] | None = None,
|
|
75
|
-
context: InvokeContext | None = None,
|
|
76
74
|
metadata: dict[str, Any] | None = None,
|
|
77
75
|
tags: list[str] | None = None,
|
|
78
76
|
parent: Exportable | str | None = None,
|
|
@@ -100,7 +98,6 @@ def invoke(
|
|
|
100
98
|
# arguments to the function
|
|
101
99
|
input: Any = None,
|
|
102
100
|
messages: list[Any] | None = None,
|
|
103
|
-
context: InvokeContext | None = None,
|
|
104
101
|
metadata: dict[str, Any] | None = None,
|
|
105
102
|
tags: list[str] | None = None,
|
|
106
103
|
parent: Exportable | str | None = None,
|
|
@@ -119,8 +116,6 @@ def invoke(
|
|
|
119
116
|
Args:
|
|
120
117
|
input: The input to the function. This will be logged as the `input` field in the span.
|
|
121
118
|
messages: Additional OpenAI-style messages to add to the prompt (only works for llm functions).
|
|
122
|
-
context: Context for functions that operate on spans/traces (e.g., facets). Should contain
|
|
123
|
-
`object_type`, `object_id`, and `scope` fields.
|
|
124
119
|
metadata: Additional metadata to add to the span. This will be logged as the `metadata` field in the span.
|
|
125
120
|
It will also be available as the {{metadata}} field in the prompt and as the `metadata` argument
|
|
126
121
|
to the function.
|
|
@@ -195,8 +190,6 @@ def invoke(
|
|
|
195
190
|
)
|
|
196
191
|
if messages is not None:
|
|
197
192
|
request["messages"] = messages
|
|
198
|
-
if context is not None:
|
|
199
|
-
request["context"] = context
|
|
200
193
|
if mode is not None:
|
|
201
194
|
request["mode"] = mode
|
|
202
195
|
if strict is not None:
|
braintrust/generated_types.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Auto-generated file (internal git SHA
|
|
1
|
+
"""Auto-generated file (internal git SHA 87ac73f4945a47eff2d4e42775ba4dbc58854c73) -- do not modify"""
|
|
2
2
|
|
|
3
3
|
from ._generated_types import (
|
|
4
4
|
Acl,
|
|
@@ -10,6 +10,7 @@ from ._generated_types import (
|
|
|
10
10
|
AsyncScoringState,
|
|
11
11
|
AttachmentReference,
|
|
12
12
|
AttachmentStatus,
|
|
13
|
+
BatchedFacetData,
|
|
13
14
|
BraintrustAttachmentReference,
|
|
14
15
|
BraintrustModelParams,
|
|
15
16
|
CallEvent,
|
|
@@ -47,15 +48,13 @@ from ._generated_types import (
|
|
|
47
48
|
GraphEdge,
|
|
48
49
|
GraphNode,
|
|
49
50
|
Group,
|
|
51
|
+
GroupScope,
|
|
50
52
|
IfExists,
|
|
51
|
-
InvokeContext,
|
|
52
53
|
InvokeFunction,
|
|
53
54
|
InvokeParent,
|
|
54
|
-
InvokeScope,
|
|
55
55
|
MCPServer,
|
|
56
56
|
MessageRole,
|
|
57
57
|
ModelParams,
|
|
58
|
-
NullableFunctionTypeEnum,
|
|
59
58
|
NullableSavedFunctionId,
|
|
60
59
|
ObjectReference,
|
|
61
60
|
ObjectReferenceNullish,
|
|
@@ -99,6 +98,7 @@ from ._generated_types import (
|
|
|
99
98
|
StreamingMode,
|
|
100
99
|
ToolFunctionDefinition,
|
|
101
100
|
TraceScope,
|
|
101
|
+
TriggeredFunctionState,
|
|
102
102
|
UploadStatus,
|
|
103
103
|
User,
|
|
104
104
|
View,
|
|
@@ -117,6 +117,7 @@ __all__ = [
|
|
|
117
117
|
"AsyncScoringState",
|
|
118
118
|
"AttachmentReference",
|
|
119
119
|
"AttachmentStatus",
|
|
120
|
+
"BatchedFacetData",
|
|
120
121
|
"BraintrustAttachmentReference",
|
|
121
122
|
"BraintrustModelParams",
|
|
122
123
|
"CallEvent",
|
|
@@ -154,15 +155,13 @@ __all__ = [
|
|
|
154
155
|
"GraphEdge",
|
|
155
156
|
"GraphNode",
|
|
156
157
|
"Group",
|
|
158
|
+
"GroupScope",
|
|
157
159
|
"IfExists",
|
|
158
|
-
"InvokeContext",
|
|
159
160
|
"InvokeFunction",
|
|
160
161
|
"InvokeParent",
|
|
161
|
-
"InvokeScope",
|
|
162
162
|
"MCPServer",
|
|
163
163
|
"MessageRole",
|
|
164
164
|
"ModelParams",
|
|
165
|
-
"NullableFunctionTypeEnum",
|
|
166
165
|
"NullableSavedFunctionId",
|
|
167
166
|
"ObjectReference",
|
|
168
167
|
"ObjectReferenceNullish",
|
|
@@ -206,6 +205,7 @@ __all__ = [
|
|
|
206
205
|
"StreamingMode",
|
|
207
206
|
"ToolFunctionDefinition",
|
|
208
207
|
"TraceScope",
|
|
208
|
+
"TriggeredFunctionState",
|
|
209
209
|
"UploadStatus",
|
|
210
210
|
"User",
|
|
211
211
|
"View",
|
braintrust/logger.py
CHANGED
|
@@ -454,24 +454,22 @@ class BraintrustState:
|
|
|
454
454
|
|
|
455
455
|
def copy_state(self, other: "BraintrustState"):
|
|
456
456
|
"""Copy login information from another BraintrustState instance."""
|
|
457
|
-
self.__dict__.update(
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
}
|
|
474
|
-
)
|
|
457
|
+
self.__dict__.update({
|
|
458
|
+
k: v
|
|
459
|
+
for (k, v) in other.__dict__.items()
|
|
460
|
+
if k
|
|
461
|
+
not in (
|
|
462
|
+
"current_experiment",
|
|
463
|
+
"current_logger",
|
|
464
|
+
"current_parent",
|
|
465
|
+
"current_span",
|
|
466
|
+
"_global_bg_logger",
|
|
467
|
+
"_override_bg_logger",
|
|
468
|
+
"_context_manager",
|
|
469
|
+
"_last_otel_setting",
|
|
470
|
+
"_context_manager_lock",
|
|
471
|
+
)
|
|
472
|
+
})
|
|
475
473
|
|
|
476
474
|
def login(
|
|
477
475
|
self,
|
|
@@ -2344,6 +2342,7 @@ def _validate_and_sanitize_experiment_log_partial_args(event: Mapping[str, Any])
|
|
|
2344
2342
|
SKIP_ASYNC_SCORING_FIELD,
|
|
2345
2343
|
"span_id",
|
|
2346
2344
|
"root_span_id",
|
|
2345
|
+
"_bt_internal_override_pagination_key",
|
|
2347
2346
|
}
|
|
2348
2347
|
if forbidden_keys:
|
|
2349
2348
|
raise ValueError(f"The following keys are not permitted: {forbidden_keys}")
|
|
@@ -3856,9 +3855,6 @@ class SpanImpl(Span):
|
|
|
3856
3855
|
if serializable_partial_record.get("metrics", {}).get("end") is not None:
|
|
3857
3856
|
self._logged_end_time = serializable_partial_record["metrics"]["end"]
|
|
3858
3857
|
|
|
3859
|
-
if len(serializable_partial_record.get("tags", [])) > 0 and self.span_parents:
|
|
3860
|
-
raise Exception("Tags can only be logged to the root span")
|
|
3861
|
-
|
|
3862
3858
|
def compute_record() -> dict[str, Any]:
|
|
3863
3859
|
exporter = _get_exporter()
|
|
3864
3860
|
return dict(
|
|
@@ -4406,24 +4402,20 @@ def render_message(render: Callable[[str], str], message: PromptMessage):
|
|
|
4406
4402
|
if c["type"] == "text":
|
|
4407
4403
|
rendered_content.append({**c, "text": render(c["text"])})
|
|
4408
4404
|
elif c["type"] == "image_url":
|
|
4409
|
-
rendered_content.append(
|
|
4410
|
-
|
|
4411
|
-
|
|
4412
|
-
|
|
4413
|
-
}
|
|
4414
|
-
)
|
|
4405
|
+
rendered_content.append({
|
|
4406
|
+
**c,
|
|
4407
|
+
"image_url": {**c["image_url"], "url": render(c["image_url"]["url"])},
|
|
4408
|
+
})
|
|
4415
4409
|
elif c["type"] == "file":
|
|
4416
|
-
rendered_content.append(
|
|
4417
|
-
|
|
4418
|
-
|
|
4419
|
-
"file"
|
|
4420
|
-
|
|
4421
|
-
|
|
4422
|
-
|
|
4423
|
-
|
|
4424
|
-
|
|
4425
|
-
}
|
|
4426
|
-
)
|
|
4410
|
+
rendered_content.append({
|
|
4411
|
+
**c,
|
|
4412
|
+
"file": {
|
|
4413
|
+
**c["file"],
|
|
4414
|
+
"file_data": render(c["file"]["file_data"]),
|
|
4415
|
+
**({} if "file_id" not in c["file"] else {"file_id": render(c["file"]["file_id"])}),
|
|
4416
|
+
**({} if "filename" not in c["file"] else {"filename": render(c["file"]["filename"])}),
|
|
4417
|
+
},
|
|
4418
|
+
})
|
|
4427
4419
|
else:
|
|
4428
4420
|
raise ValueError(f"Unknown content type: {c['type']}")
|
|
4429
4421
|
|
braintrust/otel/__init__.py
CHANGED
|
@@ -90,18 +90,13 @@ class AISpanProcessor:
|
|
|
90
90
|
def _should_keep_filtered_span(self, span):
|
|
91
91
|
"""
|
|
92
92
|
Keep spans if:
|
|
93
|
-
1.
|
|
94
|
-
2.
|
|
95
|
-
3.
|
|
96
|
-
4. Any attribute name starts with those prefixes
|
|
93
|
+
1. Custom filter returns True/False (if provided)
|
|
94
|
+
2. Span name starts with 'gen_ai.', 'braintrust.', 'llm.', 'ai.', or 'traceloop.'
|
|
95
|
+
3. Any attribute name starts with those prefixes
|
|
97
96
|
"""
|
|
98
97
|
if not span:
|
|
99
98
|
return False
|
|
100
99
|
|
|
101
|
-
# Braintrust requires root spans, so always keep them
|
|
102
|
-
if span.parent is None:
|
|
103
|
-
return True
|
|
104
|
-
|
|
105
100
|
# Apply custom filter if provided
|
|
106
101
|
if self._custom_filter:
|
|
107
102
|
custom_result = self._custom_filter(span)
|
|
@@ -384,6 +379,9 @@ def _get_braintrust_parent(object_type, object_id: str | None = None, compute_ar
|
|
|
384
379
|
|
|
385
380
|
return None
|
|
386
381
|
|
|
382
|
+
def is_root_span(span) -> bool:
|
|
383
|
+
"""Returns True if the span is a root span (no parent span)."""
|
|
384
|
+
return getattr(span, "parent", None) is None
|
|
387
385
|
|
|
388
386
|
def context_from_span_export(export_str: str):
|
|
389
387
|
"""
|
|
@@ -522,15 +520,17 @@ def add_span_parent_to_baggage(span, ctx=None):
|
|
|
522
520
|
return add_parent_to_baggage(parent_value, ctx=ctx)
|
|
523
521
|
|
|
524
522
|
|
|
525
|
-
def parent_from_headers(headers: dict[str, str]) -> str | None:
|
|
523
|
+
def parent_from_headers(headers: dict[str, str], propagator=None) -> str | None:
|
|
526
524
|
"""
|
|
527
|
-
Extract a Braintrust-compatible parent string from
|
|
525
|
+
Extract a Braintrust-compatible parent string from trace context headers.
|
|
528
526
|
|
|
529
|
-
This converts OTEL trace context headers
|
|
530
|
-
|
|
527
|
+
This converts OTEL trace context headers into a format that can be passed
|
|
528
|
+
as the 'parent' parameter to Braintrust's start_span() method.
|
|
531
529
|
|
|
532
530
|
Args:
|
|
533
|
-
headers: Dictionary with
|
|
531
|
+
headers: Dictionary with trace context headers (e.g., 'traceparent'/'baggage' for W3C)
|
|
532
|
+
propagator: Optional custom TextMapPropagator. If not provided, uses the
|
|
533
|
+
globally registered propagator (W3C TraceContext by default).
|
|
534
534
|
|
|
535
535
|
Returns:
|
|
536
536
|
Braintrust V4 export string that can be used as parent parameter,
|
|
@@ -545,6 +545,12 @@ def parent_from_headers(headers: dict[str, str]) -> str | None:
|
|
|
545
545
|
>>> parent = parent_from_headers(headers)
|
|
546
546
|
>>> with project.start_span(name="service_c", parent=parent) as span:
|
|
547
547
|
>>> span.log(input="BT span as child of OTEL parent")
|
|
548
|
+
|
|
549
|
+
>>> # Using a custom propagator (e.g., B3 format)
|
|
550
|
+
>>> from opentelemetry.propagators.b3 import B3MultiFormat
|
|
551
|
+
>>> propagator = B3MultiFormat()
|
|
552
|
+
>>> headers = {'X-B3-TraceId': '...', 'X-B3-SpanId': '...', 'baggage': '...'}
|
|
553
|
+
>>> parent = parent_from_headers(headers, propagator=propagator)
|
|
548
554
|
"""
|
|
549
555
|
if not OTEL_AVAILABLE:
|
|
550
556
|
raise ImportError(INSTALL_ERR_MSG)
|
|
@@ -553,8 +559,11 @@ def parent_from_headers(headers: dict[str, str]) -> str | None:
|
|
|
553
559
|
from opentelemetry import baggage, trace
|
|
554
560
|
from opentelemetry.propagate import extract
|
|
555
561
|
|
|
556
|
-
# Extract context from headers using
|
|
557
|
-
|
|
562
|
+
# Extract context from headers using provided propagator or global propagator
|
|
563
|
+
if propagator is not None:
|
|
564
|
+
ctx = propagator.extract(headers)
|
|
565
|
+
else:
|
|
566
|
+
ctx = extract(headers)
|
|
558
567
|
|
|
559
568
|
# Get span from context
|
|
560
569
|
span = trace.get_current_span(ctx)
|
|
@@ -39,7 +39,7 @@ class TestDiskCache(unittest.TestCase):
|
|
|
39
39
|
"a\nb",
|
|
40
40
|
]
|
|
41
41
|
for k in weird_keys:
|
|
42
|
-
time.sleep(0.
|
|
42
|
+
time.sleep(0.01) # make sure the mtimes are different
|
|
43
43
|
self.cache.set(k, data)
|
|
44
44
|
result = self.cache.get(k)
|
|
45
45
|
assert data == result
|
|
@@ -61,7 +61,7 @@ class TestDiskCache(unittest.TestCase):
|
|
|
61
61
|
# Fill cache beyond max size (3).
|
|
62
62
|
for i in range(3):
|
|
63
63
|
self.cache.set(f"key{i}", {"value": i})
|
|
64
|
-
time.sleep(0.
|
|
64
|
+
time.sleep(0.01) # wait to ensure different mtimes
|
|
65
65
|
|
|
66
66
|
# Add one more to trigger eviction.
|
|
67
67
|
self.cache.set("key3", {"value": 3})
|
|
@@ -75,7 +75,7 @@ class TestDiskCache(unittest.TestCase):
|
|
|
75
75
|
# Fill cache beyond max size (3).
|
|
76
76
|
for i in range(3):
|
|
77
77
|
self.cache.set(f"key{i}", {"value": i})
|
|
78
|
-
time.sleep(0.
|
|
78
|
+
time.sleep(0.01) # wait to ensure different mtimes
|
|
79
79
|
|
|
80
80
|
# Add one more to trigger eviction.
|
|
81
81
|
self.cache.set("key3", {"value": 3})
|
braintrust/span_types.py
CHANGED
braintrust/test_bt_json.py
CHANGED
|
@@ -5,6 +5,7 @@ import json
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from unittest import TestCase
|
|
7
7
|
|
|
8
|
+
import pytest
|
|
8
9
|
from braintrust.bt_json import bt_dumps, bt_safe_deep_copy
|
|
9
10
|
from braintrust.logger import Attachment, ExternalAttachment
|
|
10
11
|
|
|
@@ -281,30 +282,33 @@ class TestBTJson(TestCase):
|
|
|
281
282
|
self.assertTrue("(1, 2)" in result or "1, 2" in result)
|
|
282
283
|
self.assertIn("None", result)
|
|
283
284
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
285
|
+
@pytest.mark.vcr
|
|
286
|
+
def test_to_bt_safe_special_objects():
|
|
287
|
+
"""Test _to_bt_safe handling of Span, Experiment, Dataset, Logger objects."""
|
|
288
|
+
from braintrust import init, init_dataset, init_logger
|
|
287
289
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
290
|
+
# Create actual objects
|
|
291
|
+
exp = init(project="test", experiment="test")
|
|
292
|
+
dataset = init_dataset(project="test", name="test")
|
|
293
|
+
logger = init_logger(project="test")
|
|
294
|
+
span = exp.start_span()
|
|
293
295
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
+
# Import _to_bt_safe
|
|
297
|
+
from braintrust.bt_json import _to_bt_safe
|
|
298
|
+
|
|
299
|
+
# Test each special object
|
|
300
|
+
assert _to_bt_safe(span) == "<span>"
|
|
301
|
+
assert _to_bt_safe(exp) == "<experiment>"
|
|
302
|
+
assert _to_bt_safe(dataset) == "<dataset>"
|
|
303
|
+
assert _to_bt_safe(logger) == "<logger>"
|
|
296
304
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
self.assertEqual(_to_bt_safe(logger), "<logger>")
|
|
305
|
+
# Clean up
|
|
306
|
+
exp.flush()
|
|
307
|
+
dataset.flush()
|
|
308
|
+
logger.flush()
|
|
302
309
|
|
|
303
|
-
# Clean up
|
|
304
|
-
exp.flush()
|
|
305
|
-
dataset.flush()
|
|
306
|
-
logger.flush()
|
|
307
310
|
|
|
311
|
+
class TestBTJsonAttachments(TestCase):
|
|
308
312
|
def test_to_bt_safe_attachments(self):
|
|
309
313
|
"""Test _to_bt_safe preserves BaseAttachment and converts ReadonlyAttachment to reference."""
|
|
310
314
|
from braintrust.bt_json import _to_bt_safe
|
braintrust/test_framework.py
CHANGED
|
@@ -343,6 +343,31 @@ async def test_eval_no_send_logs_true(with_memory_logger, simple_scorer):
|
|
|
343
343
|
assert len(logs) == 0
|
|
344
344
|
|
|
345
345
|
|
|
346
|
+
@pytest.mark.asyncio
|
|
347
|
+
async def test_eval_no_send_logs_with_none_score(with_memory_logger):
|
|
348
|
+
"""Test that scorers returning None don't crash local mode."""
|
|
349
|
+
|
|
350
|
+
def sometimes_none_scorer(input, output, expected):
|
|
351
|
+
# Return None for first input, score for second
|
|
352
|
+
if input == "hello":
|
|
353
|
+
return {"name": "conditional", "score": None}
|
|
354
|
+
return {"name": "conditional", "score": 1.0}
|
|
355
|
+
|
|
356
|
+
result = await Eval(
|
|
357
|
+
"test-none-score",
|
|
358
|
+
data=[
|
|
359
|
+
{"input": "hello", "expected": "hello world"},
|
|
360
|
+
{"input": "test", "expected": "test world"},
|
|
361
|
+
],
|
|
362
|
+
task=lambda input_val: input_val + " world",
|
|
363
|
+
scores=[sometimes_none_scorer],
|
|
364
|
+
no_send_logs=True,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Should not crash and should calculate average from non-None scores only
|
|
368
|
+
assert result.summary.scores["conditional"].score == 1.0 # Only the second score counts
|
|
369
|
+
|
|
370
|
+
|
|
346
371
|
@pytest.mark.asyncio
|
|
347
372
|
async def test_hooks_tags_append(with_memory_logger, with_simulate_login, simple_scorer):
|
|
348
373
|
"""Test that hooks.tags can be appended to and logged."""
|
braintrust/test_logger.py
CHANGED
|
@@ -849,6 +849,40 @@ def test_span_link_with_unresolved_experiment(with_simulate_login, with_memory_l
|
|
|
849
849
|
assert link == "https://www.braintrust.dev/error-generating-link?msg=resolve-experiment-id"
|
|
850
850
|
|
|
851
851
|
|
|
852
|
+
def test_experiment_span_link_uses_env_vars_when_logged_out(with_memory_logger):
|
|
853
|
+
"""Verify EXPERIMENT spans use BRAINTRUST_ORG_NAME env var when not logged in."""
|
|
854
|
+
simulate_logout()
|
|
855
|
+
assert_logged_out()
|
|
856
|
+
|
|
857
|
+
keys = ["BRAINTRUST_APP_URL", "BRAINTRUST_ORG_NAME"]
|
|
858
|
+
originals = {k: os.environ.get(k) for k in keys}
|
|
859
|
+
try:
|
|
860
|
+
os.environ["BRAINTRUST_APP_URL"] = "https://test-app.example.com"
|
|
861
|
+
os.environ["BRAINTRUST_ORG_NAME"] = "env-org-name"
|
|
862
|
+
|
|
863
|
+
experiment = braintrust.init(
|
|
864
|
+
project="test-project",
|
|
865
|
+
experiment="test-experiment",
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
# Create span with resolved experiment ID
|
|
869
|
+
span = experiment.start_span(name="test-span")
|
|
870
|
+
span.parent_object_id = LazyValue(lambda: "test-exp-id", use_mutex=False)
|
|
871
|
+
span.end()
|
|
872
|
+
|
|
873
|
+
link = span.link()
|
|
874
|
+
|
|
875
|
+
# Should use env var org name and app url
|
|
876
|
+
assert "env-org-name" in link
|
|
877
|
+
assert "test-app.example.com" in link
|
|
878
|
+
assert "test-exp-id" in link
|
|
879
|
+
finally:
|
|
880
|
+
for k, v in originals.items():
|
|
881
|
+
os.environ.pop(k, None)
|
|
882
|
+
if v:
|
|
883
|
+
os.environ[k] = v
|
|
884
|
+
|
|
885
|
+
|
|
852
886
|
def test_permalink_with_valid_span_logged_in(with_simulate_login, with_memory_logger):
|
|
853
887
|
logger = init_logger(
|
|
854
888
|
project="test-project",
|