braintrust 0.3.11__tar.gz → 0.3.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {braintrust-0.3.11 → braintrust-0.3.13}/PKG-INFO +19 -1
- {braintrust-0.3.11 → braintrust-0.3.13}/README.md +18 -0
- braintrust-0.3.13/src/braintrust/bt_json.py +116 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/auth.py +3 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/cors.py +1 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/server.py +6 -3
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/logger.py +35 -11
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_logger.py +160 -5
- braintrust-0.3.13/src/braintrust/version.py +4 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_openai.py +0 -32
- braintrust-0.3.13/src/braintrust/wrappers/test_openrouter.py +144 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust.egg-info/PKG-INFO +19 -1
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust.egg-info/SOURCES.txt +1 -0
- braintrust-0.3.11/src/braintrust/bt_json.py +0 -28
- braintrust-0.3.11/src/braintrust/version.py +0 -4
- {braintrust-0.3.11 → braintrust-0.3.13}/setup.cfg +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/setup.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/_generated_types.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/audit.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/aws.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/__main__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/eval.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/install/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/install/api.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/install/bump_versions.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/install/logs.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/install/redshift.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/install/run_migrations.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/cli/push.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/conftest.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/context.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/contrib/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/contrib/temporal/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/contrib/temporal/test_temporal.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/db_fields.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/cache.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/dataset.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/eval_hooks.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/schemas.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/test_cached_login.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/test_lru_cache.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/devserver/test_server_integration.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/framework.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/framework2.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/functions/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/functions/constants.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/functions/invoke.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/functions/stream.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/generated_types.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/git_fields.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/gitutil.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/graph_util.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/http_headers.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/id_gen.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/merge_row_batch.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/oai.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/object.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/otel/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/otel/context.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/otel/test_distributed_tracing.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/otel/test_otel_bt_integration.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/parameters.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/prompt.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/prompt_cache/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/prompt_cache/disk_cache.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/prompt_cache/lru_cache.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/prompt_cache/prompt_cache.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/prompt_cache/test_disk_cache.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/prompt_cache/test_lru_cache.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/prompt_cache/test_prompt_cache.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/py.typed +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/queue.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/resource_manager.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/score.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/serializable_data_class.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/span_identifier_v1.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/span_identifier_v2.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/span_identifier_v3.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/span_identifier_v4.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/span_types.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_framework.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_framework2.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_helpers.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_id_gen.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_otel.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_queue.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_serializable_data_class.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_span_components.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_util.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/test_version.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/util.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/_anthropic_utils.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/agno/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/agno/agent.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/agno/function_call.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/agno/model.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/agno/team.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/agno/utils.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/anthropic.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/claude_agent_sdk/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/dspy.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/google_genai/__init__.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/langchain.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/litellm.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/openai.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_agno.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_anthropic.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_dspy.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_google_genai.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_litellm.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_pydantic_ai.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/wrappers/test_utils.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust/xact_ids.py +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust.egg-info/dependency_links.txt +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust.egg-info/entry_points.txt +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust.egg-info/requires.txt +0 -0
- {braintrust-0.3.11 → braintrust-0.3.13}/src/braintrust.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: braintrust
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.13
|
|
4
4
|
Summary: SDK for integrating Braintrust
|
|
5
5
|
Home-page: https://www.braintrust.dev
|
|
6
6
|
Author: Braintrust
|
|
@@ -75,6 +75,20 @@ Install the library with pip.
|
|
|
75
75
|
pip install braintrust
|
|
76
76
|
```
|
|
77
77
|
|
|
78
|
+
**Performance tip**: For 3-5x faster JSON serialization, install with the optional `performance` extra:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install braintrust[performance]
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Or install `orjson` separately:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
pip install orjson
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The SDK automatically detects and uses orjson if available, with seamless fallback to standard json. See [ORJSON_OPTIMIZATION.md](ORJSON_OPTIMIZATION.md) for details.
|
|
91
|
+
|
|
78
92
|
Then, run a simple experiment with the following code (replace `YOUR_API_KEY` with
|
|
79
93
|
your Braintrust API key):
|
|
80
94
|
|
|
@@ -100,3 +114,7 @@ Eval(
|
|
|
100
114
|
scores=[is_equal],
|
|
101
115
|
)
|
|
102
116
|
```
|
|
117
|
+
|
|
118
|
+
# Performance Optimization
|
|
119
|
+
|
|
120
|
+
For 3-5x faster JSON serialization, install `orjson`. The SDK automatically detects and uses orjson if available, with seamless fallback to standard json.
|
|
@@ -12,6 +12,20 @@ Install the library with pip.
|
|
|
12
12
|
pip install braintrust
|
|
13
13
|
```
|
|
14
14
|
|
|
15
|
+
**Performance tip**: For 3-5x faster JSON serialization, install with the optional `performance` extra:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install braintrust[performance]
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Or install `orjson` separately:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install orjson
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The SDK automatically detects and uses orjson if available, with seamless fallback to standard json. See [ORJSON_OPTIMIZATION.md](ORJSON_OPTIMIZATION.md) for details.
|
|
28
|
+
|
|
15
29
|
Then, run a simple experiment with the following code (replace `YOUR_API_KEY` with
|
|
16
30
|
your Braintrust API key):
|
|
17
31
|
|
|
@@ -37,3 +51,7 @@ Eval(
|
|
|
37
51
|
scores=[is_equal],
|
|
38
52
|
)
|
|
39
53
|
```
|
|
54
|
+
|
|
55
|
+
# Performance Optimization
|
|
56
|
+
|
|
57
|
+
For 3-5x faster JSON serialization, install `orjson`. The SDK automatically detects and uses orjson if available, with seamless fallback to standard json.
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
|
|
5
|
+
# Try to import orjson for better performance
|
|
6
|
+
# If not available, we'll use standard json
|
|
7
|
+
try:
|
|
8
|
+
import orjson
|
|
9
|
+
|
|
10
|
+
_HAS_ORJSON = True
|
|
11
|
+
except ImportError:
|
|
12
|
+
_HAS_ORJSON = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _to_dict(obj: Any) -> Any:
|
|
16
|
+
"""
|
|
17
|
+
Function-based default handler for non-JSON-serializable objects.
|
|
18
|
+
|
|
19
|
+
Handles:
|
|
20
|
+
- dataclasses
|
|
21
|
+
- Pydantic v2 BaseModel
|
|
22
|
+
- Pydantic v1 BaseModel
|
|
23
|
+
- Falls back to str() for unknown types
|
|
24
|
+
"""
|
|
25
|
+
if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
|
|
26
|
+
return dataclasses.asdict(obj)
|
|
27
|
+
|
|
28
|
+
# Attempt to dump a Pydantic v2 `BaseModel`.
|
|
29
|
+
try:
|
|
30
|
+
return cast(Any, obj).model_dump()
|
|
31
|
+
except (AttributeError, TypeError):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
# Attempt to dump a Pydantic v1 `BaseModel`.
|
|
35
|
+
try:
|
|
36
|
+
return cast(Any, obj).dict()
|
|
37
|
+
except (AttributeError, TypeError):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
# When everything fails, try to return the string representation of the object
|
|
41
|
+
try:
|
|
42
|
+
return str(obj)
|
|
43
|
+
except Exception:
|
|
44
|
+
# If str() fails, return an error placeholder
|
|
45
|
+
return f"<non-serializable: {type(obj).__name__}>"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class BraintrustJSONEncoder(json.JSONEncoder):
|
|
49
|
+
"""
|
|
50
|
+
Custom JSON encoder for standard json library.
|
|
51
|
+
|
|
52
|
+
This is used as a fallback when orjson is not available or fails.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def default(self, o: Any):
|
|
56
|
+
return _to_dict(o)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def bt_dumps(obj, **kwargs) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Serialize obj to a JSON-formatted string.
|
|
62
|
+
|
|
63
|
+
Automatically uses orjson if available for better performance (3-5x faster),
|
|
64
|
+
with fallback to standard json library if orjson is not installed or fails.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
obj: Object to serialize
|
|
68
|
+
**kwargs: Additional arguments (passed to json.dumps in fallback path)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
JSON string representation of obj
|
|
72
|
+
"""
|
|
73
|
+
if _HAS_ORJSON:
|
|
74
|
+
# Try orjson first for better performance
|
|
75
|
+
try:
|
|
76
|
+
# pylint: disable=no-member # orjson is a C extension, pylint can't introspect it
|
|
77
|
+
return orjson.dumps( # type: ignore[possibly-unbound]
|
|
78
|
+
obj,
|
|
79
|
+
default=_to_dict,
|
|
80
|
+
# options match json.dumps behavior for bc
|
|
81
|
+
option=orjson.OPT_SORT_KEYS | orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_NON_STR_KEYS, # type: ignore[possibly-unbound]
|
|
82
|
+
).decode("utf-8")
|
|
83
|
+
except Exception:
|
|
84
|
+
# If orjson fails, fall back to standard json
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
# Use standard json (either orjson not available or it failed)
|
|
88
|
+
# Use sort_keys=True for deterministic output (matches orjson OPT_SORT_KEYS)
|
|
89
|
+
return json.dumps(obj, cls=BraintrustJSONEncoder, allow_nan=False, sort_keys=True, **kwargs)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def bt_loads(s: str, **kwargs) -> Any:
|
|
93
|
+
"""
|
|
94
|
+
Deserialize s (a str containing a JSON document) to a Python object.
|
|
95
|
+
|
|
96
|
+
Automatically uses orjson if available for better performance (2-3x faster),
|
|
97
|
+
with fallback to standard json library if orjson is not installed or fails.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
s: JSON string to deserialize
|
|
101
|
+
**kwargs: Additional arguments (passed to json.loads in fallback path)
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Python object representation of JSON string
|
|
105
|
+
"""
|
|
106
|
+
if _HAS_ORJSON:
|
|
107
|
+
# Try orjson first for better performance
|
|
108
|
+
try:
|
|
109
|
+
# pylint: disable=no-member # orjson is a C extension, pylint can't introspect it
|
|
110
|
+
return orjson.loads(s) # type: ignore[possibly-unbound]
|
|
111
|
+
except Exception:
|
|
112
|
+
# If orjson fails, fall back to standard json
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
# Use standard json (either orjson not available or it failed)
|
|
116
|
+
return json.loads(s, **kwargs)
|
|
@@ -10,6 +10,7 @@ from ..logger import BraintrustState
|
|
|
10
10
|
ORIGIN_HEADER = "origin"
|
|
11
11
|
BRAINTRUST_AUTH_TOKEN_HEADER = "x-bt-auth-token"
|
|
12
12
|
BRAINTRUST_ORG_NAME_HEADER = "x-bt-org-name"
|
|
13
|
+
BRAINTRUST_PROJECT_ID_HEADER = "x-bt-project-id"
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
@dataclass
|
|
@@ -17,6 +18,7 @@ class RequestContext:
|
|
|
17
18
|
app_origin: Optional[str]
|
|
18
19
|
token: Optional[str]
|
|
19
20
|
org_name: Optional[str]
|
|
21
|
+
project_id: Optional[str]
|
|
20
22
|
state: Optional[BraintrustState]
|
|
21
23
|
|
|
22
24
|
|
|
@@ -56,6 +58,7 @@ class AuthorizationMiddleware(BaseHTTPMiddleware):
|
|
|
56
58
|
app_origin=extract_allowed_origin(request.headers.get(ORIGIN_HEADER)),
|
|
57
59
|
token=None,
|
|
58
60
|
org_name=request.headers.get(BRAINTRUST_ORG_NAME_HEADER),
|
|
61
|
+
project_id=request.headers.get(BRAINTRUST_PROJECT_ID_HEADER),
|
|
59
62
|
state=None,
|
|
60
63
|
)
|
|
61
64
|
|
|
@@ -196,7 +196,7 @@ async def run_eval(request: Request) -> Union[JSONResponse, StreamingResponse]:
|
|
|
196
196
|
"state": state,
|
|
197
197
|
"scores": evaluator.scores
|
|
198
198
|
+ [
|
|
199
|
-
make_scorer(state, score["name"], score["function_id"])
|
|
199
|
+
make_scorer(state, score["name"], score["function_id"], ctx.project_id)
|
|
200
200
|
for score in eval_data.get("scores", [])
|
|
201
201
|
],
|
|
202
202
|
"stream": stream_fn,
|
|
@@ -305,7 +305,7 @@ def snake_to_camel(snake_str: str) -> str:
|
|
|
305
305
|
return components[0] + "".join(x.title() for x in components[1:]) if components else snake_str
|
|
306
306
|
|
|
307
307
|
|
|
308
|
-
def make_scorer(state: BraintrustState, name: str, score: FunctionId) -> EvalScorer[Any, Any]:
|
|
308
|
+
def make_scorer(state: BraintrustState, name: str, score: FunctionId, project_id: Optional[str] = None) -> EvalScorer[Any, Any]:
|
|
309
309
|
def scorer_fn(input, output, expected, metadata):
|
|
310
310
|
request = {
|
|
311
311
|
**score,
|
|
@@ -315,7 +315,10 @@ def make_scorer(state: BraintrustState, name: str, score: FunctionId) -> EvalSco
|
|
|
315
315
|
"mode": "auto",
|
|
316
316
|
"strict": True,
|
|
317
317
|
}
|
|
318
|
-
|
|
318
|
+
headers = {"Accept": "application/json"}
|
|
319
|
+
if project_id:
|
|
320
|
+
headers["x-bt-project-id"] = project_id
|
|
321
|
+
result = state.proxy_conn().post("function/invoke", json=request, headers=headers)
|
|
319
322
|
result.raise_for_status()
|
|
320
323
|
data = result.json()
|
|
321
324
|
return data
|
|
@@ -9,6 +9,7 @@ import inspect
|
|
|
9
9
|
import io
|
|
10
10
|
import json
|
|
11
11
|
import logging
|
|
12
|
+
import math
|
|
12
13
|
import os
|
|
13
14
|
import sys
|
|
14
15
|
import textwrap
|
|
@@ -53,7 +54,7 @@ from requests.adapters import HTTPAdapter
|
|
|
53
54
|
from urllib3.util.retry import Retry
|
|
54
55
|
|
|
55
56
|
from . import context, id_gen
|
|
56
|
-
from .bt_json import bt_dumps
|
|
57
|
+
from .bt_json import bt_dumps, bt_loads
|
|
57
58
|
from .db_fields import (
|
|
58
59
|
ASYNC_SCORING_CONTROL_FIELD,
|
|
59
60
|
AUDIT_METADATA_FIELD,
|
|
@@ -2471,7 +2472,15 @@ def _deep_copy_event(event: Mapping[str, Any]) -> Dict[str, Any]:
|
|
|
2471
2472
|
# `json.dumps`. However, that runs at log upload time, while we want to
|
|
2472
2473
|
# cut out all the references to user objects synchronously in this
|
|
2473
2474
|
# function.
|
|
2474
|
-
|
|
2475
|
+
result = {}
|
|
2476
|
+
for k in v:
|
|
2477
|
+
try:
|
|
2478
|
+
key_str = str(k)
|
|
2479
|
+
except Exception:
|
|
2480
|
+
# If str() fails on the key, use a fallback representation
|
|
2481
|
+
key_str = f"<non-stringifiable-key: {type(k).__name__}>"
|
|
2482
|
+
result[key_str] = _deep_copy_object(v[k], depth + 1)
|
|
2483
|
+
return result
|
|
2475
2484
|
elif isinstance(v, (List, Tuple, Set)):
|
|
2476
2485
|
return [_deep_copy_object(x, depth + 1) for x in v]
|
|
2477
2486
|
finally:
|
|
@@ -2491,7 +2500,14 @@ def _deep_copy_event(event: Mapping[str, Any]) -> Dict[str, Any]:
|
|
|
2491
2500
|
return v
|
|
2492
2501
|
elif isinstance(v, ReadonlyAttachment):
|
|
2493
2502
|
return v.reference
|
|
2494
|
-
elif isinstance(v,
|
|
2503
|
+
elif isinstance(v, float):
|
|
2504
|
+
# Handle NaN and Infinity for JSON compatibility
|
|
2505
|
+
if math.isnan(v):
|
|
2506
|
+
return "NaN"
|
|
2507
|
+
elif math.isinf(v):
|
|
2508
|
+
return "Infinity" if v > 0 else "-Infinity"
|
|
2509
|
+
return v
|
|
2510
|
+
elif isinstance(v, (int, str, bool)) or v is None:
|
|
2495
2511
|
# Skip roundtrip for primitive types.
|
|
2496
2512
|
return v
|
|
2497
2513
|
else:
|
|
@@ -2500,7 +2516,7 @@ def _deep_copy_event(event: Mapping[str, Any]) -> Dict[str, Any]:
|
|
|
2500
2516
|
# E.g. the original type could have a `__del__` method that alters
|
|
2501
2517
|
# some shared internal state, and we need this deep copy to be
|
|
2502
2518
|
# fully-independent from the original.
|
|
2503
|
-
return
|
|
2519
|
+
return bt_loads(bt_dumps(v))
|
|
2504
2520
|
|
|
2505
2521
|
return _deep_copy_object(event)
|
|
2506
2522
|
|
|
@@ -2523,7 +2539,7 @@ class ObjectIterator(Generic[T]):
|
|
|
2523
2539
|
return value
|
|
2524
2540
|
|
|
2525
2541
|
|
|
2526
|
-
|
|
2542
|
+
DEFAULT_FETCH_BATCH_SIZE = 1000
|
|
2527
2543
|
MAX_BTQL_ITERATIONS = 10000
|
|
2528
2544
|
|
|
2529
2545
|
|
|
@@ -2550,7 +2566,7 @@ class ObjectFetcher(ABC, Generic[TMapping]):
|
|
|
2550
2566
|
self._fetched_data: Optional[List[TMapping]] = None
|
|
2551
2567
|
self._internal_btql = _internal_btql
|
|
2552
2568
|
|
|
2553
|
-
def fetch(self) -> Iterator[TMapping]:
|
|
2569
|
+
def fetch(self, batch_size: Optional[int] = None) -> Iterator[TMapping]:
|
|
2554
2570
|
"""
|
|
2555
2571
|
Fetch all records.
|
|
2556
2572
|
|
|
@@ -2563,9 +2579,10 @@ class ObjectFetcher(ABC, Generic[TMapping]):
|
|
|
2563
2579
|
print(record)
|
|
2564
2580
|
```
|
|
2565
2581
|
|
|
2582
|
+
:param batch_size: The number of records to fetch per request. Defaults to 1000.
|
|
2566
2583
|
:returns: An iterator over the records.
|
|
2567
2584
|
"""
|
|
2568
|
-
return ObjectIterator(self._refetch)
|
|
2585
|
+
return ObjectIterator(lambda: self._refetch(batch_size=batch_size))
|
|
2569
2586
|
|
|
2570
2587
|
def __iter__(self) -> Iterator[TMapping]:
|
|
2571
2588
|
return self.fetch()
|
|
@@ -2584,8 +2601,9 @@ class ObjectFetcher(ABC, Generic[TMapping]):
|
|
|
2584
2601
|
@abstractmethod
|
|
2585
2602
|
def id(self) -> str: ...
|
|
2586
2603
|
|
|
2587
|
-
def _refetch(self) -> List[TMapping]:
|
|
2604
|
+
def _refetch(self, batch_size: Optional[int] = None) -> List[TMapping]:
|
|
2588
2605
|
state = self._get_state()
|
|
2606
|
+
limit = batch_size if batch_size is not None else DEFAULT_FETCH_BATCH_SIZE
|
|
2589
2607
|
if self._fetched_data is None:
|
|
2590
2608
|
cursor = None
|
|
2591
2609
|
data = None
|
|
@@ -2610,7 +2628,7 @@ class ObjectFetcher(ABC, Generic[TMapping]):
|
|
|
2610
2628
|
],
|
|
2611
2629
|
},
|
|
2612
2630
|
"cursor": cursor,
|
|
2613
|
-
"limit":
|
|
2631
|
+
"limit": limit,
|
|
2614
2632
|
**(self._internal_btql or {}),
|
|
2615
2633
|
},
|
|
2616
2634
|
"use_columnstore": False,
|
|
@@ -3761,8 +3779,14 @@ class ReadonlyExperiment(ObjectFetcher[ExperimentEvent]):
|
|
|
3761
3779
|
self._lazy_metadata.get()
|
|
3762
3780
|
return self.state
|
|
3763
3781
|
|
|
3764
|
-
def as_dataset(self) -> Iterator[_ExperimentDatasetEvent]:
|
|
3765
|
-
|
|
3782
|
+
def as_dataset(self, batch_size: Optional[int] = None) -> Iterator[_ExperimentDatasetEvent]:
|
|
3783
|
+
"""
|
|
3784
|
+
Return the experiment's data as a dataset iterator.
|
|
3785
|
+
|
|
3786
|
+
:param batch_size: The number of records to fetch per request. Defaults to 1000.
|
|
3787
|
+
:returns: An iterator over the experiment data as dataset records.
|
|
3788
|
+
"""
|
|
3789
|
+
return ExperimentDatasetIterator(self.fetch(batch_size=batch_size))
|
|
3766
3790
|
|
|
3767
3791
|
|
|
3768
3792
|
_EXEC_COUNTER_LOCK = threading.Lock()
|
|
@@ -716,6 +716,107 @@ def test_span_log_with_large_document_many_pages(with_memory_logger):
|
|
|
716
716
|
assert logged_output["pages"][0]["lines"][0]["words"][0]["content"] == "word_0"
|
|
717
717
|
|
|
718
718
|
|
|
719
|
+
def test_span_log_handles_nan_gracefully(with_memory_logger):
|
|
720
|
+
"""Test that span.log() handles NaN values by converting them to "NaN" string."""
|
|
721
|
+
logger = init_test_logger(__name__)
|
|
722
|
+
|
|
723
|
+
with logger.start_span(name="test_span") as span:
|
|
724
|
+
# Should NOT raise - should handle NaN gracefully
|
|
725
|
+
span.log(
|
|
726
|
+
input={"test": "input"},
|
|
727
|
+
output={"value": float("nan")},
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
# Verify the log was recorded with NaN handled appropriately
|
|
731
|
+
logs = with_memory_logger.pop()
|
|
732
|
+
assert len(logs) == 1
|
|
733
|
+
assert logs[0]["input"]["test"] == "input"
|
|
734
|
+
# NaN should be converted to "NaN" string for JSON compatibility
|
|
735
|
+
output_value = logs[0]["output"]["value"]
|
|
736
|
+
assert output_value == "NaN"
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
def test_span_log_handles_infinity_gracefully(with_memory_logger):
|
|
740
|
+
"""Test that span.log() handles Infinity values by converting them to "Infinity"/"-Infinity" strings."""
|
|
741
|
+
logger = init_test_logger(__name__)
|
|
742
|
+
|
|
743
|
+
with logger.start_span(name="test_span") as span:
|
|
744
|
+
# Should NOT raise - should handle Infinity gracefully
|
|
745
|
+
span.log(
|
|
746
|
+
input={"test": "input"},
|
|
747
|
+
output={"value": float("inf"), "neg": float("-inf")},
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
# Verify the log was recorded with Infinity handled appropriately
|
|
751
|
+
logs = with_memory_logger.pop()
|
|
752
|
+
assert len(logs) == 1
|
|
753
|
+
assert logs[0]["input"]["test"] == "input"
|
|
754
|
+
# Infinity should be converted to string representations for JSON compatibility
|
|
755
|
+
assert logs[0]["output"]["value"] == "Infinity"
|
|
756
|
+
assert logs[0]["output"]["neg"] == "-Infinity"
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def test_span_log_handles_unstringifiable_object_gracefully(with_memory_logger):
|
|
760
|
+
"""Test that span.log() should handle objects with bad __str__ gracefully without raising.
|
|
761
|
+
|
|
762
|
+
This test currently FAILS - it demonstrates the desired behavior after the fix.
|
|
763
|
+
"""
|
|
764
|
+
logger = init_test_logger(__name__)
|
|
765
|
+
|
|
766
|
+
class BadStrObject:
|
|
767
|
+
def __str__(self):
|
|
768
|
+
raise RuntimeError("Cannot convert to string!")
|
|
769
|
+
|
|
770
|
+
def __repr__(self):
|
|
771
|
+
raise RuntimeError("Cannot convert to repr!")
|
|
772
|
+
|
|
773
|
+
with logger.start_span(name="test_span") as span:
|
|
774
|
+
# Should NOT raise - should handle gracefully
|
|
775
|
+
span.log(
|
|
776
|
+
input={"test": "input"},
|
|
777
|
+
output={"result": BadStrObject()},
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
# Verify the log was recorded with a fallback representation
|
|
781
|
+
logs = with_memory_logger.pop()
|
|
782
|
+
assert len(logs) == 1
|
|
783
|
+
assert logs[0]["input"]["test"] == "input"
|
|
784
|
+
# The bad object should have been replaced with some error placeholder
|
|
785
|
+
assert "result" in logs[0]["output"]
|
|
786
|
+
output_str = str(logs[0]["output"]["result"])
|
|
787
|
+
# Should contain some indication of serialization failure
|
|
788
|
+
assert "error" in output_str.lower() or "serializ" in output_str.lower()
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
def test_span_log_handles_bad_dict_keys_gracefully(with_memory_logger):
|
|
792
|
+
"""Test that span.log() should handle non-stringifiable dict keys gracefully.
|
|
793
|
+
|
|
794
|
+
This test currently FAILS - it demonstrates the desired behavior after the fix.
|
|
795
|
+
"""
|
|
796
|
+
logger = init_test_logger(__name__)
|
|
797
|
+
|
|
798
|
+
class BadKey:
|
|
799
|
+
def __str__(self):
|
|
800
|
+
raise ValueError("Key cannot be stringified!")
|
|
801
|
+
|
|
802
|
+
def __repr__(self):
|
|
803
|
+
raise ValueError("Key cannot be stringified!")
|
|
804
|
+
|
|
805
|
+
with logger.start_span(name="test_span") as span:
|
|
806
|
+
# Should NOT raise - should handle gracefully
|
|
807
|
+
span.log(
|
|
808
|
+
input={"test": "input"},
|
|
809
|
+
output={BadKey(): "value"},
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
# Verify the log was recorded with the problematic key handled
|
|
813
|
+
logs = with_memory_logger.pop()
|
|
814
|
+
assert len(logs) == 1
|
|
815
|
+
assert logs[0]["input"]["test"] == "input"
|
|
816
|
+
# The output should exist but the bad key should be replaced
|
|
817
|
+
assert "output" in logs[0]
|
|
818
|
+
|
|
819
|
+
|
|
719
820
|
def test_span_link_logged_out(with_memory_logger):
|
|
720
821
|
simulate_logout()
|
|
721
822
|
assert_logged_out()
|
|
@@ -2491,7 +2592,7 @@ class TestDatasetInternalBtql(TestCase):
|
|
|
2491
2592
|
|
|
2492
2593
|
@patch("braintrust.logger.BraintrustState")
|
|
2493
2594
|
def test_dataset_internal_btql_limit_not_overwritten(self, mock_state_class):
|
|
2494
|
-
"""Test that custom limit in _internal_btql is not overwritten by
|
|
2595
|
+
"""Test that custom limit in _internal_btql is not overwritten by DEFAULT_FETCH_BATCH_SIZE."""
|
|
2495
2596
|
# Set up mock state
|
|
2496
2597
|
mock_state = MagicMock()
|
|
2497
2598
|
mock_state_class.return_value = mock_state
|
|
@@ -2538,7 +2639,7 @@ class TestDatasetInternalBtql(TestCase):
|
|
|
2538
2639
|
call_args = mock_api_conn.post.call_args
|
|
2539
2640
|
query_json = call_args[1]["json"]["query"]
|
|
2540
2641
|
|
|
2541
|
-
# Verify that the custom limit is present (not overwritten by
|
|
2642
|
+
# Verify that the custom limit is present (not overwritten by DEFAULT_FETCH_BATCH_SIZE)
|
|
2542
2643
|
self.assertEqual(query_json["limit"], custom_limit)
|
|
2543
2644
|
|
|
2544
2645
|
# Verify that other _internal_btql fields are also present
|
|
@@ -2546,8 +2647,14 @@ class TestDatasetInternalBtql(TestCase):
|
|
|
2546
2647
|
|
|
2547
2648
|
@patch("braintrust.logger.BraintrustState")
|
|
2548
2649
|
def test_dataset_default_limit_when_not_specified(self, mock_state_class):
|
|
2549
|
-
"""Test that
|
|
2550
|
-
from braintrust.logger import
|
|
2650
|
+
"""Test that DEFAULT_FETCH_BATCH_SIZE is used when no custom limit is specified."""
|
|
2651
|
+
from braintrust.logger import (
|
|
2652
|
+
DEFAULT_FETCH_BATCH_SIZE,
|
|
2653
|
+
Dataset,
|
|
2654
|
+
LazyValue,
|
|
2655
|
+
ObjectMetadata,
|
|
2656
|
+
ProjectDatasetMetadata,
|
|
2657
|
+
)
|
|
2551
2658
|
|
|
2552
2659
|
# Set up mock state
|
|
2553
2660
|
mock_state = MagicMock()
|
|
@@ -2590,4 +2697,52 @@ class TestDatasetInternalBtql(TestCase):
|
|
|
2590
2697
|
query_json = call_args[1]["json"]["query"]
|
|
2591
2698
|
|
|
2592
2699
|
# Verify that the default limit is used
|
|
2593
|
-
self.assertEqual(query_json["limit"],
|
|
2700
|
+
self.assertEqual(query_json["limit"], DEFAULT_FETCH_BATCH_SIZE)
|
|
2701
|
+
|
|
2702
|
+
@patch("braintrust.logger.BraintrustState")
|
|
2703
|
+
def test_dataset_custom_batch_size_in_fetch(self, mock_state_class):
|
|
2704
|
+
"""Test that custom batch_size in fetch() is properly passed to BTQL query."""
|
|
2705
|
+
from braintrust.logger import Dataset, LazyValue, ObjectMetadata, ProjectDatasetMetadata
|
|
2706
|
+
|
|
2707
|
+
# Set up mock state
|
|
2708
|
+
mock_state = MagicMock()
|
|
2709
|
+
mock_state_class.return_value = mock_state
|
|
2710
|
+
|
|
2711
|
+
# Mock the API connection and response
|
|
2712
|
+
mock_api_conn = MagicMock()
|
|
2713
|
+
mock_state.api_conn.return_value = mock_api_conn
|
|
2714
|
+
|
|
2715
|
+
# Mock response object
|
|
2716
|
+
mock_response = MagicMock()
|
|
2717
|
+
mock_response.json.return_value = {
|
|
2718
|
+
"data": [{"id": "1", "input": "test1", "expected": "output1"}],
|
|
2719
|
+
"cursor": None,
|
|
2720
|
+
}
|
|
2721
|
+
mock_api_conn.post.return_value = mock_response
|
|
2722
|
+
|
|
2723
|
+
# Create dataset
|
|
2724
|
+
project_metadata = ObjectMetadata(id="test-project", name="test-project", full_info={})
|
|
2725
|
+
dataset_metadata = ObjectMetadata(id="test-dataset", name="test-dataset", full_info={})
|
|
2726
|
+
lazy_metadata = LazyValue(
|
|
2727
|
+
lambda: ProjectDatasetMetadata(project=project_metadata, dataset=dataset_metadata),
|
|
2728
|
+
use_mutex=False,
|
|
2729
|
+
)
|
|
2730
|
+
|
|
2731
|
+
dataset = Dataset(
|
|
2732
|
+
lazy_metadata=lazy_metadata,
|
|
2733
|
+
state=mock_state,
|
|
2734
|
+
)
|
|
2735
|
+
|
|
2736
|
+
# Trigger a fetch with custom batch_size
|
|
2737
|
+
custom_batch_size = 250
|
|
2738
|
+
list(dataset.fetch(batch_size=custom_batch_size))
|
|
2739
|
+
|
|
2740
|
+
# Verify the API was called
|
|
2741
|
+
mock_api_conn.post.assert_called_once()
|
|
2742
|
+
|
|
2743
|
+
# Get the actual call arguments
|
|
2744
|
+
call_args = mock_api_conn.post.call_args
|
|
2745
|
+
query_json = call_args[1]["json"]["query"]
|
|
2746
|
+
|
|
2747
|
+
# Verify that the custom batch_size is used
|
|
2748
|
+
self.assertEqual(query_json["limit"], custom_batch_size)
|
|
@@ -1690,35 +1690,3 @@ def test_braintrust_tracing_processor_trace_metadata_logging(memory_logger):
|
|
|
1690
1690
|
spans = memory_logger.pop()
|
|
1691
1691
|
root_span = spans[0]
|
|
1692
1692
|
assert root_span["metadata"]["conversation_id"] == "test-12345", "Should log trace metadata"
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
def test_parse_metrics_excludes_booleans():
|
|
1696
|
-
"""Test that boolean fields in usage objects are excluded from metrics.
|
|
1697
|
-
|
|
1698
|
-
Reproduces issue where OpenRouter returns is_byok (a boolean) in the usage
|
|
1699
|
-
object, which caused API validation errors: "Expected number, received boolean".
|
|
1700
|
-
|
|
1701
|
-
In Python, bool is a subclass of int, so isinstance(True, int) returns True.
|
|
1702
|
-
The fix ensures _is_numeric explicitly excludes booleans.
|
|
1703
|
-
"""
|
|
1704
|
-
from braintrust.oai import _parse_metrics_from_usage
|
|
1705
|
-
|
|
1706
|
-
# Simulate OpenRouter's usage object with boolean field
|
|
1707
|
-
usage = {
|
|
1708
|
-
"completion_tokens": 11,
|
|
1709
|
-
"prompt_tokens": 8,
|
|
1710
|
-
"total_tokens": 19,
|
|
1711
|
-
"cost": 0.000104,
|
|
1712
|
-
"is_byok": False, # This boolean should be filtered out
|
|
1713
|
-
}
|
|
1714
|
-
|
|
1715
|
-
metrics = _parse_metrics_from_usage(usage)
|
|
1716
|
-
|
|
1717
|
-
# Numeric fields should be included
|
|
1718
|
-
assert metrics["completion_tokens"] == 11
|
|
1719
|
-
assert metrics["prompt_tokens"] == 8
|
|
1720
|
-
assert metrics["tokens"] == 19 # total_tokens gets renamed
|
|
1721
|
-
assert metrics["cost"] == 0.000104
|
|
1722
|
-
|
|
1723
|
-
# Boolean field should NOT be in metrics (this was the bug)
|
|
1724
|
-
assert "is_byok" not in metrics
|