braintrust 0.5.0__tar.gz → 0.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {braintrust-0.5.0 → braintrust-0.5.3}/PKG-INFO +1 -1
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/__init__.py +14 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/_generated_types.py +56 -3
- braintrust-0.5.3/src/braintrust/auto.py +179 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/conftest.py +23 -4
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/db_fields.py +10 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/framework.py +18 -5
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/generated_types.py +3 -1
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/logger.py +369 -134
- braintrust-0.5.3/src/braintrust/merge_row_batch.py +183 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/oai.py +51 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_bt_json.py +0 -5
- braintrust-0.5.3/src/braintrust/test_context.py +1264 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_framework.py +37 -0
- braintrust-0.5.3/src/braintrust/test_http.py +444 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_logger.py +179 -5
- braintrust-0.5.3/src/braintrust/test_merge_row_batch.py +160 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_util.py +58 -1
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/util.py +20 -0
- braintrust-0.5.3/src/braintrust/version.py +4 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/agno/__init__.py +2 -3
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/anthropic.py +64 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/claude_agent_sdk/__init__.py +2 -3
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +9 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/dspy.py +52 -1
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/google_genai/__init__.py +9 -6
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/litellm.py +6 -43
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/pydantic_ai.py +2 -3
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_agno.py +9 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_anthropic.py +156 -0
- braintrust-0.5.3/src/braintrust/wrappers/test_dspy.py +177 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_google_genai.py +9 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_litellm.py +57 -55
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_openai.py +253 -1
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_pydantic_ai_integration.py +9 -0
- braintrust-0.5.3/src/braintrust/wrappers/test_utils.py +91 -0
- braintrust-0.5.3/src/braintrust/wrappers/threads.py +114 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust.egg-info/PKG-INFO +1 -1
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust.egg-info/SOURCES.txt +5 -1
- braintrust-0.5.0/src/braintrust/graph_util.py +0 -147
- braintrust-0.5.0/src/braintrust/merge_row_batch.py +0 -243
- braintrust-0.5.0/src/braintrust/version.py +0 -4
- braintrust-0.5.0/src/braintrust/wrappers/test_dspy.py +0 -60
- braintrust-0.5.0/src/braintrust/wrappers/test_utils.py +0 -12
- {braintrust-0.5.0 → braintrust-0.5.3}/README.md +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/setup.cfg +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/setup.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/audit.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/aws.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/bt_json.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/__main__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/eval.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/install/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/install/api.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/install/bump_versions.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/install/logs.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/install/redshift.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/install/run_migrations.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/cli/push.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/context.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/contrib/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/contrib/temporal/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/contrib/temporal/test_temporal.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/auth.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/cors.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/dataset.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/eval_hooks.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/schemas.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/server.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/test_cached_login.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/test_lru_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/devserver/test_server_integration.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/framework2.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/functions/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/functions/constants.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/functions/invoke.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/functions/stream.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/functions/test_invoke.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/git_fields.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/gitutil.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/http_headers.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/id_gen.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/object.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/otel/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/otel/context.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/otel/test_distributed_tracing.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/otel/test_otel_bt_integration.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/parameters.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/prompt.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/prompt_cache/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/prompt_cache/disk_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/prompt_cache/lru_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/prompt_cache/prompt_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/prompt_cache/test_disk_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/prompt_cache/test_lru_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/prompt_cache/test_prompt_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/py.typed +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/queue.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/resource_manager.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/score.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/serializable_data_class.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/span_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/span_identifier_v1.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/span_identifier_v2.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/span_identifier_v3.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/span_identifier_v4.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/span_types.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_framework2.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_helpers.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_id_gen.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_otel.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_queue.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_score.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_serializable_data_class.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_span_cache.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_span_components.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_trace.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/test_version.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/trace.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/__init__.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/_anthropic_utils.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/agno/agent.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/agno/function_call.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/agno/model.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/agno/team.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/agno/utils.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/langchain.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/langsmith_wrapper.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/openai.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_langsmith_wrapper.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_oai_attachments.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_openrouter.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust/xact_ids.py +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust.egg-info/dependency_links.txt +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust.egg-info/entry_points.txt +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust.egg-info/requires.txt +0 -0
- {braintrust-0.5.0 → braintrust-0.5.3}/src/braintrust.egg-info/top_level.txt +0 -0
|
@@ -49,7 +49,21 @@ BRAINTRUST_API_KEY=<YOUR_BRAINTRUST_API_KEY> braintrust eval eval_hello.py
|
|
|
49
49
|
### API Reference
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
|
+
# Check env var at import time for auto-instrumentation
|
|
53
|
+
import os
|
|
54
|
+
|
|
55
|
+
if os.getenv("BRAINTRUST_INSTRUMENT_THREADS", "").lower() in ("true", "1", "yes"):
|
|
56
|
+
try:
|
|
57
|
+
from .wrappers.threads import setup_threads
|
|
58
|
+
|
|
59
|
+
setup_threads()
|
|
60
|
+
except Exception:
|
|
61
|
+
pass # Never break on import
|
|
62
|
+
|
|
52
63
|
from .audit import *
|
|
64
|
+
from .auto import (
|
|
65
|
+
auto_instrument, # noqa: F401 # type: ignore[reportUnusedImport]
|
|
66
|
+
)
|
|
53
67
|
from .framework import *
|
|
54
68
|
from .framework2 import *
|
|
55
69
|
from .functions.invoke import *
|
|
@@ -167,6 +167,10 @@ AsyncScoringState: TypeAlias = AsyncScoringStateAsyncScoringState | AsyncScoring
|
|
|
167
167
|
class PreprocessorPreprocessor(TypedDict):
|
|
168
168
|
type: Literal['function']
|
|
169
169
|
id: str
|
|
170
|
+
version: NotRequired[str | None]
|
|
171
|
+
"""
|
|
172
|
+
The version of the function
|
|
173
|
+
"""
|
|
170
174
|
|
|
171
175
|
|
|
172
176
|
class PreprocessorPreprocessor2(TypedDict):
|
|
@@ -638,6 +642,10 @@ class ExperimentEventContext(TypedDict):
|
|
|
638
642
|
class ExtendedSavedFunctionIdExtendedSavedFunctionId(TypedDict):
|
|
639
643
|
type: Literal['function']
|
|
640
644
|
id: str
|
|
645
|
+
version: NotRequired[str | None]
|
|
646
|
+
"""
|
|
647
|
+
The version of the function
|
|
648
|
+
"""
|
|
641
649
|
|
|
642
650
|
|
|
643
651
|
class ExtendedSavedFunctionIdExtendedSavedFunctionId2(TypedDict):
|
|
@@ -668,6 +676,10 @@ class ExternalAttachmentReference(TypedDict):
|
|
|
668
676
|
class Preprocessor1Preprocessor1(TypedDict):
|
|
669
677
|
type: Literal['function']
|
|
670
678
|
id: str
|
|
679
|
+
version: NotRequired[str | None]
|
|
680
|
+
"""
|
|
681
|
+
The version of the function
|
|
682
|
+
"""
|
|
671
683
|
|
|
672
684
|
|
|
673
685
|
class Preprocessor1Preprocessor12(TypedDict):
|
|
@@ -803,7 +815,7 @@ FunctionOutputType: TypeAlias = Literal['completion', 'score', 'facet', 'classif
|
|
|
803
815
|
|
|
804
816
|
|
|
805
817
|
FunctionTypeEnum: TypeAlias = Literal[
|
|
806
|
-
'llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet', 'classifier'
|
|
818
|
+
'llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet', 'classifier', 'tag'
|
|
807
819
|
]
|
|
808
820
|
"""
|
|
809
821
|
The type of global function. Defaults to 'scorer'.
|
|
@@ -811,7 +823,7 @@ The type of global function. Defaults to 'scorer'.
|
|
|
811
823
|
|
|
812
824
|
|
|
813
825
|
FunctionTypeEnumNullish: TypeAlias = Literal[
|
|
814
|
-
'llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet', 'classifier'
|
|
826
|
+
'llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet', 'classifier', 'tag'
|
|
815
827
|
]
|
|
816
828
|
|
|
817
829
|
|
|
@@ -1108,6 +1120,12 @@ class GroupScope(TypedDict):
|
|
|
1108
1120
|
IfExists: TypeAlias = Literal['error', 'ignore', 'replace']
|
|
1109
1121
|
|
|
1110
1122
|
|
|
1123
|
+
ImageRenderingMode: TypeAlias = Literal['auto', 'click_to_load', 'blocked']
|
|
1124
|
+
"""
|
|
1125
|
+
Controls how images are rendered in the UI: 'auto' loads images automatically, 'click_to_load' shows a placeholder until clicked, 'blocked' prevents image loading entirely
|
|
1126
|
+
"""
|
|
1127
|
+
|
|
1128
|
+
|
|
1111
1129
|
class InvokeFunctionInvokeFunction(TypedDict):
|
|
1112
1130
|
function_id: str
|
|
1113
1131
|
"""
|
|
@@ -1311,6 +1329,10 @@ class ModelParamsModelParams4(TypedDict):
|
|
|
1311
1329
|
class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
|
|
1312
1330
|
type: Literal['function']
|
|
1313
1331
|
id: str
|
|
1332
|
+
version: NotRequired[str | None]
|
|
1333
|
+
"""
|
|
1334
|
+
The version of the function
|
|
1335
|
+
"""
|
|
1314
1336
|
|
|
1315
1337
|
|
|
1316
1338
|
class NullableSavedFunctionIdNullableSavedFunctionId1(TypedDict):
|
|
@@ -1390,6 +1412,7 @@ class Organization(TypedDict):
|
|
|
1390
1412
|
"""
|
|
1391
1413
|
Date of organization creation
|
|
1392
1414
|
"""
|
|
1415
|
+
image_rendering_mode: NotRequired[ImageRenderingMode | None]
|
|
1393
1416
|
|
|
1394
1417
|
|
|
1395
1418
|
Permission: TypeAlias = Literal[
|
|
@@ -2078,6 +2101,10 @@ class RunEvalMcpAuth(TypedDict):
|
|
|
2078
2101
|
class SavedFunctionIdSavedFunctionId(TypedDict):
|
|
2079
2102
|
type: Literal['function']
|
|
2080
2103
|
id: str
|
|
2104
|
+
version: NotRequired[str | None]
|
|
2105
|
+
"""
|
|
2106
|
+
The version of the function
|
|
2107
|
+
"""
|
|
2081
2108
|
|
|
2082
2109
|
|
|
2083
2110
|
class SavedFunctionIdSavedFunctionId1(TypedDict):
|
|
@@ -2590,6 +2617,14 @@ class DatasetEvent(TypedDict):
|
|
|
2590
2617
|
"""
|
|
2591
2618
|
Optional list of audit entries attached to this event
|
|
2592
2619
|
"""
|
|
2620
|
+
facets: NotRequired[Mapping[str, Any] | None]
|
|
2621
|
+
"""
|
|
2622
|
+
Facets for categorization (dictionary from facet id to value)
|
|
2623
|
+
"""
|
|
2624
|
+
classifications: NotRequired[Mapping[str, Any] | None]
|
|
2625
|
+
"""
|
|
2626
|
+
Classifications for this event (dictionary from classification name to items)
|
|
2627
|
+
"""
|
|
2593
2628
|
|
|
2594
2629
|
|
|
2595
2630
|
class EvalStatusPage(TypedDict):
|
|
@@ -3101,6 +3136,14 @@ class ExperimentEvent(TypedDict):
|
|
|
3101
3136
|
"""
|
|
3102
3137
|
Optional list of audit entries attached to this event
|
|
3103
3138
|
"""
|
|
3139
|
+
facets: NotRequired[Mapping[str, Any] | None]
|
|
3140
|
+
"""
|
|
3141
|
+
Facets for categorization (dictionary from facet id to value)
|
|
3142
|
+
"""
|
|
3143
|
+
classifications: NotRequired[Mapping[str, Any] | None]
|
|
3144
|
+
"""
|
|
3145
|
+
Classifications for this event (dictionary from classification name to items)
|
|
3146
|
+
"""
|
|
3104
3147
|
|
|
3105
3148
|
|
|
3106
3149
|
class GraphNodeGraphNode7(TypedDict):
|
|
@@ -3223,6 +3266,14 @@ class ProjectLogsEvent(TypedDict):
|
|
|
3223
3266
|
"""
|
|
3224
3267
|
The async scoring state for this event
|
|
3225
3268
|
"""
|
|
3269
|
+
facets: NotRequired[Mapping[str, Any] | None]
|
|
3270
|
+
"""
|
|
3271
|
+
Facets for categorization (dictionary from facet id to value)
|
|
3272
|
+
"""
|
|
3273
|
+
classifications: NotRequired[Mapping[str, Any] | None]
|
|
3274
|
+
"""
|
|
3275
|
+
Classifications for this event (dictionary from classification name to items)
|
|
3276
|
+
"""
|
|
3226
3277
|
|
|
3227
3278
|
|
|
3228
3279
|
class ProjectScore(TypedDict):
|
|
@@ -3330,7 +3381,9 @@ class View(TypedDict):
|
|
|
3330
3381
|
'classifiers',
|
|
3331
3382
|
'logs',
|
|
3332
3383
|
'monitor',
|
|
3333
|
-
'
|
|
3384
|
+
'for_review_project_log',
|
|
3385
|
+
'for_review_experiments',
|
|
3386
|
+
'for_review_datasets',
|
|
3334
3387
|
]
|
|
3335
3388
|
"""
|
|
3336
3389
|
Type of object that the view corresponds to.
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Auto-instrumentation for AI/ML libraries.
|
|
3
|
+
|
|
4
|
+
Provides one-line instrumentation for supported libraries.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
|
|
12
|
+
__all__ = ["auto_instrument"]
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@contextmanager
|
|
18
|
+
def _try_patch():
|
|
19
|
+
"""Context manager that suppresses ImportError and logs other exceptions."""
|
|
20
|
+
try:
|
|
21
|
+
yield
|
|
22
|
+
except ImportError:
|
|
23
|
+
pass
|
|
24
|
+
except Exception:
|
|
25
|
+
logger.exception("Failed to instrument")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def auto_instrument(
|
|
29
|
+
*,
|
|
30
|
+
openai: bool = True,
|
|
31
|
+
anthropic: bool = True,
|
|
32
|
+
litellm: bool = True,
|
|
33
|
+
pydantic_ai: bool = True,
|
|
34
|
+
google_genai: bool = True,
|
|
35
|
+
agno: bool = True,
|
|
36
|
+
claude_agent_sdk: bool = True,
|
|
37
|
+
dspy: bool = True,
|
|
38
|
+
) -> dict[str, bool]:
|
|
39
|
+
"""
|
|
40
|
+
Auto-instrument supported AI/ML libraries for Braintrust tracing.
|
|
41
|
+
|
|
42
|
+
Safe to call multiple times - already instrumented libraries are skipped.
|
|
43
|
+
|
|
44
|
+
Note on import order: If you use `from openai import OpenAI` style imports,
|
|
45
|
+
call auto_instrument() first. If you use `import openai` style imports,
|
|
46
|
+
order doesn't matter since attribute lookup happens dynamically.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
openai: Enable OpenAI instrumentation (default: True)
|
|
50
|
+
anthropic: Enable Anthropic instrumentation (default: True)
|
|
51
|
+
litellm: Enable LiteLLM instrumentation (default: True)
|
|
52
|
+
pydantic_ai: Enable Pydantic AI instrumentation (default: True)
|
|
53
|
+
google_genai: Enable Google GenAI instrumentation (default: True)
|
|
54
|
+
agno: Enable Agno instrumentation (default: True)
|
|
55
|
+
claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True)
|
|
56
|
+
dspy: Enable DSPy instrumentation (default: True)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Dict mapping integration name to whether it was successfully instrumented.
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
```python
|
|
63
|
+
import braintrust
|
|
64
|
+
braintrust.auto_instrument()
|
|
65
|
+
|
|
66
|
+
# OpenAI
|
|
67
|
+
import openai
|
|
68
|
+
client = openai.OpenAI()
|
|
69
|
+
client.chat.completions.create(model="gpt-4o-mini", messages=[...])
|
|
70
|
+
|
|
71
|
+
# Anthropic
|
|
72
|
+
import anthropic
|
|
73
|
+
client = anthropic.Anthropic()
|
|
74
|
+
client.messages.create(model="claude-sonnet-4-20250514", messages=[...])
|
|
75
|
+
|
|
76
|
+
# LiteLLM
|
|
77
|
+
import litellm
|
|
78
|
+
litellm.completion(model="gpt-4o-mini", messages=[...])
|
|
79
|
+
|
|
80
|
+
# DSPy
|
|
81
|
+
import dspy
|
|
82
|
+
lm = dspy.LM("openai/gpt-4o-mini")
|
|
83
|
+
dspy.configure(lm=lm)
|
|
84
|
+
|
|
85
|
+
# Pydantic AI
|
|
86
|
+
from pydantic_ai import Agent
|
|
87
|
+
agent = Agent("openai:gpt-4o-mini")
|
|
88
|
+
result = agent.run_sync("Hello!")
|
|
89
|
+
|
|
90
|
+
# Google GenAI
|
|
91
|
+
from google.genai import Client
|
|
92
|
+
client = Client()
|
|
93
|
+
client.models.generate_content(model="gemini-2.0-flash", contents="Hello!")
|
|
94
|
+
```
|
|
95
|
+
"""
|
|
96
|
+
results = {}
|
|
97
|
+
|
|
98
|
+
if openai:
|
|
99
|
+
results["openai"] = _instrument_openai()
|
|
100
|
+
if anthropic:
|
|
101
|
+
results["anthropic"] = _instrument_anthropic()
|
|
102
|
+
if litellm:
|
|
103
|
+
results["litellm"] = _instrument_litellm()
|
|
104
|
+
if pydantic_ai:
|
|
105
|
+
results["pydantic_ai"] = _instrument_pydantic_ai()
|
|
106
|
+
if google_genai:
|
|
107
|
+
results["google_genai"] = _instrument_google_genai()
|
|
108
|
+
if agno:
|
|
109
|
+
results["agno"] = _instrument_agno()
|
|
110
|
+
if claude_agent_sdk:
|
|
111
|
+
results["claude_agent_sdk"] = _instrument_claude_agent_sdk()
|
|
112
|
+
if dspy:
|
|
113
|
+
results["dspy"] = _instrument_dspy()
|
|
114
|
+
|
|
115
|
+
return results
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _instrument_openai() -> bool:
|
|
119
|
+
with _try_patch():
|
|
120
|
+
from braintrust.oai import patch_openai
|
|
121
|
+
|
|
122
|
+
return patch_openai()
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _instrument_anthropic() -> bool:
|
|
127
|
+
with _try_patch():
|
|
128
|
+
from braintrust.wrappers.anthropic import patch_anthropic
|
|
129
|
+
|
|
130
|
+
return patch_anthropic()
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _instrument_litellm() -> bool:
|
|
135
|
+
with _try_patch():
|
|
136
|
+
from braintrust.wrappers.litellm import patch_litellm
|
|
137
|
+
|
|
138
|
+
return patch_litellm()
|
|
139
|
+
return False
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _instrument_pydantic_ai() -> bool:
|
|
143
|
+
with _try_patch():
|
|
144
|
+
from braintrust.wrappers.pydantic_ai import setup_pydantic_ai
|
|
145
|
+
|
|
146
|
+
return setup_pydantic_ai()
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _instrument_google_genai() -> bool:
|
|
151
|
+
with _try_patch():
|
|
152
|
+
from braintrust.wrappers.google_genai import setup_genai
|
|
153
|
+
|
|
154
|
+
return setup_genai()
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _instrument_agno() -> bool:
|
|
159
|
+
with _try_patch():
|
|
160
|
+
from braintrust.wrappers.agno import setup_agno
|
|
161
|
+
|
|
162
|
+
return setup_agno()
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _instrument_claude_agent_sdk() -> bool:
|
|
167
|
+
with _try_patch():
|
|
168
|
+
from braintrust.wrappers.claude_agent_sdk import setup_claude_agent_sdk
|
|
169
|
+
|
|
170
|
+
return setup_claude_agent_sdk()
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _instrument_dspy() -> bool:
|
|
175
|
+
with _try_patch():
|
|
176
|
+
from braintrust.wrappers.dspy import patch_dspy
|
|
177
|
+
|
|
178
|
+
return patch_dspy()
|
|
179
|
+
return False
|
|
@@ -48,16 +48,29 @@ def reset_braintrust_state():
|
|
|
48
48
|
logger._state = logger.BraintrustState()
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
@pytest.fixture(
|
|
52
|
-
def
|
|
51
|
+
@pytest.fixture(autouse=True)
|
|
52
|
+
def skip_vcr_tests_in_wheel_mode(request):
|
|
53
|
+
"""Skip VCR tests when running from an installed wheel.
|
|
54
|
+
|
|
55
|
+
Wheel mode (BRAINTRUST_TESTING_WHEEL=1) is a pre-release sanity check
|
|
56
|
+
that verifies the built package installs and runs correctly. It's not
|
|
57
|
+
intended to be a full test suite - VCR cassettes are not included in
|
|
58
|
+
the wheel, so we skip those tests here. The full test suite with VCR
|
|
59
|
+
tests runs against source code during normal CI.
|
|
60
|
+
"""
|
|
61
|
+
if os.environ.get("BRAINTRUST_TESTING_WHEEL") == "1":
|
|
62
|
+
if request.node.get_closest_marker("vcr"):
|
|
63
|
+
pytest.skip("VCR tests skipped in wheel mode (pre-release sanity check only)")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_vcr_config():
|
|
53
67
|
"""
|
|
54
|
-
VCR configuration for recording/playing back HTTP interactions.
|
|
68
|
+
Get VCR configuration for recording/playing back HTTP interactions.
|
|
55
69
|
|
|
56
70
|
In CI, use "none" to fail if cassette is missing.
|
|
57
71
|
Locally, use "once" to record new cassettes if they don't exist.
|
|
58
72
|
"""
|
|
59
73
|
record_mode = "none" if (os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS")) else "once"
|
|
60
|
-
|
|
61
74
|
return {
|
|
62
75
|
"record_mode": record_mode,
|
|
63
76
|
"filter_headers": [
|
|
@@ -70,3 +83,9 @@ def vcr_config():
|
|
|
70
83
|
"x-bt-auth-token",
|
|
71
84
|
],
|
|
72
85
|
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@pytest.fixture(scope="session")
|
|
89
|
+
def vcr_config():
|
|
90
|
+
"""Pytest fixture wrapper for get_vcr_config()."""
|
|
91
|
+
return get_vcr_config()
|
|
@@ -15,3 +15,13 @@ PARENT_ID_FIELD = "_parent_id"
|
|
|
15
15
|
|
|
16
16
|
ASYNC_SCORING_CONTROL_FIELD = "_async_scoring_control"
|
|
17
17
|
SKIP_ASYNC_SCORING_FIELD = "_skip_async_scoring"
|
|
18
|
+
|
|
19
|
+
# Keys that identify which object (experiment, dataset, project logs, etc.) a row belongs to.
|
|
20
|
+
OBJECT_ID_KEYS = (
|
|
21
|
+
"experiment_id",
|
|
22
|
+
"dataset_id",
|
|
23
|
+
"prompt_session_id",
|
|
24
|
+
"project_id",
|
|
25
|
+
"log_id",
|
|
26
|
+
"function_data",
|
|
27
|
+
)
|
|
@@ -673,6 +673,7 @@ def _EvalCommon(
|
|
|
673
673
|
stream: Callable[[SSEProgressEvent], None] | None = None,
|
|
674
674
|
parent: str | None = None,
|
|
675
675
|
state: BraintrustState | None = None,
|
|
676
|
+
enable_cache: bool = True,
|
|
676
677
|
) -> Callable[[], Coroutine[Any, Any, EvalResultWithSummary[Input, Output]]]:
|
|
677
678
|
"""
|
|
678
679
|
This helper is needed because in case of `_lazy_load`, we need to update
|
|
@@ -759,7 +760,7 @@ def _EvalCommon(
|
|
|
759
760
|
async def run_to_completion():
|
|
760
761
|
with parent_context(parent, state):
|
|
761
762
|
try:
|
|
762
|
-
ret = await run_evaluator(experiment, evaluator, 0, [], stream, state)
|
|
763
|
+
ret = await run_evaluator(experiment, evaluator, 0, [], stream, state, enable_cache)
|
|
763
764
|
reporter.report_eval(evaluator, ret, verbose=True, jsonl=False)
|
|
764
765
|
return ret
|
|
765
766
|
finally:
|
|
@@ -798,6 +799,7 @@ async def EvalAsync(
|
|
|
798
799
|
stream: Callable[[SSEProgressEvent], None] | None = None,
|
|
799
800
|
parent: str | None = None,
|
|
800
801
|
state: BraintrustState | None = None,
|
|
802
|
+
enable_cache: bool = True,
|
|
801
803
|
) -> EvalResultWithSummary[Input, Output]:
|
|
802
804
|
"""
|
|
803
805
|
A function you can use to define an evaluator. This is a convenience wrapper around the `Evaluator` class.
|
|
@@ -855,6 +857,8 @@ async def EvalAsync(
|
|
|
855
857
|
:param parent: If specified, instead of creating a new experiment object, the Eval() will populate
|
|
856
858
|
the object or span specified by this parent.
|
|
857
859
|
:param state: Optional BraintrustState to use for the evaluation. If not specified, the global login state will be used.
|
|
860
|
+
:param enable_cache: Whether to enable the span cache for this evaluation. Defaults to True. The span cache stores
|
|
861
|
+
span data on disk to minimize memory usage and allow scorers to read spans without server round-trips.
|
|
858
862
|
:return: An `EvalResultWithSummary` object, which contains all results and a summary.
|
|
859
863
|
"""
|
|
860
864
|
f = _EvalCommon(
|
|
@@ -883,6 +887,7 @@ async def EvalAsync(
|
|
|
883
887
|
stream=stream,
|
|
884
888
|
parent=parent,
|
|
885
889
|
state=state,
|
|
890
|
+
enable_cache=enable_cache,
|
|
886
891
|
)
|
|
887
892
|
|
|
888
893
|
return await f()
|
|
@@ -918,6 +923,7 @@ def Eval(
|
|
|
918
923
|
stream: Callable[[SSEProgressEvent], None] | None = None,
|
|
919
924
|
parent: str | None = None,
|
|
920
925
|
state: BraintrustState | None = None,
|
|
926
|
+
enable_cache: bool = True,
|
|
921
927
|
) -> EvalResultWithSummary[Input, Output]:
|
|
922
928
|
"""
|
|
923
929
|
A function you can use to define an evaluator. This is a convenience wrapper around the `Evaluator` class.
|
|
@@ -975,6 +981,8 @@ def Eval(
|
|
|
975
981
|
:param parent: If specified, instead of creating a new experiment object, the Eval() will populate
|
|
976
982
|
the object or span specified by this parent.
|
|
977
983
|
:param state: Optional BraintrustState to use for the evaluation. If not specified, the global login state will be used.
|
|
984
|
+
:param enable_cache: Whether to enable the span cache for this evaluation. Defaults to True. The span cache stores
|
|
985
|
+
span data on disk to minimize memory usage and allow scorers to read spans without server round-trips.
|
|
978
986
|
:return: An `EvalResultWithSummary` object, which contains all results and a summary.
|
|
979
987
|
"""
|
|
980
988
|
|
|
@@ -1005,6 +1013,7 @@ def Eval(
|
|
|
1005
1013
|
stream=stream,
|
|
1006
1014
|
parent=parent,
|
|
1007
1015
|
state=state,
|
|
1016
|
+
enable_cache=enable_cache,
|
|
1008
1017
|
)
|
|
1009
1018
|
|
|
1010
1019
|
# https://stackoverflow.com/questions/55409641/asyncio-run-cannot-be-called-from-a-running-event-loop-when-using-jupyter-no
|
|
@@ -1249,10 +1258,11 @@ async def run_evaluator(
|
|
|
1249
1258
|
filters: list[Filter],
|
|
1250
1259
|
stream: Callable[[SSEProgressEvent], None] | None = None,
|
|
1251
1260
|
state: BraintrustState | None = None,
|
|
1261
|
+
enable_cache: bool = True,
|
|
1252
1262
|
) -> EvalResultWithSummary[Input, Output]:
|
|
1253
1263
|
"""Wrapper on _run_evaluator_internal that times out execution after evaluator.timeout."""
|
|
1254
1264
|
results = await asyncio.wait_for(
|
|
1255
|
-
_run_evaluator_internal(experiment, evaluator, position, filters, stream, state), evaluator.timeout
|
|
1265
|
+
_run_evaluator_internal(experiment, evaluator, position, filters, stream, state, enable_cache), evaluator.timeout
|
|
1256
1266
|
)
|
|
1257
1267
|
|
|
1258
1268
|
if experiment:
|
|
@@ -1280,6 +1290,7 @@ async def _run_evaluator_internal(
|
|
|
1280
1290
|
filters: list[Filter],
|
|
1281
1291
|
stream: Callable[[SSEProgressEvent], None] | None = None,
|
|
1282
1292
|
state: BraintrustState | None = None,
|
|
1293
|
+
enable_cache: bool = True,
|
|
1283
1294
|
):
|
|
1284
1295
|
# Start span cache for this eval (it's disabled by default to avoid temp files outside of evals)
|
|
1285
1296
|
if state is None:
|
|
@@ -1287,13 +1298,15 @@ async def _run_evaluator_internal(
|
|
|
1287
1298
|
|
|
1288
1299
|
state = _internal_get_global_state()
|
|
1289
1300
|
|
|
1290
|
-
|
|
1301
|
+
if enable_cache:
|
|
1302
|
+
state.span_cache.start()
|
|
1291
1303
|
try:
|
|
1292
1304
|
return await _run_evaluator_internal_impl(experiment, evaluator, position, filters, stream, state)
|
|
1293
1305
|
finally:
|
|
1294
1306
|
# Clean up disk-based span cache after eval completes and stop caching
|
|
1295
|
-
|
|
1296
|
-
|
|
1307
|
+
if enable_cache:
|
|
1308
|
+
state.span_cache.dispose()
|
|
1309
|
+
state.span_cache.stop()
|
|
1297
1310
|
|
|
1298
1311
|
|
|
1299
1312
|
async def _run_evaluator_internal_impl(
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Auto-generated file (internal git SHA
|
|
1
|
+
"""Auto-generated file (internal git SHA 7178200dd3c7869f27b677260303cda0b798bf42) -- do not modify"""
|
|
2
2
|
|
|
3
3
|
from ._generated_types import (
|
|
4
4
|
Acl,
|
|
@@ -53,6 +53,7 @@ from ._generated_types import (
|
|
|
53
53
|
Group,
|
|
54
54
|
GroupScope,
|
|
55
55
|
IfExists,
|
|
56
|
+
ImageRenderingMode,
|
|
56
57
|
InvokeFunction,
|
|
57
58
|
InvokeParent,
|
|
58
59
|
MCPServer,
|
|
@@ -163,6 +164,7 @@ __all__ = [
|
|
|
163
164
|
"Group",
|
|
164
165
|
"GroupScope",
|
|
165
166
|
"IfExists",
|
|
167
|
+
"ImageRenderingMode",
|
|
166
168
|
"InvokeFunction",
|
|
167
169
|
"InvokeParent",
|
|
168
170
|
"MCPServer",
|