braintrust 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/_generated_types.py +224 -122
- braintrust/cli/install/api.py +1 -1
- braintrust/conftest.py +24 -0
- braintrust/db_fields.py +1 -0
- braintrust/devserver/test_server_integration.py +0 -11
- braintrust/framework.py +2 -2
- braintrust/functions/invoke.py +1 -8
- braintrust/generated_types.py +7 -7
- braintrust/logger.py +30 -38
- braintrust/otel/__init__.py +24 -15
- braintrust/prompt_cache/test_disk_cache.py +3 -3
- braintrust/span_types.py +3 -0
- braintrust/test_bt_json.py +23 -19
- braintrust/test_framework.py +25 -0
- braintrust/test_logger.py +34 -0
- braintrust/test_otel.py +118 -26
- braintrust/test_util.py +51 -1
- braintrust/util.py +24 -3
- braintrust/version.py +2 -2
- braintrust/wrappers/langsmith_wrapper.py +517 -0
- braintrust/wrappers/litellm.py +43 -0
- braintrust/wrappers/test_agno.py +0 -12
- braintrust/wrappers/test_anthropic.py +1 -11
- braintrust/wrappers/test_dspy.py +0 -11
- braintrust/wrappers/test_google_genai.py +6 -1
- braintrust/wrappers/test_langsmith_wrapper.py +338 -0
- braintrust/wrappers/test_litellm.py +73 -10
- braintrust/wrappers/test_oai_attachments.py +0 -10
- braintrust/wrappers/test_openai.py +3 -12
- braintrust/wrappers/test_openrouter.py +0 -9
- braintrust/wrappers/test_pydantic_ai_integration.py +0 -11
- braintrust/wrappers/test_pydantic_ai_wrap_openai.py +2 -0
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/METADATA +1 -1
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/RECORD +37 -35
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/WHEEL +0 -0
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/entry_points.txt +0 -0
- {braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/top_level.txt +0 -0
braintrust/_generated_types.py
CHANGED
|
@@ -106,7 +106,7 @@ class ApiKey(TypedDict):
|
|
|
106
106
|
|
|
107
107
|
class AsyncScoringControlAsyncScoringControl(TypedDict):
|
|
108
108
|
kind: Literal['score_update']
|
|
109
|
-
token: str
|
|
109
|
+
token: NotRequired[str | None]
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
class AsyncScoringControlAsyncScoringControl2(TypedDict):
|
|
@@ -117,11 +117,39 @@ class AsyncScoringControlAsyncScoringControl3(TypedDict):
|
|
|
117
117
|
kind: Literal['state_enabled_force_rescore']
|
|
118
118
|
|
|
119
119
|
|
|
120
|
+
class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope(TypedDict):
|
|
121
|
+
type: Literal['span']
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope1(TypedDict):
|
|
125
|
+
type: Literal['trace']
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class AsyncScoringControlAsyncScoringControl4TriggeredFunction(TypedDict):
|
|
129
|
+
function_id: NotRequired[Any | None]
|
|
130
|
+
scope: (
|
|
131
|
+
AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope
|
|
132
|
+
| AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope1
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class AsyncScoringControlAsyncScoringControl4(TypedDict):
|
|
137
|
+
kind: Literal['trigger_functions']
|
|
138
|
+
triggered_functions: Sequence[AsyncScoringControlAsyncScoringControl4TriggeredFunction]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class AsyncScoringControlAsyncScoringControl5(TypedDict):
|
|
142
|
+
kind: Literal['complete_triggered_functions']
|
|
143
|
+
function_ids: Sequence[Any]
|
|
144
|
+
triggered_xact_id: str
|
|
145
|
+
|
|
146
|
+
|
|
120
147
|
class AsyncScoringStateAsyncScoringState(TypedDict):
|
|
121
148
|
status: Literal['enabled']
|
|
122
149
|
token: str
|
|
123
150
|
function_ids: Sequence[Any]
|
|
124
151
|
skip_logging: NotRequired[bool | None]
|
|
152
|
+
triggered_functions: NotRequired[Mapping[str, Any] | None]
|
|
125
153
|
|
|
126
154
|
|
|
127
155
|
class AsyncScoringStateAsyncScoringState1(TypedDict):
|
|
@@ -131,6 +159,38 @@ class AsyncScoringStateAsyncScoringState1(TypedDict):
|
|
|
131
159
|
AsyncScoringState: TypeAlias = AsyncScoringStateAsyncScoringState | AsyncScoringStateAsyncScoringState1 | None
|
|
132
160
|
|
|
133
161
|
|
|
162
|
+
class PreprocessorPreprocessor(TypedDict):
|
|
163
|
+
type: Literal['function']
|
|
164
|
+
id: str
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class PreprocessorPreprocessor2(TypedDict):
|
|
168
|
+
pass
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class PreprocessorPreprocessor3(PreprocessorPreprocessor, PreprocessorPreprocessor2):
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class BatchedFacetDataFacet(TypedDict):
|
|
176
|
+
name: str
|
|
177
|
+
"""
|
|
178
|
+
The name of the facet
|
|
179
|
+
"""
|
|
180
|
+
prompt: str
|
|
181
|
+
"""
|
|
182
|
+
The prompt to use for LLM extraction. The preprocessed text will be provided as context.
|
|
183
|
+
"""
|
|
184
|
+
model: NotRequired[str | None]
|
|
185
|
+
"""
|
|
186
|
+
The model to use for facet extraction
|
|
187
|
+
"""
|
|
188
|
+
no_match_pattern: NotRequired[str | None]
|
|
189
|
+
"""
|
|
190
|
+
Regex pattern to identify outputs that do not match the facet. If the output matches, the facet will be saved as 'no_match'
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
|
|
134
194
|
class BraintrustAttachmentReference(TypedDict):
|
|
135
195
|
type: Literal['braintrust_attachment']
|
|
136
196
|
"""
|
|
@@ -559,16 +619,16 @@ class ExternalAttachmentReference(TypedDict):
|
|
|
559
619
|
"""
|
|
560
620
|
|
|
561
621
|
|
|
562
|
-
class
|
|
622
|
+
class Preprocessor1Preprocessor1(TypedDict):
|
|
563
623
|
type: Literal['function']
|
|
564
624
|
id: str
|
|
565
625
|
|
|
566
626
|
|
|
567
|
-
class
|
|
627
|
+
class Preprocessor1Preprocessor12(TypedDict):
|
|
568
628
|
pass
|
|
569
629
|
|
|
570
630
|
|
|
571
|
-
class
|
|
631
|
+
class Preprocessor1Preprocessor13(Preprocessor1Preprocessor1, Preprocessor1Preprocessor12):
|
|
572
632
|
pass
|
|
573
633
|
|
|
574
634
|
|
|
@@ -689,7 +749,7 @@ FunctionIdRef: TypeAlias = Mapping[str, Any]
|
|
|
689
749
|
|
|
690
750
|
|
|
691
751
|
FunctionObjectType: TypeAlias = Literal[
|
|
692
|
-
'prompt', 'tool', 'scorer', 'task', '
|
|
752
|
+
'prompt', 'tool', 'scorer', 'task', 'custom_view', 'preprocessor', 'facet'
|
|
693
753
|
]
|
|
694
754
|
|
|
695
755
|
|
|
@@ -697,6 +757,9 @@ FunctionOutputType: TypeAlias = Literal['completion', 'score', 'any']
|
|
|
697
757
|
|
|
698
758
|
|
|
699
759
|
FunctionTypeEnum: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet']
|
|
760
|
+
"""
|
|
761
|
+
The type of global function. Defaults to 'scorer'.
|
|
762
|
+
"""
|
|
700
763
|
|
|
701
764
|
|
|
702
765
|
FunctionTypeEnumNullish: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet']
|
|
@@ -980,6 +1043,18 @@ class Group(TypedDict):
|
|
|
980
1043
|
"""
|
|
981
1044
|
|
|
982
1045
|
|
|
1046
|
+
class GroupScope(TypedDict):
|
|
1047
|
+
type: Literal['group']
|
|
1048
|
+
group_by: str
|
|
1049
|
+
"""
|
|
1050
|
+
Field path to group by, e.g. metadata.session_id
|
|
1051
|
+
"""
|
|
1052
|
+
idle_seconds: NotRequired[float | None]
|
|
1053
|
+
"""
|
|
1054
|
+
Optional: trigger after this many seconds of inactivity
|
|
1055
|
+
"""
|
|
1056
|
+
|
|
1057
|
+
|
|
983
1058
|
IfExists: TypeAlias = Literal['error', 'ignore', 'replace']
|
|
984
1059
|
|
|
985
1060
|
|
|
@@ -1009,6 +1084,14 @@ class InvokeFunctionInvokeFunction1(TypedDict):
|
|
|
1009
1084
|
"""
|
|
1010
1085
|
|
|
1011
1086
|
|
|
1087
|
+
class InvokeFunctionInvokeFunction2(TypedDict):
|
|
1088
|
+
global_function: str
|
|
1089
|
+
"""
|
|
1090
|
+
The name of the global function. Currently, the global namespace includes the functions in autoevals
|
|
1091
|
+
"""
|
|
1092
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
1093
|
+
|
|
1094
|
+
|
|
1012
1095
|
class InvokeFunctionInvokeFunction3(TypedDict):
|
|
1013
1096
|
prompt_session_id: str
|
|
1014
1097
|
"""
|
|
@@ -1175,12 +1258,6 @@ class ModelParamsModelParams4(TypedDict):
|
|
|
1175
1258
|
reasoning_budget: NotRequired[float | None]
|
|
1176
1259
|
|
|
1177
1260
|
|
|
1178
|
-
NullableFunctionTypeEnum: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet']
|
|
1179
|
-
"""
|
|
1180
|
-
The type of global function. If unspecified, defaults to 'scorer' for backward compatibility.
|
|
1181
|
-
"""
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
1261
|
class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
|
|
1185
1262
|
type: Literal['function']
|
|
1186
1263
|
id: str
|
|
@@ -1189,7 +1266,7 @@ class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
|
|
|
1189
1266
|
class NullableSavedFunctionIdNullableSavedFunctionId1(TypedDict):
|
|
1190
1267
|
type: Literal['global']
|
|
1191
1268
|
name: str
|
|
1192
|
-
function_type: NotRequired[
|
|
1269
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
1193
1270
|
|
|
1194
1271
|
|
|
1195
1272
|
NullableSavedFunctionId: TypeAlias = (
|
|
@@ -1829,7 +1906,7 @@ class TaskTask2(TypedDict):
|
|
|
1829
1906
|
"""
|
|
1830
1907
|
The name of the global function. Currently, the global namespace includes the functions in autoevals
|
|
1831
1908
|
"""
|
|
1832
|
-
function_type: NotRequired[
|
|
1909
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
1833
1910
|
|
|
1834
1911
|
|
|
1835
1912
|
class TaskTask3(TypedDict):
|
|
@@ -1945,7 +2022,7 @@ class SavedFunctionIdSavedFunctionId(TypedDict):
|
|
|
1945
2022
|
class SavedFunctionIdSavedFunctionId1(TypedDict):
|
|
1946
2023
|
type: Literal['global']
|
|
1947
2024
|
name: str
|
|
1948
|
-
function_type: NotRequired[
|
|
2025
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
1949
2026
|
|
|
1950
2027
|
|
|
1951
2028
|
SavedFunctionId: TypeAlias = SavedFunctionIdSavedFunctionId | SavedFunctionIdSavedFunctionId1
|
|
@@ -2024,17 +2101,11 @@ class SpanIFrame(TypedDict):
|
|
|
2024
2101
|
|
|
2025
2102
|
class SpanScope(TypedDict):
|
|
2026
2103
|
type: Literal['span']
|
|
2027
|
-
root_span_id: str
|
|
2028
|
-
"""
|
|
2029
|
-
The root span id is a unique identifier for the trace.
|
|
2030
|
-
"""
|
|
2031
|
-
id: str
|
|
2032
|
-
"""
|
|
2033
|
-
A unique identifier for the span.
|
|
2034
|
-
"""
|
|
2035
2104
|
|
|
2036
2105
|
|
|
2037
|
-
SpanType: TypeAlias = Literal[
|
|
2106
|
+
SpanType: TypeAlias = Literal[
|
|
2107
|
+
'llm', 'score', 'function', 'eval', 'task', 'tool', 'automation', 'facet', 'preprocessor'
|
|
2108
|
+
]
|
|
2038
2109
|
"""
|
|
2039
2110
|
Type of the span, for display purposes only
|
|
2040
2111
|
"""
|
|
@@ -2079,9 +2150,42 @@ class ToolFunctionDefinition(TypedDict):
|
|
|
2079
2150
|
|
|
2080
2151
|
class TraceScope(TypedDict):
|
|
2081
2152
|
type: Literal['trace']
|
|
2082
|
-
|
|
2153
|
+
idle_seconds: NotRequired[float | None]
|
|
2083
2154
|
"""
|
|
2084
|
-
|
|
2155
|
+
Consider trace complete after this many seconds of inactivity (default: 30)
|
|
2156
|
+
"""
|
|
2157
|
+
|
|
2158
|
+
|
|
2159
|
+
class TriggeredFunctionStateScope(TypedDict):
|
|
2160
|
+
type: Literal['span']
|
|
2161
|
+
|
|
2162
|
+
|
|
2163
|
+
class TriggeredFunctionStateScope1(TypedDict):
|
|
2164
|
+
type: Literal['trace']
|
|
2165
|
+
|
|
2166
|
+
|
|
2167
|
+
class TriggeredFunctionStateScope2(TypedDict):
|
|
2168
|
+
type: Literal['group']
|
|
2169
|
+
key: str
|
|
2170
|
+
value: str
|
|
2171
|
+
|
|
2172
|
+
|
|
2173
|
+
class TriggeredFunctionState(TypedDict):
|
|
2174
|
+
triggered_xact_id: str
|
|
2175
|
+
"""
|
|
2176
|
+
The xact_id when this function was triggered
|
|
2177
|
+
"""
|
|
2178
|
+
completed_xact_id: NotRequired[str | None]
|
|
2179
|
+
"""
|
|
2180
|
+
The xact_id when this function completed (matches triggered_xact_id if done)
|
|
2181
|
+
"""
|
|
2182
|
+
attempts: NotRequired[int | None]
|
|
2183
|
+
"""
|
|
2184
|
+
Number of execution attempts (for retry tracking)
|
|
2185
|
+
"""
|
|
2186
|
+
scope: TriggeredFunctionStateScope | TriggeredFunctionStateScope1 | TriggeredFunctionStateScope2
|
|
2187
|
+
"""
|
|
2188
|
+
The scope of data this function operates on
|
|
2085
2189
|
"""
|
|
2086
2190
|
|
|
2087
2191
|
|
|
@@ -2278,6 +2382,8 @@ AsyncScoringControl: TypeAlias = (
|
|
|
2278
2382
|
| AsyncScoringControlAsyncScoringControl1
|
|
2279
2383
|
| AsyncScoringControlAsyncScoringControl2
|
|
2280
2384
|
| AsyncScoringControlAsyncScoringControl3
|
|
2385
|
+
| AsyncScoringControlAsyncScoringControl4
|
|
2386
|
+
| AsyncScoringControlAsyncScoringControl5
|
|
2281
2387
|
)
|
|
2282
2388
|
|
|
2283
2389
|
|
|
@@ -2292,6 +2398,25 @@ class AttachmentStatus(TypedDict):
|
|
|
2292
2398
|
"""
|
|
2293
2399
|
|
|
2294
2400
|
|
|
2401
|
+
class PreprocessorPreprocessor1(TypedDict):
|
|
2402
|
+
type: Literal['global']
|
|
2403
|
+
name: str
|
|
2404
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2405
|
+
|
|
2406
|
+
|
|
2407
|
+
class PreprocessorPreprocessor4(PreprocessorPreprocessor1, PreprocessorPreprocessor2):
|
|
2408
|
+
pass
|
|
2409
|
+
|
|
2410
|
+
|
|
2411
|
+
Preprocessor: TypeAlias = PreprocessorPreprocessor3 | PreprocessorPreprocessor4
|
|
2412
|
+
|
|
2413
|
+
|
|
2414
|
+
class BatchedFacetData(TypedDict):
|
|
2415
|
+
type: Literal['batched_facet']
|
|
2416
|
+
preprocessor: NotRequired[Preprocessor | None]
|
|
2417
|
+
facets: Sequence[BatchedFacetDataFacet]
|
|
2418
|
+
|
|
2419
|
+
|
|
2295
2420
|
ChatCompletionContentPart: TypeAlias = (
|
|
2296
2421
|
ChatCompletionContentPartTextWithTitle
|
|
2297
2422
|
| ChatCompletionContentPartImageWithTitle
|
|
@@ -2384,7 +2509,7 @@ class DatasetEvent(TypedDict):
|
|
|
2384
2509
|
"""
|
|
2385
2510
|
span_id: str
|
|
2386
2511
|
"""
|
|
2387
|
-
A unique identifier used to link different dataset events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/
|
|
2512
|
+
A unique identifier used to link different dataset events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
|
|
2388
2513
|
"""
|
|
2389
2514
|
root_span_id: str
|
|
2390
2515
|
"""
|
|
@@ -2468,7 +2593,7 @@ class Experiment(TypedDict):
|
|
|
2468
2593
|
class ExtendedSavedFunctionIdExtendedSavedFunctionId1(TypedDict):
|
|
2469
2594
|
type: Literal['global']
|
|
2470
2595
|
name: str
|
|
2471
|
-
function_type: NotRequired[
|
|
2596
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2472
2597
|
|
|
2473
2598
|
|
|
2474
2599
|
ExtendedSavedFunctionId: TypeAlias = (
|
|
@@ -2478,22 +2603,22 @@ ExtendedSavedFunctionId: TypeAlias = (
|
|
|
2478
2603
|
)
|
|
2479
2604
|
|
|
2480
2605
|
|
|
2481
|
-
class
|
|
2606
|
+
class Preprocessor1Preprocessor11(TypedDict):
|
|
2482
2607
|
type: Literal['global']
|
|
2483
2608
|
name: str
|
|
2484
|
-
function_type: NotRequired[
|
|
2609
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2485
2610
|
|
|
2486
2611
|
|
|
2487
|
-
class
|
|
2612
|
+
class Preprocessor1Preprocessor14(Preprocessor1Preprocessor11, Preprocessor1Preprocessor12):
|
|
2488
2613
|
pass
|
|
2489
2614
|
|
|
2490
2615
|
|
|
2491
|
-
|
|
2616
|
+
Preprocessor1: TypeAlias = Preprocessor1Preprocessor13 | Preprocessor1Preprocessor14
|
|
2492
2617
|
|
|
2493
2618
|
|
|
2494
2619
|
class FacetData(TypedDict):
|
|
2495
2620
|
type: Literal['facet']
|
|
2496
|
-
preprocessor: NotRequired[
|
|
2621
|
+
preprocessor: NotRequired[Preprocessor1 | None]
|
|
2497
2622
|
prompt: str
|
|
2498
2623
|
"""
|
|
2499
2624
|
The prompt to use for LLM extraction. The preprocessed text will be provided as context.
|
|
@@ -2511,7 +2636,7 @@ class FacetData(TypedDict):
|
|
|
2511
2636
|
class FunctionDataFunctionData3(TypedDict):
|
|
2512
2637
|
type: Literal['global']
|
|
2513
2638
|
name: str
|
|
2514
|
-
function_type: NotRequired[
|
|
2639
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2515
2640
|
config: NotRequired[Mapping[str, Any] | None]
|
|
2516
2641
|
"""
|
|
2517
2642
|
Configuration options to pass to the global function (e.g., for preprocessor customization)
|
|
@@ -2523,21 +2648,68 @@ class FunctionIdFunctionId2(TypedDict):
|
|
|
2523
2648
|
"""
|
|
2524
2649
|
The name of the global function. Currently, the global namespace includes the functions in autoevals
|
|
2525
2650
|
"""
|
|
2526
|
-
function_type: NotRequired[
|
|
2651
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2527
2652
|
|
|
2528
2653
|
|
|
2529
|
-
class
|
|
2530
|
-
|
|
2654
|
+
class InvokeFunctionInvokeFunction7(TypedDict):
|
|
2655
|
+
input: NotRequired[Any | None]
|
|
2531
2656
|
"""
|
|
2532
|
-
|
|
2657
|
+
Argument to the function, which can be any JSON serializable value
|
|
2658
|
+
"""
|
|
2659
|
+
expected: NotRequired[Any | None]
|
|
2660
|
+
"""
|
|
2661
|
+
The expected output of the function
|
|
2662
|
+
"""
|
|
2663
|
+
metadata: NotRequired[Mapping[str, Any] | None]
|
|
2664
|
+
"""
|
|
2665
|
+
Any relevant metadata. This will be logged and available as the `metadata` argument.
|
|
2666
|
+
"""
|
|
2667
|
+
tags: NotRequired[Sequence[str] | None]
|
|
2668
|
+
"""
|
|
2669
|
+
Any relevant tags to log on the span.
|
|
2670
|
+
"""
|
|
2671
|
+
messages: NotRequired[Sequence[ChatCompletionMessageParam] | None]
|
|
2672
|
+
"""
|
|
2673
|
+
If the function is an LLM, additional messages to pass along to it
|
|
2674
|
+
"""
|
|
2675
|
+
parent: NotRequired[InvokeParent | None]
|
|
2676
|
+
stream: NotRequired[bool | None]
|
|
2677
|
+
"""
|
|
2678
|
+
Whether to stream the response. If true, results will be returned in the Braintrust SSE format.
|
|
2679
|
+
"""
|
|
2680
|
+
mode: NotRequired[StreamingMode | None]
|
|
2681
|
+
strict: NotRequired[bool | None]
|
|
2682
|
+
"""
|
|
2683
|
+
If true, throw an error if one of the variables in the prompt is not present in the input
|
|
2684
|
+
"""
|
|
2685
|
+
mcp_auth: NotRequired[Mapping[str, InvokeFunctionMcpAuth] | None]
|
|
2686
|
+
"""
|
|
2687
|
+
Map of MCP server URL to auth credentials
|
|
2688
|
+
"""
|
|
2689
|
+
overrides: NotRequired[Mapping[str, Any] | None]
|
|
2690
|
+
"""
|
|
2691
|
+
Partial function definition to merge with the function being invoked. Fields are validated against the function type's schema at runtime. For facets: { preprocessor?, prompt?, model? }. For prompts: { model?, ... }.
|
|
2533
2692
|
"""
|
|
2534
|
-
function_type: NotRequired[NullableFunctionTypeEnum | None]
|
|
2535
2693
|
|
|
2536
2694
|
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2695
|
+
class InvokeFunctionInvokeFunction8(InvokeFunctionInvokeFunction, InvokeFunctionInvokeFunction7):
|
|
2696
|
+
pass
|
|
2697
|
+
|
|
2698
|
+
|
|
2699
|
+
class InvokeFunctionInvokeFunction9(InvokeFunctionInvokeFunction1, InvokeFunctionInvokeFunction7):
|
|
2700
|
+
pass
|
|
2701
|
+
|
|
2702
|
+
|
|
2703
|
+
class InvokeFunctionInvokeFunction10(InvokeFunctionInvokeFunction2, InvokeFunctionInvokeFunction7):
|
|
2704
|
+
pass
|
|
2705
|
+
|
|
2706
|
+
|
|
2707
|
+
class InvokeFunctionInvokeFunction11(InvokeFunctionInvokeFunction3, InvokeFunctionInvokeFunction7):
|
|
2708
|
+
pass
|
|
2709
|
+
|
|
2710
|
+
|
|
2711
|
+
class InvokeFunctionInvokeFunction12(InvokeFunctionInvokeFunction4, InvokeFunctionInvokeFunction7):
|
|
2712
|
+
pass
|
|
2541
2713
|
|
|
2542
2714
|
|
|
2543
2715
|
class ModelParamsModelParams(TypedDict):
|
|
@@ -2580,7 +2752,7 @@ class OnlineScoreConfig(TypedDict):
|
|
|
2580
2752
|
"""
|
|
2581
2753
|
scorers: Sequence[SavedFunctionId]
|
|
2582
2754
|
"""
|
|
2583
|
-
The list of
|
|
2755
|
+
The list of functions to run for online scoring. Can include scorers, facets, or other function types.
|
|
2584
2756
|
"""
|
|
2585
2757
|
btql_filter: NotRequired[str | None]
|
|
2586
2758
|
"""
|
|
@@ -2588,16 +2760,20 @@ class OnlineScoreConfig(TypedDict):
|
|
|
2588
2760
|
"""
|
|
2589
2761
|
apply_to_root_span: NotRequired[bool | None]
|
|
2590
2762
|
"""
|
|
2591
|
-
Whether to trigger online scoring on the root span of each trace
|
|
2763
|
+
Whether to trigger online scoring on the root span of each trace. Only applies when scope is 'span' or unset.
|
|
2592
2764
|
"""
|
|
2593
2765
|
apply_to_span_names: NotRequired[Sequence[str] | None]
|
|
2594
2766
|
"""
|
|
2595
|
-
Trigger online scoring on any spans with a name in this list
|
|
2767
|
+
Trigger online scoring on any spans with a name in this list. Only applies when scope is 'span' or unset.
|
|
2596
2768
|
"""
|
|
2597
2769
|
skip_logging: NotRequired[bool | None]
|
|
2598
2770
|
"""
|
|
2599
2771
|
Whether to skip adding scorer spans when computing scores
|
|
2600
2772
|
"""
|
|
2773
|
+
scope: NotRequired[SpanScope | TraceScope | GroupScope | None]
|
|
2774
|
+
"""
|
|
2775
|
+
The scope at which to run the functions. Defaults to span-level execution. Trace/group scope requires all functions to be facets.
|
|
2776
|
+
"""
|
|
2601
2777
|
|
|
2602
2778
|
|
|
2603
2779
|
class Project(TypedDict):
|
|
@@ -2802,7 +2978,7 @@ class ExperimentEvent(TypedDict):
|
|
|
2802
2978
|
"""
|
|
2803
2979
|
span_id: str
|
|
2804
2980
|
"""
|
|
2805
|
-
A unique identifier used to link different experiment events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/
|
|
2981
|
+
A unique identifier used to link different experiment events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
|
|
2806
2982
|
"""
|
|
2807
2983
|
span_parents: NotRequired[Sequence[str] | None]
|
|
2808
2984
|
"""
|
|
@@ -2853,80 +3029,6 @@ GraphNode: TypeAlias = (
|
|
|
2853
3029
|
)
|
|
2854
3030
|
|
|
2855
3031
|
|
|
2856
|
-
class InvokeContext(TypedDict):
|
|
2857
|
-
object_type: Literal['project_logs', 'experiment', 'dataset', 'playground_logs']
|
|
2858
|
-
"""
|
|
2859
|
-
The type of object containing the span data
|
|
2860
|
-
"""
|
|
2861
|
-
object_id: str
|
|
2862
|
-
"""
|
|
2863
|
-
The ID of the object containing the span data
|
|
2864
|
-
"""
|
|
2865
|
-
scope: InvokeScope
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
class InvokeFunctionInvokeFunction7(TypedDict):
|
|
2869
|
-
input: NotRequired[Any | None]
|
|
2870
|
-
"""
|
|
2871
|
-
Argument to the function, which can be any JSON serializable value
|
|
2872
|
-
"""
|
|
2873
|
-
expected: NotRequired[Any | None]
|
|
2874
|
-
"""
|
|
2875
|
-
The expected output of the function
|
|
2876
|
-
"""
|
|
2877
|
-
metadata: NotRequired[Mapping[str, Any] | None]
|
|
2878
|
-
"""
|
|
2879
|
-
Any relevant metadata. This will be logged and available as the `metadata` argument.
|
|
2880
|
-
"""
|
|
2881
|
-
tags: NotRequired[Sequence[str] | None]
|
|
2882
|
-
"""
|
|
2883
|
-
Any relevant tags to log on the span.
|
|
2884
|
-
"""
|
|
2885
|
-
messages: NotRequired[Sequence[ChatCompletionMessageParam] | None]
|
|
2886
|
-
"""
|
|
2887
|
-
If the function is an LLM, additional messages to pass along to it
|
|
2888
|
-
"""
|
|
2889
|
-
context: NotRequired[InvokeContext | None]
|
|
2890
|
-
parent: NotRequired[InvokeParent | None]
|
|
2891
|
-
stream: NotRequired[bool | None]
|
|
2892
|
-
"""
|
|
2893
|
-
Whether to stream the response. If true, results will be returned in the Braintrust SSE format.
|
|
2894
|
-
"""
|
|
2895
|
-
mode: NotRequired[StreamingMode | None]
|
|
2896
|
-
strict: NotRequired[bool | None]
|
|
2897
|
-
"""
|
|
2898
|
-
If true, throw an error if one of the variables in the prompt is not present in the input
|
|
2899
|
-
"""
|
|
2900
|
-
mcp_auth: NotRequired[Mapping[str, InvokeFunctionMcpAuth] | None]
|
|
2901
|
-
"""
|
|
2902
|
-
Map of MCP server URL to auth credentials
|
|
2903
|
-
"""
|
|
2904
|
-
overrides: NotRequired[Mapping[str, Any] | None]
|
|
2905
|
-
"""
|
|
2906
|
-
Partial function definition to merge with the function being invoked. Fields are validated against the function type's schema at runtime. For facets: { preprocessor?, prompt?, model? }. For prompts: { model?, ... }.
|
|
2907
|
-
"""
|
|
2908
|
-
|
|
2909
|
-
|
|
2910
|
-
class InvokeFunctionInvokeFunction8(InvokeFunctionInvokeFunction, InvokeFunctionInvokeFunction7):
|
|
2911
|
-
pass
|
|
2912
|
-
|
|
2913
|
-
|
|
2914
|
-
class InvokeFunctionInvokeFunction9(InvokeFunctionInvokeFunction1, InvokeFunctionInvokeFunction7):
|
|
2915
|
-
pass
|
|
2916
|
-
|
|
2917
|
-
|
|
2918
|
-
class InvokeFunctionInvokeFunction10(InvokeFunctionInvokeFunction2, InvokeFunctionInvokeFunction7):
|
|
2919
|
-
pass
|
|
2920
|
-
|
|
2921
|
-
|
|
2922
|
-
class InvokeFunctionInvokeFunction11(InvokeFunctionInvokeFunction3, InvokeFunctionInvokeFunction7):
|
|
2923
|
-
pass
|
|
2924
|
-
|
|
2925
|
-
|
|
2926
|
-
class InvokeFunctionInvokeFunction12(InvokeFunctionInvokeFunction4, InvokeFunctionInvokeFunction7):
|
|
2927
|
-
pass
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
3032
|
class ProjectLogsEvent(TypedDict):
|
|
2931
3033
|
id: str
|
|
2932
3034
|
"""
|
|
@@ -2994,7 +3096,7 @@ class ProjectLogsEvent(TypedDict):
|
|
|
2994
3096
|
"""
|
|
2995
3097
|
span_id: str
|
|
2996
3098
|
"""
|
|
2997
|
-
A unique identifier used to link different project logs events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/
|
|
3099
|
+
A unique identifier used to link different project logs events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
|
|
2998
3100
|
"""
|
|
2999
3101
|
span_parents: NotRequired[Sequence[str] | None]
|
|
3000
3102
|
"""
|
|
@@ -3127,7 +3229,6 @@ class View(TypedDict):
|
|
|
3127
3229
|
'tools',
|
|
3128
3230
|
'scorers',
|
|
3129
3231
|
'logs',
|
|
3130
|
-
'agents',
|
|
3131
3232
|
'monitor',
|
|
3132
3233
|
'for_review',
|
|
3133
3234
|
]
|
|
@@ -3362,6 +3463,7 @@ FunctionData: TypeAlias = (
|
|
|
3362
3463
|
| FunctionDataFunctionData2
|
|
3363
3464
|
| FunctionDataFunctionData3
|
|
3364
3465
|
| FacetData
|
|
3466
|
+
| BatchedFacetData
|
|
3365
3467
|
)
|
|
3366
3468
|
|
|
3367
3469
|
|
braintrust/cli/install/api.py
CHANGED
|
@@ -326,7 +326,7 @@ def main(args):
|
|
|
326
326
|
textwrap.dedent(
|
|
327
327
|
f"""\
|
|
328
328
|
Stack with name {args.name} does not exist. Either create it manually by following
|
|
329
|
-
https://www.braintrust.dev/docs/
|
|
329
|
+
https://www.braintrust.dev/docs/admin/self-hosting/aws or use the --create flag."""
|
|
330
330
|
)
|
|
331
331
|
)
|
|
332
332
|
exit(1)
|
braintrust/conftest.py
CHANGED
|
@@ -46,3 +46,27 @@ def reset_braintrust_state():
|
|
|
46
46
|
from braintrust import logger
|
|
47
47
|
|
|
48
48
|
logger._state = logger.BraintrustState()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.fixture(scope="session")
|
|
52
|
+
def vcr_config():
|
|
53
|
+
"""
|
|
54
|
+
VCR configuration for recording/playing back HTTP interactions.
|
|
55
|
+
|
|
56
|
+
In CI, use "none" to fail if cassette is missing.
|
|
57
|
+
Locally, use "once" to record new cassettes if they don't exist.
|
|
58
|
+
"""
|
|
59
|
+
record_mode = "none" if (os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS")) else "once"
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
"record_mode": record_mode,
|
|
63
|
+
"filter_headers": [
|
|
64
|
+
"authorization",
|
|
65
|
+
"openai-organization",
|
|
66
|
+
"x-api-key",
|
|
67
|
+
"api-key",
|
|
68
|
+
"openai-api-key",
|
|
69
|
+
"x-goog-api-key",
|
|
70
|
+
"x-bt-auth-token",
|
|
71
|
+
],
|
|
72
|
+
}
|
braintrust/db_fields.py
CHANGED
|
@@ -8,17 +8,6 @@ from braintrust.framework import _evals
|
|
|
8
8
|
from braintrust.test_helpers import has_devserver_installed
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
@pytest.fixture(scope="module")
|
|
12
|
-
def vcr_config():
|
|
13
|
-
"""VCR configuration to filter sensitive headers."""
|
|
14
|
-
return {
|
|
15
|
-
"filter_headers": [
|
|
16
|
-
"x-bt-auth-token",
|
|
17
|
-
"authorization",
|
|
18
|
-
]
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
|
|
22
11
|
@pytest.fixture
|
|
23
12
|
def client():
|
|
24
13
|
"""Create test client using the real simple_eval.py example."""
|
braintrust/framework.py
CHANGED
|
@@ -1559,9 +1559,9 @@ def build_local_summary(
|
|
|
1559
1559
|
scores_by_name = defaultdict(lambda: (0, 0))
|
|
1560
1560
|
for result in results:
|
|
1561
1561
|
for name, score in result.scores.items():
|
|
1562
|
-
|
|
1563
|
-
if curr is None:
|
|
1562
|
+
if score is None:
|
|
1564
1563
|
continue
|
|
1564
|
+
curr = scores_by_name[name]
|
|
1565
1565
|
scores_by_name[name] = (curr[0] + score, curr[1] + 1)
|
|
1566
1566
|
longest_score_name = max(len(name) for name in scores_by_name) if scores_by_name else 0
|
|
1567
1567
|
avg_scores = {
|