braintrust 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braintrust/_generated_types.py +328 -126
- braintrust/cli/install/api.py +1 -1
- braintrust/conftest.py +24 -0
- braintrust/devserver/test_server_integration.py +0 -11
- braintrust/framework.py +98 -1
- braintrust/functions/invoke.py +4 -9
- braintrust/functions/test_invoke.py +61 -0
- braintrust/generated_types.py +13 -7
- braintrust/logger.py +107 -66
- braintrust/prompt_cache/test_disk_cache.py +3 -3
- braintrust/span_cache.py +337 -0
- braintrust/span_identifier_v3.py +21 -0
- braintrust/span_types.py +3 -0
- braintrust/test_bt_json.py +23 -19
- braintrust/test_logger.py +116 -0
- braintrust/test_span_cache.py +344 -0
- braintrust/test_trace.py +267 -0
- braintrust/trace.py +385 -0
- braintrust/version.py +2 -2
- braintrust/wrappers/claude_agent_sdk/_wrapper.py +48 -6
- braintrust/wrappers/claude_agent_sdk/test_wrapper.py +106 -0
- braintrust/wrappers/langsmith_wrapper.py +517 -0
- braintrust/wrappers/test_agno.py +0 -12
- braintrust/wrappers/test_anthropic.py +1 -11
- braintrust/wrappers/test_dspy.py +0 -11
- braintrust/wrappers/test_google_genai.py +6 -1
- braintrust/wrappers/test_langsmith_wrapper.py +338 -0
- braintrust/wrappers/test_litellm.py +0 -10
- braintrust/wrappers/test_oai_attachments.py +0 -10
- braintrust/wrappers/test_openai.py +3 -12
- braintrust/wrappers/test_openrouter.py +0 -9
- braintrust/wrappers/test_pydantic_ai_integration.py +0 -11
- braintrust/wrappers/test_pydantic_ai_wrap_openai.py +2 -0
- {braintrust-0.4.2.dist-info → braintrust-0.5.0.dist-info}/METADATA +1 -1
- {braintrust-0.4.2.dist-info → braintrust-0.5.0.dist-info}/RECORD +38 -31
- {braintrust-0.4.2.dist-info → braintrust-0.5.0.dist-info}/WHEEL +1 -1
- {braintrust-0.4.2.dist-info → braintrust-0.5.0.dist-info}/entry_points.txt +0 -0
- {braintrust-0.4.2.dist-info → braintrust-0.5.0.dist-info}/top_level.txt +0 -0
braintrust/_generated_types.py
CHANGED
|
@@ -106,7 +106,7 @@ class ApiKey(TypedDict):
|
|
|
106
106
|
|
|
107
107
|
class AsyncScoringControlAsyncScoringControl(TypedDict):
|
|
108
108
|
kind: Literal['score_update']
|
|
109
|
-
token: str
|
|
109
|
+
token: NotRequired[str | None]
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
class AsyncScoringControlAsyncScoringControl2(TypedDict):
|
|
@@ -117,11 +117,44 @@ class AsyncScoringControlAsyncScoringControl3(TypedDict):
|
|
|
117
117
|
kind: Literal['state_enabled_force_rescore']
|
|
118
118
|
|
|
119
119
|
|
|
120
|
+
class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope(TypedDict):
|
|
121
|
+
type: Literal['span']
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope1(TypedDict):
|
|
125
|
+
type: Literal['trace']
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class AsyncScoringControlAsyncScoringControl4TriggeredFunction(TypedDict):
|
|
129
|
+
function_id: NotRequired[Any | None]
|
|
130
|
+
scope: (
|
|
131
|
+
AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope
|
|
132
|
+
| AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope1
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class AsyncScoringControlAsyncScoringControl4(TypedDict):
|
|
137
|
+
kind: Literal['trigger_functions']
|
|
138
|
+
triggered_functions: Sequence[AsyncScoringControlAsyncScoringControl4TriggeredFunction]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class AsyncScoringControlAsyncScoringControl5(TypedDict):
|
|
142
|
+
kind: Literal['complete_triggered_functions']
|
|
143
|
+
function_ids: Sequence[Any]
|
|
144
|
+
triggered_xact_id: str
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class AsyncScoringControlAsyncScoringControl6(TypedDict):
|
|
148
|
+
kind: Literal['mark_attempt_failed']
|
|
149
|
+
function_ids: Sequence[Any]
|
|
150
|
+
|
|
151
|
+
|
|
120
152
|
class AsyncScoringStateAsyncScoringState(TypedDict):
|
|
121
153
|
status: Literal['enabled']
|
|
122
154
|
token: str
|
|
123
155
|
function_ids: Sequence[Any]
|
|
124
156
|
skip_logging: NotRequired[bool | None]
|
|
157
|
+
triggered_functions: NotRequired[Mapping[str, Any] | None]
|
|
125
158
|
|
|
126
159
|
|
|
127
160
|
class AsyncScoringStateAsyncScoringState1(TypedDict):
|
|
@@ -131,6 +164,38 @@ class AsyncScoringStateAsyncScoringState1(TypedDict):
|
|
|
131
164
|
AsyncScoringState: TypeAlias = AsyncScoringStateAsyncScoringState | AsyncScoringStateAsyncScoringState1 | None
|
|
132
165
|
|
|
133
166
|
|
|
167
|
+
class PreprocessorPreprocessor(TypedDict):
|
|
168
|
+
type: Literal['function']
|
|
169
|
+
id: str
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class PreprocessorPreprocessor2(TypedDict):
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class PreprocessorPreprocessor3(PreprocessorPreprocessor, PreprocessorPreprocessor2):
|
|
177
|
+
pass
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class BatchedFacetDataFacet(TypedDict):
|
|
181
|
+
name: str
|
|
182
|
+
"""
|
|
183
|
+
The name of the facet
|
|
184
|
+
"""
|
|
185
|
+
prompt: str
|
|
186
|
+
"""
|
|
187
|
+
The prompt to use for LLM extraction. The preprocessed text will be provided as context.
|
|
188
|
+
"""
|
|
189
|
+
model: NotRequired[str | None]
|
|
190
|
+
"""
|
|
191
|
+
The model to use for facet extraction
|
|
192
|
+
"""
|
|
193
|
+
no_match_pattern: NotRequired[str | None]
|
|
194
|
+
"""
|
|
195
|
+
Regex pattern to identify outputs that do not match the facet. If the output matches, the facet will be saved as 'no_match'
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
|
|
134
199
|
class BraintrustAttachmentReference(TypedDict):
|
|
135
200
|
type: Literal['braintrust_attachment']
|
|
136
201
|
"""
|
|
@@ -424,6 +489,10 @@ class Dataset(TypedDict):
|
|
|
424
489
|
"""
|
|
425
490
|
User-controlled metadata about the dataset
|
|
426
491
|
"""
|
|
492
|
+
url_slug: str
|
|
493
|
+
"""
|
|
494
|
+
URL slug for the dataset. used to construct dataset URLs
|
|
495
|
+
"""
|
|
427
496
|
|
|
428
497
|
|
|
429
498
|
class DatasetEventMetadata(TypedDict):
|
|
@@ -472,6 +541,43 @@ class EnvVar(TypedDict):
|
|
|
472
541
|
"""
|
|
473
542
|
|
|
474
543
|
|
|
544
|
+
class EvalStatusPageConfig(TypedDict):
|
|
545
|
+
score_columns: NotRequired[Sequence[str] | None]
|
|
546
|
+
"""
|
|
547
|
+
The score columns to display on the page
|
|
548
|
+
"""
|
|
549
|
+
metric_columns: NotRequired[Sequence[str] | None]
|
|
550
|
+
"""
|
|
551
|
+
The metric columns to display on the page
|
|
552
|
+
"""
|
|
553
|
+
grouping_field: NotRequired[str | None]
|
|
554
|
+
"""
|
|
555
|
+
The metadata field to use for grouping experiments (model)
|
|
556
|
+
"""
|
|
557
|
+
filter: NotRequired[str | None]
|
|
558
|
+
"""
|
|
559
|
+
BTQL filter to apply to experiment data
|
|
560
|
+
"""
|
|
561
|
+
sort_by: NotRequired[str | None]
|
|
562
|
+
"""
|
|
563
|
+
Field to sort results by (format: 'score:<name>' or 'metric:<name>')
|
|
564
|
+
"""
|
|
565
|
+
sort_order: NotRequired[Literal['asc', 'desc'] | None]
|
|
566
|
+
"""
|
|
567
|
+
Sort order (ascending or descending)
|
|
568
|
+
"""
|
|
569
|
+
api_key: NotRequired[str | None]
|
|
570
|
+
"""
|
|
571
|
+
The API key used for fetching experiment data
|
|
572
|
+
"""
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
EvalStatusPageTheme: TypeAlias = Literal['light', 'dark']
|
|
576
|
+
"""
|
|
577
|
+
The theme for the page
|
|
578
|
+
"""
|
|
579
|
+
|
|
580
|
+
|
|
475
581
|
class ExperimentEventMetadata(TypedDict):
|
|
476
582
|
model: NotRequired[str | None]
|
|
477
583
|
"""
|
|
@@ -559,16 +665,16 @@ class ExternalAttachmentReference(TypedDict):
|
|
|
559
665
|
"""
|
|
560
666
|
|
|
561
667
|
|
|
562
|
-
class
|
|
668
|
+
class Preprocessor1Preprocessor1(TypedDict):
|
|
563
669
|
type: Literal['function']
|
|
564
670
|
id: str
|
|
565
671
|
|
|
566
672
|
|
|
567
|
-
class
|
|
673
|
+
class Preprocessor1Preprocessor12(TypedDict):
|
|
568
674
|
pass
|
|
569
675
|
|
|
570
676
|
|
|
571
|
-
class
|
|
677
|
+
class Preprocessor1Preprocessor13(Preprocessor1Preprocessor1, Preprocessor1Preprocessor12):
|
|
572
678
|
pass
|
|
573
679
|
|
|
574
680
|
|
|
@@ -689,17 +795,24 @@ FunctionIdRef: TypeAlias = Mapping[str, Any]
|
|
|
689
795
|
|
|
690
796
|
|
|
691
797
|
FunctionObjectType: TypeAlias = Literal[
|
|
692
|
-
'prompt', 'tool', 'scorer', 'task', '
|
|
798
|
+
'prompt', 'tool', 'scorer', 'task', 'workflow', 'custom_view', 'preprocessor', 'facet', 'classifier'
|
|
693
799
|
]
|
|
694
800
|
|
|
695
801
|
|
|
696
|
-
FunctionOutputType: TypeAlias = Literal['completion', 'score', 'any']
|
|
802
|
+
FunctionOutputType: TypeAlias = Literal['completion', 'score', 'facet', 'classification', 'any']
|
|
697
803
|
|
|
698
804
|
|
|
699
|
-
FunctionTypeEnum: TypeAlias = Literal[
|
|
805
|
+
FunctionTypeEnum: TypeAlias = Literal[
|
|
806
|
+
'llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet', 'classifier'
|
|
807
|
+
]
|
|
808
|
+
"""
|
|
809
|
+
The type of global function. Defaults to 'scorer'.
|
|
810
|
+
"""
|
|
700
811
|
|
|
701
812
|
|
|
702
|
-
FunctionTypeEnumNullish: TypeAlias = Literal[
|
|
813
|
+
FunctionTypeEnumNullish: TypeAlias = Literal[
|
|
814
|
+
'llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet', 'classifier'
|
|
815
|
+
]
|
|
703
816
|
|
|
704
817
|
|
|
705
818
|
class GitMetadataSettings(TypedDict):
|
|
@@ -980,6 +1093,18 @@ class Group(TypedDict):
|
|
|
980
1093
|
"""
|
|
981
1094
|
|
|
982
1095
|
|
|
1096
|
+
class GroupScope(TypedDict):
|
|
1097
|
+
type: Literal['group']
|
|
1098
|
+
group_by: str
|
|
1099
|
+
"""
|
|
1100
|
+
Field path to group by, e.g. metadata.session_id
|
|
1101
|
+
"""
|
|
1102
|
+
idle_seconds: NotRequired[float | None]
|
|
1103
|
+
"""
|
|
1104
|
+
Optional: trigger after this many seconds of inactivity
|
|
1105
|
+
"""
|
|
1106
|
+
|
|
1107
|
+
|
|
983
1108
|
IfExists: TypeAlias = Literal['error', 'ignore', 'replace']
|
|
984
1109
|
|
|
985
1110
|
|
|
@@ -1009,6 +1134,14 @@ class InvokeFunctionInvokeFunction1(TypedDict):
|
|
|
1009
1134
|
"""
|
|
1010
1135
|
|
|
1011
1136
|
|
|
1137
|
+
class InvokeFunctionInvokeFunction2(TypedDict):
|
|
1138
|
+
global_function: str
|
|
1139
|
+
"""
|
|
1140
|
+
The name of the global function. Currently, the global namespace includes the functions in autoevals
|
|
1141
|
+
"""
|
|
1142
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
1143
|
+
|
|
1144
|
+
|
|
1012
1145
|
class InvokeFunctionInvokeFunction3(TypedDict):
|
|
1013
1146
|
prompt_session_id: str
|
|
1014
1147
|
"""
|
|
@@ -1175,12 +1308,6 @@ class ModelParamsModelParams4(TypedDict):
|
|
|
1175
1308
|
reasoning_budget: NotRequired[float | None]
|
|
1176
1309
|
|
|
1177
1310
|
|
|
1178
|
-
NullableFunctionTypeEnum: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet']
|
|
1179
|
-
"""
|
|
1180
|
-
The type of global function. If unspecified, defaults to 'scorer' for backward compatibility.
|
|
1181
|
-
"""
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
1311
|
class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
|
|
1185
1312
|
type: Literal['function']
|
|
1186
1313
|
id: str
|
|
@@ -1189,7 +1316,7 @@ class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
|
|
|
1189
1316
|
class NullableSavedFunctionIdNullableSavedFunctionId1(TypedDict):
|
|
1190
1317
|
type: Literal['global']
|
|
1191
1318
|
name: str
|
|
1192
|
-
function_type: NotRequired[
|
|
1319
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
1193
1320
|
|
|
1194
1321
|
|
|
1195
1322
|
NullableSavedFunctionId: TypeAlias = (
|
|
@@ -1597,7 +1724,18 @@ class PromptDataNullishOrigin(TypedDict):
|
|
|
1597
1724
|
class PromptParserNullish(TypedDict):
|
|
1598
1725
|
type: Literal['llm_classifier']
|
|
1599
1726
|
use_cot: bool
|
|
1600
|
-
choice_scores: Mapping[str, float]
|
|
1727
|
+
choice_scores: NotRequired[Mapping[str, float] | None]
|
|
1728
|
+
"""
|
|
1729
|
+
Map of choices to scores (0-1). Used by scorers.
|
|
1730
|
+
"""
|
|
1731
|
+
choice: NotRequired[Sequence[str] | None]
|
|
1732
|
+
"""
|
|
1733
|
+
List of valid choices without score mapping. Used by classifiers that deposit output to tags.
|
|
1734
|
+
"""
|
|
1735
|
+
allow_no_match: NotRequired[bool | None]
|
|
1736
|
+
"""
|
|
1737
|
+
If true, adds a 'No match' option. When selected, no tag is deposited.
|
|
1738
|
+
"""
|
|
1601
1739
|
|
|
1602
1740
|
|
|
1603
1741
|
class PromptSessionEvent(TypedDict):
|
|
@@ -1829,7 +1967,7 @@ class TaskTask2(TypedDict):
|
|
|
1829
1967
|
"""
|
|
1830
1968
|
The name of the global function. Currently, the global namespace includes the functions in autoevals
|
|
1831
1969
|
"""
|
|
1832
|
-
function_type: NotRequired[
|
|
1970
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
1833
1971
|
|
|
1834
1972
|
|
|
1835
1973
|
class TaskTask3(TypedDict):
|
|
@@ -1945,7 +2083,7 @@ class SavedFunctionIdSavedFunctionId(TypedDict):
|
|
|
1945
2083
|
class SavedFunctionIdSavedFunctionId1(TypedDict):
|
|
1946
2084
|
type: Literal['global']
|
|
1947
2085
|
name: str
|
|
1948
|
-
function_type: NotRequired[
|
|
2086
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
1949
2087
|
|
|
1950
2088
|
|
|
1951
2089
|
SavedFunctionId: TypeAlias = SavedFunctionIdSavedFunctionId | SavedFunctionIdSavedFunctionId1
|
|
@@ -2024,17 +2162,11 @@ class SpanIFrame(TypedDict):
|
|
|
2024
2162
|
|
|
2025
2163
|
class SpanScope(TypedDict):
|
|
2026
2164
|
type: Literal['span']
|
|
2027
|
-
root_span_id: str
|
|
2028
|
-
"""
|
|
2029
|
-
The root span id is a unique identifier for the trace.
|
|
2030
|
-
"""
|
|
2031
|
-
id: str
|
|
2032
|
-
"""
|
|
2033
|
-
A unique identifier for the span.
|
|
2034
|
-
"""
|
|
2035
2165
|
|
|
2036
2166
|
|
|
2037
|
-
SpanType: TypeAlias = Literal[
|
|
2167
|
+
SpanType: TypeAlias = Literal[
|
|
2168
|
+
'llm', 'score', 'function', 'eval', 'task', 'tool', 'automation', 'facet', 'preprocessor', 'classifier'
|
|
2169
|
+
]
|
|
2038
2170
|
"""
|
|
2039
2171
|
Type of the span, for display purposes only
|
|
2040
2172
|
"""
|
|
@@ -2079,9 +2211,42 @@ class ToolFunctionDefinition(TypedDict):
|
|
|
2079
2211
|
|
|
2080
2212
|
class TraceScope(TypedDict):
|
|
2081
2213
|
type: Literal['trace']
|
|
2082
|
-
|
|
2214
|
+
idle_seconds: NotRequired[float | None]
|
|
2215
|
+
"""
|
|
2216
|
+
Consider trace complete after this many seconds of inactivity (default: 30)
|
|
2217
|
+
"""
|
|
2218
|
+
|
|
2219
|
+
|
|
2220
|
+
class TriggeredFunctionStateScope(TypedDict):
|
|
2221
|
+
type: Literal['span']
|
|
2222
|
+
|
|
2223
|
+
|
|
2224
|
+
class TriggeredFunctionStateScope1(TypedDict):
|
|
2225
|
+
type: Literal['trace']
|
|
2226
|
+
|
|
2227
|
+
|
|
2228
|
+
class TriggeredFunctionStateScope2(TypedDict):
|
|
2229
|
+
type: Literal['group']
|
|
2230
|
+
key: str
|
|
2231
|
+
value: str
|
|
2232
|
+
|
|
2233
|
+
|
|
2234
|
+
class TriggeredFunctionState(TypedDict):
|
|
2235
|
+
triggered_xact_id: str
|
|
2236
|
+
"""
|
|
2237
|
+
The xact_id when this function was triggered
|
|
2083
2238
|
"""
|
|
2084
|
-
|
|
2239
|
+
completed_xact_id: NotRequired[str | None]
|
|
2240
|
+
"""
|
|
2241
|
+
The xact_id when this function completed (matches triggered_xact_id if done)
|
|
2242
|
+
"""
|
|
2243
|
+
attempts: NotRequired[int | None]
|
|
2244
|
+
"""
|
|
2245
|
+
Number of execution attempts (for retry tracking)
|
|
2246
|
+
"""
|
|
2247
|
+
scope: TriggeredFunctionStateScope | TriggeredFunctionStateScope1 | TriggeredFunctionStateScope2
|
|
2248
|
+
"""
|
|
2249
|
+
The scope of data this function operates on
|
|
2085
2250
|
"""
|
|
2086
2251
|
|
|
2087
2252
|
|
|
@@ -2278,6 +2443,9 @@ AsyncScoringControl: TypeAlias = (
|
|
|
2278
2443
|
| AsyncScoringControlAsyncScoringControl1
|
|
2279
2444
|
| AsyncScoringControlAsyncScoringControl2
|
|
2280
2445
|
| AsyncScoringControlAsyncScoringControl3
|
|
2446
|
+
| AsyncScoringControlAsyncScoringControl4
|
|
2447
|
+
| AsyncScoringControlAsyncScoringControl5
|
|
2448
|
+
| AsyncScoringControlAsyncScoringControl6
|
|
2281
2449
|
)
|
|
2282
2450
|
|
|
2283
2451
|
|
|
@@ -2292,6 +2460,25 @@ class AttachmentStatus(TypedDict):
|
|
|
2292
2460
|
"""
|
|
2293
2461
|
|
|
2294
2462
|
|
|
2463
|
+
class PreprocessorPreprocessor1(TypedDict):
|
|
2464
|
+
type: Literal['global']
|
|
2465
|
+
name: str
|
|
2466
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2467
|
+
|
|
2468
|
+
|
|
2469
|
+
class PreprocessorPreprocessor4(PreprocessorPreprocessor1, PreprocessorPreprocessor2):
|
|
2470
|
+
pass
|
|
2471
|
+
|
|
2472
|
+
|
|
2473
|
+
Preprocessor: TypeAlias = PreprocessorPreprocessor3 | PreprocessorPreprocessor4
|
|
2474
|
+
|
|
2475
|
+
|
|
2476
|
+
class BatchedFacetData(TypedDict):
|
|
2477
|
+
type: Literal['batched_facet']
|
|
2478
|
+
preprocessor: NotRequired[Preprocessor | None]
|
|
2479
|
+
facets: Sequence[BatchedFacetDataFacet]
|
|
2480
|
+
|
|
2481
|
+
|
|
2295
2482
|
ChatCompletionContentPart: TypeAlias = (
|
|
2296
2483
|
ChatCompletionContentPartTextWithTitle
|
|
2297
2484
|
| ChatCompletionContentPartImageWithTitle
|
|
@@ -2384,7 +2571,7 @@ class DatasetEvent(TypedDict):
|
|
|
2384
2571
|
"""
|
|
2385
2572
|
span_id: str
|
|
2386
2573
|
"""
|
|
2387
|
-
A unique identifier used to link different dataset events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/
|
|
2574
|
+
A unique identifier used to link different dataset events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
|
|
2388
2575
|
"""
|
|
2389
2576
|
root_span_id: str
|
|
2390
2577
|
"""
|
|
@@ -2405,6 +2592,43 @@ class DatasetEvent(TypedDict):
|
|
|
2405
2592
|
"""
|
|
2406
2593
|
|
|
2407
2594
|
|
|
2595
|
+
class EvalStatusPage(TypedDict):
|
|
2596
|
+
id: str
|
|
2597
|
+
"""
|
|
2598
|
+
Unique identifier for the eval status page
|
|
2599
|
+
"""
|
|
2600
|
+
project_id: str
|
|
2601
|
+
"""
|
|
2602
|
+
Unique identifier for the project that the eval status page belongs under
|
|
2603
|
+
"""
|
|
2604
|
+
user_id: NotRequired[str | None]
|
|
2605
|
+
"""
|
|
2606
|
+
Identifies the user who created the eval status page
|
|
2607
|
+
"""
|
|
2608
|
+
created: NotRequired[str | None]
|
|
2609
|
+
"""
|
|
2610
|
+
Date of eval status page creation
|
|
2611
|
+
"""
|
|
2612
|
+
deleted_at: NotRequired[str | None]
|
|
2613
|
+
"""
|
|
2614
|
+
Date of eval status page deletion, or null if the eval status page is still active
|
|
2615
|
+
"""
|
|
2616
|
+
name: str
|
|
2617
|
+
"""
|
|
2618
|
+
Name of the eval status page
|
|
2619
|
+
"""
|
|
2620
|
+
description: NotRequired[str | None]
|
|
2621
|
+
"""
|
|
2622
|
+
Textual description of the eval status page
|
|
2623
|
+
"""
|
|
2624
|
+
logo_url: NotRequired[str | None]
|
|
2625
|
+
"""
|
|
2626
|
+
URL of the logo to display on the page
|
|
2627
|
+
"""
|
|
2628
|
+
theme: EvalStatusPageTheme
|
|
2629
|
+
config: EvalStatusPageConfig
|
|
2630
|
+
|
|
2631
|
+
|
|
2408
2632
|
class Experiment(TypedDict):
|
|
2409
2633
|
id: str
|
|
2410
2634
|
"""
|
|
@@ -2468,7 +2692,7 @@ class Experiment(TypedDict):
|
|
|
2468
2692
|
class ExtendedSavedFunctionIdExtendedSavedFunctionId1(TypedDict):
|
|
2469
2693
|
type: Literal['global']
|
|
2470
2694
|
name: str
|
|
2471
|
-
function_type: NotRequired[
|
|
2695
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2472
2696
|
|
|
2473
2697
|
|
|
2474
2698
|
ExtendedSavedFunctionId: TypeAlias = (
|
|
@@ -2478,22 +2702,22 @@ ExtendedSavedFunctionId: TypeAlias = (
|
|
|
2478
2702
|
)
|
|
2479
2703
|
|
|
2480
2704
|
|
|
2481
|
-
class
|
|
2705
|
+
class Preprocessor1Preprocessor11(TypedDict):
|
|
2482
2706
|
type: Literal['global']
|
|
2483
2707
|
name: str
|
|
2484
|
-
function_type: NotRequired[
|
|
2708
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2485
2709
|
|
|
2486
2710
|
|
|
2487
|
-
class
|
|
2711
|
+
class Preprocessor1Preprocessor14(Preprocessor1Preprocessor11, Preprocessor1Preprocessor12):
|
|
2488
2712
|
pass
|
|
2489
2713
|
|
|
2490
2714
|
|
|
2491
|
-
|
|
2715
|
+
Preprocessor1: TypeAlias = Preprocessor1Preprocessor13 | Preprocessor1Preprocessor14
|
|
2492
2716
|
|
|
2493
2717
|
|
|
2494
2718
|
class FacetData(TypedDict):
|
|
2495
2719
|
type: Literal['facet']
|
|
2496
|
-
preprocessor: NotRequired[
|
|
2720
|
+
preprocessor: NotRequired[Preprocessor1 | None]
|
|
2497
2721
|
prompt: str
|
|
2498
2722
|
"""
|
|
2499
2723
|
The prompt to use for LLM extraction. The preprocessed text will be provided as context.
|
|
@@ -2511,7 +2735,7 @@ class FacetData(TypedDict):
|
|
|
2511
2735
|
class FunctionDataFunctionData3(TypedDict):
|
|
2512
2736
|
type: Literal['global']
|
|
2513
2737
|
name: str
|
|
2514
|
-
function_type: NotRequired[
|
|
2738
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2515
2739
|
config: NotRequired[Mapping[str, Any] | None]
|
|
2516
2740
|
"""
|
|
2517
2741
|
Configuration options to pass to the global function (e.g., for preprocessor customization)
|
|
@@ -2523,21 +2747,68 @@ class FunctionIdFunctionId2(TypedDict):
|
|
|
2523
2747
|
"""
|
|
2524
2748
|
The name of the global function. Currently, the global namespace includes the functions in autoevals
|
|
2525
2749
|
"""
|
|
2526
|
-
function_type: NotRequired[
|
|
2750
|
+
function_type: NotRequired[FunctionTypeEnum | None]
|
|
2527
2751
|
|
|
2528
2752
|
|
|
2529
|
-
class
|
|
2530
|
-
|
|
2753
|
+
class InvokeFunctionInvokeFunction7(TypedDict):
|
|
2754
|
+
input: NotRequired[Any | None]
|
|
2531
2755
|
"""
|
|
2532
|
-
|
|
2756
|
+
Argument to the function, which can be any JSON serializable value
|
|
2757
|
+
"""
|
|
2758
|
+
expected: NotRequired[Any | None]
|
|
2759
|
+
"""
|
|
2760
|
+
The expected output of the function
|
|
2761
|
+
"""
|
|
2762
|
+
metadata: NotRequired[Mapping[str, Any] | None]
|
|
2763
|
+
"""
|
|
2764
|
+
Any relevant metadata. This will be logged and available as the `metadata` argument.
|
|
2765
|
+
"""
|
|
2766
|
+
tags: NotRequired[Sequence[str] | None]
|
|
2767
|
+
"""
|
|
2768
|
+
Any relevant tags to log on the span.
|
|
2769
|
+
"""
|
|
2770
|
+
messages: NotRequired[Sequence[ChatCompletionMessageParam] | None]
|
|
2771
|
+
"""
|
|
2772
|
+
If the function is an LLM, additional messages to pass along to it
|
|
2773
|
+
"""
|
|
2774
|
+
parent: NotRequired[InvokeParent | None]
|
|
2775
|
+
stream: NotRequired[bool | None]
|
|
2776
|
+
"""
|
|
2777
|
+
Whether to stream the response. If true, results will be returned in the Braintrust SSE format.
|
|
2778
|
+
"""
|
|
2779
|
+
mode: NotRequired[StreamingMode | None]
|
|
2780
|
+
strict: NotRequired[bool | None]
|
|
2781
|
+
"""
|
|
2782
|
+
If true, throw an error if one of the variables in the prompt is not present in the input
|
|
2783
|
+
"""
|
|
2784
|
+
mcp_auth: NotRequired[Mapping[str, InvokeFunctionMcpAuth] | None]
|
|
2785
|
+
"""
|
|
2786
|
+
Map of MCP server URL to auth credentials
|
|
2787
|
+
"""
|
|
2788
|
+
overrides: NotRequired[Mapping[str, Any] | None]
|
|
2789
|
+
"""
|
|
2790
|
+
Partial function definition to merge with the function being invoked. Fields are validated against the function type's schema at runtime. For facets: { preprocessor?, prompt?, model? }. For prompts: { model?, ... }.
|
|
2533
2791
|
"""
|
|
2534
|
-
function_type: NotRequired[NullableFunctionTypeEnum | None]
|
|
2535
2792
|
|
|
2536
2793
|
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2794
|
+
class InvokeFunctionInvokeFunction8(InvokeFunctionInvokeFunction, InvokeFunctionInvokeFunction7):
|
|
2795
|
+
pass
|
|
2796
|
+
|
|
2797
|
+
|
|
2798
|
+
class InvokeFunctionInvokeFunction9(InvokeFunctionInvokeFunction1, InvokeFunctionInvokeFunction7):
|
|
2799
|
+
pass
|
|
2800
|
+
|
|
2801
|
+
|
|
2802
|
+
class InvokeFunctionInvokeFunction10(InvokeFunctionInvokeFunction2, InvokeFunctionInvokeFunction7):
|
|
2803
|
+
pass
|
|
2804
|
+
|
|
2805
|
+
|
|
2806
|
+
class InvokeFunctionInvokeFunction11(InvokeFunctionInvokeFunction3, InvokeFunctionInvokeFunction7):
|
|
2807
|
+
pass
|
|
2808
|
+
|
|
2809
|
+
|
|
2810
|
+
class InvokeFunctionInvokeFunction12(InvokeFunctionInvokeFunction4, InvokeFunctionInvokeFunction7):
|
|
2811
|
+
pass
|
|
2541
2812
|
|
|
2542
2813
|
|
|
2543
2814
|
class ModelParamsModelParams(TypedDict):
|
|
@@ -2580,7 +2851,7 @@ class OnlineScoreConfig(TypedDict):
|
|
|
2580
2851
|
"""
|
|
2581
2852
|
scorers: Sequence[SavedFunctionId]
|
|
2582
2853
|
"""
|
|
2583
|
-
The list of
|
|
2854
|
+
The list of functions to run for online scoring. Can include scorers, facets, or other function types.
|
|
2584
2855
|
"""
|
|
2585
2856
|
btql_filter: NotRequired[str | None]
|
|
2586
2857
|
"""
|
|
@@ -2588,16 +2859,20 @@ class OnlineScoreConfig(TypedDict):
|
|
|
2588
2859
|
"""
|
|
2589
2860
|
apply_to_root_span: NotRequired[bool | None]
|
|
2590
2861
|
"""
|
|
2591
|
-
Whether to trigger online scoring on the root span of each trace
|
|
2862
|
+
Whether to trigger online scoring on the root span of each trace. Only applies when scope is 'span' or unset.
|
|
2592
2863
|
"""
|
|
2593
2864
|
apply_to_span_names: NotRequired[Sequence[str] | None]
|
|
2594
2865
|
"""
|
|
2595
|
-
Trigger online scoring on any spans with a name in this list
|
|
2866
|
+
Trigger online scoring on any spans with a name in this list. Only applies when scope is 'span' or unset.
|
|
2596
2867
|
"""
|
|
2597
2868
|
skip_logging: NotRequired[bool | None]
|
|
2598
2869
|
"""
|
|
2599
2870
|
Whether to skip adding scorer spans when computing scores
|
|
2600
2871
|
"""
|
|
2872
|
+
scope: NotRequired[SpanScope | TraceScope | GroupScope | None]
|
|
2873
|
+
"""
|
|
2874
|
+
The scope at which to run the functions. Defaults to span-level execution. Trace/group scope requires all functions to be facets.
|
|
2875
|
+
"""
|
|
2601
2876
|
|
|
2602
2877
|
|
|
2603
2878
|
class Project(TypedDict):
|
|
@@ -2802,7 +3077,7 @@ class ExperimentEvent(TypedDict):
|
|
|
2802
3077
|
"""
|
|
2803
3078
|
span_id: str
|
|
2804
3079
|
"""
|
|
2805
|
-
A unique identifier used to link different experiment events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/
|
|
3080
|
+
A unique identifier used to link different experiment events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
|
|
2806
3081
|
"""
|
|
2807
3082
|
span_parents: NotRequired[Sequence[str] | None]
|
|
2808
3083
|
"""
|
|
@@ -2853,80 +3128,6 @@ GraphNode: TypeAlias = (
|
|
|
2853
3128
|
)
|
|
2854
3129
|
|
|
2855
3130
|
|
|
2856
|
-
class InvokeContext(TypedDict):
|
|
2857
|
-
object_type: Literal['project_logs', 'experiment', 'dataset', 'playground_logs']
|
|
2858
|
-
"""
|
|
2859
|
-
The type of object containing the span data
|
|
2860
|
-
"""
|
|
2861
|
-
object_id: str
|
|
2862
|
-
"""
|
|
2863
|
-
The ID of the object containing the span data
|
|
2864
|
-
"""
|
|
2865
|
-
scope: InvokeScope
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
class InvokeFunctionInvokeFunction7(TypedDict):
|
|
2869
|
-
input: NotRequired[Any | None]
|
|
2870
|
-
"""
|
|
2871
|
-
Argument to the function, which can be any JSON serializable value
|
|
2872
|
-
"""
|
|
2873
|
-
expected: NotRequired[Any | None]
|
|
2874
|
-
"""
|
|
2875
|
-
The expected output of the function
|
|
2876
|
-
"""
|
|
2877
|
-
metadata: NotRequired[Mapping[str, Any] | None]
|
|
2878
|
-
"""
|
|
2879
|
-
Any relevant metadata. This will be logged and available as the `metadata` argument.
|
|
2880
|
-
"""
|
|
2881
|
-
tags: NotRequired[Sequence[str] | None]
|
|
2882
|
-
"""
|
|
2883
|
-
Any relevant tags to log on the span.
|
|
2884
|
-
"""
|
|
2885
|
-
messages: NotRequired[Sequence[ChatCompletionMessageParam] | None]
|
|
2886
|
-
"""
|
|
2887
|
-
If the function is an LLM, additional messages to pass along to it
|
|
2888
|
-
"""
|
|
2889
|
-
context: NotRequired[InvokeContext | None]
|
|
2890
|
-
parent: NotRequired[InvokeParent | None]
|
|
2891
|
-
stream: NotRequired[bool | None]
|
|
2892
|
-
"""
|
|
2893
|
-
Whether to stream the response. If true, results will be returned in the Braintrust SSE format.
|
|
2894
|
-
"""
|
|
2895
|
-
mode: NotRequired[StreamingMode | None]
|
|
2896
|
-
strict: NotRequired[bool | None]
|
|
2897
|
-
"""
|
|
2898
|
-
If true, throw an error if one of the variables in the prompt is not present in the input
|
|
2899
|
-
"""
|
|
2900
|
-
mcp_auth: NotRequired[Mapping[str, InvokeFunctionMcpAuth] | None]
|
|
2901
|
-
"""
|
|
2902
|
-
Map of MCP server URL to auth credentials
|
|
2903
|
-
"""
|
|
2904
|
-
overrides: NotRequired[Mapping[str, Any] | None]
|
|
2905
|
-
"""
|
|
2906
|
-
Partial function definition to merge with the function being invoked. Fields are validated against the function type's schema at runtime. For facets: { preprocessor?, prompt?, model? }. For prompts: { model?, ... }.
|
|
2907
|
-
"""
|
|
2908
|
-
|
|
2909
|
-
|
|
2910
|
-
class InvokeFunctionInvokeFunction8(InvokeFunctionInvokeFunction, InvokeFunctionInvokeFunction7):
|
|
2911
|
-
pass
|
|
2912
|
-
|
|
2913
|
-
|
|
2914
|
-
class InvokeFunctionInvokeFunction9(InvokeFunctionInvokeFunction1, InvokeFunctionInvokeFunction7):
|
|
2915
|
-
pass
|
|
2916
|
-
|
|
2917
|
-
|
|
2918
|
-
class InvokeFunctionInvokeFunction10(InvokeFunctionInvokeFunction2, InvokeFunctionInvokeFunction7):
|
|
2919
|
-
pass
|
|
2920
|
-
|
|
2921
|
-
|
|
2922
|
-
class InvokeFunctionInvokeFunction11(InvokeFunctionInvokeFunction3, InvokeFunctionInvokeFunction7):
|
|
2923
|
-
pass
|
|
2924
|
-
|
|
2925
|
-
|
|
2926
|
-
class InvokeFunctionInvokeFunction12(InvokeFunctionInvokeFunction4, InvokeFunctionInvokeFunction7):
|
|
2927
|
-
pass
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
3131
|
class ProjectLogsEvent(TypedDict):
|
|
2931
3132
|
id: str
|
|
2932
3133
|
"""
|
|
@@ -2994,7 +3195,7 @@ class ProjectLogsEvent(TypedDict):
|
|
|
2994
3195
|
"""
|
|
2995
3196
|
span_id: str
|
|
2996
3197
|
"""
|
|
2997
|
-
A unique identifier used to link different project logs events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/
|
|
3198
|
+
A unique identifier used to link different project logs events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
|
|
2998
3199
|
"""
|
|
2999
3200
|
span_parents: NotRequired[Sequence[str] | None]
|
|
3000
3201
|
"""
|
|
@@ -3126,8 +3327,8 @@ class View(TypedDict):
|
|
|
3126
3327
|
'prompts',
|
|
3127
3328
|
'tools',
|
|
3128
3329
|
'scorers',
|
|
3330
|
+
'classifiers',
|
|
3129
3331
|
'logs',
|
|
3130
|
-
'agents',
|
|
3131
3332
|
'monitor',
|
|
3132
3333
|
'for_review',
|
|
3133
3334
|
]
|
|
@@ -3362,6 +3563,7 @@ FunctionData: TypeAlias = (
|
|
|
3362
3563
|
| FunctionDataFunctionData2
|
|
3363
3564
|
| FunctionDataFunctionData3
|
|
3364
3565
|
| FacetData
|
|
3566
|
+
| BatchedFacetData
|
|
3365
3567
|
)
|
|
3366
3568
|
|
|
3367
3569
|
|