PyPI - braintrust - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

braintrust 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

braintrust/_generated_types.py +224 -122
braintrust/cli/install/api.py +1 -1
braintrust/conftest.py +24 -0
braintrust/db_fields.py +1 -0
braintrust/devserver/test_server_integration.py +0 -11
braintrust/framework.py +2 -2
braintrust/functions/invoke.py +1 -8
braintrust/generated_types.py +7 -7
braintrust/logger.py +30 -38
braintrust/otel/__init__.py +24 -15
braintrust/prompt_cache/test_disk_cache.py +3 -3
braintrust/span_types.py +3 -0
braintrust/test_bt_json.py +23 -19
braintrust/test_framework.py +25 -0
braintrust/test_logger.py +34 -0
braintrust/test_otel.py +118 -26
braintrust/test_util.py +51 -1
braintrust/util.py +24 -3
braintrust/version.py +2 -2
braintrust/wrappers/langsmith_wrapper.py +517 -0
braintrust/wrappers/litellm.py +43 -0
braintrust/wrappers/test_agno.py +0 -12
braintrust/wrappers/test_anthropic.py +1 -11
braintrust/wrappers/test_dspy.py +0 -11
braintrust/wrappers/test_google_genai.py +6 -1
braintrust/wrappers/test_langsmith_wrapper.py +338 -0
braintrust/wrappers/test_litellm.py +73 -10
braintrust/wrappers/test_oai_attachments.py +0 -10
braintrust/wrappers/test_openai.py +3 -12
braintrust/wrappers/test_openrouter.py +0 -9
braintrust/wrappers/test_pydantic_ai_integration.py +0 -11
braintrust/wrappers/test_pydantic_ai_wrap_openai.py +2 -0
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/METADATA +1 -1
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/RECORD +37 -35
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/WHEEL +0 -0
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/entry_points.txt +0 -0
{braintrust-0.4.1.dist-info → braintrust-0.4.3.dist-info}/top_level.txt +0 -0

braintrust/_generated_types.py CHANGED Viewed

@@ -106,7 +106,7 @@ class ApiKey(TypedDict):
 class AsyncScoringControlAsyncScoringControl(TypedDict):
     kind: Literal['score_update']
-    token: str
+    token: NotRequired[str | None]
 class AsyncScoringControlAsyncScoringControl2(TypedDict):
@@ -117,11 +117,39 @@ class AsyncScoringControlAsyncScoringControl3(TypedDict):
     kind: Literal['state_enabled_force_rescore']
+class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope(TypedDict):
+    type: Literal['span']
+class AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope1(TypedDict):
+    type: Literal['trace']
+class AsyncScoringControlAsyncScoringControl4TriggeredFunction(TypedDict):
+    function_id: NotRequired[Any | None]
+    scope: (
+        AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope
+        | AsyncScoringControlAsyncScoringControl4TriggeredFunctionScope1
+    )
+class AsyncScoringControlAsyncScoringControl4(TypedDict):
+    kind: Literal['trigger_functions']
+    triggered_functions: Sequence[AsyncScoringControlAsyncScoringControl4TriggeredFunction]
+class AsyncScoringControlAsyncScoringControl5(TypedDict):
+    kind: Literal['complete_triggered_functions']
+    function_ids: Sequence[Any]
+    triggered_xact_id: str
 class AsyncScoringStateAsyncScoringState(TypedDict):
     status: Literal['enabled']
     token: str
     function_ids: Sequence[Any]
     skip_logging: NotRequired[bool | None]
+    triggered_functions: NotRequired[Mapping[str, Any] | None]
 class AsyncScoringStateAsyncScoringState1(TypedDict):
@@ -131,6 +159,38 @@ class AsyncScoringStateAsyncScoringState1(TypedDict):
 AsyncScoringState: TypeAlias = AsyncScoringStateAsyncScoringState | AsyncScoringStateAsyncScoringState1 | None
+class PreprocessorPreprocessor(TypedDict):
+    type: Literal['function']
+    id: str
+class PreprocessorPreprocessor2(TypedDict):
+    pass
+class PreprocessorPreprocessor3(PreprocessorPreprocessor, PreprocessorPreprocessor2):
+    pass
+class BatchedFacetDataFacet(TypedDict):
+    name: str
+    """
+    The name of the facet
+    """
+    prompt: str
+    """
+    The prompt to use for LLM extraction. The preprocessed text will be provided as context.
+    """
+    model: NotRequired[str | None]
+    """
+    The model to use for facet extraction
+    """
+    no_match_pattern: NotRequired[str | None]
+    """
+    Regex pattern to identify outputs that do not match the facet. If the output matches, the facet will be saved as 'no_match'
+    """
 class BraintrustAttachmentReference(TypedDict):
     type: Literal['braintrust_attachment']
     """
@@ -559,16 +619,16 @@ class ExternalAttachmentReference(TypedDict):
     """
-class PreprocessorPreprocessor(TypedDict):
+class Preprocessor1Preprocessor1(TypedDict):
     type: Literal['function']
     id: str
-class PreprocessorPreprocessor2(TypedDict):
+class Preprocessor1Preprocessor12(TypedDict):
     pass
-class PreprocessorPreprocessor3(PreprocessorPreprocessor, PreprocessorPreprocessor2):
+class Preprocessor1Preprocessor13(Preprocessor1Preprocessor1, Preprocessor1Preprocessor12):
     pass
@@ -689,7 +749,7 @@ FunctionIdRef: TypeAlias = Mapping[str, Any]
 FunctionObjectType: TypeAlias = Literal[
-    'prompt', 'tool', 'scorer', 'task', 'agent', 'custom_view', 'preprocessor', 'facet'
+    'prompt', 'tool', 'scorer', 'task', 'custom_view', 'preprocessor', 'facet'
 ]
@@ -697,6 +757,9 @@ FunctionOutputType: TypeAlias = Literal['completion', 'score', 'any']
 FunctionTypeEnum: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet']
+"""
+The type of global function. Defaults to 'scorer'.
+"""
 FunctionTypeEnumNullish: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet']
@@ -980,6 +1043,18 @@ class Group(TypedDict):
     """
+class GroupScope(TypedDict):
+    type: Literal['group']
+    group_by: str
+    """
+    Field path to group by, e.g. metadata.session_id
+    """
+    idle_seconds: NotRequired[float | None]
+    """
+    Optional: trigger after this many seconds of inactivity
+    """
 IfExists: TypeAlias = Literal['error', 'ignore', 'replace']
@@ -1009,6 +1084,14 @@ class InvokeFunctionInvokeFunction1(TypedDict):
     """
+class InvokeFunctionInvokeFunction2(TypedDict):
+    global_function: str
+    """
+    The name of the global function. Currently, the global namespace includes the functions in autoevals
+    """
+    function_type: NotRequired[FunctionTypeEnum | None]
 class InvokeFunctionInvokeFunction3(TypedDict):
     prompt_session_id: str
     """
@@ -1175,12 +1258,6 @@ class ModelParamsModelParams4(TypedDict):
     reasoning_budget: NotRequired[float | None]
-NullableFunctionTypeEnum: TypeAlias = Literal['llm', 'scorer', 'task', 'tool', 'custom_view', 'preprocessor', 'facet']
-"""
-The type of global function. If unspecified, defaults to 'scorer' for backward compatibility.
-"""
 class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
     type: Literal['function']
     id: str
@@ -1189,7 +1266,7 @@ class NullableSavedFunctionIdNullableSavedFunctionId(TypedDict):
 class NullableSavedFunctionIdNullableSavedFunctionId1(TypedDict):
     type: Literal['global']
     name: str
-    function_type: NotRequired[NullableFunctionTypeEnum | None]
+    function_type: NotRequired[FunctionTypeEnum | None]
 NullableSavedFunctionId: TypeAlias = (
@@ -1829,7 +1906,7 @@ class TaskTask2(TypedDict):
     """
     The name of the global function. Currently, the global namespace includes the functions in autoevals
     """
-    function_type: NotRequired[NullableFunctionTypeEnum | None]
+    function_type: NotRequired[FunctionTypeEnum | None]
 class TaskTask3(TypedDict):
@@ -1945,7 +2022,7 @@ class SavedFunctionIdSavedFunctionId(TypedDict):
 class SavedFunctionIdSavedFunctionId1(TypedDict):
     type: Literal['global']
     name: str
-    function_type: NotRequired[NullableFunctionTypeEnum | None]
+    function_type: NotRequired[FunctionTypeEnum | None]
 SavedFunctionId: TypeAlias = SavedFunctionIdSavedFunctionId | SavedFunctionIdSavedFunctionId1
@@ -2024,17 +2101,11 @@ class SpanIFrame(TypedDict):
 class SpanScope(TypedDict):
     type: Literal['span']
-    root_span_id: str
-    """
-    The root span id is a unique identifier for the trace.
-    """
-    id: str
-    """
-    A unique identifier for the span.
-    """
-SpanType: TypeAlias = Literal['llm', 'score', 'function', 'eval', 'task', 'tool']
+SpanType: TypeAlias = Literal[
+    'llm', 'score', 'function', 'eval', 'task', 'tool', 'automation', 'facet', 'preprocessor'
+]
 """
 Type of the span, for display purposes only
 """
@@ -2079,9 +2150,42 @@ class ToolFunctionDefinition(TypedDict):
 class TraceScope(TypedDict):
     type: Literal['trace']
-    root_span_id: str
+    idle_seconds: NotRequired[float | None]
     """
-    The root span id is a unique identifier for the trace.
+    Consider trace complete after this many seconds of inactivity (default: 30)
+    """
+class TriggeredFunctionStateScope(TypedDict):
+    type: Literal['span']
+class TriggeredFunctionStateScope1(TypedDict):
+    type: Literal['trace']
+class TriggeredFunctionStateScope2(TypedDict):
+    type: Literal['group']
+    key: str
+    value: str
+class TriggeredFunctionState(TypedDict):
+    triggered_xact_id: str
+    """
+    The xact_id when this function was triggered
+    """
+    completed_xact_id: NotRequired[str | None]
+    """
+    The xact_id when this function completed (matches triggered_xact_id if done)
+    """
+    attempts: NotRequired[int | None]
+    """
+    Number of execution attempts (for retry tracking)
+    """
+    scope: TriggeredFunctionStateScope | TriggeredFunctionStateScope1 | TriggeredFunctionStateScope2
+    """
+    The scope of data this function operates on
     """
@@ -2278,6 +2382,8 @@ AsyncScoringControl: TypeAlias = (
     | AsyncScoringControlAsyncScoringControl1
     | AsyncScoringControlAsyncScoringControl2
     | AsyncScoringControlAsyncScoringControl3
+    | AsyncScoringControlAsyncScoringControl4
+    | AsyncScoringControlAsyncScoringControl5
 )
@@ -2292,6 +2398,25 @@ class AttachmentStatus(TypedDict):
     """
+class PreprocessorPreprocessor1(TypedDict):
+    type: Literal['global']
+    name: str
+    function_type: NotRequired[FunctionTypeEnum | None]
+class PreprocessorPreprocessor4(PreprocessorPreprocessor1, PreprocessorPreprocessor2):
+    pass
+Preprocessor: TypeAlias = PreprocessorPreprocessor3 | PreprocessorPreprocessor4
+class BatchedFacetData(TypedDict):
+    type: Literal['batched_facet']
+    preprocessor: NotRequired[Preprocessor | None]
+    facets: Sequence[BatchedFacetDataFacet]
 ChatCompletionContentPart: TypeAlias = (
     ChatCompletionContentPartTextWithTitle
     | ChatCompletionContentPartImageWithTitle
@@ -2384,7 +2509,7 @@ class DatasetEvent(TypedDict):
     """
     span_id: str
     """
-    A unique identifier used to link different dataset events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/guides/tracing) for full details on tracing
+    A unique identifier used to link different dataset events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
     """
     root_span_id: str
     """
@@ -2468,7 +2593,7 @@ class Experiment(TypedDict):
 class ExtendedSavedFunctionIdExtendedSavedFunctionId1(TypedDict):
     type: Literal['global']
     name: str
-    function_type: NotRequired[NullableFunctionTypeEnum | None]
+    function_type: NotRequired[FunctionTypeEnum | None]
 ExtendedSavedFunctionId: TypeAlias = (
@@ -2478,22 +2603,22 @@ ExtendedSavedFunctionId: TypeAlias = (
 )
-class PreprocessorPreprocessor1(TypedDict):
+class Preprocessor1Preprocessor11(TypedDict):
     type: Literal['global']
     name: str
-    function_type: NotRequired[NullableFunctionTypeEnum | None]
+    function_type: NotRequired[FunctionTypeEnum | None]
-class PreprocessorPreprocessor4(PreprocessorPreprocessor1, PreprocessorPreprocessor2):
+class Preprocessor1Preprocessor14(Preprocessor1Preprocessor11, Preprocessor1Preprocessor12):
     pass
-Preprocessor: TypeAlias = PreprocessorPreprocessor3 | PreprocessorPreprocessor4
+Preprocessor1: TypeAlias = Preprocessor1Preprocessor13 | Preprocessor1Preprocessor14
 class FacetData(TypedDict):
     type: Literal['facet']
-    preprocessor: NotRequired[Preprocessor | None]
+    preprocessor: NotRequired[Preprocessor1 | None]
     prompt: str
     """
     The prompt to use for LLM extraction. The preprocessed text will be provided as context.
@@ -2511,7 +2636,7 @@ class FacetData(TypedDict):
 class FunctionDataFunctionData3(TypedDict):
     type: Literal['global']
     name: str
-    function_type: NotRequired[NullableFunctionTypeEnum | None]
+    function_type: NotRequired[FunctionTypeEnum | None]
     config: NotRequired[Mapping[str, Any] | None]
     """
     Configuration options to pass to the global function (e.g., for preprocessor customization)
@@ -2523,21 +2648,68 @@ class FunctionIdFunctionId2(TypedDict):
     """
     The name of the global function. Currently, the global namespace includes the functions in autoevals
     """
-    function_type: NotRequired[NullableFunctionTypeEnum | None]
+    function_type: NotRequired[FunctionTypeEnum | None]
-class InvokeFunctionInvokeFunction2(TypedDict):
-    global_function: str
+class InvokeFunctionInvokeFunction7(TypedDict):
+    input: NotRequired[Any | None]
     """
-    The name of the global function. Currently, the global namespace includes the functions in autoevals
+    Argument to the function, which can be any JSON serializable value
+    """
+    expected: NotRequired[Any | None]
+    """
+    The expected output of the function
+    """
+    metadata: NotRequired[Mapping[str, Any] | None]
+    """
+    Any relevant metadata. This will be logged and available as the `metadata` argument.
+    """
+    tags: NotRequired[Sequence[str] | None]
+    """
+    Any relevant tags to log on the span.
+    """
+    messages: NotRequired[Sequence[ChatCompletionMessageParam] | None]
+    """
+    If the function is an LLM, additional messages to pass along to it
+    """
+    parent: NotRequired[InvokeParent | None]
+    stream: NotRequired[bool | None]
+    """
+    Whether to stream the response. If true, results will be returned in the Braintrust SSE format.
+    """
+    mode: NotRequired[StreamingMode | None]
+    strict: NotRequired[bool | None]
+    """
+    If true, throw an error if one of the variables in the prompt is not present in the input
+    """
+    mcp_auth: NotRequired[Mapping[str, InvokeFunctionMcpAuth] | None]
+    """
+    Map of MCP server URL to auth credentials
+    """
+    overrides: NotRequired[Mapping[str, Any] | None]
+    """
+    Partial function definition to merge with the function being invoked. Fields are validated against the function type's schema at runtime. For facets: { preprocessor?, prompt?, model? }. For prompts: { model?, ... }.
     """
-    function_type: NotRequired[NullableFunctionTypeEnum | None]
-InvokeScope: TypeAlias = SpanScope | TraceScope
-"""
-The scope at which to operate (span or trace)
-"""
+class InvokeFunctionInvokeFunction8(InvokeFunctionInvokeFunction, InvokeFunctionInvokeFunction7):
+    pass
+class InvokeFunctionInvokeFunction9(InvokeFunctionInvokeFunction1, InvokeFunctionInvokeFunction7):
+    pass
+class InvokeFunctionInvokeFunction10(InvokeFunctionInvokeFunction2, InvokeFunctionInvokeFunction7):
+    pass
+class InvokeFunctionInvokeFunction11(InvokeFunctionInvokeFunction3, InvokeFunctionInvokeFunction7):
+    pass
+class InvokeFunctionInvokeFunction12(InvokeFunctionInvokeFunction4, InvokeFunctionInvokeFunction7):
+    pass
 class ModelParamsModelParams(TypedDict):
@@ -2580,7 +2752,7 @@ class OnlineScoreConfig(TypedDict):
     """
     scorers: Sequence[SavedFunctionId]
     """
-    The list of scorers to use for online scoring
+    The list of functions to run for online scoring. Can include scorers, facets, or other function types.
     """
     btql_filter: NotRequired[str | None]
     """
@@ -2588,16 +2760,20 @@ class OnlineScoreConfig(TypedDict):
     """
     apply_to_root_span: NotRequired[bool | None]
     """
-    Whether to trigger online scoring on the root span of each trace
+    Whether to trigger online scoring on the root span of each trace. Only applies when scope is 'span' or unset.
     """
     apply_to_span_names: NotRequired[Sequence[str] | None]
     """
-    Trigger online scoring on any spans with a name in this list
+    Trigger online scoring on any spans with a name in this list. Only applies when scope is 'span' or unset.
     """
     skip_logging: NotRequired[bool | None]
     """
     Whether to skip adding scorer spans when computing scores
     """
+    scope: NotRequired[SpanScope | TraceScope | GroupScope | None]
+    """
+    The scope at which to run the functions. Defaults to span-level execution. Trace/group scope requires all functions to be facets.
+    """
 class Project(TypedDict):
@@ -2802,7 +2978,7 @@ class ExperimentEvent(TypedDict):
     """
     span_id: str
     """
-    A unique identifier used to link different experiment events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/guides/tracing) for full details on tracing
+    A unique identifier used to link different experiment events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
     """
     span_parents: NotRequired[Sequence[str] | None]
     """
@@ -2853,80 +3029,6 @@ GraphNode: TypeAlias = (
 )
-class InvokeContext(TypedDict):
-    object_type: Literal['project_logs', 'experiment', 'dataset', 'playground_logs']
-    """
-    The type of object containing the span data
-    """
-    object_id: str
-    """
-    The ID of the object containing the span data
-    """
-    scope: InvokeScope
-class InvokeFunctionInvokeFunction7(TypedDict):
-    input: NotRequired[Any | None]
-    """
-    Argument to the function, which can be any JSON serializable value
-    """
-    expected: NotRequired[Any | None]
-    """
-    The expected output of the function
-    """
-    metadata: NotRequired[Mapping[str, Any] | None]
-    """
-    Any relevant metadata. This will be logged and available as the `metadata` argument.
-    """
-    tags: NotRequired[Sequence[str] | None]
-    """
-    Any relevant tags to log on the span.
-    """
-    messages: NotRequired[Sequence[ChatCompletionMessageParam] | None]
-    """
-    If the function is an LLM, additional messages to pass along to it
-    """
-    context: NotRequired[InvokeContext | None]
-    parent: NotRequired[InvokeParent | None]
-    stream: NotRequired[bool | None]
-    """
-    Whether to stream the response. If true, results will be returned in the Braintrust SSE format.
-    """
-    mode: NotRequired[StreamingMode | None]
-    strict: NotRequired[bool | None]
-    """
-    If true, throw an error if one of the variables in the prompt is not present in the input
-    """
-    mcp_auth: NotRequired[Mapping[str, InvokeFunctionMcpAuth] | None]
-    """
-    Map of MCP server URL to auth credentials
-    """
-    overrides: NotRequired[Mapping[str, Any] | None]
-    """
-    Partial function definition to merge with the function being invoked. Fields are validated against the function type's schema at runtime. For facets: { preprocessor?, prompt?, model? }. For prompts: { model?, ... }.
-    """
-class InvokeFunctionInvokeFunction8(InvokeFunctionInvokeFunction, InvokeFunctionInvokeFunction7):
-    pass
-class InvokeFunctionInvokeFunction9(InvokeFunctionInvokeFunction1, InvokeFunctionInvokeFunction7):
-    pass
-class InvokeFunctionInvokeFunction10(InvokeFunctionInvokeFunction2, InvokeFunctionInvokeFunction7):
-    pass
-class InvokeFunctionInvokeFunction11(InvokeFunctionInvokeFunction3, InvokeFunctionInvokeFunction7):
-    pass
-class InvokeFunctionInvokeFunction12(InvokeFunctionInvokeFunction4, InvokeFunctionInvokeFunction7):
-    pass
 class ProjectLogsEvent(TypedDict):
     id: str
     """
@@ -2994,7 +3096,7 @@ class ProjectLogsEvent(TypedDict):
     """
     span_id: str
     """
-    A unique identifier used to link different project logs events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/guides/tracing) for full details on tracing
+    A unique identifier used to link different project logs events together as part of a full trace. See the [tracing guide](https://www.braintrust.dev/docs/instrument) for full details on tracing
     """
     span_parents: NotRequired[Sequence[str] | None]
     """
@@ -3127,7 +3229,6 @@ class View(TypedDict):
         'tools',
         'scorers',
         'logs',
-        'agents',
         'monitor',
         'for_review',
     ]
@@ -3362,6 +3463,7 @@ FunctionData: TypeAlias = (
     | FunctionDataFunctionData2
     | FunctionDataFunctionData3
     | FacetData
+    | BatchedFacetData
 )

braintrust/cli/install/api.py CHANGED Viewed

@@ -326,7 +326,7 @@ def main(args):
             textwrap.dedent(
                 f"""\
             Stack with name {args.name} does not exist. Either create it manually by following
-            https://www.braintrust.dev/docs/guides/self-hosting/aws or use the --create flag."""
+            https://www.braintrust.dev/docs/admin/self-hosting/aws or use the --create flag."""
             )
         )
         exit(1)

braintrust/conftest.py CHANGED Viewed

@@ -46,3 +46,27 @@ def reset_braintrust_state():
     from braintrust import logger
     logger._state = logger.BraintrustState()
+@pytest.fixture(scope="session")
+def vcr_config():
+    """
+    VCR configuration for recording/playing back HTTP interactions.
+    In CI, use "none" to fail if cassette is missing.
+    Locally, use "once" to record new cassettes if they don't exist.
+    """
+    record_mode = "none" if (os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS")) else "once"
+    return {
+        "record_mode": record_mode,
+        "filter_headers": [
+            "authorization",
+            "openai-organization",
+            "x-api-key",
+            "api-key",
+            "openai-api-key",
+            "x-goog-api-key",
+            "x-bt-auth-token",
+        ],
+    }

braintrust/db_fields.py CHANGED Viewed

@@ -5,6 +5,7 @@ ID_FIELD = "id"
 IS_MERGE_FIELD = "_is_merge"
 MERGE_PATHS_FIELD = "_merge_paths"
+ARRAY_DELETE_FIELD = "_array_delete"
 AUDIT_SOURCE_FIELD = "_audit_source"
 AUDIT_METADATA_FIELD = "_audit_metadata"

braintrust/devserver/test_server_integration.py CHANGED Viewed

@@ -8,17 +8,6 @@ from braintrust.framework import _evals
 from braintrust.test_helpers import has_devserver_installed
-@pytest.fixture(scope="module")
-def vcr_config():
-    """VCR configuration to filter sensitive headers."""
-    return {
-        "filter_headers": [
-            "x-bt-auth-token",
-            "authorization",
-        ]
-    }
 @pytest.fixture
 def client():
     """Create test client using the real simple_eval.py example."""

braintrust/framework.py CHANGED Viewed

@@ -1559,9 +1559,9 @@ def build_local_summary(
     scores_by_name = defaultdict(lambda: (0, 0))
     for result in results:
         for name, score in result.scores.items():
-            curr = scores_by_name[name]
-            if curr is None:
+            if score is None:
                 continue
+            curr = scores_by_name[name]
             scores_by_name[name] = (curr[0] + score, curr[1] + 1)
     longest_score_name = max(len(name) for name in scores_by_name) if scores_by_name else 0
     avg_scores = {

braintrust 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

braintrust 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl