PyPI - inspect-ai - Versions diffs - 0.3.72__py3-none-any.whl → 0.3.73__py3-none-any.whl - Mend

inspect-ai 0.3.72py3-none-any.whl → 0.3.73py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

inspect_ai/_cli/eval.py +14 -3
inspect_ai/_cli/sandbox.py +3 -3
inspect_ai/_cli/score.py +6 -4
inspect_ai/_cli/trace.py +53 -6
inspect_ai/_display/core/config.py +1 -1
inspect_ai/_display/core/display.py +2 -1
inspect_ai/_display/core/footer.py +6 -6
inspect_ai/_display/plain/display.py +11 -6
inspect_ai/_display/rich/display.py +23 -13
inspect_ai/_display/textual/app.py +10 -9
inspect_ai/_display/textual/display.py +2 -2
inspect_ai/_display/textual/widgets/footer.py +4 -0
inspect_ai/_display/textual/widgets/samples.py +14 -5
inspect_ai/_eval/context.py +1 -2
inspect_ai/_eval/eval.py +54 -41
inspect_ai/_eval/loader.py +9 -2
inspect_ai/_eval/run.py +148 -81
inspect_ai/_eval/score.py +13 -8
inspect_ai/_eval/task/images.py +31 -21
inspect_ai/_eval/task/run.py +62 -59
inspect_ai/_eval/task/rundir.py +16 -9
inspect_ai/_eval/task/sandbox.py +7 -8
inspect_ai/_eval/task/util.py +7 -0
inspect_ai/_util/_async.py +118 -10
inspect_ai/_util/constants.py +0 -2
inspect_ai/_util/file.py +15 -29
inspect_ai/_util/future.py +37 -0
inspect_ai/_util/http.py +3 -99
inspect_ai/_util/httpx.py +60 -0
inspect_ai/_util/interrupt.py +2 -2
inspect_ai/_util/json.py +5 -52
inspect_ai/_util/logger.py +30 -86
inspect_ai/_util/retry.py +10 -61
inspect_ai/_util/trace.py +2 -2
inspect_ai/_view/server.py +86 -3
inspect_ai/_view/www/dist/assets/index.js +25837 -13269
inspect_ai/_view/www/log-schema.json +253 -186
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
inspect_ai/_view/www/src/types/log.d.ts +122 -94
inspect_ai/approval/_human/manager.py +6 -10
inspect_ai/approval/_human/panel.py +2 -2
inspect_ai/dataset/_sources/util.py +7 -6
inspect_ai/log/__init__.py +4 -0
inspect_ai/log/_file.py +35 -61
inspect_ai/log/_log.py +18 -1
inspect_ai/log/_recorders/eval.py +14 -23
inspect_ai/log/_recorders/json.py +3 -18
inspect_ai/log/_samples.py +27 -2
inspect_ai/log/_transcript.py +8 -8
inspect_ai/model/__init__.py +2 -1
inspect_ai/model/_call_tools.py +60 -40
inspect_ai/model/_chat_message.py +3 -2
inspect_ai/model/_generate_config.py +25 -0
inspect_ai/model/_model.py +74 -36
inspect_ai/model/_openai.py +9 -1
inspect_ai/model/_providers/anthropic.py +24 -26
inspect_ai/model/_providers/azureai.py +11 -9
inspect_ai/model/_providers/bedrock.py +33 -24
inspect_ai/model/_providers/cloudflare.py +8 -9
inspect_ai/model/_providers/goodfire.py +7 -3
inspect_ai/model/_providers/google.py +47 -13
inspect_ai/model/_providers/groq.py +15 -15
inspect_ai/model/_providers/hf.py +24 -17
inspect_ai/model/_providers/mistral.py +36 -20
inspect_ai/model/_providers/openai.py +30 -25
inspect_ai/model/_providers/openai_o1.py +1 -1
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/together.py +3 -4
inspect_ai/model/_providers/util/__init__.py +2 -2
inspect_ai/model/_providers/util/chatapi.py +6 -19
inspect_ai/model/_providers/util/hooks.py +165 -0
inspect_ai/model/_providers/vertex.py +20 -3
inspect_ai/model/_providers/vllm.py +16 -19
inspect_ai/scorer/_multi.py +5 -2
inspect_ai/solver/_bridge/patch.py +31 -1
inspect_ai/solver/_fork.py +5 -3
inspect_ai/solver/_human_agent/agent.py +3 -2
inspect_ai/tool/__init__.py +8 -2
inspect_ai/tool/_tool_info.py +4 -90
inspect_ai/tool/_tool_params.py +4 -34
inspect_ai/tool/_tools/_web_search.py +30 -24
inspect_ai/util/__init__.py +4 -0
inspect_ai/util/_concurrency.py +5 -6
inspect_ai/util/_display.py +6 -0
inspect_ai/util/_json.py +170 -0
inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
inspect_ai/util/_sandbox/docker/docker.py +5 -0
inspect_ai/util/_sandbox/environment.py +56 -9
inspect_ai/util/_sandbox/service.py +12 -5
inspect_ai/util/_subprocess.py +94 -113
inspect_ai/util/_subtask.py +2 -4
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +99 -99
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
inspect_ai/_util/timeouts.py +0 -160
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
inspect_ai/model/_providers/util/tracker.py +0 -92
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0

inspect_ai/model/_model.py CHANGED Viewed

@@ -13,6 +13,7 @@ from typing import Any, AsyncIterator, Callable, Literal, Type, cast
 from pydantic_core import to_jsonable_python
 from tenacity import (
+    RetryCallState,
     retry,
     retry_if_exception,
     stop_after_attempt,
@@ -20,8 +21,9 @@ from tenacity import (
     stop_never,
     wait_exponential_jitter,
 )
+from tenacity.stop import StopBaseT
-from inspect_ai._util.constants import DEFAULT_MAX_CONNECTIONS
+from inspect_ai._util.constants import DEFAULT_MAX_CONNECTIONS, HTTP
 from inspect_ai._util.content import (
     Content,
     ContentImage,
@@ -30,6 +32,7 @@ from inspect_ai._util.content import (
 )
 from inspect_ai._util.hooks import init_hooks, override_api_key, send_telemetry
 from inspect_ai._util.interrupt import check_sample_interrupt
+from inspect_ai._util.logger import warn_once
 from inspect_ai._util.platform import platform_init
 from inspect_ai._util.registry import (
     RegistryInfo,
@@ -37,7 +40,7 @@ from inspect_ai._util.registry import (
     registry_info,
     registry_unqualified_name,
 )
-from inspect_ai._util.retry import log_rate_limit_retry
+from inspect_ai._util.retry import report_http_retry
 from inspect_ai._util.trace import trace_action
 from inspect_ai._util.working import report_sample_waiting_time, sample_working_time
 from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
@@ -173,11 +176,11 @@ class ModelAPI(abc.ABC):
         """Scope for enforcement of max_connections."""
         return "default"
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        """Is this exception a rate limit error.
+    def should_retry(self, ex: Exception) -> bool:
+        """Should this exception be retried?
         Args:
-           ex: Exception to check for rate limit.
+           ex: Exception to check for retry
         """
         return False
@@ -331,14 +334,17 @@ class Model:
         start_time = datetime.now()
         working_start = sample_working_time()
         async with self._connection_concurrency(config):
+            from inspect_ai.log._samples import track_active_sample_retries
             # generate
-            output = await self._generate(
-                input=input,
-                tools=tools,
-                tool_choice=tool_choice,
-                config=config,
-                cache=cache,
-            )
+            with track_active_sample_retries():
+                output = await self._generate(
+                    input=input,
+                    tools=tools,
+                    tool_choice=tool_choice,
+                    config=config,
+                    cache=cache,
+                )
             # update the most recent ModelEvent with the actual start/completed
             # times as well as a computation of working time (events are
@@ -418,27 +424,27 @@ class Model:
         if self.api.collapse_assistant_messages():
             input = collapse_consecutive_assistant_messages(input)
-        # retry for rate limit errors (max of 30 minutes)
+        # retry for transient http errors:
+        # - no default timeout or max_retries (try forever)
+        # - exponential backoff starting at 3 seconds (will wait 25 minutes
+        #   on the 10th retry,then will wait no longer than 30 minutes on
+        #   subsequent retries)
+        if config.max_retries is not None and config.timeout is not None:
+            stop: StopBaseT = stop_after_attempt(config.max_retries) | stop_after_delay(
+                config.timeout
+            )
+        elif config.max_retries is not None:
+            stop = stop_after_attempt(config.max_retries)
+        elif config.timeout is not None:
+            stop = stop_after_delay(config.timeout)
+        else:
+            stop = stop_never
         @retry(
-            wait=wait_exponential_jitter(max=(30 * 60), jitter=5),
-            retry=retry_if_exception(self.api.is_rate_limit),
-            stop=(
-                (
-                    stop_after_delay(config.timeout)
-                    | stop_after_attempt(config.max_retries)
-                )
-                if config.timeout and config.max_retries
-                else (
-                    stop_after_delay(config.timeout)
-                    if config.timeout
-                    else (
-                        stop_after_attempt(config.max_retries)
-                        if config.max_retries
-                        else stop_never
-                    )
-                )
-            ),
-            before_sleep=functools.partial(log_rate_limit_retry, self.api.model_name),
+            wait=wait_exponential_jitter(initial=3, max=(30 * 60), jitter=3),
+            retry=retry_if_exception(self.should_retry),
+            stop=stop,
+            before_sleep=functools.partial(log_model_retry, self.api.model_name),
         )
         async def generate() -> ModelOutput:
             check_sample_interrupt()
@@ -555,6 +561,30 @@ class Model:
         # return results
         return model_output
+    def should_retry(self, ex: BaseException) -> bool:
+        if isinstance(ex, Exception):
+            # check standard should_retry() method
+            retry = self.api.should_retry(ex)
+            if retry:
+                report_http_retry()
+                return True
+            # see if the API implements legacy is_rate_limit() method
+            is_rate_limit = getattr(self.api, "is_rate_limit", None)
+            if is_rate_limit:
+                warn_once(
+                    logger,
+                    f"provider '{self.name}' implements deprecated is_rate_limit() method, "
+                    + "please change to should_retry()",
+                )
+                retry = cast(bool, is_rate_limit(ex))
+                if retry:
+                    report_http_retry()
+                    return True
+        # no retry
+        return False
     # function to verify that its okay to call model apis
     def verify_model_apis(self) -> None:
         if (
@@ -1064,6 +1094,7 @@ def tool_result_images_reducer(
             messages
             + [
                 ChatMessageTool(
+                    id=message.id,
                     content=edited_tool_message_content,
                     tool_call_id=message.tool_call_id,
                     function=message.function,
@@ -1170,19 +1201,26 @@ def combine_messages(
     a: ChatMessage, b: ChatMessage, message_type: Type[ChatMessage]
 ) -> ChatMessage:
     if isinstance(a.content, str) and isinstance(b.content, str):
-        return message_type(content=f"{a.content}\n{b.content}")
+        return message_type(id=a.id, content=f"{a.content}\n{b.content}")
     elif isinstance(a.content, list) and isinstance(b.content, list):
-        return message_type(content=a.content + b.content)
+        return message_type(id=a.id, content=a.content + b.content)
     elif isinstance(a.content, str) and isinstance(b.content, list):
-        return message_type(content=[ContentText(text=a.content), *b.content])
+        return message_type(id=a.id, content=[ContentText(text=a.content), *b.content])
     elif isinstance(a.content, list) and isinstance(b.content, str):
-        return message_type(content=a.content + [ContentText(text=b.content)])
+        return message_type(id=a.id, content=a.content + [ContentText(text=b.content)])
     else:
         raise TypeError(
             f"Cannot combine messages with invalid content types: {a.content!r}, {b.content!r}"
         )
+def log_model_retry(model_name: str, retry_state: RetryCallState) -> None:
+    logger.log(
+        HTTP,
+        f"-> {model_name} retry {retry_state.attempt_number} after waiting for {retry_state.idle_for}",
+    )
 def init_active_model(model: Model, config: GenerateConfig) -> None:
     active_model_context_var.set(model)
     set_active_generate_config(config)

inspect_ai/model/_openai.py CHANGED Viewed

@@ -52,7 +52,7 @@ from ._model_output import ModelUsage, StopReason, as_stop_reason
 def is_o_series(name: str) -> bool:
-    return bool(re.match(r"^o\d+", name))
+    return bool(re.match(r"(^|.*\/)o\d+", name))
 def is_o1_mini(name: str) -> bool:
@@ -396,6 +396,9 @@ def content_from_openai(
     content: ChatCompletionContentPartParam | ChatCompletionContentPartRefusalParam,
     parse_reasoning: bool = False,
 ) -> list[Content]:
+    # Some providers omit the type tag and use "object-with-a-single-field" encoding
+    if "type" not in content and len(content) == 1:
+        content["type"] = list(content.keys())[0]  # type: ignore[arg-type]
     if content["type"] == "text":
         text = content["text"]
         if parse_reasoning:
@@ -413,6 +416,8 @@ def content_from_openai(
                 return [ContentText(text=text)]
         else:
             return [ContentText(text=text)]
+    elif content["type"] == "reasoning":  # type: ignore[comparison-overlap]
+        return [ContentReasoning(reasoning=content["reasoning"])]
     elif content["type"] == "image_url":
         return [
             ContentImage(
@@ -428,6 +433,9 @@ def content_from_openai(
         ]
     elif content["type"] == "refusal":
         return [ContentText(text=content["refusal"])]
+    else:
+        content_type = content["type"]
+        raise ValueError(f"Unexpected content type '{content_type}' in message.")
 def chat_message_assistant_from_openai(

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -6,7 +6,12 @@ from copy import copy
 from logging import getLogger
 from typing import Any, Literal, Optional, Tuple, TypedDict, cast
-from .util.tracker import HttpxTimeTracker
+import httpcore
+import httpx
+from inspect_ai._util.http import is_retryable_http_status
+from .util.hooks import HttpxHooks
 if sys.version_info >= (3, 11):
     from typing import NotRequired
@@ -16,13 +21,12 @@ else:
 from anthropic import (
     APIConnectionError,
     APIStatusError,
+    APITimeoutError,
     AsyncAnthropic,
     AsyncAnthropicBedrock,
     AsyncAnthropicVertex,
     BadRequestError,
-    InternalServerError,
     NotGiven,
-    RateLimitError,
 )
 from anthropic._types import Body
 from anthropic.types import (
@@ -46,7 +50,6 @@ from typing_extensions import override
 from inspect_ai._util.constants import (
     BASE_64_DATA_REMOVED,
-    DEFAULT_MAX_RETRIES,
     NO_CONTENT,
 )
 from inspect_ai._util.content import (
@@ -125,9 +128,6 @@ class AnthropicAPI(ModelAPI):
                 AsyncAnthropic | AsyncAnthropicBedrock | AsyncAnthropicVertex
             ) = AsyncAnthropicBedrock(
                 base_url=base_url,
-                max_retries=(
-                    config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
-                ),
                 aws_region=aws_region,
                 **model_args,
             )
@@ -141,9 +141,6 @@ class AnthropicAPI(ModelAPI):
                 region=region,
                 project_id=project_id,
                 base_url=base_url,
-                max_retries=(
-                    config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
-                ),
                 **model_args,
             )
         else:
@@ -156,14 +153,11 @@ class AnthropicAPI(ModelAPI):
             self.client = AsyncAnthropic(
                 base_url=base_url,
                 api_key=self.api_key,
-                max_retries=(
-                    config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
-                ),
                 **model_args,
             )
         # create time tracker
-        self._time_tracker = HttpxTimeTracker(self.client._client)
+        self._http_hooks = HttpxHooks(self.client._client)
     @override
     async def close(self) -> None:
@@ -183,7 +177,7 @@ class AnthropicAPI(ModelAPI):
         config: GenerateConfig,
     ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
         # allocate request_id (so we can see it from ModelCall)
-        request_id = self._time_tracker.start_request()
+        request_id = self._http_hooks.start_request()
         # setup request and response for ModelCall
         request: dict[str, Any] = {}
@@ -194,7 +188,7 @@ class AnthropicAPI(ModelAPI):
                 request=request,
                 response=response,
                 filter=model_call_filter,
-                time=self._time_tracker.end_request(request_id),
+                time=self._http_hooks.end_request(request_id),
             )
         # generate
@@ -223,7 +217,7 @@ class AnthropicAPI(ModelAPI):
             request = request | req
             # extra headers (for time tracker and computer use)
-            extra_headers = headers | {HttpxTimeTracker.REQUEST_ID_HEADER: request_id}
+            extra_headers = headers | {HttpxHooks.REQUEST_ID_HEADER: request_id}
             if computer_use:
                 betas.append("computer-use-2025-01-24")
             if len(betas) > 0:
@@ -291,8 +285,6 @@ class AnthropicAPI(ModelAPI):
                 betas.append("output-128k-2025-02-19")
         # config that applies to all models
-        if config.timeout is not None:
-            params["timeout"] = float(config.timeout)
         if config.stop_seqs is not None:
             params["stop_sequences"] = config.stop_seqs
@@ -334,13 +326,19 @@ class AnthropicAPI(ModelAPI):
         return str(self.api_key)
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        # We have observed that anthropic will frequently return InternalServerError
-        # seemingly in place of RateLimitError (at the very least the errors seem to
-        # always be transient). Equating this to rate limit errors may occasionally
-        # result in retrying too many times, but much more often will avert a failed
-        # eval that just needed to survive a transient error
-        return isinstance(ex, RateLimitError | InternalServerError | APIConnectionError)
+    def should_retry(self, ex: Exception) -> bool:
+        if isinstance(ex, APIStatusError):
+            return is_retryable_http_status(ex.status_code)
+        elif isinstance(
+            ex,
+            APIConnectionError
+            | APITimeoutError
+            | httpx.RemoteProtocolError
+            | httpcore.RemoteProtocolError,
+        ):
+            return True
+        else:
+            return False
     @override
     def collapse_user_messages(self) -> bool:

inspect_ai/model/_providers/azureai.py CHANGED Viewed

@@ -27,11 +27,16 @@ from azure.ai.inference.models import (
     UserMessage,
 )
 from azure.core.credentials import AzureKeyCredential
-from azure.core.exceptions import AzureError, HttpResponseError
+from azure.core.exceptions import (
+    AzureError,
+    HttpResponseError,
+    ServiceResponseError,
+)
 from typing_extensions import override
 from inspect_ai._util.constants import DEFAULT_MAX_TOKENS
 from inspect_ai._util.content import Content, ContentImage, ContentText
+from inspect_ai._util.http import is_retryable_http_status
 from inspect_ai._util.images import file_as_data_uri
 from inspect_ai.tool import ToolChoice, ToolInfo
 from inspect_ai.tool._tool_call import ToolCall
@@ -232,14 +237,11 @@ class AzureAIAPI(ModelAPI):
             return DEFAULT_MAX_TOKENS
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        if isinstance(ex, HttpResponseError):
-            return (
-                ex.status_code == 408
-                or ex.status_code == 409
-                or ex.status_code == 429
-                or ex.status_code == 500
-            )
+    def should_retry(self, ex: Exception) -> bool:
+        if isinstance(ex, HttpResponseError) and ex.status_code is not None:
+            return is_retryable_http_status(ex.status_code)
+        elif isinstance(ex, ServiceResponseError):
+            return True
         else:
             return False

inspect_ai/model/_providers/bedrock.py CHANGED Viewed

@@ -1,16 +1,14 @@
 import base64
+from logging import getLogger
 from typing import Any, Literal, Tuple, Union, cast
 from pydantic import BaseModel, Field
 from typing_extensions import override
-from inspect_ai._util.constants import (
-    DEFAULT_MAX_RETRIES,
-    DEFAULT_MAX_TOKENS,
-    DEFAULT_TIMEOUT,
-)
+from inspect_ai._util._async import current_async_backend
+from inspect_ai._util.constants import DEFAULT_MAX_TOKENS
 from inspect_ai._util.content import Content, ContentImage, ContentText
-from inspect_ai._util.error import pip_dependency_error
+from inspect_ai._util.error import PrerequisiteError, pip_dependency_error
 from inspect_ai._util.images import file_as_data
 from inspect_ai._util.version import verify_required_version
 from inspect_ai.tool import ToolChoice, ToolInfo
@@ -31,7 +29,9 @@ from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage
 from .util import (
     model_base_url,
 )
-from .util.tracker import BotoTimeTracker
+from .util.hooks import ConverseHooks
+logger = getLogger(__name__)
 # Model for Bedrock Converse API (Response)
 # generated from: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#converse
@@ -245,6 +245,12 @@ class BedrockAPI(ModelAPI):
             config=config,
         )
+        # raise if we are using trio
+        if current_async_backend() == "trio":
+            raise PrerequisiteError(
+                "ERROR: The bedrock provider does not work with the trio async backend."
+            )
         # save model_args
         self.model_args = model_args
@@ -258,7 +264,7 @@ class BedrockAPI(ModelAPI):
             self.session = aioboto3.Session()
             # create time tracker
-            self._time_tracker = BotoTimeTracker(self.session)
+            self._http_hooks = ConverseHooks(self.session)
         except ImportError:
             raise pip_dependency_error("Bedrock API", ["aioboto3"])
@@ -288,15 +294,25 @@ class BedrockAPI(ModelAPI):
             return DEFAULT_MAX_TOKENS
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
+    def should_retry(self, ex: Exception) -> bool:
         from botocore.exceptions import ClientError
         # Look for an explicit throttle exception
         if isinstance(ex, ClientError):
-            if ex.response["Error"]["Code"] == "ThrottlingException":
-                return True
-        return super().is_rate_limit(ex)
+            error_code = ex.response.get("Error", {}).get("Code", "")
+            return error_code in [
+                "ThrottlingException",
+                "RequestLimitExceeded",
+                "Throttling",
+                "RequestThrottled",
+                "TooManyRequestsException",
+                "ProvisionedThroughputExceededException",
+                "TransactionInProgressException",
+                "RequestTimeout",
+                "ServiceUnavailable",
+            ]
+        else:
+            return False
     @override
     def collapse_user_messages(self) -> bool:
@@ -317,20 +333,13 @@ class BedrockAPI(ModelAPI):
         from botocore.exceptions import ClientError
         # The bedrock client
-        request_id = self._time_tracker.start_request()
+        request_id = self._http_hooks.start_request()
         async with self.session.client(  # type: ignore[call-overload]
             service_name="bedrock-runtime",
             endpoint_url=self.base_url,
             config=Config(
-                connect_timeout=config.timeout if config.timeout else DEFAULT_TIMEOUT,
-                read_timeout=config.timeout if config.timeout else DEFAULT_TIMEOUT,
-                retries=dict(
-                    max_attempts=config.max_retries
-                    if config.max_retries
-                    else DEFAULT_MAX_RETRIES,
-                    mode="adaptive",
-                ),
-                user_agent_extra=self._time_tracker.user_agent_extra(request_id),
+                retries=dict(mode="adaptive"),
+                user_agent_extra=self._http_hooks.user_agent_extra(request_id),
             ),
             **self.model_args,
         ) as client:
@@ -370,7 +379,7 @@ class BedrockAPI(ModelAPI):
                         request.model_dump(exclude_none=True)
                     ),
                     response=response,
-                    time=self._time_tracker.end_request(request_id),
+                    time=self._http_hooks.end_request(request_id),
                 )
             try:

inspect_ai/model/_providers/cloudflare.py CHANGED Viewed

@@ -16,10 +16,10 @@ from .util import (
     chat_api_input,
     chat_api_request,
     environment_prerequisite_error,
-    is_chat_api_rate_limit,
     model_base_url,
+    should_retry_chat_api_error,
 )
-from .util.tracker import HttpxTimeTracker
+from .util.hooks import HttpxHooks
 # https://developers.cloudflare.com/workers-ai/models/#text-generation
@@ -51,7 +51,7 @@ class CloudFlareAPI(ModelAPI):
             if not self.api_key:
                 raise environment_prerequisite_error("CloudFlare", CLOUDFLARE_API_TOKEN)
         self.client = httpx.AsyncClient()
-        self._time_tracker = HttpxTimeTracker(self.client)
+        self._http_hooks = HttpxHooks(self.client)
         base_url = model_base_url(base_url, "CLOUDFLARE_BASE_URL")
         self.base_url = (
             base_url if base_url else "https://api.cloudflare.com/client/v4/accounts"
@@ -79,7 +79,7 @@ class CloudFlareAPI(ModelAPI):
         json["messages"] = chat_api_input(input, tools, self.chat_api_handler())
         # request_id
-        request_id = self._time_tracker.start_request()
+        request_id = self._http_hooks.start_request()
         # setup response
         response: dict[str, Any] = {}
@@ -88,7 +88,7 @@ class CloudFlareAPI(ModelAPI):
             return ModelCall.create(
                 request=json,
                 response=response,
-                time=self._time_tracker.end_request(request_id),
+                time=self._http_hooks.end_request(request_id),
             )
         # make the call
@@ -98,10 +98,9 @@ class CloudFlareAPI(ModelAPI):
             url=f"{chat_url}/{self.model_name}",
             headers={
                 "Authorization": f"Bearer {self.api_key}",
-                HttpxTimeTracker.REQUEST_ID_HEADER: request_id,
+                HttpxHooks.REQUEST_ID_HEADER: request_id,
             },
             json=json,
-            config=config,
         )
         # handle response
@@ -127,8 +126,8 @@ class CloudFlareAPI(ModelAPI):
             raise RuntimeError(f"Error calling {self.model_name}: {error}")
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        return is_chat_api_rate_limit(ex)
+    def should_retry(self, ex: Exception) -> bool:
+        return should_retry_chat_api_error(ex)
     # cloudflare enforces rate limits by model for each account
     @override

inspect_ai/model/_providers/goodfire.py CHANGED Viewed

@@ -3,7 +3,11 @@ from typing import Any, List, Literal, get_args
 from goodfire import AsyncClient
 from goodfire.api.chat.interfaces import ChatMessage as GoodfireChatMessage
-from goodfire.api.exceptions import InvalidRequestException, RateLimitException
+from goodfire.api.exceptions import (
+    InvalidRequestException,
+    RateLimitException,
+    ServerErrorException,
+)
 from goodfire.variants.variants import SUPPORTED_MODELS, Variant
 from typing_extensions import override
@@ -163,9 +167,9 @@ class GoodfireAPI(ModelAPI):
         return ex
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
+    def should_retry(self, ex: Exception) -> bool:
         """Check if exception is due to rate limiting."""
-        return isinstance(ex, RateLimitException)
+        return isinstance(ex, RateLimitException | ServerErrorException)
     @override
     def connection_key(self) -> str:

inspect-ai 0.3.72__py3-none-any.whl → 0.3.73__py3-none-any.whl

inspect-ai 0.3.72py3-none-any.whl → 0.3.73py3-none-any.whl