PyPI - inspect-ai - Versions diffs - 0.3.71__py3-none-any.whl → 0.3.73__py3-none-any.whl - Mend

inspect-ai 0.3.71py3-none-any.whl → 0.3.73py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

inspect_ai/_cli/eval.py +14 -3
inspect_ai/_cli/sandbox.py +3 -3
inspect_ai/_cli/score.py +6 -4
inspect_ai/_cli/trace.py +53 -6
inspect_ai/_display/core/config.py +1 -1
inspect_ai/_display/core/display.py +2 -1
inspect_ai/_display/core/footer.py +6 -6
inspect_ai/_display/plain/display.py +11 -6
inspect_ai/_display/rich/display.py +23 -13
inspect_ai/_display/textual/app.py +10 -9
inspect_ai/_display/textual/display.py +2 -2
inspect_ai/_display/textual/widgets/footer.py +4 -0
inspect_ai/_display/textual/widgets/samples.py +14 -5
inspect_ai/_eval/context.py +1 -2
inspect_ai/_eval/eval.py +54 -41
inspect_ai/_eval/loader.py +9 -2
inspect_ai/_eval/run.py +148 -81
inspect_ai/_eval/score.py +13 -8
inspect_ai/_eval/task/images.py +31 -21
inspect_ai/_eval/task/run.py +62 -59
inspect_ai/_eval/task/rundir.py +16 -9
inspect_ai/_eval/task/sandbox.py +7 -8
inspect_ai/_eval/task/util.py +7 -0
inspect_ai/_util/_async.py +118 -10
inspect_ai/_util/constants.py +0 -2
inspect_ai/_util/file.py +15 -29
inspect_ai/_util/future.py +37 -0
inspect_ai/_util/http.py +3 -99
inspect_ai/_util/httpx.py +60 -0
inspect_ai/_util/interrupt.py +2 -2
inspect_ai/_util/json.py +5 -52
inspect_ai/_util/logger.py +30 -86
inspect_ai/_util/retry.py +10 -61
inspect_ai/_util/trace.py +2 -2
inspect_ai/_view/server.py +86 -3
inspect_ai/_view/www/dist/assets/index.js +25837 -13269
inspect_ai/_view/www/log-schema.json +253 -186
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
inspect_ai/_view/www/src/types/log.d.ts +122 -94
inspect_ai/approval/_human/manager.py +6 -10
inspect_ai/approval/_human/panel.py +2 -2
inspect_ai/dataset/_sources/util.py +7 -6
inspect_ai/log/__init__.py +4 -0
inspect_ai/log/_file.py +35 -61
inspect_ai/log/_log.py +18 -1
inspect_ai/log/_recorders/eval.py +14 -23
inspect_ai/log/_recorders/json.py +3 -18
inspect_ai/log/_samples.py +27 -2
inspect_ai/log/_transcript.py +8 -8
inspect_ai/model/__init__.py +2 -1
inspect_ai/model/_call_tools.py +60 -40
inspect_ai/model/_chat_message.py +3 -2
inspect_ai/model/_generate_config.py +25 -0
inspect_ai/model/_model.py +74 -36
inspect_ai/model/_openai.py +9 -1
inspect_ai/model/_providers/anthropic.py +172 -154
inspect_ai/model/_providers/azureai.py +11 -9
inspect_ai/model/_providers/bedrock.py +33 -24
inspect_ai/model/_providers/cloudflare.py +8 -9
inspect_ai/model/_providers/goodfire.py +7 -3
inspect_ai/model/_providers/google.py +47 -13
inspect_ai/model/_providers/groq.py +15 -15
inspect_ai/model/_providers/hf.py +24 -17
inspect_ai/model/_providers/mistral.py +36 -20
inspect_ai/model/_providers/openai.py +30 -25
inspect_ai/model/_providers/openai_o1.py +1 -1
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/together.py +3 -4
inspect_ai/model/_providers/util/__init__.py +2 -2
inspect_ai/model/_providers/util/chatapi.py +6 -19
inspect_ai/model/_providers/util/hooks.py +165 -0
inspect_ai/model/_providers/vertex.py +20 -3
inspect_ai/model/_providers/vllm.py +16 -19
inspect_ai/scorer/_multi.py +5 -2
inspect_ai/solver/_bridge/patch.py +31 -1
inspect_ai/solver/_fork.py +5 -3
inspect_ai/solver/_human_agent/agent.py +3 -2
inspect_ai/tool/__init__.py +8 -2
inspect_ai/tool/_tool_info.py +4 -90
inspect_ai/tool/_tool_params.py +4 -34
inspect_ai/tool/_tools/_computer/_common.py +117 -58
inspect_ai/tool/_tools/_computer/_computer.py +80 -57
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
inspect_ai/tool/_tools/_computer/test_args.py +151 -0
inspect_ai/tool/_tools/_web_search.py +30 -24
inspect_ai/util/__init__.py +4 -0
inspect_ai/util/_concurrency.py +5 -6
inspect_ai/util/_display.py +6 -0
inspect_ai/util/_json.py +170 -0
inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
inspect_ai/util/_sandbox/docker/docker.py +5 -0
inspect_ai/util/_sandbox/environment.py +56 -9
inspect_ai/util/_sandbox/service.py +12 -5
inspect_ai/util/_subprocess.py +94 -113
inspect_ai/util/_subtask.py +2 -4
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +111 -103
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
inspect_ai/_util/timeouts.py +0 -160
inspect_ai/model/_providers/util/tracker.py +0 -92
inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0

inspect_ai/model/_providers/mistral.py CHANGED Viewed

@@ -7,6 +7,7 @@ from httpcore import ReadTimeout
 from httpx import ReadTimeout as AsyncReadTimeout
 from mistralai import (
     ContentChunk,
+    DocumentURLChunk,
     FunctionCall,
     FunctionName,
     ImageURL,
@@ -22,6 +23,12 @@ from mistralai.models import (
     ChatCompletionChoice as MistralChatCompletionChoice,
 )
 from mistralai.models import Function as MistralFunction
+from mistralai.models import (
+    JSONSchema as MistralJSONSchema,
+)
+from mistralai.models import (
+    ResponseFormat as MistralResponseFormat,
+)
 from mistralai.models import SDKError
 from mistralai.models import SystemMessage as MistralSystemMessage
 from mistralai.models import Tool as MistralTool
@@ -38,11 +45,9 @@ from typing_extensions import override
 # TODO: Migration guide:
 # https://github.com/mistralai/client-python/blob/main/MIGRATION.md
-from inspect_ai._util.constants import (
-    DEFAULT_TIMEOUT,
-    NO_CONTENT,
-)
+from inspect_ai._util.constants import NO_CONTENT
 from inspect_ai._util.content import Content, ContentImage, ContentText
+from inspect_ai._util.http import is_retryable_http_status
 from inspect_ai._util.images import file_as_data_uri
 from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
@@ -61,7 +66,7 @@ from .._model_output import (
     StopReason,
 )
 from .util import environment_prerequisite_error, model_base_url
-from .util.tracker import HttpxTimeTracker
+from .util.hooks import HttpxHooks
 AZURE_MISTRAL_API_KEY = "AZURE_MISTRAL_API_KEY"
 AZUREAI_MISTRAL_API_KEY = "AZUREAI_MISTRAL_API_KEY"
@@ -127,16 +132,12 @@ class MistralAPI(ModelAPI):
         config: GenerateConfig,
     ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
         # create client
-        with Mistral(
-            api_key=self.api_key,
-            timeout_ms=(config.timeout if config.timeout else DEFAULT_TIMEOUT) * 1000,
-            **self.model_args,
-        ) as client:
+        with Mistral(api_key=self.api_key, **self.model_args) as client:
             # create time tracker
-            time_tracker = HttpxTimeTracker(client.sdk_configuration.async_client)
+            http_hooks = HttpxHooks(client.sdk_configuration.async_client)
             # build request
-            request_id = time_tracker.start_request()
+            request_id = http_hooks.start_request()
             request: dict[str, Any] = dict(
                 model=self.model_name,
                 messages=await mistral_chat_messages(input),
@@ -144,7 +145,7 @@ class MistralAPI(ModelAPI):
                 tool_choice=(
                     mistral_chat_tool_choice(tool_choice) if len(tools) > 0 else None
                 ),
-                http_headers={HttpxTimeTracker.REQUEST_ID_HEADER: request_id},
+                http_headers={HttpxHooks.REQUEST_ID_HEADER: request_id},
             )
             if config.temperature is not None:
                 request["temperature"] = config.temperature
@@ -154,6 +155,18 @@ class MistralAPI(ModelAPI):
                 request["max_tokens"] = config.max_tokens
             if config.seed is not None:
                 request["random_seed"] = config.seed
+            if config.response_schema is not None:
+                request["response_format"] = MistralResponseFormat(
+                    type="json_schema",
+                    json_schema=MistralJSONSchema(
+                        name=config.response_schema.name,
+                        description=config.response_schema.description,
+                        schema_definition=config.response_schema.json_schema.model_dump(
+                            exclude_none=True
+                        ),
+                        strict=config.response_schema.strict,
+                    ),
+                )
             # prepare response for inclusion in model call
             response: dict[str, Any] = {}
@@ -169,7 +182,7 @@ class MistralAPI(ModelAPI):
                 return ModelCall.create(
                     request=req,
                     response=response,
-                    time=time_tracker.end_request(request_id),
+                    time=http_hooks.end_request(request_id),
                 )
             # send request
@@ -205,12 +218,13 @@ class MistralAPI(ModelAPI):
             ), model_call()
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        return (
-            isinstance(ex, SDKError)
-            and ex.status_code == 429
-            or isinstance(ex, ReadTimeout | AsyncReadTimeout)
-        )
+    def should_retry(self, ex: Exception) -> bool:
+        if isinstance(ex, SDKError):
+            return is_retryable_http_status(ex.status_code)
+        elif isinstance(ex, ReadTimeout | AsyncReadTimeout):
+            return True
+        else:
+            return False
     @override
     def connection_key(self) -> str:
@@ -462,6 +476,8 @@ def completion_content_chunk(content: ContentChunk) -> Content:
         raise TypeError("ReferenceChunk content is not supported by Inspect.")
     elif isinstance(content, TextChunk):
         return ContentText(text=content.text)
+    elif isinstance(content, DocumentURLChunk):
+        return ContentText(text=content.document_url)
     else:
         if isinstance(content.image_url, str):
             return ContentImage(image=content.image_url)

inspect_ai/model/_providers/openai.py CHANGED Viewed

@@ -7,25 +7,22 @@ import httpx
 from openai import (
     DEFAULT_CONNECTION_LIMITS,
     DEFAULT_TIMEOUT,
-    APIConnectionError,
+    APIStatusError,
     APITimeoutError,
     AsyncAzureOpenAI,
     AsyncOpenAI,
     BadRequestError,
-    InternalServerError,
     RateLimitError,
 )
 from openai._types import NOT_GIVEN
-from openai.types.chat import (
-    ChatCompletion,
-)
+from openai.types.chat import ChatCompletion
 from typing_extensions import override
-from inspect_ai._util.constants import DEFAULT_MAX_RETRIES
 from inspect_ai._util.error import PrerequisiteError
+from inspect_ai._util.http import is_retryable_http_status
 from inspect_ai._util.logger import warn_once
 from inspect_ai.model._openai import chat_choices_from_openai
-from inspect_ai.model._providers.util.tracker import HttpxTimeTracker
+from inspect_ai.model._providers.util.hooks import HttpxHooks
 from inspect_ai.tool import ToolChoice, ToolInfo
 from .._chat_message import ChatMessage
@@ -130,9 +127,6 @@ class OpenAIAPI(ModelAPI):
                 api_key=self.api_key,
                 azure_endpoint=base_url,
                 azure_deployment=model_name,
-                max_retries=(
-                    config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
-                ),
                 http_client=http_client,
                 **model_args,
             )
@@ -140,15 +134,12 @@ class OpenAIAPI(ModelAPI):
             self.client = AsyncOpenAI(
                 api_key=self.api_key,
                 base_url=model_base_url(base_url, "OPENAI_BASE_URL"),
-                max_retries=(
-                    config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
-                ),
                 http_client=http_client,
                 **model_args,
             )
         # create time tracker
-        self._time_tracker = HttpxTimeTracker(self.client._client)
+        self._http_hooks = HttpxHooks(self.client._client)
     def is_azure(self) -> bool:
         return self.service == "azure"
@@ -186,7 +177,7 @@ class OpenAIAPI(ModelAPI):
             )
         # allocate request_id (so we can see it from ModelCall)
-        request_id = self._time_tracker.start_request()
+        request_id = self._http_hooks.start_request()
         # setup request and response for ModelCall
         request: dict[str, Any] = {}
@@ -197,7 +188,7 @@ class OpenAIAPI(ModelAPI):
                 request=request,
                 response=response,
                 filter=image_url_filter,
-                time=self._time_tracker.end_request(request_id),
+                time=self._http_hooks.end_request(request_id),
             )
         # unlike text models, vision models require a max_tokens (and set it to a very low
@@ -216,7 +207,7 @@ class OpenAIAPI(ModelAPI):
             tool_choice=openai_chat_tool_choice(tool_choice)
             if len(tools) > 0
             else NOT_GIVEN,
-            extra_headers={HttpxTimeTracker.REQUEST_ID_HEADER: request_id},
+            extra_headers={HttpxHooks.REQUEST_ID_HEADER: request_id},
             **self.completion_params(config, len(tools) > 0),
         )
@@ -266,17 +257,21 @@ class OpenAIAPI(ModelAPI):
         return chat_choices_from_openai(response, tools)
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
+    def should_retry(self, ex: Exception) -> bool:
         if isinstance(ex, RateLimitError):
             # Do not retry on these rate limit errors
             # The quota exceeded one is related to monthly account quotas.
-            if "You exceeded your current quota" not in ex.message:
+            if "You exceeded your current quota" in ex.message:
+                warn_once(logger, f"OpenAI quota exceeded, not retrying: {ex.message}")
+                return False
+            else:
                 return True
-        elif isinstance(
-            ex, (APIConnectionError | APITimeoutError | InternalServerError)
-        ):
+        elif isinstance(ex, APIStatusError):
+            return is_retryable_http_status(ex.status_code)
+        elif isinstance(ex, APITimeoutError):
             return True
-        return False
+        else:
+            return False
     @override
     def connection_key(self) -> str:
@@ -315,8 +310,6 @@ class OpenAIAPI(ModelAPI):
             params["temperature"] = 1
         if config.top_p is not None:
             params["top_p"] = config.top_p
-        if config.timeout is not None:
-            params["timeout"] = float(config.timeout)
         if config.num_choices is not None:
             params["n"] = config.num_choices
         if config.logprobs is not None:
@@ -331,6 +324,18 @@ class OpenAIAPI(ModelAPI):
             and not self.is_o1_mini()
         ):
             params["reasoning_effort"] = config.reasoning_effort
+        if config.response_schema is not None:
+            params["response_format"] = dict(
+                type="json_schema",
+                json_schema=dict(
+                    name=config.response_schema.name,
+                    schema=config.response_schema.json_schema.model_dump(
+                        exclude_none=True
+                    ),
+                    description=config.response_schema.description,
+                    strict=config.response_schema.strict,
+                ),
+            )
         return params

inspect_ai/model/_providers/openai_o1.py CHANGED Viewed

@@ -107,7 +107,7 @@ def chat_messages(
 ) -> list[ChatCompletionMessageParam]:
     # o1 does not allow system messages so convert system -> user
     messages: list[ChatMessage] = [
-        ChatMessageUser(content=message.content)
+        ChatMessageUser(id=message.id, content=message.content)
         if message.role == "system"
         else message
         for message in input

inspect_ai/model/_providers/providers.py CHANGED Viewed

@@ -148,7 +148,7 @@ def cf() -> type[ModelAPI]:
 def mistral() -> type[ModelAPI]:
     FEATURE = "Mistral API"
     PACKAGE = "mistralai"
-    MIN_VERSION = "1.5.0"
+    MIN_VERSION = "1.5.1"
     # verify we have the package
     try:

inspect_ai/model/_providers/together.py CHANGED Viewed

@@ -34,8 +34,8 @@ from .util import (
     chat_api_input,
     chat_api_request,
     environment_prerequisite_error,
-    is_chat_api_rate_limit,
     model_base_url,
+    should_retry_chat_api_error,
 )
@@ -186,7 +186,6 @@ class TogetherRESTAPI(ModelAPI):
             url=f"{chat_url}",
             headers={"Authorization": f"Bearer {self.api_key}"},
             json=json,
-            config=config,
         )
         if "error" in response:
@@ -215,8 +214,8 @@ class TogetherRESTAPI(ModelAPI):
             return ModelOutput(model=model, choices=choices, usage=usage)
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        return is_chat_api_rate_limit(ex)
+    def should_retry(self, ex: Exception) -> bool:
+        return should_retry_chat_api_error(ex)
     # cloudflare enforces rate limits by model for each account
     @override

inspect_ai/model/_providers/util/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from .chatapi import (
     ChatAPIMessage,
     chat_api_input,
     chat_api_request,
-    is_chat_api_rate_limit,
+    should_retry_chat_api_error,
 )
 from .hf_handler import HFHandler
 from .llama31 import Llama31Handler
@@ -19,7 +19,7 @@ __all__ = [
     "as_stop_reason",
     "chat_api_request",
     "chat_api_input",
-    "is_chat_api_rate_limit",
+    "should_retry_chat_api_error",
     "model_base_url",
     "parse_tool_call",
     "tool_parse_error_message",

inspect_ai/model/_providers/util/chatapi.py CHANGED Viewed

@@ -7,17 +7,15 @@ from tenacity import (
     retry,
     retry_if_exception,
     stop_after_attempt,
-    stop_after_delay,
     wait_exponential_jitter,
 )
-from inspect_ai._util.constants import DEFAULT_MAX_RETRIES
-from inspect_ai._util.retry import httpx_should_retry, log_retry_attempt
+from inspect_ai._util.http import is_retryable_http_status
+from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
 from inspect_ai.model._chat_message import ChatMessageAssistant, ChatMessageTool
 from inspect_ai.tool._tool_info import ToolInfo
 from ..._chat_message import ChatMessage
-from ..._generate_config import GenerateConfig
 logger = getLogger(__name__)
@@ -75,21 +73,13 @@ async def chat_api_request(
     url: str,
     headers: dict[str, Any],
     json: Any,
-    config: GenerateConfig,
 ) -> Any:
-    # provide default max_retries
-    max_retries = config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
     # define call w/ retry policy
     @retry(
         wait=wait_exponential_jitter(),
-        stop=(
-            (stop_after_attempt(max_retries) | stop_after_delay(config.timeout))
-            if config.timeout
-            else stop_after_attempt(max_retries)
-        ),
+        stop=(stop_after_attempt(2)),
         retry=retry_if_exception(httpx_should_retry),
-        before_sleep=log_retry_attempt(model_name),
+        before_sleep=log_httpx_retry_attempt(model_name),
     )
     async def call_api() -> Any:
         response = await client.post(url=url, headers=headers, json=json)
@@ -104,14 +94,11 @@ async def chat_api_request(
 # checking for rate limit errors needs to punch through the RetryError and
 # look at its `__cause__`. we've observed Cloudflare giving transient 500
 # status as well as a ReadTimeout, so we count these as rate limit errors
-def is_chat_api_rate_limit(ex: BaseException) -> bool:
+def should_retry_chat_api_error(ex: BaseException) -> bool:
     return isinstance(ex, RetryError) and (
         (
             isinstance(ex.__cause__, httpx.HTTPStatusError)
-            and (
-                ex.__cause__.response.status_code == 429
-                or ex.__cause__.response.status_code == 500
-            )
+            and is_retryable_http_status(ex.__cause__.response.status_code)
         )
         or isinstance(ex.__cause__, httpx.ReadTimeout)
     )

inspect_ai/model/_providers/util/hooks.py ADDED Viewed

@@ -0,0 +1,165 @@
+import re
+import time
+from logging import getLogger
+from typing import Any, Mapping, NamedTuple, cast
+import httpx
+from shortuuid import uuid
+from inspect_ai._util.constants import HTTP
+from inspect_ai._util.retry import report_http_retry
+logger = getLogger(__name__)
+class RequestInfo(NamedTuple):
+    attempts: int
+    last_request: float
+class HttpHooks:
+    """Class which hooks various HTTP clients for improved tracking/logging.
+    A special header is injected into requests which is then read from
+    a request event hook -- this creates a record of when the request
+    started. Note that with retries a single request_id could be started
+    several times; our request hook makes sure we always track the time of
+    the last request.
+    There is an 'end_request()' method which gets the total request time
+    for a request_id and then purges the request_id from our tracking (so
+    the dict doesn't grow unbounded)
+    Additionally, an http response hook is installed and used for logging
+    requests for the 'http' log-level
+    """
+    REQUEST_ID_HEADER = "x-irid"
+    def __init__(self) -> None:
+        # track request start times
+        self._requests: dict[str, RequestInfo] = {}
+    def start_request(self) -> str:
+        request_id = uuid()
+        self._requests[request_id] = RequestInfo(0, time.monotonic())
+        return request_id
+    def end_request(self, request_id: str) -> float:
+        # read the request info (if available) and purge from dict
+        request_info = self._requests.pop(request_id, None)
+        if request_info is None:
+            raise RuntimeError(f"request_id not registered: {request_id}")
+        # return elapsed time
+        return time.monotonic() - request_info.last_request
+    def update_request_time(self, request_id: str) -> None:
+        request_info = self._requests.get(request_id, None)
+        if not request_info:
+            raise RuntimeError(f"No request registered for request_id: {request_id}")
+        # update the attempts and last request time
+        request_info = RequestInfo(request_info.attempts + 1, time.monotonic())
+        self._requests[request_id] = request_info
+        # trace a retry if this is attempt > 1
+        if request_info.attempts > 1:
+            report_http_retry()
+class ConverseHooks(HttpHooks):
+    def __init__(self, session: Any) -> None:
+        from aiobotocore.session import AioSession
+        super().__init__()
+        # register hooks
+        session = cast(AioSession, session._session)
+        session.register(
+            "before-send.bedrock-runtime.Converse", self.converse_before_send
+        )
+        session.register(
+            "after-call.bedrock-runtime.Converse", self.converse_after_call
+        )
+    def converse_before_send(self, **kwargs: Any) -> None:
+        user_agent = kwargs["request"].headers["User-Agent"].decode()
+        match = re.search(rf"{self.USER_AGENT_PREFIX}(\w+)", user_agent)
+        if match:
+            request_id = match.group(1)
+            self.update_request_time(request_id)
+    def converse_after_call(self, http_response: Any, **kwargs: Any) -> None:
+        from botocore.awsrequest import AWSResponse
+        response = cast(AWSResponse, http_response)
+        logger.log(HTTP, f"POST {response.url} - {response.status_code}")
+    def user_agent_extra(self, request_id: str) -> str:
+        return f"{self.USER_AGENT_PREFIX}{request_id}"
+    USER_AGENT_PREFIX = "ins/rid#"
+class HttpxHooks(HttpHooks):
+    def __init__(self, client: httpx.AsyncClient):
+        super().__init__()
+        # install hooks
+        client.event_hooks["request"].append(self.request_hook)
+        client.event_hooks["response"].append(self.response_hook)
+    async def request_hook(self, request: httpx.Request) -> None:
+        # update the last request time for this request id (as there could be retries)
+        request_id = request.headers.get(self.REQUEST_ID_HEADER, None)
+        if request_id:
+            self.update_request_time(request_id)
+    async def response_hook(self, response: httpx.Response) -> None:
+        message = f'{response.request.method} {response.request.url} "{response.http_version} {response.status_code} {response.reason_phrase}" '
+        logger.log(HTTP, message)
+def urllib3_hooks() -> HttpHooks:
+    import urllib3
+    from urllib3.connectionpool import HTTPConnectionPool
+    from urllib3.response import BaseHTTPResponse
+    class Urllib3Hooks(HttpHooks):
+        def request_hook(self, headers: Mapping[str, str]) -> None:
+            # update the last request time for this request id (as there could be retries)
+            request_id = headers.get(self.REQUEST_ID_HEADER, None)
+            if request_id:
+                self.update_request_time(request_id)
+        def response_hook(
+            self, method: str, url: str, response: BaseHTTPResponse
+        ) -> None:
+            message = f'{method} {url} "{response.version_string} {response.status} {response.reason}" '
+            logger.log(HTTP, message)
+    global _urlilb3_hooks
+    if _urlilb3_hooks is None:
+        # one time patch of urlopen
+        urlilb3_hooks = Urllib3Hooks()
+        original_urlopen = urllib3.connectionpool.HTTPConnectionPool.urlopen
+        def patched_urlopen(
+            self: HTTPConnectionPool, method: str, url: str, **kwargs: Any
+        ) -> BaseHTTPResponse:
+            headers = kwargs.get("headers", {})
+            urlilb3_hooks.request_hook(headers)
+            response = original_urlopen(self, method, url, **kwargs)
+            urlilb3_hooks.response_hook(method, f"{self.host}{url}", response)
+            return response
+        urllib3.connectionpool.HTTPConnectionPool.urlopen = patched_urlopen  # type: ignore[assignment,method-assign]
+        # assign to global hooks instance
+        _urlilb3_hooks = urlilb3_hooks
+    return _urlilb3_hooks
+_urlilb3_hooks: HttpHooks | None = None

inspect_ai/model/_providers/vertex.py CHANGED Viewed

@@ -4,7 +4,13 @@ from copy import copy
 from typing import Any, cast
 import vertexai  # type: ignore
-from google.api_core.exceptions import TooManyRequests
+from google.api_core.exceptions import (
+    Aborted,
+    ClientError,
+    DeadlineExceeded,
+    ServiceUnavailable,
+)
+from google.api_core.retry import if_transient_error
 from google.protobuf.json_format import MessageToDict
 from pydantic import JsonValue
 from typing_extensions import override
@@ -31,6 +37,7 @@ from inspect_ai._util.content import (
     ContentText,
     ContentVideo,
 )
+from inspect_ai._util.http import is_retryable_http_status
 from inspect_ai._util.images import file_as_data
 from inspect_ai.tool import ToolCall, ToolChoice, ToolInfo
@@ -169,8 +176,18 @@ class VertexAPI(ModelAPI):
         return output, call
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        return isinstance(ex, TooManyRequests)
+    def should_retry(self, ex: Exception) -> bool:
+        # google API-specific errors
+        if isinstance(ex, Aborted | DeadlineExceeded | ServiceUnavailable):
+            return True
+        # standard HTTP errors
+        elif isinstance(ex, ClientError) and ex.code is not None:
+            return is_retryable_http_status(ex.code)
+        # additional errors flagged by google as transient
+        elif isinstance(ex, Exception):
+            return if_transient_error(ex)
+        else:
+            return False
     @override
     def connection_key(self) -> str:

inspect_ai/model/_providers/vllm.py CHANGED Viewed

@@ -1,13 +1,15 @@
-import asyncio
+import concurrent.futures
 import functools
 import gc
 import os
 import time
+from concurrent.futures import Future
 from dataclasses import dataclass
 from queue import Empty, Queue
 from threading import Thread
 from typing import Any, cast
+import anyio
 from typing_extensions import override
 from vllm import LLM, CompletionOutput, RequestOutput, SamplingParams  # type: ignore
@@ -280,8 +282,7 @@ class VLLMAPI(ModelAPI):
 @dataclass
 class _QueueItem:
     input: GenerateInput
-    future: asyncio.Future[list[GenerateOutput]]
-    loop: asyncio.AbstractEventLoop
+    future: Future[list[GenerateOutput]]
 batch_thread: Thread | None = None
@@ -297,15 +298,16 @@ async def batched_generate(input: GenerateInput) -> list[GenerateOutput]:
         batch_thread.start()
     # enqueue the job
-    loop = asyncio.get_event_loop()
-    future: asyncio.Future[list[GenerateOutput]] = loop.create_future()
-    batch_queue.put(_QueueItem(input=input, future=future, loop=loop))
+    future = Future[list[GenerateOutput]]()
+    batch_queue.put(_QueueItem(input=input, future=future))
-    # await the job
-    await future
-    # return it
-    return future.result()
+    # await the future
+    while True:
+        try:
+            return future.result(timeout=0.01)
+        except concurrent.futures.TimeoutError:
+            pass
+        await anyio.sleep(1)
 def string_to_bytes(string: str) -> list[int]:
@@ -397,13 +399,12 @@ def post_process_outputs(
 def process_batches() -> None:
     while True:
         # drain the queue (wait until no new messages have shown up for 2 seconds)
-        inputs: list[tuple[GenerateInput, asyncio.Future[list[GenerateOutput]]]] = []
+        inputs: list[tuple[GenerateInput, Future[list[GenerateOutput]]]] = []
         while True:
             try:
                 input = batch_queue.get(
                     timeout=2
                 )  # wait 2 seconds max TODO: what's optimal wait time?
-                loop = input.loop
                 inputs.append((input.input, input.future))
                 if len(inputs) >= input.input.batch_size:
                     # max batch size reached
@@ -429,14 +430,10 @@ def process_batches() -> None:
             for i, output in enumerate(outputs):
                 future = inputs[i][1]
-                # asyncio futures are not thread safe, so we need to pass the event loop
-                # down to this point, so we can mark the future as done in a thread safe manner.
-                # see: https://docs.python.org/3/library/asyncio-dev.html#concurrency-and-multithreading
-                loop.call_soon_threadsafe(
-                    future.set_result,
+                future.set_result(
                     post_process_outputs(output, num_top_logprobs, total_time),
                 )
         except Exception as e:
             for _, future in inputs:
-                loop.call_soon_threadsafe(future.set_exception, e)
+                future.set_exception(e)

inspect-ai 0.3.71__py3-none-any.whl → 0.3.73__py3-none-any.whl

inspect-ai 0.3.71py3-none-any.whl → 0.3.73py3-none-any.whl