PyPI - inspect-ai - Versions diffs - 0.3.71__py3-none-any.whl → 0.3.73__py3-none-any.whl - Mend

inspect-ai 0.3.71py3-none-any.whl → 0.3.73py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

inspect_ai/_cli/eval.py +14 -3
inspect_ai/_cli/sandbox.py +3 -3
inspect_ai/_cli/score.py +6 -4
inspect_ai/_cli/trace.py +53 -6
inspect_ai/_display/core/config.py +1 -1
inspect_ai/_display/core/display.py +2 -1
inspect_ai/_display/core/footer.py +6 -6
inspect_ai/_display/plain/display.py +11 -6
inspect_ai/_display/rich/display.py +23 -13
inspect_ai/_display/textual/app.py +10 -9
inspect_ai/_display/textual/display.py +2 -2
inspect_ai/_display/textual/widgets/footer.py +4 -0
inspect_ai/_display/textual/widgets/samples.py +14 -5
inspect_ai/_eval/context.py +1 -2
inspect_ai/_eval/eval.py +54 -41
inspect_ai/_eval/loader.py +9 -2
inspect_ai/_eval/run.py +148 -81
inspect_ai/_eval/score.py +13 -8
inspect_ai/_eval/task/images.py +31 -21
inspect_ai/_eval/task/run.py +62 -59
inspect_ai/_eval/task/rundir.py +16 -9
inspect_ai/_eval/task/sandbox.py +7 -8
inspect_ai/_eval/task/util.py +7 -0
inspect_ai/_util/_async.py +118 -10
inspect_ai/_util/constants.py +0 -2
inspect_ai/_util/file.py +15 -29
inspect_ai/_util/future.py +37 -0
inspect_ai/_util/http.py +3 -99
inspect_ai/_util/httpx.py +60 -0
inspect_ai/_util/interrupt.py +2 -2
inspect_ai/_util/json.py +5 -52
inspect_ai/_util/logger.py +30 -86
inspect_ai/_util/retry.py +10 -61
inspect_ai/_util/trace.py +2 -2
inspect_ai/_view/server.py +86 -3
inspect_ai/_view/www/dist/assets/index.js +25837 -13269
inspect_ai/_view/www/log-schema.json +253 -186
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
inspect_ai/_view/www/src/types/log.d.ts +122 -94
inspect_ai/approval/_human/manager.py +6 -10
inspect_ai/approval/_human/panel.py +2 -2
inspect_ai/dataset/_sources/util.py +7 -6
inspect_ai/log/__init__.py +4 -0
inspect_ai/log/_file.py +35 -61
inspect_ai/log/_log.py +18 -1
inspect_ai/log/_recorders/eval.py +14 -23
inspect_ai/log/_recorders/json.py +3 -18
inspect_ai/log/_samples.py +27 -2
inspect_ai/log/_transcript.py +8 -8
inspect_ai/model/__init__.py +2 -1
inspect_ai/model/_call_tools.py +60 -40
inspect_ai/model/_chat_message.py +3 -2
inspect_ai/model/_generate_config.py +25 -0
inspect_ai/model/_model.py +74 -36
inspect_ai/model/_openai.py +9 -1
inspect_ai/model/_providers/anthropic.py +172 -154
inspect_ai/model/_providers/azureai.py +11 -9
inspect_ai/model/_providers/bedrock.py +33 -24
inspect_ai/model/_providers/cloudflare.py +8 -9
inspect_ai/model/_providers/goodfire.py +7 -3
inspect_ai/model/_providers/google.py +47 -13
inspect_ai/model/_providers/groq.py +15 -15
inspect_ai/model/_providers/hf.py +24 -17
inspect_ai/model/_providers/mistral.py +36 -20
inspect_ai/model/_providers/openai.py +30 -25
inspect_ai/model/_providers/openai_o1.py +1 -1
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/together.py +3 -4
inspect_ai/model/_providers/util/__init__.py +2 -2
inspect_ai/model/_providers/util/chatapi.py +6 -19
inspect_ai/model/_providers/util/hooks.py +165 -0
inspect_ai/model/_providers/vertex.py +20 -3
inspect_ai/model/_providers/vllm.py +16 -19
inspect_ai/scorer/_multi.py +5 -2
inspect_ai/solver/_bridge/patch.py +31 -1
inspect_ai/solver/_fork.py +5 -3
inspect_ai/solver/_human_agent/agent.py +3 -2
inspect_ai/tool/__init__.py +8 -2
inspect_ai/tool/_tool_info.py +4 -90
inspect_ai/tool/_tool_params.py +4 -34
inspect_ai/tool/_tools/_computer/_common.py +117 -58
inspect_ai/tool/_tools/_computer/_computer.py +80 -57
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
inspect_ai/tool/_tools/_computer/test_args.py +151 -0
inspect_ai/tool/_tools/_web_search.py +30 -24
inspect_ai/util/__init__.py +4 -0
inspect_ai/util/_concurrency.py +5 -6
inspect_ai/util/_display.py +6 -0
inspect_ai/util/_json.py +170 -0
inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
inspect_ai/util/_sandbox/docker/docker.py +5 -0
inspect_ai/util/_sandbox/environment.py +56 -9
inspect_ai/util/_sandbox/service.py +12 -5
inspect_ai/util/_subprocess.py +94 -113
inspect_ai/util/_subtask.py +2 -4
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +111 -103
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
inspect_ai/_util/timeouts.py +0 -160
inspect_ai/model/_providers/util/tracker.py +0 -92
inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0

inspect_ai/model/_providers/azureai.py CHANGED Viewed

@@ -27,11 +27,16 @@ from azure.ai.inference.models import (
     UserMessage,
 )
 from azure.core.credentials import AzureKeyCredential
-from azure.core.exceptions import AzureError, HttpResponseError
+from azure.core.exceptions import (
+    AzureError,
+    HttpResponseError,
+    ServiceResponseError,
+)
 from typing_extensions import override
 from inspect_ai._util.constants import DEFAULT_MAX_TOKENS
 from inspect_ai._util.content import Content, ContentImage, ContentText
+from inspect_ai._util.http import is_retryable_http_status
 from inspect_ai._util.images import file_as_data_uri
 from inspect_ai.tool import ToolChoice, ToolInfo
 from inspect_ai.tool._tool_call import ToolCall
@@ -232,14 +237,11 @@ class AzureAIAPI(ModelAPI):
             return DEFAULT_MAX_TOKENS
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        if isinstance(ex, HttpResponseError):
-            return (
-                ex.status_code == 408
-                or ex.status_code == 409
-                or ex.status_code == 429
-                or ex.status_code == 500
-            )
+    def should_retry(self, ex: Exception) -> bool:
+        if isinstance(ex, HttpResponseError) and ex.status_code is not None:
+            return is_retryable_http_status(ex.status_code)
+        elif isinstance(ex, ServiceResponseError):
+            return True
         else:
             return False

inspect_ai/model/_providers/bedrock.py CHANGED Viewed

@@ -1,16 +1,14 @@
 import base64
+from logging import getLogger
 from typing import Any, Literal, Tuple, Union, cast
 from pydantic import BaseModel, Field
 from typing_extensions import override
-from inspect_ai._util.constants import (
-    DEFAULT_MAX_RETRIES,
-    DEFAULT_MAX_TOKENS,
-    DEFAULT_TIMEOUT,
-)
+from inspect_ai._util._async import current_async_backend
+from inspect_ai._util.constants import DEFAULT_MAX_TOKENS
 from inspect_ai._util.content import Content, ContentImage, ContentText
-from inspect_ai._util.error import pip_dependency_error
+from inspect_ai._util.error import PrerequisiteError, pip_dependency_error
 from inspect_ai._util.images import file_as_data
 from inspect_ai._util.version import verify_required_version
 from inspect_ai.tool import ToolChoice, ToolInfo
@@ -31,7 +29,9 @@ from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage
 from .util import (
     model_base_url,
 )
-from .util.tracker import BotoTimeTracker
+from .util.hooks import ConverseHooks
+logger = getLogger(__name__)
 # Model for Bedrock Converse API (Response)
 # generated from: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#converse
@@ -245,6 +245,12 @@ class BedrockAPI(ModelAPI):
             config=config,
         )
+        # raise if we are using trio
+        if current_async_backend() == "trio":
+            raise PrerequisiteError(
+                "ERROR: The bedrock provider does not work with the trio async backend."
+            )
         # save model_args
         self.model_args = model_args
@@ -258,7 +264,7 @@ class BedrockAPI(ModelAPI):
             self.session = aioboto3.Session()
             # create time tracker
-            self._time_tracker = BotoTimeTracker(self.session)
+            self._http_hooks = ConverseHooks(self.session)
         except ImportError:
             raise pip_dependency_error("Bedrock API", ["aioboto3"])
@@ -288,15 +294,25 @@ class BedrockAPI(ModelAPI):
             return DEFAULT_MAX_TOKENS
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
+    def should_retry(self, ex: Exception) -> bool:
         from botocore.exceptions import ClientError
         # Look for an explicit throttle exception
         if isinstance(ex, ClientError):
-            if ex.response["Error"]["Code"] == "ThrottlingException":
-                return True
-        return super().is_rate_limit(ex)
+            error_code = ex.response.get("Error", {}).get("Code", "")
+            return error_code in [
+                "ThrottlingException",
+                "RequestLimitExceeded",
+                "Throttling",
+                "RequestThrottled",
+                "TooManyRequestsException",
+                "ProvisionedThroughputExceededException",
+                "TransactionInProgressException",
+                "RequestTimeout",
+                "ServiceUnavailable",
+            ]
+        else:
+            return False
     @override
     def collapse_user_messages(self) -> bool:
@@ -317,20 +333,13 @@ class BedrockAPI(ModelAPI):
         from botocore.exceptions import ClientError
         # The bedrock client
-        request_id = self._time_tracker.start_request()
+        request_id = self._http_hooks.start_request()
         async with self.session.client(  # type: ignore[call-overload]
             service_name="bedrock-runtime",
             endpoint_url=self.base_url,
             config=Config(
-                connect_timeout=config.timeout if config.timeout else DEFAULT_TIMEOUT,
-                read_timeout=config.timeout if config.timeout else DEFAULT_TIMEOUT,
-                retries=dict(
-                    max_attempts=config.max_retries
-                    if config.max_retries
-                    else DEFAULT_MAX_RETRIES,
-                    mode="adaptive",
-                ),
-                user_agent_extra=self._time_tracker.user_agent_extra(request_id),
+                retries=dict(mode="adaptive"),
+                user_agent_extra=self._http_hooks.user_agent_extra(request_id),
             ),
             **self.model_args,
         ) as client:
@@ -370,7 +379,7 @@ class BedrockAPI(ModelAPI):
                         request.model_dump(exclude_none=True)
                     ),
                     response=response,
-                    time=self._time_tracker.end_request(request_id),
+                    time=self._http_hooks.end_request(request_id),
                 )
             try:

inspect_ai/model/_providers/cloudflare.py CHANGED Viewed

@@ -16,10 +16,10 @@ from .util import (
     chat_api_input,
     chat_api_request,
     environment_prerequisite_error,
-    is_chat_api_rate_limit,
     model_base_url,
+    should_retry_chat_api_error,
 )
-from .util.tracker import HttpxTimeTracker
+from .util.hooks import HttpxHooks
 # https://developers.cloudflare.com/workers-ai/models/#text-generation
@@ -51,7 +51,7 @@ class CloudFlareAPI(ModelAPI):
             if not self.api_key:
                 raise environment_prerequisite_error("CloudFlare", CLOUDFLARE_API_TOKEN)
         self.client = httpx.AsyncClient()
-        self._time_tracker = HttpxTimeTracker(self.client)
+        self._http_hooks = HttpxHooks(self.client)
         base_url = model_base_url(base_url, "CLOUDFLARE_BASE_URL")
         self.base_url = (
             base_url if base_url else "https://api.cloudflare.com/client/v4/accounts"
@@ -79,7 +79,7 @@ class CloudFlareAPI(ModelAPI):
         json["messages"] = chat_api_input(input, tools, self.chat_api_handler())
         # request_id
-        request_id = self._time_tracker.start_request()
+        request_id = self._http_hooks.start_request()
         # setup response
         response: dict[str, Any] = {}
@@ -88,7 +88,7 @@ class CloudFlareAPI(ModelAPI):
             return ModelCall.create(
                 request=json,
                 response=response,
-                time=self._time_tracker.end_request(request_id),
+                time=self._http_hooks.end_request(request_id),
             )
         # make the call
@@ -98,10 +98,9 @@ class CloudFlareAPI(ModelAPI):
             url=f"{chat_url}/{self.model_name}",
             headers={
                 "Authorization": f"Bearer {self.api_key}",
-                HttpxTimeTracker.REQUEST_ID_HEADER: request_id,
+                HttpxHooks.REQUEST_ID_HEADER: request_id,
             },
             json=json,
-            config=config,
         )
         # handle response
@@ -127,8 +126,8 @@ class CloudFlareAPI(ModelAPI):
             raise RuntimeError(f"Error calling {self.model_name}: {error}")
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        return is_chat_api_rate_limit(ex)
+    def should_retry(self, ex: Exception) -> bool:
+        return should_retry_chat_api_error(ex)
     # cloudflare enforces rate limits by model for each account
     @override

inspect_ai/model/_providers/goodfire.py CHANGED Viewed

@@ -3,7 +3,11 @@ from typing import Any, List, Literal, get_args
 from goodfire import AsyncClient
 from goodfire.api.chat.interfaces import ChatMessage as GoodfireChatMessage
-from goodfire.api.exceptions import InvalidRequestException, RateLimitException
+from goodfire.api.exceptions import (
+    InvalidRequestException,
+    RateLimitException,
+    ServerErrorException,
+)
 from goodfire.variants.variants import SUPPORTED_MODELS, Variant
 from typing_extensions import override
@@ -163,9 +167,9 @@ class GoodfireAPI(ModelAPI):
         return ex
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
+    def should_retry(self, ex: Exception) -> bool:
         """Check if exception is due to rate limiting."""
-        return isinstance(ex, RateLimitException)
+        return isinstance(ex, RateLimitException | ServerErrorException)
     @override
     def connection_key(self) -> str:

inspect_ai/model/_providers/google.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import asyncio
 import functools
 import hashlib
 import json
@@ -9,6 +8,7 @@ from logging import getLogger
 from typing import Any
 # SDK Docs: https://googleapis.github.io/python-genai/
+import anyio
 from google.genai import Client  # type: ignore
 from google.genai.errors import APIError, ClientError  # type: ignore
 from google.genai.types import (  # type: ignore
@@ -26,6 +26,7 @@ from google.genai.types import (  # type: ignore
     GenerationConfig,
     HarmBlockThreshold,
     HarmCategory,
+    HttpOptions,
     Part,
     SafetySetting,
     SafetySettingDict,
@@ -49,6 +50,7 @@ from inspect_ai._util.content import (
     ContentVideo,
 )
 from inspect_ai._util.error import PrerequisiteError
+from inspect_ai._util.http import is_retryable_http_status
 from inspect_ai._util.images import file_as_data
 from inspect_ai._util.kvstore import inspect_kvstore
 from inspect_ai._util.trace import trace_message
@@ -69,6 +71,7 @@ from inspect_ai.model import (
 )
 from inspect_ai.model._model_call import ModelCall
 from inspect_ai.model._providers.util import model_base_url
+from inspect_ai.model._providers.util.hooks import HttpHooks, urllib3_hooks
 from inspect_ai.tool import (
     ToolCall,
     ToolChoice,
@@ -199,11 +202,15 @@ class GoogleGenAIAPI(ModelAPI):
         tool_choice: ToolChoice,
         config: GenerateConfig,
     ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
+        # generate request_id
+        request_id = urllib3_hooks().start_request()
         # Create google-genai types.
         gemini_contents = await as_chat_messages(self.client, input)
         gemini_tools = chat_tools(tools) if len(tools) > 0 else None
         gemini_tool_config = chat_tool_config(tool_choice) if len(tools) > 0 else None
         parameters = GenerateContentConfig(
+            http_options=HttpOptions(headers={HttpHooks.REQUEST_ID_HEADER: request_id}),
             temperature=config.temperature,
             top_p=config.top_p,
             top_k=config.top_k,
@@ -219,6 +226,11 @@ class GoogleGenAIAPI(ModelAPI):
                 self.client, input
             ),
         )
+        if config.response_schema is not None:
+            parameters.response_mime_type = "application/json"
+            parameters.response_schema = schema_from_param(
+                config.response_schema.json_schema, nullable=None
+            )
         response: GenerateContentResponse | None = None
@@ -230,10 +242,9 @@ class GoogleGenAIAPI(ModelAPI):
                 tools=gemini_tools,
                 tool_config=gemini_tool_config,
                 response=response,
+                time=urllib3_hooks().end_request(request_id),
             )
-        # TODO: would need to monkey patch AuthorizedSession.request
         try:
             response = await self.client.aio.models.generate_content(
                 model=self.model_name,
@@ -252,11 +263,25 @@ class GoogleGenAIAPI(ModelAPI):
         return output, model_call()
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        # see https://cloud.google.com/storage/docs/retry-strategy
-        return isinstance(ex, APIError) and (
-            ex.code in (408, 429, 429) or ex.code >= 500
-        )
+    def should_retry(self, ex: Exception) -> bool:
+        import requests  # type: ignore
+        # standard http errors
+        if isinstance(ex, APIError):
+            return is_retryable_http_status(ex.status)
+        # low-level requests exceptions
+        elif isinstance(ex, requests.exceptions.RequestException):
+            return isinstance(
+                ex,
+                (
+                    requests.exceptions.ConnectionError
+                    | requests.exceptions.ConnectTimeout
+                    | requests.exceptions.ChunkedEncodingError
+                ),
+            )
+        else:
+            return False
     @override
     def connection_key(self) -> str:
@@ -296,6 +321,7 @@ def build_model_call(
     tools: list[Tool] | None,
     tool_config: ToolConfig | None,
     response: GenerateContentResponse | None,
+    time: float | None,
 ) -> ModelCall:
     return ModelCall.create(
         request=dict(
@@ -307,6 +333,7 @@ def build_model_call(
         ),
         response=response if response is not None else {},
         filter=model_call_filter,
+        time=time,
     )
@@ -464,7 +491,9 @@ def chat_tools(tools: list[ToolInfo]) -> list[Tool]:
 # https://ai.google.dev/gemini-api/tutorials/extract_structured_data#define_the_schema
-def schema_from_param(param: ToolParam | ToolParams, nullable: bool = False) -> Schema:
+def schema_from_param(
+    param: ToolParam | ToolParams, nullable: bool | None = False
+) -> Schema:
     if isinstance(param, ToolParams):
         param = ToolParam(
             type=param.type, properties=param.properties, required=param.required
@@ -529,10 +558,13 @@ def chat_tool_config(tool_choice: ToolChoice) -> ToolConfig:
 def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoice:
-    # check for completion text
-    content = ""
     # content can be None when the finish_reason is SAFETY
-    if candidate.content is not None:
+    if candidate.content is None:
+        content = ""
+    # content.parts can be None when the finish_reason is MALFORMED_FUNCTION_CALL
+    elif candidate.content.parts is None:
+        content = ""
+    else:
         content = " ".join(
             [
                 part.text
@@ -680,6 +712,8 @@ def finish_reason_to_stop_reason(finish_reason: FinishReason) -> StopReason:
         ):
             return "content_filter"
         case _:
+            # Note: to avoid adding another option to StopReason,
+            # this includes FinishReason.MALFORMED_FUNCTION_CALL
             return "unknown"
@@ -775,7 +809,7 @@ async def file_for_content(
             file=BytesIO(content_bytes), config=dict(mime_type=mime_type)
         )
         while upload.state.name == "PROCESSING":
-            await asyncio.sleep(3)
+            await anyio.sleep(3)
             upload = client.files.get(name=upload.name)
         if upload.state.name == "FAILED":
             trace(f"Failed to upload file '{upload.name}: {upload.error}")

inspect_ai/model/_providers/groq.py CHANGED Viewed

@@ -5,8 +5,9 @@ from typing import Any, Dict, Iterable, List, Optional
 import httpx
 from groq import (
+    APIStatusError,
+    APITimeoutError,
     AsyncGroq,
-    RateLimitError,
 )
 from groq.types.chat import (
     ChatCompletion,
@@ -25,10 +26,10 @@ from typing_extensions import override
 from inspect_ai._util.constants import (
     BASE_64_DATA_REMOVED,
-    DEFAULT_MAX_RETRIES,
     DEFAULT_MAX_TOKENS,
 )
 from inspect_ai._util.content import Content, ContentReasoning, ContentText
+from inspect_ai._util.http import is_retryable_http_status
 from inspect_ai._util.images import file_as_data_uri
 from inspect_ai._util.url import is_http_url
 from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
@@ -54,7 +55,7 @@ from .util import (
     environment_prerequisite_error,
     model_base_url,
 )
-from .util.tracker import HttpxTimeTracker
+from .util.hooks import HttpxHooks
 GROQ_API_KEY = "GROQ_API_KEY"
@@ -84,18 +85,12 @@ class GroqAPI(ModelAPI):
         self.client = AsyncGroq(
             api_key=self.api_key,
             base_url=model_base_url(base_url, "GROQ_BASE_URL"),
-            max_retries=(
-                config.max_retries
-                if config.max_retries is not None
-                else DEFAULT_MAX_RETRIES
-            ),
-            timeout=config.timeout if config.timeout is not None else 60.0,
             **model_args,
             http_client=httpx.AsyncClient(limits=httpx.Limits(max_connections=None)),
         )
         # create time tracker
-        self._time_tracker = HttpxTimeTracker(self.client._client)
+        self._http_hooks = HttpxHooks(self.client._client)
     @override
     async def close(self) -> None:
@@ -109,7 +104,7 @@ class GroqAPI(ModelAPI):
         config: GenerateConfig,
     ) -> tuple[ModelOutput, ModelCall]:
         # allocate request_id (so we can see it from ModelCall)
-        request_id = self._time_tracker.start_request()
+        request_id = self._http_hooks.start_request()
         # setup request and response for ModelCall
         request: dict[str, Any] = {}
@@ -120,7 +115,7 @@ class GroqAPI(ModelAPI):
                 request=request,
                 response=response,
                 filter=model_call_filter,
-                time=self._time_tracker.end_request(request_id),
+                time=self._http_hooks.end_request(request_id),
             )
         messages = await as_groq_chat_messages(input)
@@ -137,7 +132,7 @@ class GroqAPI(ModelAPI):
         request = dict(
             messages=messages,
             model=self.model_name,
-            extra_headers={HttpxTimeTracker.REQUEST_ID_HEADER: request_id},
+            extra_headers={HttpxHooks.REQUEST_ID_HEADER: request_id},
             **params,
         )
@@ -215,8 +210,13 @@ class GroqAPI(ModelAPI):
         ]
     @override
-    def is_rate_limit(self, ex: BaseException) -> bool:
-        return isinstance(ex, RateLimitError)
+    def should_retry(self, ex: Exception) -> bool:
+        if isinstance(ex, APIStatusError):
+            return is_retryable_http_status(ex.status_code)
+        elif isinstance(ex, APITimeoutError):
+            return True
+        else:
+            return False
     @override
     def connection_key(self) -> str:

inspect_ai/model/_providers/hf.py CHANGED Viewed

@@ -1,15 +1,19 @@
-import asyncio
+import concurrent
+import concurrent.futures
 import copy
 import functools
 import gc
 import json
 import os
 import time
+from concurrent.futures import Future
 from dataclasses import dataclass
+from logging import getLogger
 from queue import Empty, Queue
 from threading import Thread
 from typing import Any, Literal, Protocol, cast
+import anyio
 import numpy as np
 import torch  # type: ignore
 from torch import Tensor  # type: ignore
@@ -23,6 +27,7 @@ from typing_extensions import override
 from inspect_ai._util.constants import DEFAULT_MAX_TOKENS
 from inspect_ai._util.content import ContentText
+from inspect_ai._util.trace import trace_action
 from inspect_ai.tool import ToolChoice, ToolInfo
 from .._chat_message import ChatMessage, ChatMessageAssistant
@@ -38,6 +43,9 @@ from .._model_output import (
 )
 from .util import ChatAPIHandler, HFHandler
+logger = getLogger(__name__)
 HF_TOKEN = "HF_TOKEN"
@@ -385,8 +393,7 @@ class GenerateOutput:
 @dataclass
 class _QueueItem:
     input: GenerateInput
-    future: asyncio.Future[GenerateOutput]
-    loop: asyncio.AbstractEventLoop
+    future: Future[GenerateOutput]
 batch_thread: Thread | None = None
@@ -402,25 +409,26 @@ async def batched_generate(input: GenerateInput) -> GenerateOutput:
         batch_thread.start()
     # enqueue the job
-    loop = asyncio.get_event_loop()
-    future: asyncio.Future[GenerateOutput] = loop.create_future()
-    batch_queue.put(_QueueItem(input=input, future=future, loop=loop))
-    # await the job
-    await future
+    future = Future[GenerateOutput]()
+    batch_queue.put(_QueueItem(input=input, future=future))
-    # return it
-    return future.result()
+    # await the future
+    with trace_action(logger, "HF Batched Generate", "HF Batched Generate"):
+        while True:
+            try:
+                return future.result(timeout=0.01)
+            except concurrent.futures.TimeoutError:
+                pass
+            await anyio.sleep(1)
 def process_batches() -> None:
     while True:
         # drain the queue (wait until no new messages have shown up for 2 seconds)
-        inputs: list[tuple[GenerateInput, asyncio.Future[GenerateOutput]]] = []
+        inputs: list[tuple[GenerateInput, Future[GenerateOutput]]] = []
         while True:
             try:
                 input = batch_queue.get(timeout=2)
-                loop = input.loop
                 inputs.append((input.input, input.future))
                 if len(inputs) == input.input.batch_size:
                     # max batch size reached
@@ -480,8 +488,7 @@ def process_batches() -> None:
                 # asyncio futures are not thread safe, so we need to pass the event loop
                 # down to this point, so we can mark the future as done in a thread safe manner.
                 # see: https://docs.python.org/3/library/asyncio-dev.html#concurrency-and-multithreading
-                loop.call_soon_threadsafe(
-                    future.set_result,
+                future.set_result(
                     GenerateOutput(
                         output=output,
                         input_tokens=input_tokens,
@@ -489,13 +496,13 @@ def process_batches() -> None:
                         total_tokens=input_tokens + output_tokens,
                         logprobs=logprobs[i] if logprobs is not None else None,
                         time=total_time,
-                    ),
+                    )
                 )
         except Exception as ex:
             for inp in inputs:
                 future = inp[1]
-                loop.call_soon_threadsafe(future.set_exception, ex)
+                future.set_exception(ex)
 def extract_logprobs(

inspect-ai 0.3.71__py3-none-any.whl → 0.3.73__py3-none-any.whl

inspect-ai 0.3.71py3-none-any.whl → 0.3.73py3-none-any.whl