PyPI - inspect-ai - Versions diffs - 0.3.69__py3-none-any.whl → 0.3.70__py3-none-any.whl - Mend

inspect-ai 0.3.69py3-none-any.whl → 0.3.70py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

inspect_ai/_cli/eval.py +13 -1
inspect_ai/_display/textual/app.py +3 -2
inspect_ai/_display/textual/widgets/samples.py +4 -10
inspect_ai/_display/textual/widgets/transcript.py +25 -12
inspect_ai/_eval/eval.py +14 -2
inspect_ai/_eval/evalset.py +6 -1
inspect_ai/_eval/run.py +6 -0
inspect_ai/_eval/task/run.py +44 -15
inspect_ai/_eval/task/task.py +26 -3
inspect_ai/_util/interrupt.py +6 -0
inspect_ai/_util/logger.py +19 -0
inspect_ai/_util/rich.py +7 -8
inspect_ai/_util/text.py +13 -0
inspect_ai/_util/transcript.py +10 -2
inspect_ai/_util/working.py +46 -0
inspect_ai/_view/www/dist/assets/index.css +56 -12
inspect_ai/_view/www/dist/assets/index.js +904 -750
inspect_ai/_view/www/log-schema.json +337 -2
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
inspect_ai/_view/www/src/appearance/icons.ts +3 -1
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
inspect_ai/_view/www/src/types/log.d.ts +188 -108
inspect_ai/_view/www/src/utils/format.ts +7 -4
inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_condense.py +1 -0
inspect_ai/log/_log.py +72 -12
inspect_ai/log/_samples.py +5 -1
inspect_ai/log/_transcript.py +31 -1
inspect_ai/model/_call_tools.py +1 -1
inspect_ai/model/_conversation.py +1 -1
inspect_ai/model/_model.py +32 -16
inspect_ai/model/_model_call.py +10 -3
inspect_ai/model/_providers/anthropic.py +13 -2
inspect_ai/model/_providers/bedrock.py +7 -0
inspect_ai/model/_providers/cloudflare.py +20 -7
inspect_ai/model/_providers/google.py +2 -0
inspect_ai/model/_providers/groq.py +57 -23
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +78 -51
inspect_ai/model/_providers/openai.py +9 -0
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/util/tracker.py +92 -0
inspect_ai/model/_providers/vllm.py +13 -5
inspect_ai/solver/_basic_agent.py +1 -3
inspect_ai/solver/_bridge/patch.py +0 -2
inspect_ai/solver/_limit.py +4 -4
inspect_ai/solver/_plan.py +0 -3
inspect_ai/solver/_task_state.py +7 -0
inspect_ai/tool/_tools/_web_search.py +3 -3
inspect_ai/util/_concurrency.py +14 -8
inspect_ai/util/_sandbox/context.py +15 -0
inspect_ai/util/_sandbox/docker/docker.py +7 -5
inspect_ai/util/_sandbox/environment.py +32 -1
inspect_ai/util/_sandbox/events.py +149 -0
inspect_ai/util/_sandbox/local.py +3 -3
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +3 -3
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +74 -67
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0

inspect_ai/log/__init__.py CHANGED Viewed

@@ -41,6 +41,7 @@ from ._transcript import (
     ModelEvent,
     SampleInitEvent,
     SampleLimitEvent,
+    SandboxEvent,
     ScoreEvent,
     StateEvent,
     StepEvent,
@@ -82,6 +83,7 @@ __all__ = [
     "ModelEvent",
     "SampleInitEvent",
     "SampleLimitEvent",
+    "SandboxEvent",
     "ScoreEvent",
     "StateEvent",
     "StepEvent",

inspect_ai/log/_condense.py CHANGED Viewed

@@ -217,6 +217,7 @@ def walk_model_call(
         return ModelCall(
             request=walk_json_dict(call.request, content_fn),
             response=walk_json_dict(call.response, content_fn),
+            time=call.time,
         )
     else:
         return None

inspect_ai/log/_log.py CHANGED Viewed

@@ -4,7 +4,7 @@ import sys
 import traceback
 from logging import getLogger
 from types import TracebackType
-from typing import Any, Literal, Type, TypedDict
+from typing import Any, Literal, Tuple, Type, TypedDict
 import click
 import tenacity
@@ -86,13 +86,16 @@ class EvalConfig(BaseModel):
     """
     message_limit: int | None = Field(default=None)
-    """Maximum messages to allow in a chat conversation."""
+    """Maximum messages to allow per sample."""
     token_limit: int | None = Field(default=None)
-    """Maximum tokens to allow in a chat conversation."""
+    """Maximum tokens usage per sample."""
     time_limit: int | None = Field(default=None)
-    """Maximum seconds for chat conversation."""
+    """Maximum clock time per sample."""
+    working_limit: int | None = Field(default=None)
+    """Meximum working time per sample."""
     max_samples: int | None = Field(default=None)
     """Maximum number of samples to run in parallel."""
@@ -141,7 +144,9 @@ class EvalConfig(BaseModel):
 class EvalSampleLimit(BaseModel):
     """Limit encontered by sample."""
-    type: Literal["context", "time", "message", "token", "operator", "custom"]
+    type: Literal[
+        "context", "time", "working", "message", "token", "operator", "custom"
+    ]
     """The type of limit"""
     limit: int
@@ -218,6 +223,15 @@ class EvalSample(BaseModel):
     model_usage: dict[str, ModelUsage] = Field(default_factory=dict)
     """Model token usage for sample."""
+    total_time: float | None = Field(default=None)
+    """Total time that the sample was running."""
+    working_time: float | None = Field(default=None)
+    """Time spent working (model generation, sandbox calls, etc.)"""
+    uuid: str | None = Field(default=None)
+    """Globally unique identifier for sample run (exists for samples created in Inspect >= 0.3.70)"""
     error: EvalError | None = Field(default=None)
     """Error that halted sample."""
@@ -601,14 +615,15 @@ def eval_error(
     exc_traceback: TracebackType | None,
 ) -> EvalError:
     # get text traceback
-    traceback_text = "\n".join(
-        traceback.format_exception(exc_type, exc_value, exc_traceback)
-    )
+    traceback_text, truncated = truncate_traceback(exc_type, exc_value, exc_traceback)
-    with open(os.devnull, "w") as f:
-        console = Console(record=True, file=f, legacy_windows=True)
-        console.print(rich_traceback(exc_type, exc_value, exc_traceback))
-        traceback_ansi = console.export_text(styles=True)
+    if not truncated:
+        with open(os.devnull, "w") as f:
+            console = Console(record=True, file=f, legacy_windows=True)
+            console.print(rich_traceback(exc_type, exc_value, exc_traceback))
+            traceback_ansi = console.export_text(styles=True)
+    else:
+        traceback_ansi = traceback_text
     # return error
     return EvalError(
@@ -632,6 +647,51 @@ def rich_traceback(
     return rich_tb
+def truncate_traceback(
+    exc_type: Type[Any],
+    exc_value: BaseException,
+    exc_traceback: TracebackType | None,
+    max_length: int = 1048576,  # 1MB
+) -> Tuple[str, bool]:
+    tb_list = traceback.format_exception(exc_type, exc_value, exc_traceback)
+    # Keep the front and back of the traceback
+    header = tb_list[0]
+    error_msg = tb_list[-1]
+    # Join the middle parts (stack frames)
+    frames = "".join(tb_list[1:-1])
+    # It all fits, use it as is
+    full_tb = header + frames + error_msg
+    if len(full_tb) <= max_length:
+        return full_tb, False
+    ellipsis = "\n...\n"
+    # Minimum header size
+    header_size = min(len(header), 1024)
+    # Minimum frames size
+    frames_size = min(len(frames), 1024)
+    # Remaining space for error message
+    error_msg_size = max(0, max_length - header_size - frames_size)
+    def truncate_middle(text: str, size: int) -> str:
+        if len(text) <= size:
+            return text
+        half = (size - len(ellipsis)) // 2
+        return f"{text[:half]}{ellipsis}{text[-half:]}"
+    # Truncate each part as needed
+    truncated_header = truncate_middle(header, header_size)
+    truncated_frames = truncate_middle(frames, frames_size)
+    truncated_error = truncate_middle(error_msg, error_msg_size)
+    return truncated_header + truncated_frames + truncated_error, True
 class EvalStats(BaseModel):
     """Timing and usage statistics."""

inspect_ai/log/_samples.py CHANGED Viewed

@@ -23,6 +23,7 @@ class ActiveSample:
         message_limit: int | None,
         token_limit: int | None,
         time_limit: int | None,
+        working_limit: int | None,
         fails_on_error: bool,
         transcript: Transcript,
         sandboxes: dict[str, SandboxConnection],
@@ -37,6 +38,7 @@ class ActiveSample:
         self.message_limit = message_limit
         self.token_limit = token_limit
         self.time_limit = time_limit
+        self.working_limit = working_limit
         self.fails_on_error = fails_on_error
         self.total_messages = 0
         self.total_tokens = 0
@@ -45,7 +47,7 @@ class ActiveSample:
         self._interrupt_action: Literal["score", "error"] | None = None
     @property
-    def execution_time(self) -> float:
+    def running_time(self) -> float:
         if self.started is not None:
             completed = (
                 self.completed
@@ -78,6 +80,7 @@ async def active_sample(
     message_limit: int | None,
     token_limit: int | None,
     time_limit: int | None,
+    working_limit: int | None,
     fails_on_error: bool,
     transcript: Transcript,
 ) -> AsyncGenerator[ActiveSample, None]:
@@ -90,6 +93,7 @@ async def active_sample(
         message_limit=message_limit,
         token_limit=token_limit,
         time_limit=time_limit,
+        working_limit=working_limit,
         sandboxes=await sandbox_connections(),
         fails_on_error=fails_on_error,
         transcript=transcript,

inspect_ai/log/_transcript.py CHANGED Viewed

@@ -70,7 +70,7 @@ class SampleLimitEvent(BaseEvent):
     event: Literal["sample_limit"] = Field(default="sample_limit")
     """Event type."""
-    type: Literal["message", "time", "token", "operator", "custom"]
+    type: Literal["message", "time", "working", "token", "operator", "custom"]
     """Type of limit that halted processing"""
     message: str
@@ -207,6 +207,34 @@ class ToolEvent(BaseEvent):
     """Required so that we can include '_task' as a member."""
+class SandboxEvent(BaseEvent):
+    """Sandbox execution or I/O"""
+    event: Literal["sandbox"] = Field(default="sandbox")
+    """Event type"""
+    action: Literal["exec", "read_file", "write_file"]
+    """Sandbox action"""
+    cmd: str | None = Field(default=None)
+    """Command (for exec)"""
+    options: dict[str, JsonValue] | None = Field(default=None)
+    """Options (for exec)"""
+    file: str | None = Field(default=None)
+    """File (for read_file and write_file)"""
+    input: str | None = Field(default=None)
+    """Input (for cmd and write_file). Truncated to 100 lines."""
+    result: int | None = Field(default=None)
+    """Result (for exec)"""
+    output: str | None = Field(default=None)
+    """Output (for exec and read_file). Truncated to 100 lines."""
 class ApprovalEvent(BaseEvent):
     """Tool approval."""
@@ -342,10 +370,12 @@ class SubtaskEvent(BaseEvent):
 Event: TypeAlias = Union[
     SampleInitEvent
     | SampleLimitEvent
+    | SandboxEvent
     | StateEvent
     | StoreEvent
     | ModelEvent
     | ToolEvent
+    | SandboxEvent
     | ApprovalEvent
     | InputEvent
     | ScoreEvent

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -407,7 +407,7 @@ def tool_param(type_hint: Type[Any], input: Any) -> Any:
             return tuple(input)
     elif origin is dict or origin is Dict:
         if args and len(args) > 1:
-            return {k: tool_param(args[1], v) for k, v in input}
+            return {k: tool_param(args[1], v) for k, v in input.items()}
         else:
             return input
     elif origin is Union or origin is types.UnionType:

inspect_ai/model/_conversation.py CHANGED Viewed

@@ -19,7 +19,7 @@ def conversation_tool_mesage(message: ChatMessageTool) -> None:
             message.error.message.strip() if message.error else message.text.strip()
         )
         if output:
-            content = lines_display(output, 100)
+            content = lines_display(output, 50)
             conversation_panel(
                 title=f"Tool Output: {message.function}",

inspect_ai/model/_model.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import abc
-import asyncio
+import contextlib
 import functools
 import json
 import logging
@@ -8,7 +8,7 @@ import time
 from contextvars import ContextVar
 from copy import deepcopy
 from types import TracebackType
-from typing import Any, Callable, Literal, Type, cast
+from typing import Any, AsyncIterator, Callable, Literal, Type, cast
 from pydantic_core import to_jsonable_python
 from tenacity import (
@@ -33,6 +33,7 @@ from inspect_ai._util.registry import (
 )
 from inspect_ai._util.retry import log_rate_limit_retry
 from inspect_ai._util.trace import trace_action
+from inspect_ai._util.working import report_sample_waiting_time
 from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
 from inspect_ai.tool._tool_def import ToolDef, tool_defs
 from inspect_ai.util import concurrency
@@ -435,14 +436,16 @@ class Model:
             )
             with trace_action(logger, "Model", f"generate ({str(self)})"):
-                time_start = time.perf_counter()
-                result = await self.api.generate(
-                    input=input,
-                    tools=tools,
-                    tool_choice=tool_choice,
-                    config=config,
-                )
-                time_elapsed = time.perf_counter() - time_start
+                time_start = time.monotonic()
+                try:
+                    result = await self.api.generate(
+                        input=input,
+                        tools=tools,
+                        tool_choice=tool_choice,
+                        config=config,
+                    )
+                finally:
+                    time_elapsed = time.monotonic() - time_start
             if isinstance(result, tuple):
                 output, call = result
@@ -461,8 +464,12 @@ class Model:
                 error_message = f"{error}\n\nRequest:\n{request}"
                 raise RuntimeError(error_message)
-            # update output with time elapsed
-            output.time = time_elapsed
+            # update output with time (call.time captures time spent
+            # on the actual request that succeeds w/ status 200)
+            if call and call.time is not None:
+                output.time = call.time
+            else:
+                output.time = time_elapsed
             # add views to tool calls
             for choice in output.choices:
@@ -488,8 +495,13 @@ class Model:
             return output
-        # call the model
+        # call the model (this will so retries, etc., so report waiting time
+        # as elapsed time - actual time for successful model call)
+        time_start = time.monotonic()
         model_output = await generate()
+        total_time = time.monotonic() - time_start
+        if model_output.time:
+            report_sample_waiting_time(total_time - model_output.time)
         # return results
         return model_output
@@ -513,7 +525,10 @@ class Model:
     # override the _connection_key() argument to provide a scope within which
     # to enforce max_connections (e.g. by account/api_key, by endpoint, etc.)
-    def _connection_concurrency(self, config: GenerateConfig) -> asyncio.Semaphore:
+    @contextlib.asynccontextmanager
+    async def _connection_concurrency(
+        self, config: GenerateConfig
+    ) -> AsyncIterator[None]:
         """Get the appropriate connection semaphore for this model instance."""
         max_connections = (
             config.max_connections
@@ -521,11 +536,12 @@ class Model:
             else self.api.max_connections()
         )
         model_name = ModelName(self)
-        return concurrency(
+        async with concurrency(
             name=f"{model_name.api}",
             concurrency=max_connections,
             key=f"Model{self.api.connection_key()}",
-        )
+        ):
+            yield
     def _record_model_interaction(
         self,

inspect_ai/model/_model_call.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Any, Callable
-from pydantic import BaseModel, JsonValue
+from pydantic import BaseModel, Field, JsonValue
 from inspect_ai._util.json import jsonable_python
@@ -22,9 +22,15 @@ class ModelCall(BaseModel):
     response: dict[str, JsonValue]
     """Raw response data from model."""
+    time: float | None = Field(default=None)
+    """Time taken for underlying model call."""
     @staticmethod
     def create(
-        request: Any, response: Any, filter: ModelCallFilter | None = None
+        request: Any,
+        response: Any,
+        filter: ModelCallFilter | None = None,
+        time: float | None = None,
     ) -> "ModelCall":
         """Create a ModelCall object.
@@ -36,6 +42,7 @@ class ModelCall(BaseModel):
            request (Any): Request object (dict, dataclass, BaseModel, etc.)
            response (Any): Response object (dict, dataclass, BaseModel, etc.)
            filter (ModelCallFilter): Function for filtering model call data.
+           time: Time taken for underlying ModelCall
         """
         request_dict = jsonable_python(request)
         if filter:
@@ -43,7 +50,7 @@ class ModelCall(BaseModel):
         response_dict = jsonable_python(response)
         if filter:
             response_dict = _walk_json_value(None, response_dict, filter)
-        return ModelCall(request=request_dict, response=response_dict)
+        return ModelCall(request=request_dict, response=response_dict, time=time)
 def _walk_json_value(

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -5,6 +5,8 @@ from copy import copy
 from logging import getLogger
 from typing import Any, Literal, Tuple, TypedDict, cast
+from .util.tracker import HttpxTimeTracker
 if sys.version_info >= (3, 11):
     from typing import NotRequired
 else:
@@ -150,6 +152,9 @@ class AnthropicAPI(ModelAPI):
                 **model_args,
             )
+        # create time tracker
+        self._time_tracker = HttpxTimeTracker(self.client._client)
     @override
     async def close(self) -> None:
         await self.client.close()
@@ -167,6 +172,9 @@ class AnthropicAPI(ModelAPI):
         tool_choice: ToolChoice,
         config: GenerateConfig,
     ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
+        # allocate request_id (so we can see it from ModelCall)
+        request_id = self._time_tracker.start_request()
         # setup request and response for ModelCall
         request: dict[str, Any] = {}
         response: dict[str, Any] = {}
@@ -176,6 +184,7 @@ class AnthropicAPI(ModelAPI):
                 request=request,
                 response=response,
                 filter=model_call_filter,
+                time=self._time_tracker.end_request(request_id),
             )
         # generate
@@ -200,9 +209,11 @@ class AnthropicAPI(ModelAPI):
             # additional options
             request = request | self.completion_params(config)
-            # computer use beta
+            # extra headers (for time tracker and computer use)
+            extra_headers = {HttpxTimeTracker.REQUEST_ID_HEADER: request_id}
             if computer_use:
-                request["extra_headers"] = {"anthropic-beta": "computer-use-2024-10-22"}
+                extra_headers["anthropic-beta"] = "computer-use-2024-10-22"
+            request["extra_headers"] = extra_headers
             # extra_body
             if self.extra_body is not None:

inspect_ai/model/_providers/bedrock.py CHANGED Viewed

@@ -31,6 +31,7 @@ from .._model_output import ChatCompletionChoice, ModelOutput, ModelUsage
 from .util import (
     model_base_url,
 )
+from .util.tracker import BotoTimeTracker
 # Model for Bedrock Converse API (Response)
 # generated from: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#converse
@@ -256,6 +257,9 @@ class BedrockAPI(ModelAPI):
             # Create a shared session to be used when generating
             self.session = aioboto3.Session()
+            # create time tracker
+            self._time_tracker = BotoTimeTracker(self.session)
         except ImportError:
             raise pip_dependency_error("Bedrock API", ["aioboto3"])
@@ -313,6 +317,7 @@ class BedrockAPI(ModelAPI):
         from botocore.exceptions import ClientError
         # The bedrock client
+        request_id = self._time_tracker.start_request()
         async with self.session.client(  # type: ignore[call-overload]
             service_name="bedrock-runtime",
             endpoint_url=self.base_url,
@@ -325,6 +330,7 @@ class BedrockAPI(ModelAPI):
                     else DEFAULT_MAX_RETRIES,
                     mode="adaptive",
                 ),
+                user_agent_extra=self._time_tracker.user_agent_extra(request_id),
             ),
             **self.model_args,
         ) as client:
@@ -364,6 +370,7 @@ class BedrockAPI(ModelAPI):
                         request.model_dump(exclude_none=True)
                     ),
                     response=response,
+                    time=self._time_tracker.end_request(request_id),
                 )
             try:

inspect_ai/model/_providers/cloudflare.py CHANGED Viewed

@@ -19,6 +19,7 @@ from .util import (
     is_chat_api_rate_limit,
     model_base_url,
 )
+from .util.tracker import HttpxTimeTracker
 # https://developers.cloudflare.com/workers-ai/models/#text-generation
@@ -50,6 +51,7 @@ class CloudFlareAPI(ModelAPI):
             if not self.api_key:
                 raise environment_prerequisite_error("CloudFlare", CLOUDFLARE_API_TOKEN)
         self.client = httpx.AsyncClient()
+        self._time_tracker = HttpxTimeTracker(self.client)
         base_url = model_base_url(base_url, "CLOUDFLARE_BASE_URL")
         self.base_url = (
             base_url if base_url else "https://api.cloudflare.com/client/v4/accounts"
@@ -76,12 +78,28 @@ class CloudFlareAPI(ModelAPI):
             json["max_tokens"] = config.max_tokens
         json["messages"] = chat_api_input(input, tools, self.chat_api_handler())
+        # request_id
+        request_id = self._time_tracker.start_request()
+        # setup response
+        response: dict[str, Any] = {}
+        def model_call() -> ModelCall:
+            return ModelCall.create(
+                request=json,
+                response=response,
+                time=self._time_tracker.end_request(request_id),
+            )
         # make the call
         response = await chat_api_request(
             self.client,
             model_name=self.model_name,
             url=f"{chat_url}/{self.model_name}",
-            headers={"Authorization": f"Bearer {self.api_key}"},
+            headers={
+                "Authorization": f"Bearer {self.api_key}",
+                HttpxTimeTracker.REQUEST_ID_HEADER: request_id,
+            },
             json=json,
             config=config,
         )
@@ -102,13 +120,8 @@ class CloudFlareAPI(ModelAPI):
                 ],
             )
-            # record call
-            call = ModelCall.create(
-                request=dict(model_name=self.model_name, **json), response=response
-            )
             # return
-            return output, call
+            return output, model_call()
         else:
             error = str(response.get("errors", "Unknown"))
             raise RuntimeError(f"Error calling {self.model_name}: {error}")

inspect_ai/model/_providers/google.py CHANGED Viewed

@@ -229,6 +229,8 @@ class GoogleGenAIAPI(ModelAPI):
                 response=response,
             )
+        # TODO: would need to monkey patch AuthorizedSession.request
         try:
             response = await self.client.aio.models.generate_content(
                 model=self.model_name,

inspect-ai 0.3.69__py3-none-any.whl → 0.3.70__py3-none-any.whl

inspect-ai 0.3.69py3-none-any.whl → 0.3.70py3-none-any.whl