PyPI - inspect-ai - Versions diffs - 0.3.68__py3-none-any.whl → 0.3.70__py3-none-any.whl - Mend

inspect-ai 0.3.68py3-none-any.whl → 0.3.70py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

inspect_ai/_cli/eval.py +13 -1
inspect_ai/_display/plain/display.py +9 -11
inspect_ai/_display/textual/app.py +5 -5
inspect_ai/_display/textual/widgets/samples.py +47 -18
inspect_ai/_display/textual/widgets/transcript.py +25 -12
inspect_ai/_eval/eval.py +14 -2
inspect_ai/_eval/evalset.py +6 -1
inspect_ai/_eval/run.py +6 -0
inspect_ai/_eval/task/run.py +44 -15
inspect_ai/_eval/task/task.py +26 -3
inspect_ai/_util/interrupt.py +15 -0
inspect_ai/_util/logger.py +23 -0
inspect_ai/_util/rich.py +7 -8
inspect_ai/_util/text.py +301 -1
inspect_ai/_util/transcript.py +10 -2
inspect_ai/_util/working.py +46 -0
inspect_ai/_view/www/dist/assets/index.css +56 -12
inspect_ai/_view/www/dist/assets/index.js +905 -751
inspect_ai/_view/www/log-schema.json +337 -2
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
inspect_ai/_view/www/src/appearance/icons.ts +3 -1
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
inspect_ai/_view/www/src/types/log.d.ts +188 -108
inspect_ai/_view/www/src/utils/format.ts +7 -4
inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_condense.py +1 -0
inspect_ai/log/_log.py +72 -12
inspect_ai/log/_samples.py +5 -5
inspect_ai/log/_transcript.py +31 -1
inspect_ai/model/_call_tools.py +1 -1
inspect_ai/model/_conversation.py +1 -1
inspect_ai/model/_model.py +35 -16
inspect_ai/model/_model_call.py +10 -3
inspect_ai/model/_providers/anthropic.py +13 -2
inspect_ai/model/_providers/bedrock.py +7 -0
inspect_ai/model/_providers/cloudflare.py +20 -7
inspect_ai/model/_providers/google.py +358 -302
inspect_ai/model/_providers/groq.py +57 -23
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +81 -52
inspect_ai/model/_providers/openai.py +9 -0
inspect_ai/model/_providers/providers.py +6 -6
inspect_ai/model/_providers/util/tracker.py +92 -0
inspect_ai/model/_providers/vllm.py +13 -5
inspect_ai/solver/_basic_agent.py +1 -3
inspect_ai/solver/_bridge/patch.py +0 -2
inspect_ai/solver/_limit.py +4 -4
inspect_ai/solver/_plan.py +3 -3
inspect_ai/solver/_solver.py +3 -0
inspect_ai/solver/_task_state.py +10 -1
inspect_ai/tool/_tools/_web_search.py +3 -3
inspect_ai/util/_concurrency.py +14 -8
inspect_ai/util/_sandbox/context.py +15 -0
inspect_ai/util/_sandbox/docker/cleanup.py +8 -3
inspect_ai/util/_sandbox/docker/compose.py +5 -9
inspect_ai/util/_sandbox/docker/docker.py +20 -6
inspect_ai/util/_sandbox/docker/util.py +10 -1
inspect_ai/util/_sandbox/environment.py +32 -1
inspect_ai/util/_sandbox/events.py +149 -0
inspect_ai/util/_sandbox/local.py +3 -3
inspect_ai/util/_sandbox/self_check.py +2 -1
inspect_ai/util/_subprocess.py +4 -1
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +5 -5
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +82 -74
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0

inspect_ai/model/_providers/google.py CHANGED Viewed

@@ -2,103 +2,96 @@ import asyncio
 import functools
 import hashlib
 import json
+import os
 from copy import copy
 from io import BytesIO
 from logging import getLogger
-from typing import Any, MutableSequence, cast
+from typing import Any
-import proto  # type: ignore
-from google.ai.generativelanguage import (
-    Blob,
+# SDK Docs: https://googleapis.github.io/python-genai/
+from google.genai import Client  # type: ignore
+from google.genai.errors import APIError, ClientError  # type: ignore
+from google.genai.types import (  # type: ignore
     Candidate,
-    FunctionCall,
+    Content,
+    File,
+    FinishReason,
     FunctionCallingConfig,
     FunctionDeclaration,
     FunctionResponse,
+    GenerateContentConfig,
+    GenerateContentResponse,
+    GenerateContentResponsePromptFeedback,
+    GenerateContentResponseUsageMetadata,
+    GenerationConfig,
+    HarmBlockThreshold,
+    HarmCategory,
     Part,
+    SafetySetting,
+    SafetySettingDict,
     Schema,
+    Tool,
     ToolConfig,
     Type,
 )
-from google.api_core.exceptions import (
-    GatewayTimeout,
-    InternalServerError,
-    InvalidArgument,
-    ServiceUnavailable,
-    TooManyRequests,
-)
-from google.api_core.retry.retry_base import if_transient_error
-from google.generativeai.client import configure
-from google.generativeai.files import get_file, upload_file
-from google.generativeai.generative_models import GenerativeModel
-from google.generativeai.types import (
-    ContentDict,
-    GenerationConfig,
-    PartDict,
-    PartType,
-    Tool,
-)
-from google.generativeai.types.file_types import File
-from google.generativeai.types.generation_types import AsyncGenerateContentResponse
-from google.generativeai.types.safety_types import (
-    EasySafetySettingDict,
-    HarmBlockThreshold,
-    HarmCategory,
-)
-from google.protobuf.json_format import MessageToDict, ParseDict
-from google.protobuf.struct_pb2 import Struct
 from pydantic import JsonValue
 from typing_extensions import override
 from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
+from inspect_ai._util.content import Content as InspectContent
 from inspect_ai._util.content import (
-    Content,
     ContentAudio,
     ContentImage,
     ContentText,
     ContentVideo,
 )
+from inspect_ai._util.error import PrerequisiteError
 from inspect_ai._util.images import file_as_data
 from inspect_ai._util.kvstore import inspect_kvstore
 from inspect_ai._util.trace import trace_message
-from inspect_ai.tool import ToolCall, ToolChoice, ToolInfo, ToolParam, ToolParams
-from .._chat_message import (
+from inspect_ai.model import (
+    ChatCompletionChoice,
     ChatMessage,
     ChatMessageAssistant,
-    ChatMessageSystem,
     ChatMessageTool,
     ChatMessageUser,
-)
-from .._generate_config import GenerateConfig
-from .._model import ModelAPI
-from .._model_call import ModelCall
-from .._model_output import (
-    ChatCompletionChoice,
+    GenerateConfig,
     Logprob,
     Logprobs,
+    ModelAPI,
     ModelOutput,
     ModelUsage,
     StopReason,
     TopLogprob,
 )
-from .util import model_base_url
+from inspect_ai.model._model_call import ModelCall
+from inspect_ai.model._providers.util import model_base_url
+from inspect_ai.tool import (
+    ToolCall,
+    ToolChoice,
+    ToolFunction,
+    ToolInfo,
+    ToolParam,
+    ToolParams,
+)
 logger = getLogger(__name__)
-SAFETY_SETTINGS = "safety_settings"
-DEFAULT_SAFETY_SETTINGS: EasySafetySettingDict = {
-    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+GOOGLE_API_KEY = "GOOGLE_API_KEY"
+VERTEX_API_KEY = "VERTEX_API_KEY"
+SAFETY_SETTINGS = "safety_settings"
+DEFAULT_SAFETY_SETTINGS = {
+    HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: HarmBlockThreshold.BLOCK_NONE,
     HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
 }
-GOOGLE_API_KEY = "GOOGLE_API_KEY"
-class GoogleAPI(ModelAPI):
+class GoogleGenAIAPI(ModelAPI):
     def __init__(
         self,
         model_name: str,
@@ -111,11 +104,11 @@ class GoogleAPI(ModelAPI):
             model_name=model_name,
             base_url=base_url,
             api_key=api_key,
-            api_key_vars=[GOOGLE_API_KEY],
+            api_key_vars=[GOOGLE_API_KEY, VERTEX_API_KEY],
             config=config,
         )
-        # pick out vertex safety settings and merge against default
+        # pick out user-provided safety settings and merge against default
         self.safety_settings = DEFAULT_SAFETY_SETTINGS.copy()
         if SAFETY_SETTINGS in model_args:
             self.safety_settings.update(
@@ -123,22 +116,79 @@ class GoogleAPI(ModelAPI):
             )
             del model_args[SAFETY_SETTINGS]
-        # configure genai client
-        base_url = model_base_url(base_url, "GOOGLE_BASE_URL")
-        configure(
+        # extract any service prefix from model name
+        parts = model_name.split("/")
+        if len(parts) > 1:
+            self.service: str | None = parts[0]
+            model_name = "/".join(parts[1:])
+        else:
+            self.service = None
+        # vertex can also be forced by the GOOGLE_GENAI_USE_VERTEX_AI flag
+        if self.service is None:
+            if os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "").lower() == "true":
+                self.service = "vertex"
+        # ensure we haven't specified an invalid service
+        if self.service is not None and self.service != "vertex":
+            raise RuntimeError(
+                f"Invalid service name for google: {self.service}. "
+                + "Currently 'vertex' is the only supported service."
+            )
+        # handle auth (vertex or standard google api key)
+        if self.is_vertex():
+            # see if we are running in express mode (propagate api key if we are)
+            # https://cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/overview
+            vertex_api_key = os.environ.get(VERTEX_API_KEY, None)
+            if vertex_api_key and not self.api_key:
+                self.api_key = vertex_api_key
+            # When not using express mode the GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION
+            # environment variables should be set, OR the 'project' and 'location' should be
+            # passed within the model_args.
+            # https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2
+            if not vertex_api_key:
+                if not os.environ.get(
+                    "GOOGLE_CLOUD_PROJECT", None
+                ) and not model_args.get("project", None):
+                    raise PrerequisiteError(
+                        "Google provider requires either the GOOGLE_CLOUD_PROJECT environment variable "
+                        + "or the 'project' custom model arg (-M) when running against vertex."
+                    )
+                if not os.environ.get(
+                    "GOOGLE_CLOUD_LOCATION", None
+                ) and not model_args.get("location", None):
+                    raise PrerequisiteError(
+                        "Google provider requires either the GOOGLE_CLOUD_LOCATION environment variable "
+                        + "or the 'location' custom model arg (-M) when running against vertex."
+                    )
+        # normal google endpoint
+        else:
+            # read api key from env
+            if not self.api_key:
+                self.api_key = os.environ.get(GOOGLE_API_KEY, None)
+            # custom base_url
+            base_url = model_base_url(base_url, "GOOGLE_BASE_URL")
+        # create client
+        self.client = Client(
+            vertexai=self.is_vertex(),
             api_key=self.api_key,
-            client_options=dict(api_endpoint=base_url),
+            http_options={"base_url": base_url},
             **model_args,
         )
-        # create model
-        self.model = GenerativeModel(self.model_name)
     @override
     async def close(self) -> None:
         # GenerativeModel uses a cached/shared client so there is no 'close'
         pass
+    def is_vertex(self) -> bool:
+        return self.service == "vertex"
     async def generate(
         self,
         input: list[ChatMessage],
@@ -146,7 +196,11 @@ class GoogleAPI(ModelAPI):
         tool_choice: ToolChoice,
         config: GenerateConfig,
     ) -> ModelOutput | tuple[ModelOutput | Exception, ModelCall]:
-        parameters = GenerationConfig(
+        # Create google-genai types.
+        gemini_contents = await as_chat_messages(self.client, input)
+        gemini_tools = chat_tools(tools) if len(tools) > 0 else None
+        gemini_tool_config = chat_tool_config(tool_choice) if len(tools) > 0 else None
+        parameters = GenerateContentConfig(
             temperature=config.temperature,
             top_p=config.top_p,
             top_k=config.top_k,
@@ -155,21 +209,19 @@ class GoogleAPI(ModelAPI):
             candidate_count=config.num_choices,
             presence_penalty=config.presence_penalty,
             frequency_penalty=config.frequency_penalty,
+            safety_settings=safety_settings_to_list(self.safety_settings),
+            tools=gemini_tools,
+            tool_config=gemini_tool_config,
+            system_instruction=await extract_system_message_as_parts(
+                self.client, input
+            ),
         )
-        # google-native messages
-        contents = await as_chat_messages(input)
-        # tools
-        gemini_tools = chat_tools(tools) if len(tools) > 0 else None
-        gemini_tool_config = chat_tool_config(tool_choice) if len(tools) > 0 else None
-        # response for ModelCall
-        response: AsyncGenerateContentResponse | None = None
+        response: GenerateContentResponse | None = None
         def model_call() -> ModelCall:
             return build_model_call(
-                contents=contents,
+                contents=gemini_contents,
                 safety_settings=self.safety_settings,
                 generation_config=parameters,
                 tools=gemini_tools,
@@ -177,164 +229,149 @@ class GoogleAPI(ModelAPI):
                 response=response,
             )
+        # TODO: would need to monkey patch AuthorizedSession.request
         try:
-            response = await self.model.generate_content_async(
-                contents=contents,
-                safety_settings=self.safety_settings,
-                generation_config=parameters,
-                tools=gemini_tools,
-                tool_config=gemini_tool_config,
+            response = await self.client.aio.models.generate_content(
+                model=self.model_name,
+                contents=gemini_contents,
+                config=parameters,
             )
+        except ClientError as ex:
+            return self.handle_client_error(ex), model_call()
-        except InvalidArgument as ex:
-            return self.handle_invalid_argument(ex), model_call()
-        # build output
         output = ModelOutput(
             model=self.model_name,
-            choices=completion_choices_from_candidates(response.candidates),
-            usage=ModelUsage(
-                input_tokens=response.usage_metadata.prompt_token_count,
-                output_tokens=response.usage_metadata.candidates_token_count,
-                total_tokens=response.usage_metadata.total_token_count,
-            ),
+            choices=completion_choices_from_candidates(response),
+            usage=usage_metadata_to_model_usage(response.usage_metadata),
         )
-        # return
         return output, model_call()
-    def handle_invalid_argument(self, ex: InvalidArgument) -> ModelOutput | Exception:
-        if "size exceeds the limit" in ex.message.lower():
-            return ModelOutput.from_content(
-                model=self.model_name, content=ex.message, stop_reason="model_length"
-            )
-        else:
-            return ex
     @override
     def is_rate_limit(self, ex: BaseException) -> bool:
-        return isinstance(
-            ex,
-            TooManyRequests | InternalServerError | ServiceUnavailable | GatewayTimeout,
-        )
+        return isinstance(ex, APIError) and ex.code in (429, 500, 503, 504)
     @override
     def connection_key(self) -> str:
         """Scope for enforcing max_connections (could also use endpoint)."""
         return self.model_name
+    def handle_client_error(self, ex: ClientError) -> ModelOutput | Exception:
+        if (
+            ex.code == 400
+            and ex.message
+            and (
+                "maximum number of tokens" in ex.message
+                or "size exceeds the limit" in ex.message
+            )
+        ):
+            return ModelOutput.from_content(
+                self.model_name, content=ex.message, stop_reason="model_length"
+            )
+        else:
+            raise ex
+def safety_settings_to_list(safety_settings: SafetySettingDict) -> list[SafetySetting]:
+    return [
+        SafetySetting(
+            category=category,
+            threshold=threshold,
+        )
+        for category, threshold in safety_settings.items()
+    ]
 def build_model_call(
-    contents: list[ContentDict],
+    contents: list[Content],
     generation_config: GenerationConfig,
-    safety_settings: EasySafetySettingDict,
+    safety_settings: SafetySettingDict,
     tools: list[Tool] | None,
     tool_config: ToolConfig | None,
-    response: AsyncGenerateContentResponse | None,
+    response: GenerateContentResponse | None,
 ) -> ModelCall:
     return ModelCall.create(
         request=dict(
-            contents=[model_call_content(content) for content in contents],
+            contents=contents,
             generation_config=generation_config,
             safety_settings=safety_settings,
-            tools=[MessageToDict(tool._proto._pb) for tool in tools]
-            if tools is not None
-            else None,
-            tool_config=MessageToDict(tool_config._pb)
-            if tool_config is not None
-            else None,
+            tools=tools if tools is not None else None,
+            tool_config=tool_config if tool_config is not None else None,
         ),
-        response=response.to_dict() if response is not None else {},  # type: ignore[no-untyped-call]
+        response=response if response is not None else {},
         filter=model_call_filter,
     )
 def model_call_filter(key: JsonValue | None, value: JsonValue) -> JsonValue:
-    # remove images from raw api call
     if key == "inline_data" and isinstance(value, dict) and "data" in value:
         value = copy(value)
         value.update(data=BASE_64_DATA_REMOVED)
     return value
-def model_call_content(content: ContentDict) -> ContentDict:
-    return ContentDict(
-        role=content["role"], parts=[model_call_part(part) for part in content["parts"]]
-    )
-def model_call_part(part: PartType) -> PartType:
-    if isinstance(part, proto.Message):
-        return cast(PartDict, MessageToDict(part._pb))
-    elif isinstance(part, dict):
-        part = part.copy()
-        keys = list(part.keys())
-        for key in keys:
-            part[key] = model_call_part(part[key])  # type: ignore[literal-required]
-        return part
-    else:
-        return part
-async def as_chat_messages(messages: list[ChatMessage]) -> list[ContentDict]:
-    # google does not support system messages so filter them out to start with
-    system_messages = [message for message in messages if message.role == "system"]
+async def as_chat_messages(
+    client: Client, messages: list[ChatMessage]
+) -> list[Content]:
+    # There is no "system" role in the `google-genai` package. Instead, system messages
+    # are included in the `GenerateContentConfig` as a `system_instruction`. Strip any
+    # system messages out.
     supported_messages = [message for message in messages if message.role != "system"]
     # build google chat messages
-    chat_messages = [await content_dict(message) for message in supported_messages]
-    # we want the system messages to be prepended to the first user message
-    # (if there is no first user message then prepend one)
-    prepend_system_messages(chat_messages, system_messages)
+    chat_messages = [await content(client, message) for message in supported_messages]
     # combine consecutive tool messages
-    chat_messages = functools.reduce(consective_tool_message_reducer, chat_messages, [])
+    chat_messages = functools.reduce(
+        consecutive_tool_message_reducer, chat_messages, []
+    )
     # return messages
     return chat_messages
-def consective_tool_message_reducer(
-    messages: list[ContentDict],
-    message: ContentDict,
-) -> list[ContentDict]:
+def consecutive_tool_message_reducer(
+    messages: list[Content],
+    message: Content,
+) -> list[Content]:
     if (
-        message["role"] == "function"
+        message.role == "function"
         and len(messages) > 0
-        and messages[-1]["role"] == "function"
+        and messages[-1].role == "function"
     ):
-        messages[-1] = ContentDict(
-            role="function", parts=messages[-1]["parts"] + message["parts"]
+        messages[-1] = Content(
+            role="function", parts=messages[-1].parts + message.parts
         )
     else:
         messages.append(message)
     return messages
-async def content_dict(
+async def content(
+    client: Client,
     message: ChatMessageUser | ChatMessageAssistant | ChatMessageTool,
-) -> ContentDict:
+) -> Content:
     if isinstance(message, ChatMessageUser):
-        return ContentDict(
+        if isinstance(message.content, str):
+            return Content(
+                role="user", parts=[await content_part(client, message.content)]
+            )
+        return Content(
             role="user",
             parts=(
-                [message.content or NO_CONTENT]
-                if isinstance(message.content, str)
-                else [await content_part(content) for content in message.content]
+                [await content_part(client, content) for content in message.content]
             ),
         )
     elif isinstance(message, ChatMessageAssistant):
-        content_parts: list[PartType] = []
+        content_parts: list[Part] = []
         # tool call parts
         if message.tool_calls is not None:
             content_parts.extend(
                 [
-                    Part(
-                        function_call=FunctionCall(
-                            name=tool_call.function,
-                            args=dict_to_struct(tool_call.arguments),
-                        )
+                    Part.from_function_call(
+                        name=tool_call.function,
+                        args=tool_call.arguments,
                     )
                     for tool_call in message.tool_calls
                 ]
@@ -345,68 +382,62 @@ async def content_dict(
             content_parts.append(Part(text=message.content or NO_CONTENT))
         else:
             content_parts.extend(
-                [await content_part(content) for content in message.content]
+                [await content_part(client, content) for content in message.content]
             )
         # return parts
-        return ContentDict(role="model", parts=content_parts)
+        return Content(role="model", parts=content_parts)
     elif isinstance(message, ChatMessageTool):
         response = FunctionResponse(
             name=message.tool_call_id,
-            response=ParseDict(
-                js_dict={
-                    "content": (
-                        message.error.message
-                        if message.error is not None
-                        else message.text
-                    )
-                },
-                message=Struct(),
-            ),
+            response={
+                "content": (
+                    message.error.message if message.error is not None else message.text
+                )
+            },
         )
-        return ContentDict(role="function", parts=[Part(function_response=response)])
+        return Content(role="function", parts=[Part(function_response=response)])
-def dict_to_struct(x: dict[str, Any]) -> Struct:
-    struct = Struct()
-    struct.update(x)
-    return struct
-async def content_part(content: Content | str) -> PartType:
+async def content_part(client: Client, content: InspectContent | str) -> Part:
     if isinstance(content, str):
-        return content or NO_CONTENT
+        return Part.from_text(text=content or NO_CONTENT)
     elif isinstance(content, ContentText):
-        return content.text or NO_CONTENT
+        return Part.from_text(text=content.text or NO_CONTENT)
     else:
-        return await chat_content_to_part(content)
+        return await chat_content_to_part(client, content)
 async def chat_content_to_part(
+    client: Client,
     content: ContentImage | ContentAudio | ContentVideo,
-) -> PartType:
+) -> Part:
     if isinstance(content, ContentImage):
         content_bytes, mime_type = await file_as_data(content.image)
-        return Blob(mime_type=mime_type, data=content_bytes)
-    else:
-        return await file_for_content(content)
-def prepend_system_messages(
-    messages: list[ContentDict], system_messages: list[ChatMessageSystem]
-) -> None:
-    # create system_parts
-    system_parts: list[PartType] = [
-        Part(text=message.text) for message in system_messages
-    ]
-    # we want the system messages to be prepended to the first user message
-    # (if there is no first user message then prepend one)
-    if len(messages) > 0 and messages[0].get("role") == "user":
-        messages[0]["parts"] = system_parts + messages[0].get("parts", [])
+        return Part.from_bytes(mime_type=mime_type, data=content_bytes)
     else:
-        messages.insert(0, ContentDict(role="user", parts=system_parts))
+        return await file_for_content(client, content)
+async def extract_system_message_as_parts(
+    client: Client,
+    messages: list[ChatMessage],
+) -> list[Part] | None:
+    system_parts: list[Part] = []
+    for message in messages:
+        if message.role == "system":
+            content = message.content
+            if isinstance(content, str):
+                system_parts.append(Part.from_text(text=content))
+            elif isinstance(content, list):  # list[InspectContent]
+                system_parts.extend(
+                    [await content_part(client, content) for content in content]
+                )
+            else:
+                raise ValueError(f"Unsupported system message content: {content}")
+    # google-genai raises "ValueError: content is required." if the list is empty.
+    return system_parts or None
 def chat_tools(tools: list[ToolInfo]) -> list[Tool]:
@@ -424,8 +455,6 @@ def chat_tools(tools: list[ToolInfo]) -> list[Tool]:
 # https://ai.google.dev/gemini-api/tutorials/extract_structured_data#define_the_schema
 def schema_from_param(param: ToolParam | ToolParams, nullable: bool = False) -> Schema:
     if isinstance(param, ToolParams):
         param = ToolParam(
@@ -461,7 +490,7 @@ def schema_from_param(param: ToolParam | ToolParams, nullable: bool = False) ->
             description=param.description,
             properties={k: schema_from_param(v) for k, v in param.properties.items()}
             if param.properties is not None
-            else None,
+            else {},
             required=param.required,
             nullable=nullable,
         )
@@ -478,57 +507,56 @@ def schema_from_param(param: ToolParam | ToolParams, nullable: bool = False) ->
 def chat_tool_config(tool_choice: ToolChoice) -> ToolConfig:
-    # NOTE: Google seems to sporadically return errors when being
-    # passed a FunctionCallingConfig with mode="ANY". therefore,
-    # we 'correct' this to "AUTO" to prevent the errors
-    mode = "AUTO"
-    if tool_choice == "none":
-        mode = "NONE"
-    return ToolConfig(function_calling_config=FunctionCallingConfig(mode=mode))
-    # This is the 'correct' implementation if Google wasn't returning
-    # errors for mode="ANY". we can test whether this is working properly
-    # by commenting this back in and running pytest -k google_tools
-    #
-    # if isinstance(tool_choice, ToolFunction):
-    #     return ToolConfig(
-    #         function_calling_config=FunctionCallingConfig(
-    #             mode="ANY", allowed_function_names=[tool_choice.name]
-    #         )
-    #     )
-    # else:
-    #     return ToolConfig(
-    #         function_calling_config=FunctionCallingConfig(mode=tool_choice.upper())
-    #     )
+    if isinstance(tool_choice, ToolFunction):
+        return ToolConfig(
+            function_calling_config=FunctionCallingConfig(
+                mode="ANY", allowed_function_names=[tool_choice.name]
+            )
+        )
+    else:
+        return ToolConfig(
+            function_calling_config=FunctionCallingConfig(mode=tool_choice.upper())
+        )
 def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoice:
     # check for completion text
-    content = " ".join(
-        [part.text for part in candidate.content.parts if part.text is not None]
-    )
+    content = ""
+    # content can be None when the finish_reason is SAFETY
+    if candidate.content is not None:
+        content = " ".join(
+            [
+                part.text
+                for part in candidate.content.parts
+                if part.text is not None and candidate.content is not None
+            ]
+        )
+    # split reasoning
+    reasoning, content = split_reasoning(content)
     # now tool calls
     tool_calls: list[ToolCall] = []
-    for part in candidate.content.parts:
-        if part.function_call:
-            function_call = MessageToDict(getattr(part.function_call, "_pb"))
-            tool_calls.append(
-                ToolCall(
-                    type="function",
-                    id=function_call["name"],
-                    function=function_call["name"],
-                    arguments=function_call["args"],
+    if candidate.content is not None and candidate.content.parts is not None:
+        for part in candidate.content.parts:
+            if part.function_call:
+                tool_calls.append(
+                    ToolCall(
+                        type="function",
+                        id=part.function_call.name,
+                        function=part.function_call.name,
+                        arguments=part.function_call.args,
+                    )
                 )
-            )
     # stop reason
-    stop_reason = candidate_stop_reason(candidate.finish_reason)
+    stop_reason = finish_reason_to_stop_reason(candidate.finish_reason)
-    # build choide
+    # build choice
     choice = ChatCompletionChoice(
         message=ChatMessageAssistant(
             content=content,
+            reasoning=reasoning,
             tool_calls=tool_calls if len(tool_calls) > 0 else None,
             source="generate",
         ),
@@ -558,111 +586,144 @@ def completion_choice_from_candidate(candidate: Candidate) -> ChatCompletionChoi
 def completion_choices_from_candidates(
-    candidates: MutableSequence[Candidate],
+    response: GenerateContentResponse,
 ) -> list[ChatCompletionChoice]:
+    candidates = response.candidates
     if candidates:
         candidates_list = sorted(candidates, key=lambda c: c.index)
         return [
             completion_choice_from_candidate(candidate) for candidate in candidates_list
         ]
-    else:
+    elif response.prompt_feedback:
         return [
             ChatCompletionChoice(
                 message=ChatMessageAssistant(
-                    content="I was unable to generate a response.",
+                    content=prompt_feedback_to_content(response.prompt_feedback),
                     source="generate",
                 ),
-                stop_reason="unknown",
+                stop_reason="content_filter",
             )
         ]
+    else:
+        raise RuntimeError(
+            "Google response includes no completion candidates and no block reason: "
+            + f"{response.model_dump_json(indent=2)}"
+        )
-# google doesn't export FinishReason (it's in a sub-namespace with a beta
-# designation that seems destined to change, so we vendor the enum here)
-class FinishReason:
-    FINISH_REASON_UNSPECIFIED = 0
-    STOP = 1
-    MAX_TOKENS = 2
-    SAFETY = 3
-    RECITATION = 4
-    OTHER = 5
+def split_reasoning(content: str) -> tuple[str | None, str]:
+    separator = "\nFinal Answer: "
+    if separator in content:
+        parts = content.split(separator, 1)  # dplit only on first occurrence
+        return parts[0].strip(), separator.lstrip() + parts[1].strip()
+    else:
+        return None, content.strip()
+def prompt_feedback_to_content(
+    feedback: GenerateContentResponsePromptFeedback,
+) -> str:
+    content: list[str] = []
+    block_reason = str(feedback.block_reason) if feedback.block_reason else "UNKNOWN"
+    content.append(f"BLOCKED: {block_reason}")
-def candidate_stop_reason(finish_reason: FinishReason) -> StopReason:
+    if feedback.block_reason_message is not None:
+        content.append(feedback.block_reason_message)
+    if feedback.safety_ratings is not None:
+        content.extend(
+            [rating.model_dump_json(indent=2) for rating in feedback.safety_ratings]
+        )
+    return "\n".join(content)
+def usage_metadata_to_model_usage(
+    metadata: GenerateContentResponseUsageMetadata,
+) -> ModelUsage | None:
+    if metadata is None:
+        return None
+    return ModelUsage(
+        input_tokens=metadata.prompt_token_count or 0,
+        output_tokens=metadata.candidates_token_count or 0,
+        total_tokens=metadata.total_token_count or 0,
+    )
+def finish_reason_to_stop_reason(finish_reason: FinishReason) -> StopReason:
     match finish_reason:
         case FinishReason.STOP:
             return "stop"
         case FinishReason.MAX_TOKENS:
             return "max_tokens"
-        case FinishReason.SAFETY | FinishReason.RECITATION:
+        case (
+            FinishReason.SAFETY
+            | FinishReason.RECITATION
+            | FinishReason.BLOCKLIST
+            | FinishReason.PROHIBITED_CONTENT
+            | FinishReason.SPII
+        ):
             return "content_filter"
         case _:
             return "unknown"
-def gapi_should_retry(ex: BaseException) -> bool:
-    if isinstance(ex, Exception):
-        return if_transient_error(ex)
-    else:
-        return False
 def parse_safety_settings(
     safety_settings: Any,
-) -> EasySafetySettingDict:
+) -> dict[HarmCategory, HarmBlockThreshold]:
     # ensure we have a dict
     if isinstance(safety_settings, str):
         safety_settings = json.loads(safety_settings)
     if not isinstance(safety_settings, dict):
         raise ValueError(f"{SAFETY_SETTINGS} must be dictionary.")
-    parsed_settings: EasySafetySettingDict = {}
+    parsed_settings: dict[HarmCategory, HarmBlockThreshold] = {}
     for key, value in safety_settings.items():
-        if isinstance(key, str):
-            key = str_to_harm_category(key)
-        if not isinstance(key, HarmCategory):
+        if not isinstance(key, str):
             raise ValueError(f"Unexpected type for harm category: {key}")
-        if isinstance(value, str):
-            value = str_to_harm_block_threshold(value)
-        if not isinstance(value, HarmBlockThreshold):
+        if not isinstance(value, str):
             raise ValueError(f"Unexpected type for harm block threshold: {value}")
+        key = str_to_harm_category(key)
+        value = str_to_harm_block_threshold(value)
         parsed_settings[key] = value
     return parsed_settings
-def str_to_harm_category(category: str) -> int:
+def str_to_harm_category(category: str) -> HarmCategory:
     category = category.upper()
+    # `in` instead of `==` to allow users to pass in short version e.g. "HARASSMENT" or
+    # long version e.g. "HARM_CATEGORY_HARASSMENT" strings.
+    if "CIVIC_INTEGRITY" in category:
+        return HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY
+    if "DANGEROUS_CONTENT" in category:
+        return HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT
+    if "HATE_SPEECH" in category:
+        return HarmCategory.HARM_CATEGORY_HATE_SPEECH
     if "HARASSMENT" in category:
-        return cast(int, HarmCategory.HARM_CATEGORY_HARASSMENT)
-    elif "HATE_SPEECH" in category:
-        return cast(int, HarmCategory.HARM_CATEGORY_HATE_SPEECH)
-    elif "SEXUALLY_EXPLICIT" in category:
-        return cast(int, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT)
-    elif "DANGEROUS_CONTENT" in category:
-        return cast(int, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT)
-    else:
-        # NOTE: Although there is an "UNSPECIFIED" category, in the
-        # documentation, the API does not accept it.
-        raise ValueError(f"Unknown HarmCategory: {category}")
+        return HarmCategory.HARM_CATEGORY_HARASSMENT
+    if "SEXUALLY_EXPLICIT" in category:
+        return HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT
+    if "UNSPECIFIED" in category:
+        return HarmCategory.HARM_CATEGORY_UNSPECIFIED
+    raise ValueError(f"Unknown HarmCategory: {category}")
-def str_to_harm_block_threshold(threshold: str) -> int:
+def str_to_harm_block_threshold(threshold: str) -> HarmBlockThreshold:
     threshold = threshold.upper()
     if "LOW" in threshold:
         return HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
-    elif "MEDIUM" in threshold:
+    if "MEDIUM" in threshold:
         return HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
-    elif "HIGH" in threshold:
+    if "HIGH" in threshold:
         return HarmBlockThreshold.BLOCK_ONLY_HIGH
-    elif "NONE" in threshold:
+    if "NONE" in threshold:
         return HarmBlockThreshold.BLOCK_NONE
-    else:
-        raise ValueError(f"Unknown HarmBlockThreshold: {threshold}")
+    if "OFF" in threshold:
+        return HarmBlockThreshold.OFF
+    raise ValueError(f"Unknown HarmBlockThreshold: {threshold}")
-async def file_for_content(content: ContentAudio | ContentVideo) -> File:
+async def file_for_content(
+    client: Client, content: ContentAudio | ContentVideo
+) -> File:
     # helper to write trace messages
     def trace(message: str) -> None:
         trace_message(logger, "Google Files", message)
@@ -674,7 +735,6 @@ async def file_for_content(content: ContentAudio | ContentVideo) -> File:
         file = content.video
     content_bytes, mime_type = await file_as_data(file)
     content_sha256 = hashlib.sha256(content_bytes).hexdigest()
     # we cache uploads for re-use, open the db where we track that
     # (track up to 1 million previous uploads)
     with inspect_kvstore("google_files", 1000000) as files_db:
@@ -682,7 +742,7 @@ async def file_for_content(content: ContentAudio | ContentVideo) -> File:
         uploaded_file = files_db.get(content_sha256)
         if uploaded_file:
             try:
-                upload = get_file(uploaded_file)
+                upload: File = client.files.get(uploaded_file)
                 if upload.state.name == "ACTIVE":
                     trace(f"Using uploaded file: {uploaded_file}")
                     return upload
@@ -693,20 +753,16 @@ async def file_for_content(content: ContentAudio | ContentVideo) -> File:
             except Exception as ex:
                 trace(f"Error attempting to access uploaded file: {ex}")
                 files_db.delete(content_sha256)
         # do the upload (and record it)
-        upload = upload_file(BytesIO(content_bytes), mime_type=mime_type)
+        upload = client.files.upload(BytesIO(content_bytes), mime_type=mime_type)
         while upload.state.name == "PROCESSING":
             await asyncio.sleep(3)
-            upload = get_file(upload.name)
+            upload = client.files.get(upload.name)
         if upload.state.name == "FAILED":
             trace(f"Failed to upload file '{upload.name}: {upload.error}")
             raise ValueError(f"Google file upload failed: {upload.error}")
         # trace and record it
         trace(f"Uploaded file: {upload.name}")
         files_db.put(content_sha256, upload.name)
         # return the file
         return upload

inspect-ai 0.3.68__py3-none-any.whl → 0.3.70__py3-none-any.whl

inspect-ai 0.3.68py3-none-any.whl → 0.3.70py3-none-any.whl