PyPI - inspect-ai - Versions diffs - 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl - Mend

inspect-ai 0.3.57py3-none-any.whl → 0.3.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

inspect_ai/__init__.py +2 -1
inspect_ai/_cli/common.py +7 -3
inspect_ai/_cli/eval.py +17 -2
inspect_ai/_cli/trace.py +21 -2
inspect_ai/_display/core/active.py +4 -3
inspect_ai/_display/core/config.py +3 -3
inspect_ai/_display/core/panel.py +7 -3
inspect_ai/_display/plain/__init__.py +0 -0
inspect_ai/_display/plain/display.py +203 -0
inspect_ai/_display/rich/display.py +4 -9
inspect_ai/_display/textual/app.py +4 -1
inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
inspect_ai/_display/textual/widgets/samples.py +119 -16
inspect_ai/_display/textual/widgets/sandbox.py +37 -0
inspect_ai/_eval/eval.py +32 -20
inspect_ai/_eval/evalset.py +7 -5
inspect_ai/_eval/score.py +1 -0
inspect_ai/_eval/task/__init__.py +2 -2
inspect_ai/_eval/task/images.py +40 -25
inspect_ai/_eval/task/results.py +50 -22
inspect_ai/_eval/task/run.py +180 -124
inspect_ai/_eval/task/sandbox.py +10 -5
inspect_ai/_eval/task/task.py +140 -25
inspect_ai/_util/constants.py +2 -0
inspect_ai/_util/content.py +23 -1
inspect_ai/_util/images.py +20 -17
inspect_ai/_util/kvstore.py +73 -0
inspect_ai/_util/notgiven.py +18 -0
inspect_ai/_util/port_names.py +61 -0
inspect_ai/_util/text.py +23 -0
inspect_ai/_util/thread.py +5 -0
inspect_ai/_view/www/App.css +31 -1
inspect_ai/_view/www/dist/assets/index.css +31 -1
inspect_ai/_view/www/dist/assets/index.js +25375 -1846
inspect_ai/_view/www/log-schema.json +129 -15
inspect_ai/_view/www/package.json +2 -0
inspect_ai/_view/www/src/App.mjs +8 -10
inspect_ai/_view/www/src/Types.mjs +0 -1
inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
inspect_ai/_view/www/src/index.js +75 -2
inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
inspect_ai/_view/www/src/types/log.d.ts +62 -27
inspect_ai/_view/www/src/utils/Format.mjs +10 -3
inspect_ai/_view/www/src/utils/Json.mjs +12 -6
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
inspect_ai/_view/www/vite.config.js +7 -0
inspect_ai/_view/www/yarn.lock +116 -0
inspect_ai/approval/_human/__init__.py +0 -0
inspect_ai/approval/_human/util.py +2 -2
inspect_ai/approval/_policy.py +12 -6
inspect_ai/dataset/_sources/csv.py +2 -1
inspect_ai/dataset/_sources/json.py +2 -1
inspect_ai/dataset/_sources/util.py +15 -7
inspect_ai/log/_condense.py +11 -1
inspect_ai/log/_log.py +3 -6
inspect_ai/log/_recorders/eval.py +19 -8
inspect_ai/log/_samples.py +26 -5
inspect_ai/log/_transcript.py +32 -2
inspect_ai/model/__init__.py +10 -2
inspect_ai/model/_call_tools.py +59 -12
inspect_ai/model/_chat_message.py +2 -4
inspect_ai/model/_conversation.py +61 -0
inspect_ai/model/_generate_config.py +10 -4
inspect_ai/model/_model.py +117 -18
inspect_ai/model/_model_output.py +7 -2
inspect_ai/model/_providers/anthropic.py +109 -51
inspect_ai/model/_providers/azureai.py +26 -24
inspect_ai/model/_providers/bedrock.py +43 -44
inspect_ai/model/_providers/google.py +121 -58
inspect_ai/model/_providers/groq.py +7 -5
inspect_ai/model/_providers/hf.py +11 -6
inspect_ai/model/_providers/mistral.py +17 -20
inspect_ai/model/_providers/openai.py +32 -21
inspect_ai/model/_providers/openai_o1.py +9 -8
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/together.py +8 -8
inspect_ai/model/_providers/vertex.py +18 -8
inspect_ai/scorer/__init__.py +13 -2
inspect_ai/scorer/_metrics/__init__.py +2 -2
inspect_ai/scorer/_metrics/std.py +3 -3
inspect_ai/scorer/_reducer/reducer.py +1 -1
inspect_ai/scorer/_scorer.py +2 -2
inspect_ai/solver/__init__.py +2 -5
inspect_ai/solver/_prompt.py +35 -5
inspect_ai/solver/_task_state.py +80 -38
inspect_ai/tool/__init__.py +11 -1
inspect_ai/tool/_tool.py +21 -3
inspect_ai/tool/_tool_call.py +10 -0
inspect_ai/tool/_tool_def.py +16 -5
inspect_ai/tool/_tool_with.py +21 -4
inspect_ai/tool/beta/__init__.py +5 -0
inspect_ai/tool/beta/_computer/__init__.py +3 -0
inspect_ai/tool/beta/_computer/_common.py +133 -0
inspect_ai/tool/beta/_computer/_computer.py +155 -0
inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/util/__init__.py +2 -3
inspect_ai/util/{_trace.py → _conversation.py} +3 -17
inspect_ai/util/_display.py +14 -4
inspect_ai/util/_limit.py +26 -0
inspect_ai/util/_sandbox/context.py +12 -13
inspect_ai/util/_sandbox/docker/compose.py +24 -11
inspect_ai/util/_sandbox/docker/docker.py +84 -14
inspect_ai/util/_sandbox/docker/internal.py +3 -1
inspect_ai/util/_sandbox/environment.py +27 -1
inspect_ai/util/_sandbox/local.py +1 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
inspect_ai/model/_trace.py +0 -48
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0

inspect_ai/model/_providers/vertex.py CHANGED Viewed

@@ -23,9 +23,15 @@ from vertexai.generative_models import (  # type: ignore
 )
 from vertexai.generative_models import Content as VertexContent
-from inspect_ai._util.constants import BASE_64_DATA_REMOVED
-from inspect_ai._util.content import Content, ContentText
-from inspect_ai._util.images import image_as_data
+from inspect_ai._util.constants import BASE_64_DATA_REMOVED, NO_CONTENT
+from inspect_ai._util.content import (
+    Content,
+    ContentAudio,
+    ContentImage,
+    ContentText,
+    ContentVideo,
+)
+from inspect_ai._util.images import file_as_data
 from inspect_ai.tool import ToolCall, ToolChoice, ToolInfo
 from .._chat_message import (
@@ -244,9 +250,6 @@ def consective_tool_message_reducer(
     return messages
-NO_CONTENT = "(no content)"
 async def content_dict(
     message: ChatMessageUser | ChatMessageAssistant | ChatMessageTool,
 ) -> VertexContent:
@@ -308,9 +311,16 @@ async def content_part(content: Content | str) -> Part:
         return Part.from_text(content or NO_CONTENT)
     elif isinstance(content, ContentText):
         return Part.from_text(content.text or NO_CONTENT)
-    else:
-        image_bytes, mime_type = await image_as_data(content.image)
+    elif isinstance(content, ContentImage):
+        image_bytes, mime_type = await file_as_data(content.image)
         return Part.from_image(image=Image.from_bytes(data=image_bytes))
+    else:
+        if isinstance(content, ContentAudio):
+            file = content.audio
+        elif isinstance(content, ContentVideo):
+            file = content.video
+        file_bytes, mime_type = await file_as_data(file)
+        return Part.from_data(file_bytes, mime_type)
 def prepend_system_messages(

inspect_ai/scorer/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from inspect_ai._util.deprecation import relocated_module_attribute
 from ._answer import AnswerPattern, answer
 from ._choice import choice
 from ._classification import exact, f1
@@ -16,7 +18,7 @@ from ._metric import (
 )
 from ._metrics.accuracy import accuracy
 from ._metrics.mean import mean
-from ._metrics.std import bootstrap_std, std, stderr
+from ._metrics.std import bootstrap_stderr, std, stderr
 from ._model import model_graded_fact, model_graded_qa
 from ._multi import multi_scorer
 from ._pattern import pattern
@@ -50,7 +52,7 @@ __all__ = [
     "Target",
     "scorer",
     "accuracy",
-    "bootstrap_std",
+    "bootstrap_stderr",
     "std",
     "stderr",
     "mean",
@@ -76,3 +78,12 @@ __all__ = [
     "at_least",
     "pass_at",
 ]
+_BOOTSTRAP_RENAME_VERSION = "0.3.58"
+_REMOVED_IN = "0.4"
+relocated_module_attribute(
+    "bootstrap_std",
+    "inspect_ai.scorer.bootstrap_stderr",
+    _BOOTSTRAP_RENAME_VERSION,
+    _REMOVED_IN,
+)

inspect_ai/scorer/_metrics/__init__.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from .accuracy import accuracy
 from .mean import mean, var
-from .std import bootstrap_std, std, stderr
+from .std import bootstrap_stderr, std, stderr
 __all__ = [
     "accuracy",
     "mean",
     "var",
-    "bootstrap_std",
+    "bootstrap_stderr",
     "std",
     "stderr",
 ]

inspect_ai/scorer/_metrics/std.py CHANGED Viewed

@@ -15,10 +15,10 @@ logger = getLogger(__name__)
 @metric
-def bootstrap_std(
+def bootstrap_stderr(
     num_samples: int = 1000, to_float: ValueToFloat = value_to_float()
 ) -> Metric:
-    """Standard deviation of a bootstrapped estimate of the mean.
+    """Standard error of the mean using bootstrap.
     Args:
        num_samples (int): Number of bootstrap samples to take.
@@ -31,7 +31,7 @@ def bootstrap_std(
          0 if the Value is a complex object (list or dict).
     Returns:
-       bootstrap_std metric
+       bootstrap_stderr metric
     """
     def metric(scores: list[Score]) -> float:

inspect_ai/scorer/_reducer/reducer.py CHANGED Viewed

@@ -111,7 +111,7 @@ def pass_at(
             if total - correct < k:
                 return 1.0
             else:
-                return 1.0 - cast(
+                return 1.0 - cast(  # type: ignore[redundant-cast]
                     float,
                     np.prod(1.0 - k / np.arange(total - correct + 1, total + 1)).item(),
                 )

inspect_ai/scorer/_scorer.py CHANGED Viewed

@@ -151,8 +151,8 @@ def scorer_metrics(
         return cast(list[Metric | dict[str, list[Metric]]], metrics_raw)
-def unique_scorer_name(scorer: Scorer, already_used_names: list[str]) -> str:
-    base_name = registry_unqualified_name(scorer)
+def unique_scorer_name(scorer: Scorer | str, already_used_names: list[str]) -> str:
+    base_name = scorer if isinstance(scorer, str) else registry_unqualified_name(scorer)
     scorer_name = base_name
     count = 1
     while scorer_name in already_used_names:

inspect_ai/solver/__init__.py CHANGED Viewed

@@ -7,11 +7,7 @@ from ._fork import fork
 from ._human_agent.agent import human_agent
 from ._multiple_choice import MultipleChoiceTemplate, multiple_choice
 from ._plan import Plan, plan
-from ._prompt import (
-    chain_of_thought,
-    prompt_template,
-    system_message,
-)
+from ._prompt import chain_of_thought, prompt_template, system_message, user_message
 from ._solver import Generate, Solver, SolverSpec, generate, solver
 from ._task_state import Choice, Choices, TaskState
 from ._use_tools import use_tools
@@ -26,6 +22,7 @@ __all__ = [
     "chain_of_thought",
     "multiple_choice",
     "system_message",
+    "user_message",
     "self_critique",
     "use_tools",
     "plan",

inspect_ai/solver/_prompt.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Any
 from inspect_ai._util.dict import omit
 from inspect_ai.model import ChatMessageSystem
+from inspect_ai.model._chat_message import ChatMessageUser
 from inspect_ai.util import resource
 from ._solver import Generate, Solver, solver
@@ -15,7 +16,8 @@ def prompt_template(template: str, **params: Any) -> Solver:
     Prompt template containing a `{prompt}` placeholder and any
     number of additional `params`. All values contained in sample
-    `metadata` are also automatically included in the `params`.
+    `metadata` and `store` are also automatically included in the
+    `params`.
     Args:
       template: (str): Template for prompt.
@@ -29,7 +31,7 @@ def prompt_template(template: str, **params: Any) -> Solver:
     async def solve(state: TaskState, generate: Generate) -> TaskState:
         prompt = state.user_prompt
-        kwargs = omit(state.metadata, ["prompt"]) | params
+        kwargs = omit(state.metadata | state.store._data, ["prompt"]) | params
         prompt.text = prompt_template.format(prompt=prompt.text, **kwargs)
         return state
@@ -41,8 +43,9 @@ def system_message(template: str, **params: Any) -> Solver:
     """Solver which inserts a system message into the conversation.
     System message template containing any number of optional `params`.
-    for substitution. All values contained in sample `metadata` are also
-    automatically included in the `params`.
+    for substitution using the `str.format()` method. All values
+    contained in sample `metadata` and `store` are also automatically
+    included in the `params`.
     The new message will go after other system messages (if there
     are none it will be inserted at the beginning of the conversation).
@@ -58,7 +61,7 @@ def system_message(template: str, **params: Any) -> Solver:
     content = resource(template)
     async def solve(state: TaskState, generate: Generate) -> TaskState:
-        kwargs = state.metadata | params
+        kwargs = state.metadata | state.store._data | params
         append_system_message(
             state.messages, ChatMessageSystem(content=content.format(**kwargs))
         )
@@ -67,6 +70,33 @@ def system_message(template: str, **params: Any) -> Solver:
     return solve
+@solver
+def user_message(template: str, **params: Any) -> Solver:
+    """Solver which inserts a user message into the conversation.
+    User message template containing any number of optional `params`.
+    for substitution using the `str.format()` method. All values
+    contained in sample `metadata` and `store` are also automatically
+    included in the `params`.
+    Args:
+      template (str): Template for user message.
+      **params (dict[str,Any]): Parameters to fill into the template.
+    Returns:
+      A solver that inserts the parameterised user message.
+    """
+    # read template
+    content = resource(template)
+    async def solve(state: TaskState, generate: Generate) -> TaskState:
+        kwargs = state.metadata | state.store._data | params
+        state.messages.append(ChatMessageUser(content=content.format(**kwargs)))
+        return state
+    return solve
 DEFAULT_COT_TEMPLATE = r"""
 {prompt}

inspect_ai/solver/_task_state.py CHANGED Viewed

@@ -2,8 +2,9 @@ from collections.abc import Sequence
 from contextvars import ContextVar
 from copy import deepcopy
 from dataclasses import dataclass
+from itertools import tee
 from random import Random
-from typing import Any, Type, Union, cast, overload
+from typing import Any, Iterable, SupportsIndex, Type, Union, cast, overload
 from pydantic_core import to_jsonable_python
@@ -15,9 +16,13 @@ from inspect_ai.model import (
     ModelOutput,
 )
 from inspect_ai.model._call_tools import tools_info
+from inspect_ai.model._chat_message import ChatMessageBase
 from inspect_ai.model._model import sample_total_tokens
+from inspect_ai.scorer._metric import Score
+from inspect_ai.scorer._target import Target
 from inspect_ai.tool import Tool, ToolChoice
 from inspect_ai.tool._tool_def import ToolDef
+from inspect_ai.util._limit import SampleLimitExceededError
 from inspect_ai.util._store import Store, store_jsonable
 from inspect_ai.util._store_model import SMT
@@ -136,6 +141,7 @@ class TaskState:
         epoch: int,
         input: str | list[ChatMessage],
         messages: list[ChatMessage],
+        target: Target = Target(""),
         choices: list[str] | None = [],
         output: ModelOutput | None = None,
         message_limit: int | None = None,
@@ -161,10 +167,13 @@ class TaskState:
         or `input_text` only
         """
+        self.target = target
+        """The scoring target for this `Sample`."""
         self.metadata = metadata
         """Metadata from the `Sample` for this `TaskState`"""
-        self.messages = messages
+        self._messages: list[ChatMessage] = ChatMessageList(messages)
         """
         Chat conversation history for sample.
@@ -189,9 +198,7 @@ class TaskState:
         """
         self._message_limit = message_limit
-        self._message_limit_exceeded = False
         self._token_limit = token_limit
-        self._token_limit_exceeded = False
         self._completed = completed
         """Store for shared data"""
@@ -202,6 +209,9 @@ class TaskState:
         else:
             self.choices = Choices([])
+        self.scores: dict[str, Score] | None = None
+        """Scores yielded by running task."""
     @property
     def model(self) -> ModelName:
         """Name of model being evaluated."""
@@ -254,6 +264,16 @@ class TaskState:
         else:
             raise ValueError("user_prompt requested from TaskState but none available")
+    @property
+    def messages(self) -> list[ChatMessage]:
+        """Messages in chat history"""
+        return self._messages
+    @messages.setter
+    def messages(self, messages: list[ChatMessage]) -> None:
+        """Set messages in chat history."""
+        self._messages = ChatMessageList(messages)
     @property
     def max_messages(self) -> int | None:
         """Deprecated (use message_limit)."""
@@ -300,40 +320,7 @@ class TaskState:
     @property
     def completed(self) -> bool:
         """Is the task completed."""
-        # update messages
-        from inspect_ai.log._samples import set_active_sample_total_messages
-        from inspect_ai.log._transcript import SampleLimitEvent, transcript
-        set_active_sample_total_messages(len(self.messages))
-        if self._completed:
-            return True
-        elif self.message_limit and len(self.messages) >= self.message_limit:
-            # log if this is the first time we hit this
-            if not self._message_limit_exceeded:
-                self._message_limit_exceeded = True
-                transcript()._event(
-                    SampleLimitEvent(
-                        type="message",
-                        message=f"Sample completed: exceeded message limit ({self.message_limit})",
-                        limit=self.message_limit,
-                    )
-                )
-            return True
-        elif self.token_limit and self.token_usage >= self.token_limit:
-            # log if this is the first time we hit this
-            if not self._token_limit_exceeded:
-                self._token_limit_exceeded = True
-                transcript()._event(
-                    SampleLimitEvent(
-                        type="token",
-                        message=f"Sample completed: exceeded token limit ({self.token_limit:,})",
-                        limit=self.token_limit,
-                    )
-                )
-            return True
-        else:
-            return False
+        return self._completed
     @completed.setter
     def completed(self, completed: bool) -> None:
@@ -413,3 +400,58 @@ def state_jsonable(state: TaskState | None = None) -> dict[str, Any]:
 def sample_jsonable(sample: Sample) -> dict[str, Any]:
     jsonable = to_jsonable_python(sample, exclude_none=True, fallback=lambda _x: None)
     return cast(dict[str, Any], deepcopy(jsonable))
+class ChatMessageList(list[ChatMessage]):
+    def __init__(self, iterable: Iterable[ChatMessage]):
+        items, length = self._iterable_length(iterable)
+        self._check_size(length)
+        super().__init__(items)
+    def _check_size(self, additional_items: int = 1) -> None:
+        from inspect_ai.log._samples import active_sample_message_limit
+        messages_limit = active_sample_message_limit()
+        if messages_limit is not None:
+            messages = len(self) + additional_items
+            if messages > messages_limit:
+                raise SampleLimitExceededError(
+                    "message", value=messages, limit=messages_limit
+                )
+    def append(self, item: ChatMessage) -> None:
+        self._check_size()
+        super().append(item)
+    def extend(self, items: Iterable[ChatMessage]) -> None:
+        items, length = self._iterable_length(items)
+        self._check_size(length)
+        super().extend(items)
+    def insert(self, index: SupportsIndex, item: ChatMessage) -> None:
+        self._check_size()
+        super().insert(index, item)
+    @overload
+    def __setitem__(self, index: SupportsIndex, item: ChatMessage) -> None: ...
+    @overload
+    def __setitem__(self, index: slice, item: Iterable[ChatMessage]) -> None: ...
+    def __setitem__(
+        self, index: SupportsIndex | slice, item: ChatMessage | Iterable[ChatMessage]
+    ) -> None:
+        if isinstance(index, slice) and not isinstance(item, ChatMessageBase):
+            item, length = self._iterable_length(item)
+            size_change = length - len(self[index])
+            if size_change > 0:
+                self._check_size(size_change)
+        super().__setitem__(index, item)  # type: ignore[assignment,index]
+    def _iterable_length(
+        self, items: Iterable[ChatMessage]
+    ) -> tuple[Iterable[ChatMessage], int]:
+        items, counter = tee(items)
+        length = sum(1 for _ in counter)
+        return items, length

inspect_ai/tool/__init__.py CHANGED Viewed

@@ -1,4 +1,10 @@
-from inspect_ai._util.content import Content, ContentImage, ContentText
+from inspect_ai._util.content import (
+    Content,
+    ContentAudio,
+    ContentImage,
+    ContentText,
+    ContentVideo,
+)
 from inspect_ai._util.deprecation import relocated_module_attribute
 from ._tool import Tool, ToolError, ToolResult, tool
@@ -6,6 +12,7 @@ from ._tool_call import (
     ToolCall,
     ToolCallContent,
     ToolCallError,
+    ToolCallModelInput,
     ToolCallView,
     ToolCallViewer,
 )
@@ -30,10 +37,13 @@ __all__ = [
     "ToolError",
     "ToolResult",
     "Content",
+    "ContentAudio",
     "ContentImage",
     "ContentText",
+    "ContentVideo",
     "ToolCall",
     "ToolCallContent",
+    "ToolCallModelInput",
     "ToolCallView",
     "ToolCallViewer",
     "ToolChoice",

inspect_ai/tool/_tool.py CHANGED Viewed

@@ -11,7 +11,12 @@ from typing import (
     runtime_checkable,
 )
-from inspect_ai._util.content import ContentImage, ContentText
+from inspect_ai._util.content import (
+    ContentAudio,
+    ContentImage,
+    ContentText,
+    ContentVideo,
+)
 from inspect_ai._util.registry import (
     RegistryInfo,
     registry_add,
@@ -19,7 +24,7 @@ from inspect_ai._util.registry import (
     registry_tag,
 )
-from ._tool_call import ToolCallViewer
+from ._tool_call import ToolCallModelInput, ToolCallViewer
 logger = getLogger(__name__)
@@ -31,7 +36,9 @@ ToolResult = (
     | bool
     | ContentText
     | ContentImage
-    | list[ContentText | ContentImage]
+    | ContentAudio
+    | ContentVideo
+    | list[ContentText | ContentImage | ContentAudio | ContentVideo]
 )
@@ -105,6 +112,7 @@ def tool(
     *,
     name: str | None = None,
     viewer: ToolCallViewer | None = None,
+    model_input: ToolCallModelInput | None = None,
     parallel: bool = True,
     prompt: str | None = None,
 ) -> Callable[[Callable[P, Tool]], Callable[P, Tool]]: ...
@@ -115,6 +123,7 @@ def tool(
     *,
     name: str | None = None,
     viewer: ToolCallViewer | None = None,
+    model_input: ToolCallModelInput | None = None,
     parallel: bool = True,
     prompt: str | None = None,
 ) -> Callable[P, Tool] | Callable[[Callable[P, Tool]], Callable[P, Tool]]:
@@ -128,6 +137,8 @@ def tool(
             will be used as the name of the tool.
         viewer (ToolCallViewer | None): Provide a custom view
             of tool call and context.
+        model_input (ToolCallModelInput | None): Provide a custom
+            function for playing back tool results as model input.
         parallel (bool):
             Does this tool support parallel execution?
             (defaults to True).
@@ -169,6 +180,9 @@ def tool(
                         TOOL_PROMPT: prompt,
                         TOOL_PARALLEL: parallel,
                         TOOL_VIEWER: viewer,
+                        TOOL_MODEL_INPUT: (
+                            model_input or getattr(tool, TOOL_INIT_MODEL_INPUT, None)
+                        ),
                     },
                 ),
                 *args,
@@ -188,3 +202,7 @@ def tool(
 TOOL_PROMPT = "prompt"
 TOOL_PARALLEL = "parallel"
 TOOL_VIEWER = "viewer"
+TOOL_MODEL_INPUT = "model_input"
+TOOL_INIT_MODEL_INPUT = "__TOOL_INIT_MODEL_INPUT__"

inspect_ai/tool/_tool_call.py CHANGED Viewed

@@ -3,6 +3,8 @@ from typing import Any, Callable, Literal
 from pydantic import BaseModel, Field
+from inspect_ai._util.content import Content
 class ToolCallContent(BaseModel):
     """Content to include in tool call view."""
@@ -71,3 +73,11 @@ class ToolCallError:
 ToolCallViewer = Callable[[ToolCall], ToolCallView]
 """Custom view renderer for tool calls."""
+ToolCallModelInput = Callable[[int, int, str | list[Content]], str | list[Content]]
+"""Determine how tool call results are played back as model input.
+The first argument is an index into the total number of tool results
+for this tool in the message history, the second is the total number.
+"""

inspect_ai/tool/_tool_def.py CHANGED Viewed

@@ -13,8 +13,8 @@ from inspect_ai._util.registry import (
     set_registry_params,
 )
-from ._tool import TOOL_PARALLEL, TOOL_PROMPT, TOOL_VIEWER, Tool
-from ._tool_call import ToolCallViewer
+from ._tool import TOOL_MODEL_INPUT, TOOL_PARALLEL, TOOL_PROMPT, TOOL_VIEWER, Tool
+from ._tool_call import ToolCallModelInput, ToolCallViewer
 from ._tool_description import (
     ToolDescription,
     set_tool_description,
@@ -33,6 +33,7 @@ class ToolDef:
         parameters: dict[str, str] | ToolParams | None = None,
         parallel: bool | None = None,
         viewer: ToolCallViewer | None = None,
+        model_input: ToolCallModelInput | None = None,
     ) -> None:
         """Tool definition.
@@ -46,6 +47,8 @@ class ToolDef:
           parallel (bool | None): Does the tool support parallel execution
              (defaults to True if not specified)
           viewer (ToolCallViewer | None): Optional tool call viewer implementation.
+          model_input (ToolCallModelInput | None): Optional function that determines how
+              tool call results are played back as model input.
         Returns:
           Tool definition.
@@ -68,6 +71,7 @@ class ToolDef:
             parameters = parameters if parameters is not None else tdef.parameters
             self.parallel = parallel if parallel is not None else tdef.parallel
             self.viewer = viewer or tdef.viewer
+            self.model_input = model_input or tdef.model_input
         # if its not a tool then extract tool_info if all fields have not
         # been provided explicitly
@@ -97,6 +101,7 @@ class ToolDef:
             # behavioral attributes
             self.parallel = parallel is not False
             self.viewer = viewer
+            self.model_input = model_input
     tool: Callable[..., Any]
     """Callable to execute tool."""
@@ -116,6 +121,9 @@ class ToolDef:
     viewer: ToolCallViewer | None
     """Custom viewer for tool call"""
+    model_input: ToolCallModelInput | None
+    """Custom model input presenter for tool calls."""
     def as_tool(self) -> Tool:
         """Convert a ToolDef to a Tool."""
         tool = self.tool
@@ -159,11 +167,12 @@ class ToolDefFields(NamedTuple):
     parameters: ToolParams
     parallel: bool
     viewer: ToolCallViewer | None
+    model_input: ToolCallModelInput | None
 def tool_def_fields(tool: Tool) -> ToolDefFields:
     # get tool_info
-    name, prompt, parallel, viewer = tool_registry_info(tool)
+    name, prompt, parallel, viewer, model_input = tool_registry_info(tool)
     tool_info = parse_tool_info(tool)
     # if there is a description then append any prompt to the
@@ -213,15 +222,17 @@ def tool_def_fields(tool: Tool) -> ToolDefFields:
         parameters=tool_info.parameters,
         parallel=parallel,
         viewer=viewer,
+        model_input=model_input,
     )
 def tool_registry_info(
     tool: Tool,
-) -> tuple[str, str | None, bool, ToolCallViewer | None]:
+) -> tuple[str, str | None, bool, ToolCallViewer | None, ToolCallModelInput | None]:
     info = registry_info(tool)
     name = info.name.split("/")[-1]
     prompt = info.metadata.get(TOOL_PROMPT, None)
     parallel = info.metadata.get(TOOL_PARALLEL, True)
     viewer = info.metadata.get(TOOL_VIEWER, None)
-    return name, prompt, parallel, viewer
+    model_input = info.metadata.get(TOOL_MODEL_INPUT, None)
+    return name, prompt, parallel, viewer, model_input

inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl

inspect-ai 0.3.57py3-none-any.whl → 0.3.59py3-none-any.whl