PyPI - inspect-ai - Versions diffs - 0.3.94__py3-none-any.whl → 0.3.95__py3-none-any.whl - Mend

inspect-ai 0.3.94py3-none-any.whl → 0.3.95py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

inspect_ai/_eval/loader.py +1 -1
inspect_ai/_eval/task/run.py +12 -6
inspect_ai/_util/exception.py +4 -0
inspect_ai/_util/hash.py +39 -0
inspect_ai/_util/path.py +22 -0
inspect_ai/_util/trace.py +1 -1
inspect_ai/_util/working.py +4 -0
inspect_ai/_view/www/dist/assets/index.css +9 -9
inspect_ai/_view/www/dist/assets/index.js +117 -120
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
inspect_ai/_view/www/src/app/types.ts +12 -2
inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
inspect_ai/_view/www/src/state/hooks.ts +19 -3
inspect_ai/_view/www/src/state/logSlice.ts +23 -5
inspect_ai/_view/www/yarn.lock +9 -9
inspect_ai/agent/_bridge/patch.py +1 -3
inspect_ai/analysis/__init__.py +0 -0
inspect_ai/analysis/beta/__init__.py +57 -0
inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
inspect_ai/analysis/beta/_dataframe/evals/table.py +140 -0
inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/events/columns.py +37 -0
inspect_ai/analysis/beta/_dataframe/events/table.py +14 -0
inspect_ai/analysis/beta/_dataframe/extract.py +54 -0
inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
inspect_ai/analysis/beta/_dataframe/messages/table.py +87 -0
inspect_ai/analysis/beta/_dataframe/record.py +377 -0
inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/samples/columns.py +73 -0
inspect_ai/analysis/beta/_dataframe/samples/extract.py +82 -0
inspect_ai/analysis/beta/_dataframe/samples/table.py +329 -0
inspect_ai/analysis/beta/_dataframe/util.py +157 -0
inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
inspect_ai/log/_file.py +1 -1
inspect_ai/log/_log.py +21 -1
inspect_ai/model/_call_tools.py +2 -1
inspect_ai/model/_model.py +6 -4
inspect_ai/model/_openai_responses.py +17 -18
inspect_ai/model/_providers/anthropic.py +30 -5
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/solver/_multiple_choice.py +4 -1
inspect_ai/solver/_task_state.py +7 -3
inspect_ai/tool/_mcp/_context.py +3 -5
inspect_ai/tool/_mcp/server.py +1 -1
inspect_ai/tool/_tools/_think.py +1 -1
inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
inspect_ai/util/_sandbox/events.py +3 -2
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/METADATA +8 -1
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/RECORD +70 -43
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.95.dist-info}/top_level.txt +0 -0

inspect_ai/analysis/beta/_dataframe/validate.py ADDED Viewed

@@ -0,0 +1,171 @@
+from __future__ import annotations
+from logging import getLogger
+from typing import Any, Iterator, Mapping, Type
+import jsonref  # type: ignore
+from jsonpath_ng import Fields, Index, JSONPath, Slice, Where, WhereNot  # type: ignore
+from jsonpath_ng.ext.filter import Filter  # type: ignore
+from pydantic import BaseModel
+logger = getLogger(__name__)
+Schema = Mapping[str, Any]
+def resolved_schema(model: Type[BaseModel]) -> Schema:
+    schema_dict = model.model_json_schema()
+    base = "file:///memory/inspect_schema.json"
+    schema: Schema = jsonref.replace_refs(
+        schema_dict, base_uri=base, jsonschema=True, proxies=False
+    )
+    return schema
+def jsonpath_in_schema(expr: JSONPath, schema: Schema) -> bool:
+    # don't validate unsupported constructs
+    if find_unsupported(expr):
+        return True
+    def descend(sch: Schema, tok: str | int | None) -> list[Schema]:
+        # First, branch through anyOf/oneOf/allOf
+        outs: list[Schema] = []
+        for branch in _expand_union(sch):
+            outs.extend(descend_concrete(branch, tok))
+        return outs
+    def descend_concrete(sch: Schema, tok: str | int | None) -> list[Schema]:
+        # totally open object – accept any child
+        if sch == {}:
+            return [{}]  # stay alive, accept any key
+        outs: list[Schema] = []
+        def open_dict(node: Schema) -> None:
+            """Append the schema that governs unknown keys.
+            - None / missing  -> open object  ->   {}
+            - True            -> open object  ->   {}
+            - Mapping         -> that mapping (could be {} or a real subschema)
+            - False           -> closed object ->   (do nothing)
+            """
+            if "additionalProperties" not in node:
+                if not node.get("properties"):
+                    outs.append({})
+            else:
+                ap = node["additionalProperties"]
+                if ap is True:
+                    outs.append({})
+                elif isinstance(ap, Mapping):  # {} or {...}
+                    outs.append(ap)
+                # ap is False  -> closed dict  ->  ignore
+        # Wildcard -----------------------------------------------------------
+        if tok is None:
+            if "properties" in sch:
+                outs.extend(sch["properties"].values())
+            if "object" in _types(sch):
+                open_dict(sch)
+            if "array" in _types(sch) and "items" in sch:
+                outs.extend(_normalize_items(sch["items"]))
+            return outs
+        # Property access ----------------------------------------------------
+        if isinstance(tok, str):
+            if "properties" in sch and tok in sch["properties"]:
+                outs.append(sch["properties"][tok])
+            elif "additionalProperties" in sch:  # PRESENCE, not truthiness
+                open_dict(sch)
+            elif "object" in _types(sch):
+                open_dict(sch)
+        # Array index --------------------------------------------------------
+        else:  # tok is int or None from an Index node
+            if "array" in _types(sch) and "items" in sch:
+                outs.extend(_normalize_items(sch["items"], index=tok))
+        return outs
+    def _types(sch: Schema) -> set[str]:
+        t = sch.get("type")
+        return set(t) if isinstance(t, list) else {t} if t else set()
+    def _normalize_items(items: Any, index: int | None = None) -> list[Schema]:
+        if isinstance(items, list):
+            if index is None:  # wildcard/slice
+                return items
+            if 0 <= index < len(items):
+                return [items[index]]
+            return []
+        if isinstance(items, Mapping):
+            return [items]
+        return []
+    states = [schema]
+    for tok in iter_tokens(expr):
+        next_states: list[Schema] = []
+        for st in states:
+            next_states.extend(descend(st, tok))
+        if not next_states:  # nothing matched this segment
+            return False
+        states = next_states
+    return True  # every segment found at least one schema
+def iter_tokens(node: JSONPath) -> Iterator[str | int | None]:
+    """Linearise a jsonpath-ng AST into a stream of tokens we care about."""
+    if hasattr(node, "left"):  # Child, Descendants, etc.
+        yield from iter_tokens(node.left)
+        yield from iter_tokens(node.right)
+    elif isinstance(node, Fields):
+        yield from node.fields  # e.g. ["foo"]
+    elif isinstance(node, Index):
+        yield node.index  # 0  /  -1  /  None for wildcard
+    elif isinstance(node, Slice):
+        yield None  # treat any slice as wildcard
+COMBINATORS = ("anyOf", "oneOf", "allOf")
+def _expand_union(sch: Schema) -> list[Schema]:
+    """Return sch itself or the list of subschemas if it is a combinator."""
+    for key in COMBINATORS:
+        if key in sch:
+            subs: list[Schema] = []
+            for sub in sch[key]:
+                # a sub-schema might itself be an anyOf/oneOf/allOf
+                subs.extend(_expand_union(sub))
+            return subs
+    return [sch]
+UNSUPPORTED: tuple[type[JSONPath], ...] = (
+    Filter,  # [?foo > 0]
+    Where,  # .foo[(@.bar < 42)]
+    WhereNot,
+    Slice,  # [1:5]  (wildcard “[*]” is Index/None, not Slice)
+)
+def find_unsupported(node: JSONPath) -> list[type[JSONPath]]:
+    """Return a list of node types present in `node` that we do not validate."""
+    bad: list[type[JSONPath]] = []
+    stack: list[JSONPath] = [node]
+    while stack:
+        n = stack.pop()
+        if isinstance(n, UNSUPPORTED):
+            bad.append(type(n))
+        # Drill into children (jsonpath-ng uses .left / .right / .child attributes)
+        for attr in ("left", "right", "child", "expression"):
+            stack.extend(
+                [getattr(n, attr)]
+                if hasattr(n, attr) and isinstance(getattr(n, attr), JSONPath)
+                else []
+            )
+        # handle containers like Fields(fields=[...]) and Index(index=[...])
+        if hasattr(n, "__dict__"):
+            for v in n.__dict__.values():
+                if isinstance(v, list):
+                    stack.extend(x for x in v if isinstance(x, JSONPath))
+    return bad

inspect_ai/log/_file.py CHANGED Viewed

@@ -524,7 +524,7 @@ def manifest_eval_log_name(info: EvalLogInfo, log_dir: str, sep: str) -> str:
 def log_files_from_ls(
     ls: list[FileInfo],
-    formats: list[Literal["eval", "json"]] | None,
+    formats: list[Literal["eval", "json"]] | None = None,
     descending: bool = True,
 ) -> list[EvalLogInfo]:
     extensions = [f".{format}" for format in (formats or ALL_LOG_FORMATS)]

inspect_ai/log/_log.py CHANGED Viewed

@@ -17,9 +17,11 @@ from pydantic import (
 )
 from rich.console import Console, RenderableType
 from rich.traceback import Traceback
+from shortuuid import uuid
-from inspect_ai._util.constants import CONSOLE_DISPLAY_WIDTH, PKG_NAME
+from inspect_ai._util.constants import CONSOLE_DISPLAY_WIDTH, DESERIALIZING, PKG_NAME
 from inspect_ai._util.error import EvalError, exception_message
+from inspect_ai._util.hash import base57_id_hash
 from inspect_ai._util.logger import warn_once
 from inspect_ai.approval._policy import ApprovalPolicyConfig
 from inspect_ai.dataset._dataset import MT, metadata_as
@@ -677,6 +679,9 @@ class EvalModelConfig(BaseModel):
 class EvalSpec(BaseModel):
     """Eval target and configuration."""
+    eval_id: str = Field(default_factory=str)
+    """Globally unique id for eval."""
     run_id: str = Field(default_factory=str)
     """Unique run id"""
@@ -757,6 +762,21 @@ class EvalSpec(BaseModel):
     # allow field model_args
     model_config = ConfigDict(protected_namespaces=())
+    def model_post_init(self, __context: Any) -> None:
+        # check if deserializing
+        is_deserializing = isinstance(__context, dict) and __context.get(
+            DESERIALIZING, False
+        )
+        # Generate eval_id if needed
+        if self.eval_id == "":
+            if is_deserializing:
+                # we want the eval_id to be stable across reads of the eval log so we compose it
+                # as a hash that matches the size/apperance of shortuuid-based uuids
+                self.eval_id = base57_id_hash(self.run_id + self.task_id + self.created)
+            else:
+                self.eval_id = uuid()
     @model_validator(mode="before")
     @classmethod
     def read_sandbox_spec(

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -39,6 +39,7 @@ from inspect_ai._util.content import (
     ContentText,
     ContentVideo,
 )
+from inspect_ai._util.exception import TerminateSampleError
 from inspect_ai._util.format import format_function_call
 from inspect_ai._util.logger import warn_once
 from inspect_ai._util.registry import registry_unqualified_name
@@ -376,7 +377,7 @@ async def call_tool(
             transcript()._event(
                 SampleLimitEvent(type="operator", limit=1, message=message)
             )
-            raise LimitExceededError("operator", value=1, limit=1, message=message)
+            raise TerminateSampleError(message)
         else:
             raise ToolApprovalError(approval.explanation if approval else None)
     if approval and approval.modified:

inspect_ai/model/_model.py CHANGED Viewed

@@ -1237,9 +1237,10 @@ def tool_result_images_as_user_message(
     Tool responses will have images replaced with "Image content is included below.", and the new user message will contain the images.
     """
-    init_accum: ImagesAccumulator = ([], [], [])
     chat_messages, user_message_content, tool_call_ids = functools.reduce(
-        tool_result_images_reducer, messages, init_accum
+        tool_result_images_reducer,
+        messages,
+        (list[ChatMessage](), list[Content](), list[str]()),
     )
     # if the last message was a tool result, we may need to flush the pending stuff here
     return maybe_adding_user_message(chat_messages, user_message_content, tool_call_ids)
@@ -1265,9 +1266,10 @@ def tool_result_images_reducer(
         and isinstance(message.content, list)
         and any([isinstance(c, ContentImage) for c in message.content])
     ):
-        init_accum: ImageContentAccumulator = ([], [])
         new_user_message_content, edited_tool_message_content = functools.reduce(
-            tool_result_image_content_reducer, message.content, init_accum
+            tool_result_image_content_reducer,
+            message.content,
+            (list[Content](), list[Content]()),
         )
         return (

inspect_ai/model/_openai_responses.py CHANGED Viewed

@@ -184,24 +184,23 @@ def openai_responses_chat_choices(
 # │ │ ┌───────────────────┐ │ │    │ │ ┌───────────────────┐ │ │    │ │ ┌───────────────────┐ │ │
 # │ │ │ type: "reasoning" │ │ │    │ │ │ ContentText       │ │ │    │ │ │ type: "reasoning" │ │ │
 # │ │ │ id: "rs_bbbbbb"   │ │ │    │ │ │ text: ""          │ │ │    │ │ │ id: "rs_bbbbbb"   │ │ │
-# │ │ │ summary: []       │ │ │    │ │ └───────────────────┘ │ │    │ │ │ summary: []       │ │ │
-# │ │ └───────────────────┘ │ │    │ │ ┌───────────────────┐ │ │    │ │ ┌───────────────────┐ │ │
-# │ │ ┌───────────────────┐ │ │    │ │ │ ContentText       │ │ │    │ │ │ type: "message"   │ │ │
-# │ │ │ type: "message"   │ │ │    │ │ │ text: "text1"     │ │ │    │ │ │ id: "msg_ccccccc" │ │ │
-# │ │ │ id: "msg_ccccccc" │ │ │    │ │ └───────────────────┘ │ │    │ │ │ role: "assistant" │ │ │
-# │ │ │ role: "assistant" │ │ │--->│ │ ┌───────────────────┐ │ │--->│ │ │ ┌───────────────┐ │ │ │
-# │ │ │ ┌───────────────┐ │ │ │    │ │ │ ContentText       │ │ │    │ │ │ │ Content       │ │ │ │
-# │ │ │ │ Content       │ │ │ │    │ │ │ text: "text2"     │ │ │    │ │ │ │ ┌───────────┐ │ │ │ │
-# │ │ │ │ ┌───────────┐ │ │ │ │    │ └───────────────────────┘ │    │ │ │ │ │"text1"    │ │ │ │ │
-# │ │ │ │ │"text1"    │ │ │ │ │    │ ┌───────────────────────┐ │    │ │ │ │ └───────────┘ │ │ │ │
-# │ │ │ │ └───────────┘ │ │ │ │    │ │ internal              │ │    │ │ │ │ ┌───────────┐ │ │ │ │
-# │ │ │ │ ┌───────────┐ │ │ │ │    │ │ ┌───────────────────┐ │ │    │ │ │ │ │ "text2"   │ │ │ │ │
-# │ │ │ │ │ "text2"   │ │ │ │ │    │ │ │ reasoning_id:     │ │ │    │ │ │ │ └───────────┘ │ │ │ │
-# │ │ │ │ └───────────┘ │ │ │ │    │ │ │ "rs_bbbbbb"       │ │ │    │ │ │ └───────────────┘ │ │ │
-# │ │ │ └───────────────┘ │ │ │    │ │ └───────────────────┘ │ │    │ │ └───────────────────┘ │ │
-# │ │ └───────────────────┘ │ │    │ │ ┌───────────────────┐ │ │    │ └───────────────────────┘ │
-# │ └───────────────────────┘ │    │ │ │ output_msg_id:    │ │ │    └───────────────────────────┘
-# └───────────────────────────┘    │ │ │ "msg_ccccccc"     │ │ │
+# │ │ │ summary: []       │ │ │    │ │ ├───────────────────┤ │ │    │ │ │ summary: []       │ │ │
+# │ │ ├───────────────────┤ │ │    │ │ │ ContentText       │ │ │    │ │ ├───────────────────┤ │ │
+# │ │ │ type: "message"   │ │ │    │ │ │ text: "text1"     │ │ │    │ │ │ type: "message"   │ │ │
+# │ │ │ id: "msg_ccccccc" │ │ │    │ │ ├───────────────────┤ │ │    │ │ │ id: "msg_ccccccc" │ │ │
+# │ │ │ role: "assistant" │ │ │    │ │ │ ContentText       │ │ │    │ │ │ role: "assistant" │ │ │
+# │ │ │ ┌───────────────┐ │ │ │ -> │ │ │ text: "text2"     │ │ │ -> │ │ │ ┌───────────────┐ │ │ │
+# │ │ │ │ Content       │ │ │ │    │ │ └───────────────────┘ │ │    │ │ │ │ Content       │ │ │ │
+# │ │ │ │ ┌───────────┐ │ │ │ │    │ └───────────────────────┘ │    │ │ │ │ ┌───────────┐ │ │ │ │
+# │ │ │ │ │"text1"    │ │ │ │ │    │ ┌───────────────────────┐ │    │ │ │ │ │"text1"    │ │ │ │ │
+# │ │ │ │ ├───────────┤ │ │ │ │    │ │ internal              │ │    │ │ │ │ ├───────────┤ │ │ │ │
+# │ │ │ │ │"text2"    │ │ │ │ │    │ │ ┌───────────────────┐ │ │    │ │ │ │ │"text2"    │ │ │ │ │
+# │ │ │ │ └───────────┘ │ │ │ │    │ │ │ reasoning_id:     │ │ │    │ │ │ │ └───────────┘ │ │ │ │
+# │ │ │ └───────────────┘ │ │ │    │ │ │ "rs_bbbbbb"       │ │ │    │ │ │ └───────────────┘ │ │ │
+# │ │ └───────────────────┘ │ │    │ │ └───────────────────┘ │ │    │ │ └───────────────────┘ │ │
+# │ └───────────────────────┘ │    │ │ ┌───────────────────┐ │ │    │ └───────────────────────┘ │
+# └───────────────────────────┘    │ │ │ output_msg_id:    │ │ │    └───────────────────────────┘
+#                                  │ │ │ "msg_ccccccc"     │ │ │
 #                                  │ │ └───────────────────┘ │ │
 #                                  │ └───────────────────────┘ │
 #                                  └───────────────────────────┘

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -33,7 +33,10 @@ from anthropic.types import (
     ToolUseBlockParam,
     message_create_params,
 )
-from anthropic.types.beta import BetaToolComputerUse20250124Param
+from anthropic.types.beta import (
+    BetaToolComputerUse20250124Param,
+    BetaToolTextEditor20241022Param,
+)
 from pydantic import JsonValue
 from typing_extensions import override
@@ -218,6 +221,8 @@ class AnthropicAPI(ModelAPI):
                 # tools are generally available for Claude 3.5 Sonnet (new) as well and
                 # can be used without the computer use beta header.
                 betas.append("computer-use-2025-01-24")
+            if any("20241022" in str(tool.get("type", "")) for tool in tools_param):
+                betas.append("computer-use-2024-10-22")
             if len(betas) > 0:
                 extra_headers["anthropic-beta"] = ",".join(betas)
@@ -337,6 +342,15 @@ class AnthropicAPI(ModelAPI):
     @override
     def should_retry(self, ex: Exception) -> bool:
         if isinstance(ex, APIStatusError):
+            # for unknown reasons, anthropic does not always set status_code == 529
+            # for "overloaded_error" so we check for it explicitly
+            if (
+                isinstance(ex.body, dict)
+                and ex.body.get("error", {}).get("type", "") == "overloaded_error"
+            ):
+                return True
+            # standard http status code checking
             return is_retryable_http_status(ex.status_code)
         elif httpx_should_retry(ex):
             return True
@@ -545,7 +559,7 @@ class AnthropicAPI(ModelAPI):
     def text_editor_tool_param(
         self, tool: ToolInfo
-    ) -> Optional[ToolTextEditor20250124Param]:
+    ) -> ToolTextEditor20250124Param | BetaToolTextEditor20241022Param | None:
         # check for compatible 'text editor' tool
         if tool.name == "text_editor" and (
             sorted(tool.parameters.properties.keys())
@@ -561,8 +575,14 @@ class AnthropicAPI(ModelAPI):
                 ]
             )
         ):
-            return ToolTextEditor20250124Param(
-                type="text_editor_20250124", name="str_replace_editor"
+            return (
+                BetaToolTextEditor20241022Param(
+                    type="text_editor_20241022", name="str_replace_editor"
+                )
+                if self.is_claude_3_5()
+                else ToolTextEditor20250124Param(
+                    type="text_editor_20250124", name="str_replace_editor"
+                )
             )
         # not a text_editor tool
         else:
@@ -571,7 +591,10 @@ class AnthropicAPI(ModelAPI):
 # tools can be either a stock tool param or a special Anthropic native use tool param
 ToolParamDef = (
-    ToolParam | BetaToolComputerUse20250124Param | ToolTextEditor20250124Param
+    ToolParam
+    | BetaToolComputerUse20250124Param
+    | ToolTextEditor20250124Param
+    | BetaToolTextEditor20241022Param
 )
@@ -580,6 +603,7 @@ def add_cache_control(
     | ToolParam
     | BetaToolComputerUse20250124Param
     | ToolTextEditor20250124Param
+    | BetaToolTextEditor20241022Param
     | dict[str, Any],
 ) -> None:
     cast(dict[str, Any], param)["cache_control"] = {"type": "ephemeral"}
@@ -844,6 +868,7 @@ def _names_for_tool_call(
     """
     mappings = (
         (INTERNAL_COMPUTER_TOOL_NAME, "computer_20250124", "computer"),
+        ("str_replace_editor", "text_editor_20241022", "text_editor"),
         ("str_replace_editor", "text_editor_20250124", "text_editor"),
         ("bash", "bash_20250124", "bash_session"),
     )

inspect_ai/model/_providers/providers.py CHANGED Viewed

@@ -281,7 +281,7 @@ def none() -> type[ModelAPI]:
 def validate_openai_client(feature: str) -> None:
     FEATURE = feature
     PACKAGE = "openai"
-    MIN_VERSION = "1.75.0"
+    MIN_VERSION = "1.78.0"
     # verify we have the package
     try:

inspect_ai/solver/_multiple_choice.py CHANGED Viewed

@@ -200,6 +200,7 @@ def multiple_choice(
     template: str | None = None,
     cot: bool = False,
     multiple_correct: bool = False,
+    max_tokens: int | None = None,
     **kwargs: Unpack[DeprecatedArgs],
 ) -> Solver:
     """Multiple choice question solver. Formats a multiple choice question prompt, then calls `generate()`.
@@ -226,6 +227,8 @@ def multiple_choice(
         squares? A) 3, B) 4, C) 9" has multiple correct answers, B and C. Leave
         as `False` if there's exactly one correct answer from the choices
         available. NOTE: this has no effect if you provide a custom template.
+      max_tokens: Default `None`. Controls the number of tokens generated through the call
+        to generate().
       **kwargs (Any): Deprecated arguments for backward compatibility.
     #### Shuffling
@@ -282,7 +285,7 @@ def multiple_choice(
             template=str(template),
         )
-        state = await generate(state)
+        state = await generate(state, max_tokens=max_tokens)
         answers = parse_answers(state)
         if answers and answers.group(1):

inspect_ai/solver/_task_state.py CHANGED Viewed

@@ -204,13 +204,17 @@ class TaskState:
         Convenience function for accessing the initial input from the `Sample` as a string.
         If the `input` is a `list[ChatMessage]`, this will return the text from
-        the first chat message
+        the last chat message
         """
         if isinstance(self._input, str):
             return self._input
         else:
             input = next(
-                (message.text for message in self._input if message.role == "user"),
+                (
+                    message.text
+                    for message in reversed(self._input)
+                    if message.role == "user"
+                ),
                 None,
             )
             if input:
@@ -231,7 +235,7 @@ class TaskState:
         write access to the user chat prompt. Raises an
         exception if there is no user prompt
         """
-        prompt = next((m for m in self.messages if m.role == "user"), None)
+        prompt = next((m for m in reversed(self.messages) if m.role == "user"), None)
         if prompt:
             return prompt
         else:

inspect_ai/tool/_mcp/_context.py CHANGED Viewed

@@ -2,13 +2,11 @@ from contextlib import _AsyncGeneratorContextManager
 from typing import TypeAlias
 from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
-from mcp.types import (
-    JSONRPCMessage,
-)
+from mcp.shared.message import SessionMessage
 MCPServerContext: TypeAlias = _AsyncGeneratorContextManager[
     tuple[
-        MemoryObjectReceiveStream[JSONRPCMessage | Exception],
-        MemoryObjectSendStream[JSONRPCMessage],
+        MemoryObjectReceiveStream[SessionMessage | Exception],
+        MemoryObjectSendStream[SessionMessage],
     ],
 ]

inspect_ai/tool/_mcp/server.py CHANGED Viewed

@@ -102,7 +102,7 @@ def mcp_server_sandbox(
 def verfify_mcp_package() -> None:
     FEATURE = "MCP tools"
     PACKAGE = "mcp"
-    MIN_VERSION = "1.6.0"
+    MIN_VERSION = "1.8.0"
     # verify we have the package
     try:

inspect_ai/tool/_tools/_think.py CHANGED Viewed

@@ -41,7 +41,7 @@ def think(
 def think_tool_viewer() -> ToolCallViewer:
     def viewer(tool_call: ToolCall) -> ToolCallView:
         call = ToolCallContent(
-            format="markdown", content=tool_call.arguments["thought"]
+            format="markdown", content=tool_call.arguments.get("thought", "")
         )
         return ToolCallView(call=call)

inspect_ai/tool/_tools/_web_search/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from ._web_search import web_search
+__all__ = ["web_search"]

inspect-ai 0.3.94__py3-none-any.whl → 0.3.95__py3-none-any.whl

inspect-ai 0.3.94py3-none-any.whl → 0.3.95py3-none-any.whl