PyPI - inspect-ai - Versions diffs - 0.3.71__py3-none-any.whl → 0.3.72__py3-none-any.whl - Mend

inspect-ai 0.3.71py3-none-any.whl → 0.3.72py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

inspect_ai/_view/www/node_modules/flatted/python/flatted.py ADDED Viewed

@@ -0,0 +1,149 @@
+# ISC License
+#
+# Copyright (c) 2018-2021, Andrea Giammarchi, @WebReflection
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+import json as _json
+class _Known:
+    def __init__(self):
+        self.key = []
+        self.value = []
+class _String:
+    def __init__(self, value):
+        self.value = value
+def _array_keys(value):
+    keys = []
+    i = 0
+    for _ in value:
+        keys.append(i)
+        i += 1
+    return keys
+def _object_keys(value):
+    keys = []
+    for key in value:
+        keys.append(key)
+    return keys
+def _is_array(value):
+    return isinstance(value, list) or isinstance(value, tuple)
+def _is_object(value):
+    return isinstance(value, dict)
+def _is_string(value):
+    return isinstance(value, str)
+def _index(known, input, value):
+    input.append(value)
+    index = str(len(input) - 1)
+    known.key.append(value)
+    known.value.append(index)
+    return index
+def _loop(keys, input, known, output):
+    for key in keys:
+        value = output[key]
+        if isinstance(value, _String):
+            _ref(key, input[int(value.value)], input, known, output)
+    return output
+def _ref(key, value, input, known, output):
+    if _is_array(value) and not value in known:
+        known.append(value)
+        value = _loop(_array_keys(value), input, known, value)
+    elif _is_object(value) and not value in known:
+        known.append(value)
+        value = _loop(_object_keys(value), input, known, value)
+    output[key] = value
+def _relate(known, input, value):
+    if _is_string(value) or _is_array(value) or _is_object(value):
+        try:
+            return known.value[known.key.index(value)]
+        except:
+            return _index(known, input, value)
+    return value
+def _transform(known, input, value):
+    if _is_array(value):
+        output = []
+        for val in value:
+            output.append(_relate(known, input, val))
+        return output
+    if _is_object(value):
+        obj = {}
+        for key in value:
+            obj[key] = _relate(known, input, value[key])
+        return obj
+    return value
+def _wrap(value):
+    if _is_string(value):
+        return _String(value)
+    if _is_array(value):
+        i = 0
+        for val in value:
+            value[i] = _wrap(val)
+            i += 1
+    elif _is_object(value):
+        for key in value:
+            value[key] = _wrap(value[key])
+    return value
+def parse(value, *args, **kwargs):
+    json = _json.loads(value, *args, **kwargs)
+    wrapped = []
+    for value in json:
+        wrapped.append(_wrap(value))
+    input = []
+    for value in wrapped:
+        if isinstance(value, _String):
+            input.append(value.value)
+        else:
+            input.append(value)
+    value = input[0]
+    if _is_array(value):
+        return _loop(_array_keys(value), input, [value], value)
+    if _is_object(value):
+        return _loop(_object_keys(value), input, [value], value)
+    return value
+def stringify(value, *args, **kwargs):
+    known = _Known()
+    input = []
+    output = []
+    i = int(_index(known, input, value))
+    while i < len(input):
+        output.append(_transform(known, input, input[i]))
+        i += 1
+    return _json.dumps(output, *args, **kwargs)

inspect_ai/_view/www/node_modules/flatted/python/test.py ADDED Viewed

@@ -0,0 +1,63 @@
+from flatted import stringify as _stringify, parse
+def stringify(value):
+    return _stringify(value, separators=(',', ':'))
+assert stringify([None, None]) == '[[null,null]]'
+a = []
+o = {}
+assert stringify(a) == '[[]]'
+assert stringify(o) == '[{}]'
+a.append(a)
+o['o'] = o
+assert stringify(a) == '[["0"]]'
+assert stringify(o) == '[{"o":"0"}]'
+b = parse(stringify(a))
+assert isinstance(b, list) and b[0] == b
+a.append(1)
+a.append('two')
+a.append(True)
+o['one'] = 1
+o['two'] = 'two'
+o['three'] = True
+assert stringify(a) == '[["0",1,"1",true],"two"]'
+assert stringify(o) == '[{"o":"0","one":1,"two":"1","three":true},"two"]'
+a.append(o)
+o['a'] = a
+assert stringify(a) == '[["0",1,"1",true,"2"],"two",{"o":"2","one":1,"two":"1","three":true,"a":"0"}]'
+assert stringify(o) == '[{"o":"0","one":1,"two":"1","three":true,"a":"2"},"two",["2",1,"1",true,"0"]]'
+a.append({'test': 'OK'})
+a.append([1, 2, 3])
+o['test'] = {'test': 'OK'}
+o['array'] = [1, 2, 3]
+assert stringify(a) == '[["0",1,"1",true,"2","3","4"],"two",{"o":"2","one":1,"two":"1","three":true,"a":"0","test":"3","array":"4"},{"test":"5"},[1,2,3],"OK"]'
+assert stringify(o) == '[{"o":"0","one":1,"two":"1","three":true,"a":"2","test":"3","array":"4"},"two",["2",1,"1",true,"0","3","4"],{"test":"5"},[1,2,3],"OK"]'
+a2 = parse(stringify(a));
+o2 = parse(stringify(o));
+assert a2[0] == a2
+assert o2['o'] == o2
+assert a2[1] == 1 and a2[2] == 'two' and a2[3] == True and isinstance(a2[4], dict)
+assert a2[4] == a2[4]['o'] and a2 == a2[4]['o']['a']
+str = parse('[{"prop":"1","a":"2","b":"3"},{"value":123},["4","5"],{"e":"6","t":"7","p":4},{},{"b":"8"},"f",{"a":"9"},["10"],"sup",{"a":1,"d":2,"c":"7","z":"11","h":1},{"g":2,"a":"7","b":"12","f":6},{"r":4,"u":"7","c":5}]')
+assert str['b']['t']['a'] == 'sup' and str['a'][1]['b'][0]['c'] == str['b']['t']
+oo = parse('[{"a":"1","b":"0","c":"2"},{"aa":"3"},{"ca":"4","cb":"5","cc":"6","cd":"7","ce":"8","cf":"9"},{"aaa":"10"},{"caa":"4"},{"cba":"5"},{"cca":"2"},{"cda":"4"},"value2","value3","value1"]');
+assert oo['a']['aa']['aaa'] == 'value1' and oo == oo['b'] and oo['c']['ca']['caa'] == oo['c']['ca']
+print('OK')

inspect_ai/model/_providers/anthropic.py CHANGED Viewed

@@ -4,7 +4,7 @@ import re
 import sys
 from copy import copy
 from logging import getLogger
-from typing import Any, Literal, Tuple, TypedDict, cast
+from typing import Any, Literal, Optional, Tuple, TypedDict, cast
 from .util.tracker import HttpxTimeTracker
@@ -204,7 +204,7 @@ class AnthropicAPI(ModelAPI):
                 tools_param,
                 messages,
                 computer_use,
-            ) = await resolve_chat_input(self.model_name, input, tools, config)
+            ) = await self.resolve_chat_input(input, tools, config)
             # prepare request params (assembed this way so we can log the raw model call)
             request = dict(messages=messages)
@@ -225,7 +225,7 @@ class AnthropicAPI(ModelAPI):
             # extra headers (for time tracker and computer use)
             extra_headers = headers | {HttpxTimeTracker.REQUEST_ID_HEADER: request_id}
             if computer_use:
-                betas.append("computer-use-2024-10-22")
+                betas.append("computer-use-2025-01-24")
             if len(betas) > 0:
                 extra_headers["anthropic-beta"] = ",".join(betas)
@@ -326,6 +326,9 @@ class AnthropicAPI(ModelAPI):
     def is_claude_3_5(self) -> bool:
         return "claude-3-5-" in self.model_name
+    def is_claude_3_7(self) -> bool:
+        return "claude-3-7-" in self.model_name
     @override
     def connection_key(self) -> str:
         return str(self.api_key)
@@ -397,6 +400,148 @@ class AnthropicAPI(ModelAPI):
         else:
             return ex
+    async def resolve_chat_input(
+        self,
+        input: list[ChatMessage],
+        tools: list[ToolInfo],
+        config: GenerateConfig,
+    ) -> Tuple[
+        list[TextBlockParam] | None, list["ToolParamDef"], list[MessageParam], bool
+    ]:
+        # extract system message
+        system_messages, messages = split_system_messages(input, config)
+        # messages
+        message_params = [(await message_param(message)) for message in messages]
+        # collapse user messages (as Inspect 'tool' messages become Claude 'user' messages)
+        message_params = functools.reduce(
+            consecutive_user_message_reducer, message_params, []
+        )
+        # tools
+        tools_params, computer_use = self.tool_params_for_tools(tools, config)
+        # system messages
+        if len(system_messages) > 0:
+            system_param: list[TextBlockParam] | None = [
+                TextBlockParam(type="text", text=message.text)
+                for message in system_messages
+            ]
+        else:
+            system_param = None
+        # add caching directives if necessary
+        cache_prompt = (
+            config.cache_prompt
+            if isinstance(config.cache_prompt, bool)
+            else True
+            if len(tools_params)
+            else False
+        )
+        # only certain claude models qualify
+        if cache_prompt:
+            if (
+                "claude-3-sonnet" in self.model_name
+                or "claude-2" in self.model_name
+                or "claude-instant" in self.model_name
+            ):
+                cache_prompt = False
+        if cache_prompt:
+            # system
+            if system_param:
+                add_cache_control(system_param[-1])
+            # tools
+            if tools_params:
+                add_cache_control(tools_params[-1])
+            # last 2 user messages
+            user_message_params = list(
+                filter(lambda m: m["role"] == "user", reversed(message_params))
+            )
+            for message in user_message_params[:2]:
+                if isinstance(message["content"], str):
+                    text_param = TextBlockParam(type="text", text=message["content"])
+                    add_cache_control(text_param)
+                    message["content"] = [text_param]
+                else:
+                    content = list(message["content"])
+                    add_cache_control(cast(dict[str, Any], content[-1]))
+        # return chat input
+        return system_param, tools_params, message_params, computer_use
+    def tool_params_for_tools(
+        self, tools: list[ToolInfo], config: GenerateConfig
+    ) -> tuple[list["ToolParamDef"], bool]:
+        # tool params and computer_use bit to return
+        tool_params: list["ToolParamDef"] = []
+        computer_use = False
+        # for each tool, check if it has a native computer use implementation and use that
+        # when available (noting that we need to set the computer use request header)
+        for tool in tools:
+            computer_use_tool = (
+                self.computer_use_tool_param(tool)
+                if config.internal_tools is not False
+                else None
+            )
+            if computer_use_tool:
+                tool_params.append(computer_use_tool)
+                computer_use = True
+            else:
+                tool_params.append(
+                    ToolParam(
+                        name=tool.name,
+                        description=tool.description,
+                        input_schema=tool.parameters.model_dump(exclude_none=True),
+                    )
+                )
+        return tool_params, computer_use
+    def computer_use_tool_param(
+        self, tool: ToolInfo
+    ) -> Optional["ComputerUseToolParam"]:
+        # check for compatible 'computer' tool
+        if tool.name == "computer" and (
+            sorted(tool.parameters.properties.keys())
+            == sorted(
+                [
+                    "action",
+                    "coordinate",
+                    "duration",
+                    "scroll_amount",
+                    "scroll_direction",
+                    "start_coordinate",
+                    "text",
+                ]
+            )
+        ):
+            if self.is_claude_3_5():
+                warn_once(
+                    logger,
+                    "Use of Anthropic's native computer use support is not enabled in Claude 3.5. Please use 3.7 or later to leverage the native support.",
+                )
+                return None
+            return ComputerUseToolParam(
+                type="computer_20250124",
+                name="computer",
+                # Note: The dimensions passed here for display_width_px and display_height_px should
+                # match the dimensions of screenshots returned by the tool.
+                # Those dimensions will always be one of the values in MAX_SCALING_TARGETS
+                # in _x11_client.py.
+                # TODO: enhance this code to calculate the dimensions based on the scaled screen
+                # size used by the container.
+                display_width_px=1366,
+                display_height_px=768,
+                display_number=1,
+            )
+        # not a computer_use tool
+        else:
+            return None
 # native anthropic tool definitions for computer use beta
 # https://docs.anthropic.com/en/docs/build-with-claude/computer-use
@@ -412,131 +557,6 @@ class ComputerUseToolParam(TypedDict):
 ToolParamDef = ToolParam | ComputerUseToolParam
-async def resolve_chat_input(
-    model: str,
-    input: list[ChatMessage],
-    tools: list[ToolInfo],
-    config: GenerateConfig,
-) -> Tuple[list[TextBlockParam] | None, list[ToolParamDef], list[MessageParam], bool]:
-    # extract system message
-    system_messages, messages = split_system_messages(input, config)
-    # messages
-    message_params = [(await message_param(message)) for message in messages]
-    # collapse user messages (as Inspect 'tool' messages become Claude 'user' messages)
-    message_params = functools.reduce(
-        consecutive_user_message_reducer, message_params, []
-    )
-    # tools
-    tools_params, computer_use = tool_params_for_tools(tools, config)
-    # system messages
-    if len(system_messages) > 0:
-        system_param: list[TextBlockParam] | None = [
-            TextBlockParam(type="text", text=message.text)
-            for message in system_messages
-        ]
-    else:
-        system_param = None
-    # add caching directives if necessary
-    cache_prompt = (
-        config.cache_prompt
-        if isinstance(config.cache_prompt, bool)
-        else True
-        if len(tools_params)
-        else False
-    )
-    # only certain claude models qualify
-    if cache_prompt:
-        if (
-            "claude-3-sonnet" in model
-            or "claude-2" in model
-            or "claude-instant" in model
-        ):
-            cache_prompt = False
-    if cache_prompt:
-        # system
-        if system_param:
-            add_cache_control(system_param[-1])
-        # tools
-        if tools_params:
-            add_cache_control(tools_params[-1])
-        # last 2 user messages
-        user_message_params = list(
-            filter(lambda m: m["role"] == "user", reversed(message_params))
-        )
-        for message in user_message_params[:2]:
-            if isinstance(message["content"], str):
-                text_param = TextBlockParam(type="text", text=message["content"])
-                add_cache_control(text_param)
-                message["content"] = [text_param]
-            else:
-                content = list(message["content"])
-                add_cache_control(cast(dict[str, Any], content[-1]))
-    # return chat input
-    return system_param, tools_params, message_params, computer_use
-def tool_params_for_tools(
-    tools: list[ToolInfo], config: GenerateConfig
-) -> tuple[list[ToolParamDef], bool]:
-    # tool params and computer_use bit to return
-    tool_params: list[ToolParamDef] = []
-    computer_use = False
-    # for each tool, check if it has a native computer use implementation and use that
-    # when available (noting that we need to set the computer use request header)
-    for tool in tools:
-        computer_use_tool = (
-            computer_use_tool_param(tool)
-            if config.internal_tools is not False
-            else None
-        )
-        if computer_use_tool:
-            tool_params.append(computer_use_tool)
-            computer_use = True
-        else:
-            tool_params.append(
-                ToolParam(
-                    name=tool.name,
-                    description=tool.description,
-                    input_schema=tool.parameters.model_dump(exclude_none=True),
-                )
-            )
-    return tool_params, computer_use
-def computer_use_tool_param(tool: ToolInfo) -> ComputerUseToolParam | None:
-    # check for compatible 'computer' tool
-    if tool.name == "computer" and (
-        sorted(tool.parameters.properties.keys())
-        == sorted(["action", "coordinate", "text"])
-    ):
-        return ComputerUseToolParam(
-            type="computer_20241022",
-            name="computer",
-            # Note: The dimensions passed here for display_width_px and display_height_px should
-            # match the dimensions of screenshots returned by the tool.
-            # Those dimensions will always be one of the values in MAX_SCALING_TARGETS
-            # in _x11_client.py.
-            # TODO: enhance this code to calculate the dimensions based on the scaled screen
-            # size used by the container.
-            display_width_px=1366,
-            display_height_px=768,
-            display_number=1,
-        )
-    # not a computer_use tool
-    else:
-        return None
 def add_cache_control(
     param: TextBlockParam | ToolParam | ComputerUseToolParam | dict[str, Any],
 ) -> None:

inspect-ai 0.3.71__py3-none-any.whl → 0.3.72__py3-none-any.whl

inspect-ai 0.3.71py3-none-any.whl → 0.3.72py3-none-any.whl