inspect-ai 0.3.71__py3-none-any.whl → 0.3.73__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +14 -3
- inspect_ai/_cli/sandbox.py +3 -3
- inspect_ai/_cli/score.py +6 -4
- inspect_ai/_cli/trace.py +53 -6
- inspect_ai/_display/core/config.py +1 -1
- inspect_ai/_display/core/display.py +2 -1
- inspect_ai/_display/core/footer.py +6 -6
- inspect_ai/_display/plain/display.py +11 -6
- inspect_ai/_display/rich/display.py +23 -13
- inspect_ai/_display/textual/app.py +10 -9
- inspect_ai/_display/textual/display.py +2 -2
- inspect_ai/_display/textual/widgets/footer.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +14 -5
- inspect_ai/_eval/context.py +1 -2
- inspect_ai/_eval/eval.py +54 -41
- inspect_ai/_eval/loader.py +9 -2
- inspect_ai/_eval/run.py +148 -81
- inspect_ai/_eval/score.py +13 -8
- inspect_ai/_eval/task/images.py +31 -21
- inspect_ai/_eval/task/run.py +62 -59
- inspect_ai/_eval/task/rundir.py +16 -9
- inspect_ai/_eval/task/sandbox.py +7 -8
- inspect_ai/_eval/task/util.py +7 -0
- inspect_ai/_util/_async.py +118 -10
- inspect_ai/_util/constants.py +0 -2
- inspect_ai/_util/file.py +15 -29
- inspect_ai/_util/future.py +37 -0
- inspect_ai/_util/http.py +3 -99
- inspect_ai/_util/httpx.py +60 -0
- inspect_ai/_util/interrupt.py +2 -2
- inspect_ai/_util/json.py +5 -52
- inspect_ai/_util/logger.py +30 -86
- inspect_ai/_util/retry.py +10 -61
- inspect_ai/_util/trace.py +2 -2
- inspect_ai/_view/server.py +86 -3
- inspect_ai/_view/www/dist/assets/index.js +25837 -13269
- inspect_ai/_view/www/log-schema.json +253 -186
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
- inspect_ai/_view/www/src/types/log.d.ts +122 -94
- inspect_ai/approval/_human/manager.py +6 -10
- inspect_ai/approval/_human/panel.py +2 -2
- inspect_ai/dataset/_sources/util.py +7 -6
- inspect_ai/log/__init__.py +4 -0
- inspect_ai/log/_file.py +35 -61
- inspect_ai/log/_log.py +18 -1
- inspect_ai/log/_recorders/eval.py +14 -23
- inspect_ai/log/_recorders/json.py +3 -18
- inspect_ai/log/_samples.py +27 -2
- inspect_ai/log/_transcript.py +8 -8
- inspect_ai/model/__init__.py +2 -1
- inspect_ai/model/_call_tools.py +60 -40
- inspect_ai/model/_chat_message.py +3 -2
- inspect_ai/model/_generate_config.py +25 -0
- inspect_ai/model/_model.py +74 -36
- inspect_ai/model/_openai.py +9 -1
- inspect_ai/model/_providers/anthropic.py +172 -154
- inspect_ai/model/_providers/azureai.py +11 -9
- inspect_ai/model/_providers/bedrock.py +33 -24
- inspect_ai/model/_providers/cloudflare.py +8 -9
- inspect_ai/model/_providers/goodfire.py +7 -3
- inspect_ai/model/_providers/google.py +47 -13
- inspect_ai/model/_providers/groq.py +15 -15
- inspect_ai/model/_providers/hf.py +24 -17
- inspect_ai/model/_providers/mistral.py +36 -20
- inspect_ai/model/_providers/openai.py +30 -25
- inspect_ai/model/_providers/openai_o1.py +1 -1
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +3 -4
- inspect_ai/model/_providers/util/__init__.py +2 -2
- inspect_ai/model/_providers/util/chatapi.py +6 -19
- inspect_ai/model/_providers/util/hooks.py +165 -0
- inspect_ai/model/_providers/vertex.py +20 -3
- inspect_ai/model/_providers/vllm.py +16 -19
- inspect_ai/scorer/_multi.py +5 -2
- inspect_ai/solver/_bridge/patch.py +31 -1
- inspect_ai/solver/_fork.py +5 -3
- inspect_ai/solver/_human_agent/agent.py +3 -2
- inspect_ai/tool/__init__.py +8 -2
- inspect_ai/tool/_tool_info.py +4 -90
- inspect_ai/tool/_tool_params.py +4 -34
- inspect_ai/tool/_tools/_computer/_common.py +117 -58
- inspect_ai/tool/_tools/_computer/_computer.py +80 -57
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
- inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
- inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
- inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
- inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_computer/test_args.py +151 -0
- inspect_ai/tool/_tools/_web_search.py +30 -24
- inspect_ai/util/__init__.py +4 -0
- inspect_ai/util/_concurrency.py +5 -6
- inspect_ai/util/_display.py +6 -0
- inspect_ai/util/_json.py +170 -0
- inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
- inspect_ai/util/_sandbox/docker/docker.py +5 -0
- inspect_ai/util/_sandbox/environment.py +56 -9
- inspect_ai/util/_sandbox/service.py +12 -5
- inspect_ai/util/_subprocess.py +94 -113
- inspect_ai/util/_subtask.py +2 -4
- {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
- {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +111 -103
- {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
- inspect_ai/_util/timeouts.py +0 -160
- inspect_ai/model/_providers/util/tracker.py +0 -92
- inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
- {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0
inspect_ai/scorer/_multi.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
import
|
1
|
+
import functools
|
2
2
|
|
3
|
+
from inspect_ai._util._async import tg_collect
|
3
4
|
from inspect_ai.scorer._reducer.registry import create_reducers
|
4
5
|
from inspect_ai.solver._task_state import TaskState
|
5
6
|
|
@@ -19,7 +20,9 @@ def multi_scorer(scorers: list[Scorer], reducer: str | ScoreReducer) -> Scorer:
|
|
19
20
|
reducer = create_reducers(reducer)[0]
|
20
21
|
|
21
22
|
async def score(state: TaskState, target: Target) -> Score:
|
22
|
-
scores = await
|
23
|
+
scores = await tg_collect(
|
24
|
+
[functools.partial(_scorer, state, target) for _scorer in scorers]
|
25
|
+
)
|
23
26
|
return reducer(scores)
|
24
27
|
|
25
28
|
return score
|
@@ -11,11 +11,12 @@ from openai._types import ResponseT
|
|
11
11
|
from openai.types.chat import (
|
12
12
|
ChatCompletion,
|
13
13
|
ChatCompletionMessageParam,
|
14
|
+
ChatCompletionToolChoiceOptionParam,
|
14
15
|
ChatCompletionToolParam,
|
15
16
|
)
|
16
17
|
from shortuuid import uuid
|
17
18
|
|
18
|
-
from inspect_ai.model._generate_config import GenerateConfig
|
19
|
+
from inspect_ai.model._generate_config import GenerateConfig, ResponseSchema
|
19
20
|
from inspect_ai.model._model import get_model
|
20
21
|
from inspect_ai.model._openai import (
|
21
22
|
chat_messages_from_openai,
|
@@ -23,8 +24,10 @@ from inspect_ai.model._openai import (
|
|
23
24
|
openai_completion_usage,
|
24
25
|
)
|
25
26
|
from inspect_ai.solver._task_state import sample_state
|
27
|
+
from inspect_ai.tool._tool_choice import ToolChoice, ToolFunction
|
26
28
|
from inspect_ai.tool._tool_info import ToolInfo
|
27
29
|
from inspect_ai.tool._tool_params import ToolParams
|
30
|
+
from inspect_ai.util._json import JSONSchema
|
28
31
|
|
29
32
|
|
30
33
|
@contextlib.asynccontextmanager
|
@@ -113,6 +116,20 @@ async def inspect_model_request(
|
|
113
116
|
)
|
114
117
|
)
|
115
118
|
|
119
|
+
# convert openai tool choice to inspect tool_choice
|
120
|
+
inspect_tool_choice: ToolChoice | None = None
|
121
|
+
tool_choice: ChatCompletionToolChoiceOptionParam | None = json_data.get(
|
122
|
+
"tool_choice", None
|
123
|
+
)
|
124
|
+
if tool_choice is not None:
|
125
|
+
match tool_choice:
|
126
|
+
case "auto" | "none":
|
127
|
+
inspect_tool_choice = tool_choice
|
128
|
+
case "required":
|
129
|
+
inspect_tool_choice = "any"
|
130
|
+
case _:
|
131
|
+
inspect_tool_choice = ToolFunction(name=tool_choice["function"]["name"])
|
132
|
+
|
116
133
|
# resolve model
|
117
134
|
if model_name == "inspect":
|
118
135
|
model = get_model()
|
@@ -122,6 +139,7 @@ async def inspect_model_request(
|
|
122
139
|
output = await model.generate(
|
123
140
|
input=input,
|
124
141
|
tools=inspect_tools,
|
142
|
+
tool_choice=inspect_tool_choice,
|
125
143
|
config=generate_config_from_openai(options),
|
126
144
|
)
|
127
145
|
|
@@ -165,4 +183,16 @@ def generate_config_from_openai(options: FinalRequestOptions) -> GenerateConfig:
|
|
165
183
|
config.parallel_tool_calls = json_data.get("parallel_tool_calls", None)
|
166
184
|
config.reasoning_effort = json_data.get("reasoning_effort", None)
|
167
185
|
|
186
|
+
# response format
|
187
|
+
response_format: dict[str, Any] | None = json_data.get("response_format", None)
|
188
|
+
if response_format is not None:
|
189
|
+
json_schema: dict[str, Any] | None = response_format.get("json_schema", None)
|
190
|
+
if json_schema is not None:
|
191
|
+
config.response_schema = ResponseSchema(
|
192
|
+
name=json_schema.get("name", "schema"),
|
193
|
+
description=json_schema.get("description", None),
|
194
|
+
json_schema=JSONSchema.model_validate(json_schema.get("schema", {})),
|
195
|
+
strict=json_schema.get("strict", None),
|
196
|
+
)
|
197
|
+
|
168
198
|
return config
|
inspect_ai/solver/_fork.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
-
import
|
1
|
+
import functools
|
2
2
|
from contextvars import ContextVar
|
3
3
|
from copy import deepcopy
|
4
4
|
from typing import Any, cast
|
5
5
|
|
6
6
|
from typing_extensions import overload
|
7
7
|
|
8
|
+
from inspect_ai._util._async import tg_collect
|
8
9
|
from inspect_ai._util.registry import registry_log_name, registry_params
|
9
10
|
from inspect_ai.util._subtask import subtask
|
10
11
|
|
@@ -44,8 +45,9 @@ async def fork(
|
|
44
45
|
if isinstance(solvers, Solver):
|
45
46
|
return await solver_subtask(state, solvers)
|
46
47
|
else:
|
47
|
-
|
48
|
-
|
48
|
+
return await tg_collect(
|
49
|
+
[functools.partial(solver_subtask, state, solver) for solver in solvers]
|
50
|
+
)
|
49
51
|
|
50
52
|
|
51
53
|
async def solver_subtask(state: TaskState, solver: Solver) -> TaskState:
|
@@ -1,6 +1,7 @@
|
|
1
|
-
import asyncio
|
2
1
|
from typing import cast
|
3
2
|
|
3
|
+
import anyio
|
4
|
+
|
4
5
|
from inspect_ai.util import display_type, input_panel, sandbox
|
5
6
|
from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
|
6
7
|
|
@@ -42,7 +43,7 @@ def human_agent(
|
|
42
43
|
Solver: Human agent solver.
|
43
44
|
"""
|
44
45
|
# we can only run one human agent interaction at a time (use lock to enforce)
|
45
|
-
agent_lock =
|
46
|
+
agent_lock = anyio.Lock()
|
46
47
|
|
47
48
|
async def solve(state: TaskState, generate: Generate) -> TaskState:
|
48
49
|
async with agent_lock:
|
inspect_ai/tool/__init__.py
CHANGED
@@ -20,7 +20,7 @@ from ._tool_call import (
|
|
20
20
|
from ._tool_choice import ToolChoice, ToolFunction
|
21
21
|
from ._tool_def import ToolDef
|
22
22
|
from ._tool_info import ToolInfo
|
23
|
-
from ._tool_params import
|
23
|
+
from ._tool_params import ToolParam, ToolParams
|
24
24
|
from ._tool_with import tool_with
|
25
25
|
from ._tools._computer import computer
|
26
26
|
from ._tools._execute import bash, python
|
@@ -56,12 +56,18 @@ __all__ = [
|
|
56
56
|
"ToolInfo",
|
57
57
|
"ToolParam",
|
58
58
|
"ToolParams",
|
59
|
-
"JSONType",
|
60
59
|
]
|
61
60
|
|
62
61
|
_UTIL_MODULE_VERSION = "0.3.19"
|
62
|
+
_JSON_MODULE_VERSION = "0.3.73"
|
63
63
|
_REMOVED_IN = "0.4"
|
64
64
|
|
65
|
+
relocated_module_attribute(
|
66
|
+
"JSONType",
|
67
|
+
"inspect_ai.util.JSONType",
|
68
|
+
_JSON_MODULE_VERSION,
|
69
|
+
_REMOVED_IN,
|
70
|
+
)
|
65
71
|
|
66
72
|
relocated_module_attribute(
|
67
73
|
"ToolEnvironment",
|
inspect_ai/tool/_tool_info.py
CHANGED
@@ -1,27 +1,19 @@
|
|
1
1
|
import inspect
|
2
|
-
import types
|
3
|
-
import typing
|
4
|
-
from dataclasses import is_dataclass
|
5
2
|
from typing import (
|
6
3
|
Any,
|
7
4
|
Callable,
|
8
5
|
Dict,
|
9
|
-
List,
|
10
|
-
Optional,
|
11
|
-
Tuple,
|
12
|
-
Type,
|
13
|
-
Union,
|
14
6
|
get_args,
|
15
|
-
get_origin,
|
16
7
|
get_type_hints,
|
17
|
-
is_typeddict,
|
18
8
|
)
|
19
9
|
|
20
10
|
from docstring_parser import Docstring, parse
|
21
11
|
from pydantic import BaseModel, Field
|
22
12
|
|
13
|
+
from inspect_ai.util._json import JSONType, json_schema
|
14
|
+
|
23
15
|
from ._tool_description import tool_description
|
24
|
-
from ._tool_params import
|
16
|
+
from ._tool_params import ToolParam, ToolParams
|
25
17
|
|
26
18
|
|
27
19
|
class ToolInfo(BaseModel):
|
@@ -88,7 +80,7 @@ def parse_tool_info(func: Callable[..., Any]) -> ToolInfo:
|
|
88
80
|
|
89
81
|
# Get type information from type annotations
|
90
82
|
if param_name in type_hints:
|
91
|
-
tool_param =
|
83
|
+
tool_param = json_schema(type_hints[param_name])
|
92
84
|
# as a fallback try to parse it from the docstring
|
93
85
|
# (this is minimally necessary for backwards compatiblity
|
94
86
|
# with tools gen1 type parsing, which only used docstrings)
|
@@ -129,84 +121,6 @@ def parse_tool_info(func: Callable[..., Any]) -> ToolInfo:
|
|
129
121
|
return info
|
130
122
|
|
131
123
|
|
132
|
-
def parse_type(type_hint: Type[Any]) -> ToolParam:
|
133
|
-
origin = get_origin(type_hint)
|
134
|
-
args = get_args(type_hint)
|
135
|
-
|
136
|
-
if origin is None:
|
137
|
-
if type_hint is int:
|
138
|
-
return ToolParam(type="integer")
|
139
|
-
elif type_hint is float:
|
140
|
-
return ToolParam(type="number")
|
141
|
-
elif type_hint is str:
|
142
|
-
return ToolParam(type="string")
|
143
|
-
elif type_hint is bool:
|
144
|
-
return ToolParam(type="boolean")
|
145
|
-
elif type_hint is list:
|
146
|
-
return ToolParam(type="array", items=ToolParam())
|
147
|
-
elif type_hint is dict:
|
148
|
-
return ToolParam(type="object", additionalProperties=ToolParam())
|
149
|
-
elif (
|
150
|
-
is_dataclass(type_hint)
|
151
|
-
or is_typeddict(type_hint)
|
152
|
-
or (isinstance(type_hint, type) and issubclass(type_hint, BaseModel))
|
153
|
-
):
|
154
|
-
return parse_object(type_hint)
|
155
|
-
elif type_hint is type(None):
|
156
|
-
return ToolParam(type="null")
|
157
|
-
else:
|
158
|
-
return ToolParam()
|
159
|
-
elif origin is list or origin is List or origin is tuple or origin is Tuple:
|
160
|
-
return ToolParam(
|
161
|
-
type="array", items=parse_type(args[0]) if args else ToolParam()
|
162
|
-
)
|
163
|
-
elif origin is dict or origin is Dict:
|
164
|
-
return ToolParam(
|
165
|
-
type="object",
|
166
|
-
additionalProperties=parse_type(args[1]) if len(args) > 1 else ToolParam(),
|
167
|
-
)
|
168
|
-
elif origin is Union or origin is types.UnionType:
|
169
|
-
return ToolParam(anyOf=[parse_type(arg) for arg in args])
|
170
|
-
elif origin is Optional:
|
171
|
-
return ToolParam(
|
172
|
-
anyOf=[parse_type(arg) for arg in args] + [ToolParam(type="null")]
|
173
|
-
)
|
174
|
-
elif origin is typing.Literal:
|
175
|
-
return ToolParam(enum=list(args))
|
176
|
-
|
177
|
-
return ToolParam() # Default case if we can't determine the type
|
178
|
-
|
179
|
-
|
180
|
-
def parse_object(cls: Type[Any]) -> ToolParam:
|
181
|
-
properties: Dict[str, ToolParam] = {}
|
182
|
-
required: List[str] = []
|
183
|
-
|
184
|
-
if is_dataclass(cls):
|
185
|
-
fields = cls.__dataclass_fields__ # type: ignore
|
186
|
-
for name, field in fields.items():
|
187
|
-
properties[name] = parse_type(field.type) # type: ignore
|
188
|
-
if field.default == field.default_factory:
|
189
|
-
required.append(name)
|
190
|
-
elif isinstance(cls, type) and issubclass(cls, BaseModel):
|
191
|
-
schema = cls.model_json_schema()
|
192
|
-
for name, prop in schema.get("properties", {}).items():
|
193
|
-
properties[name] = ToolParam(**prop)
|
194
|
-
required = schema.get("required", [])
|
195
|
-
elif is_typeddict(cls):
|
196
|
-
annotations = get_type_hints(cls)
|
197
|
-
for name, type_hint in annotations.items():
|
198
|
-
properties[name] = parse_type(type_hint)
|
199
|
-
if name in cls.__required_keys__:
|
200
|
-
required.append(name)
|
201
|
-
|
202
|
-
return ToolParam(
|
203
|
-
type="object",
|
204
|
-
properties=properties,
|
205
|
-
required=required if required else None,
|
206
|
-
additionalProperties=False,
|
207
|
-
)
|
208
|
-
|
209
|
-
|
210
124
|
def parse_docstring(docstring: str | None, param_name: str) -> Dict[str, str]:
|
211
125
|
if not docstring:
|
212
126
|
return {}
|
inspect_ai/tool/_tool_params.py
CHANGED
@@ -1,44 +1,14 @@
|
|
1
1
|
from typing import (
|
2
|
-
Any,
|
3
2
|
Literal,
|
4
|
-
|
3
|
+
TypeAlias,
|
5
4
|
)
|
6
5
|
|
7
6
|
from pydantic import BaseModel, Field
|
8
7
|
|
9
|
-
|
10
|
-
"""Validate types within JSON schema."""
|
8
|
+
from inspect_ai.util._json import JSONSchema
|
11
9
|
|
12
|
-
|
13
|
-
|
14
|
-
"""Description of tool parameter in JSON Schema format."""
|
15
|
-
|
16
|
-
type: JSONType | None = Field(default=None)
|
17
|
-
"""JSON type of tool parameter."""
|
18
|
-
|
19
|
-
description: str | None = Field(default=None)
|
20
|
-
"""Parameter description."""
|
21
|
-
|
22
|
-
default: Any = Field(default=None)
|
23
|
-
"""Default value for parameter."""
|
24
|
-
|
25
|
-
enum: list[Any] | None = Field(default=None)
|
26
|
-
"""Valid values for enum parameters."""
|
27
|
-
|
28
|
-
items: Optional["ToolParam"] = Field(default=None)
|
29
|
-
"""Valid type for array parameters."""
|
30
|
-
|
31
|
-
properties: dict[str, "ToolParam"] | None = Field(default=None)
|
32
|
-
"""Valid fields for object parametrs."""
|
33
|
-
|
34
|
-
additionalProperties: Optional["ToolParam"] | bool | None = Field(default=None)
|
35
|
-
"""Are additional properties allowed?"""
|
36
|
-
|
37
|
-
anyOf: list["ToolParam"] | None = Field(default=None)
|
38
|
-
"""Valid types for union parameters."""
|
39
|
-
|
40
|
-
required: list[str] | None = Field(default=None)
|
41
|
-
"""Required fields for object parameters."""
|
10
|
+
ToolParam: TypeAlias = JSONSchema
|
11
|
+
"""Description of tool parameter in JSON Schema format."""
|
42
12
|
|
43
13
|
|
44
14
|
class ToolParams(BaseModel):
|
@@ -11,19 +11,6 @@ from inspect_ai.tool import ToolError, ToolResult
|
|
11
11
|
from inspect_ai.util._sandbox.context import sandbox_with
|
12
12
|
from inspect_ai.util._sandbox.environment import SandboxEnvironment
|
13
13
|
|
14
|
-
Action = Literal[
|
15
|
-
"key",
|
16
|
-
"type",
|
17
|
-
"mouse_move",
|
18
|
-
"left_click",
|
19
|
-
"left_click_drag",
|
20
|
-
"right_click",
|
21
|
-
"middle_click",
|
22
|
-
"double_click",
|
23
|
-
"screenshot",
|
24
|
-
"cursor_position",
|
25
|
-
]
|
26
|
-
|
27
14
|
|
28
15
|
class ToolExecResult(BaseModel):
|
29
16
|
output: str | None = Field(default=None)
|
@@ -31,6 +18,122 @@ class ToolExecResult(BaseModel):
|
|
31
18
|
base64_image: str | None = Field(default=None)
|
32
19
|
|
33
20
|
|
21
|
+
async def cursor_position(timeout: int | None = None) -> ToolResult:
|
22
|
+
return await _send_cmd(["cursor_position"], timeout=timeout)
|
23
|
+
|
24
|
+
|
25
|
+
async def screenshot(timeout: int | None = None) -> ToolResult:
|
26
|
+
return await _send_cmd(["screenshot"], timeout=timeout)
|
27
|
+
|
28
|
+
|
29
|
+
async def wait(duration: int, timeout: int | None = None) -> ToolResult:
|
30
|
+
return await _send_cmd(["wait", "--duration", f"{duration}"], timeout=timeout)
|
31
|
+
|
32
|
+
|
33
|
+
async def mouse_move(coordinate: list[int], timeout: int | None = None) -> ToolResult:
|
34
|
+
return await _send_cmd(
|
35
|
+
["mouse_move", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
|
36
|
+
timeout=timeout,
|
37
|
+
)
|
38
|
+
|
39
|
+
|
40
|
+
async def left_mouse_down(timeout: int | None = None) -> ToolResult:
|
41
|
+
return await _send_cmd(["left_mouse_down"], timeout=timeout)
|
42
|
+
|
43
|
+
|
44
|
+
async def left_mouse_up(timeout: int | None = None) -> ToolResult:
|
45
|
+
return await _send_cmd(["left_mouse_up"], timeout=timeout)
|
46
|
+
|
47
|
+
|
48
|
+
async def left_click(coordinate: list[int], timeout: int | None = None) -> ToolResult:
|
49
|
+
return await _send_cmd(
|
50
|
+
["left_click", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
|
51
|
+
timeout=timeout,
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
async def left_click_drag(
|
56
|
+
start_coordinate: list[int], coordinate: list[int], timeout: int | None = None
|
57
|
+
) -> ToolResult:
|
58
|
+
return await _send_cmd(
|
59
|
+
[
|
60
|
+
"left_click_drag",
|
61
|
+
"--start_coordinate",
|
62
|
+
f"{start_coordinate[0]}",
|
63
|
+
f"{start_coordinate[1]}",
|
64
|
+
"--coordinate",
|
65
|
+
f"{coordinate[0]}",
|
66
|
+
f"{coordinate[1]}",
|
67
|
+
],
|
68
|
+
timeout=timeout,
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
async def right_click(coordinate: list[int], timeout: int | None = None) -> ToolResult:
|
73
|
+
return await _send_cmd(
|
74
|
+
["right_click", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
|
75
|
+
timeout=timeout,
|
76
|
+
)
|
77
|
+
|
78
|
+
|
79
|
+
async def middle_click(coordinate: list[int], timeout: int | None = None) -> ToolResult:
|
80
|
+
return await _send_cmd(
|
81
|
+
["middle_click", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
|
82
|
+
timeout=timeout,
|
83
|
+
)
|
84
|
+
|
85
|
+
|
86
|
+
async def double_click(coordinate: list[int], timeout: int | None = None) -> ToolResult:
|
87
|
+
return await _send_cmd(
|
88
|
+
["double_click", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
|
89
|
+
timeout=timeout,
|
90
|
+
)
|
91
|
+
|
92
|
+
|
93
|
+
async def triple_click(coordinate: list[int], timeout: int | None = None) -> ToolResult:
|
94
|
+
return await _send_cmd(
|
95
|
+
["triple_click", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
|
96
|
+
timeout=timeout,
|
97
|
+
)
|
98
|
+
|
99
|
+
|
100
|
+
async def scroll(
|
101
|
+
scroll_amount: int,
|
102
|
+
scroll_direction: Literal["up", "down", "left", "right"],
|
103
|
+
coordinate: list[int] | None,
|
104
|
+
timeout: int | None = None,
|
105
|
+
) -> ToolResult:
|
106
|
+
return await _send_cmd(
|
107
|
+
[
|
108
|
+
"scroll",
|
109
|
+
"--scroll_amount",
|
110
|
+
f"{scroll_amount}",
|
111
|
+
"--scroll_direction",
|
112
|
+
f"{scroll_direction}",
|
113
|
+
]
|
114
|
+
+ (
|
115
|
+
["--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"]
|
116
|
+
if coordinate
|
117
|
+
else []
|
118
|
+
),
|
119
|
+
timeout=timeout,
|
120
|
+
)
|
121
|
+
|
122
|
+
|
123
|
+
async def press_key(key: str, timeout: int | None = None) -> ToolResult:
|
124
|
+
return await _send_cmd(["key", "--text", key], timeout=timeout)
|
125
|
+
|
126
|
+
|
127
|
+
async def hold_key(key: str, duration: int, timeout: int | None = None) -> ToolResult:
|
128
|
+
return await _send_cmd(
|
129
|
+
["hold_key", "--text", key, "--duration", f"{duration}"], timeout=timeout
|
130
|
+
)
|
131
|
+
|
132
|
+
|
133
|
+
async def type(text: str, timeout: int | None = None) -> ToolResult:
|
134
|
+
return await _send_cmd(["type", "--text", text], timeout=timeout)
|
135
|
+
|
136
|
+
|
34
137
|
async def _send_cmd(cmdTail: list[str], timeout: int | None = None) -> ToolResult:
|
35
138
|
from inspect_ai.log._samples import sample_active
|
36
139
|
|
@@ -39,7 +142,7 @@ async def _send_cmd(cmdTail: list[str], timeout: int | None = None) -> ToolResul
|
|
39
142
|
sample_id = sample.sample.id
|
40
143
|
assert sample_id
|
41
144
|
|
42
|
-
cmd = ["python3", "/opt/inspect/tool/computer_tool.py"
|
145
|
+
cmd = ["python3", "/opt/inspect/tool/computer_tool.py"] + cmdTail
|
43
146
|
|
44
147
|
raw_exec_result = await (await computer_sandbox()).exec(cmd, timeout=timeout)
|
45
148
|
|
@@ -72,50 +175,6 @@ async def _send_cmd(cmdTail: list[str], timeout: int | None = None) -> ToolResul
|
|
72
175
|
return "OK"
|
73
176
|
|
74
177
|
|
75
|
-
async def cursor_position(timeout: int | None = None) -> ToolResult:
|
76
|
-
return await _send_cmd(["cursor_position"], timeout=timeout)
|
77
|
-
|
78
|
-
|
79
|
-
async def screenshot(timeout: int | None = None) -> ToolResult:
|
80
|
-
return await _send_cmd(["screenshot"], timeout=timeout)
|
81
|
-
|
82
|
-
|
83
|
-
async def mouse_move(x: int, y: int, timeout: int | None = None) -> ToolResult:
|
84
|
-
return await _send_cmd(
|
85
|
-
["mouse_move", "--coordinate", f"{x}", f"{y}"], timeout=timeout
|
86
|
-
)
|
87
|
-
|
88
|
-
|
89
|
-
async def left_click(timeout: int | None = None) -> ToolResult:
|
90
|
-
return await _send_cmd(["left_click"], timeout=timeout)
|
91
|
-
|
92
|
-
|
93
|
-
async def left_click_drag(x: int, y: int, timeout: int | None = None) -> ToolResult:
|
94
|
-
return await _send_cmd(
|
95
|
-
["left_click_drag", "--coordinate", f"{x}", f"{y}"], timeout=timeout
|
96
|
-
)
|
97
|
-
|
98
|
-
|
99
|
-
async def right_click(timeout: int | None = None) -> ToolResult:
|
100
|
-
return await _send_cmd(["right_click"], timeout=timeout)
|
101
|
-
|
102
|
-
|
103
|
-
async def middle_click(timeout: int | None = None) -> ToolResult:
|
104
|
-
return await _send_cmd(["middle_click"], timeout=timeout)
|
105
|
-
|
106
|
-
|
107
|
-
async def double_click(timeout: int | None = None) -> ToolResult:
|
108
|
-
return await _send_cmd(["double_click"], timeout=timeout)
|
109
|
-
|
110
|
-
|
111
|
-
async def press_key(key: str, timeout: int | None = None) -> ToolResult:
|
112
|
-
return await _send_cmd(["key", "--text", key], timeout=timeout)
|
113
|
-
|
114
|
-
|
115
|
-
async def type(text: str, timeout: int | None = None) -> ToolResult:
|
116
|
-
return await _send_cmd(["type", "--text", text], timeout=timeout)
|
117
|
-
|
118
|
-
|
119
178
|
async def computer_sandbox() -> SandboxEnvironment:
|
120
179
|
sb = await sandbox_with("/opt/inspect/tool/computer_tool.py")
|
121
180
|
if sb:
|