inspect-ai 0.3.71__py3-none-any.whl → 0.3.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. inspect_ai/_cli/eval.py +14 -3
  2. inspect_ai/_cli/sandbox.py +3 -3
  3. inspect_ai/_cli/score.py +6 -4
  4. inspect_ai/_cli/trace.py +53 -6
  5. inspect_ai/_display/core/config.py +1 -1
  6. inspect_ai/_display/core/display.py +2 -1
  7. inspect_ai/_display/core/footer.py +6 -6
  8. inspect_ai/_display/plain/display.py +11 -6
  9. inspect_ai/_display/rich/display.py +23 -13
  10. inspect_ai/_display/textual/app.py +10 -9
  11. inspect_ai/_display/textual/display.py +2 -2
  12. inspect_ai/_display/textual/widgets/footer.py +4 -0
  13. inspect_ai/_display/textual/widgets/samples.py +14 -5
  14. inspect_ai/_eval/context.py +1 -2
  15. inspect_ai/_eval/eval.py +54 -41
  16. inspect_ai/_eval/loader.py +9 -2
  17. inspect_ai/_eval/run.py +148 -81
  18. inspect_ai/_eval/score.py +13 -8
  19. inspect_ai/_eval/task/images.py +31 -21
  20. inspect_ai/_eval/task/run.py +62 -59
  21. inspect_ai/_eval/task/rundir.py +16 -9
  22. inspect_ai/_eval/task/sandbox.py +7 -8
  23. inspect_ai/_eval/task/util.py +7 -0
  24. inspect_ai/_util/_async.py +118 -10
  25. inspect_ai/_util/constants.py +0 -2
  26. inspect_ai/_util/file.py +15 -29
  27. inspect_ai/_util/future.py +37 -0
  28. inspect_ai/_util/http.py +3 -99
  29. inspect_ai/_util/httpx.py +60 -0
  30. inspect_ai/_util/interrupt.py +2 -2
  31. inspect_ai/_util/json.py +5 -52
  32. inspect_ai/_util/logger.py +30 -86
  33. inspect_ai/_util/retry.py +10 -61
  34. inspect_ai/_util/trace.py +2 -2
  35. inspect_ai/_view/server.py +86 -3
  36. inspect_ai/_view/www/dist/assets/index.js +25837 -13269
  37. inspect_ai/_view/www/log-schema.json +253 -186
  38. inspect_ai/_view/www/package.json +2 -2
  39. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
  40. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
  41. inspect_ai/_view/www/src/types/log.d.ts +122 -94
  42. inspect_ai/approval/_human/manager.py +6 -10
  43. inspect_ai/approval/_human/panel.py +2 -2
  44. inspect_ai/dataset/_sources/util.py +7 -6
  45. inspect_ai/log/__init__.py +4 -0
  46. inspect_ai/log/_file.py +35 -61
  47. inspect_ai/log/_log.py +18 -1
  48. inspect_ai/log/_recorders/eval.py +14 -23
  49. inspect_ai/log/_recorders/json.py +3 -18
  50. inspect_ai/log/_samples.py +27 -2
  51. inspect_ai/log/_transcript.py +8 -8
  52. inspect_ai/model/__init__.py +2 -1
  53. inspect_ai/model/_call_tools.py +60 -40
  54. inspect_ai/model/_chat_message.py +3 -2
  55. inspect_ai/model/_generate_config.py +25 -0
  56. inspect_ai/model/_model.py +74 -36
  57. inspect_ai/model/_openai.py +9 -1
  58. inspect_ai/model/_providers/anthropic.py +172 -154
  59. inspect_ai/model/_providers/azureai.py +11 -9
  60. inspect_ai/model/_providers/bedrock.py +33 -24
  61. inspect_ai/model/_providers/cloudflare.py +8 -9
  62. inspect_ai/model/_providers/goodfire.py +7 -3
  63. inspect_ai/model/_providers/google.py +47 -13
  64. inspect_ai/model/_providers/groq.py +15 -15
  65. inspect_ai/model/_providers/hf.py +24 -17
  66. inspect_ai/model/_providers/mistral.py +36 -20
  67. inspect_ai/model/_providers/openai.py +30 -25
  68. inspect_ai/model/_providers/openai_o1.py +1 -1
  69. inspect_ai/model/_providers/providers.py +1 -1
  70. inspect_ai/model/_providers/together.py +3 -4
  71. inspect_ai/model/_providers/util/__init__.py +2 -2
  72. inspect_ai/model/_providers/util/chatapi.py +6 -19
  73. inspect_ai/model/_providers/util/hooks.py +165 -0
  74. inspect_ai/model/_providers/vertex.py +20 -3
  75. inspect_ai/model/_providers/vllm.py +16 -19
  76. inspect_ai/scorer/_multi.py +5 -2
  77. inspect_ai/solver/_bridge/patch.py +31 -1
  78. inspect_ai/solver/_fork.py +5 -3
  79. inspect_ai/solver/_human_agent/agent.py +3 -2
  80. inspect_ai/tool/__init__.py +8 -2
  81. inspect_ai/tool/_tool_info.py +4 -90
  82. inspect_ai/tool/_tool_params.py +4 -34
  83. inspect_ai/tool/_tools/_computer/_common.py +117 -58
  84. inspect_ai/tool/_tools/_computer/_computer.py +80 -57
  85. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
  86. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
  87. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
  88. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
  89. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
  90. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
  91. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
  92. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
  93. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
  94. inspect_ai/tool/_tools/_computer/test_args.py +151 -0
  95. inspect_ai/tool/_tools/_web_search.py +30 -24
  96. inspect_ai/util/__init__.py +4 -0
  97. inspect_ai/util/_concurrency.py +5 -6
  98. inspect_ai/util/_display.py +6 -0
  99. inspect_ai/util/_json.py +170 -0
  100. inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
  101. inspect_ai/util/_sandbox/docker/docker.py +5 -0
  102. inspect_ai/util/_sandbox/environment.py +56 -9
  103. inspect_ai/util/_sandbox/service.py +12 -5
  104. inspect_ai/util/_subprocess.py +94 -113
  105. inspect_ai/util/_subtask.py +2 -4
  106. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
  107. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +111 -103
  108. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
  109. inspect_ai/_util/timeouts.py +0 -160
  110. inspect_ai/model/_providers/util/tracker.py +0 -92
  111. inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
  112. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
  113. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
  114. {inspect_ai-0.3.71.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Awaitable, Callable
1
+ from typing import Awaitable, Callable, Literal, TypeVar
2
2
 
3
3
  from inspect_ai._util.content import Content, ContentImage, ContentText
4
4
  from inspect_ai.tool import Tool, ToolResult, tool
@@ -6,7 +6,7 @@ from inspect_ai.tool._tool import TOOL_INIT_MODEL_INPUT, ToolParsingError
6
6
  from inspect_ai.tool._tool_call import ToolCallModelInput
7
7
 
8
8
  from . import _common as common
9
- from ._common import Action
9
+ from ._resources.tool._constants import Action
10
10
 
11
11
  ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
12
12
 
@@ -26,8 +26,12 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
26
26
 
27
27
  async def execute(
28
28
  action: Action,
29
- text: str | None = None,
30
29
  coordinate: list[int] | None = None,
30
+ duration: int | None = None,
31
+ scroll_amount: int | None = None,
32
+ scroll_direction: Literal["up", "down", "left", "right"] | None = None,
33
+ start_coordinate: list[int] | None = None,
34
+ text: str | None = None,
31
35
  ) -> ToolResult:
32
36
  """
33
37
  Use this tool to interact with a computer.
@@ -47,83 +51,93 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
47
51
  "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
48
52
  "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
49
53
  "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal",
54
+ - 'hold_key': Hold down a key or multiple keys for a specified duration (in seconds). Supports the same syntax as `key`.
50
55
  - `type`: Type a string of text on the keyboard. If the text contains spaces, enclose it in quotes.
51
56
  - Example: execute(action="type", text="The crux of the biscuit is the apostrophe!")
52
57
  - `cursor_position`: Get the current (x, y) pixel coordinate of the cursor on the screen.
53
58
  - `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
54
59
  - Example: execute(action="mouse_move", coordinate=(100, 200))
60
+ - `left_mouse_down`: Press the left mouse button.
61
+ - `left_mouse_up`: Release the left mouse button.
55
62
  - `left_click`: Click the left mouse button.
56
63
  - `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
57
64
  - Example: execute(action="left_click_drag", coordinate=(150, 250))
58
65
  - `right_click`: Click the right mouse button.
59
66
  - `middle_click`: Click the middle mouse button.
60
67
  - `double_click`: Double-click the left mouse button.
68
+ - `triple_click`: Double-click the left mouse button.
69
+ - `wait`: Wait for a specified duration (in seconds).
61
70
  - `screenshot`: Take a screenshot.
71
+ coordinate (tuple[int, int] | None): The (x, y) pixel coordinate on the screen to which to move or drag. Required only by `action=mouse_move` and `action=left_click_drag`.
72
+ duration (int | None): The duration to wait or hold the key down for. Required only by `action=hold_key` and `action=wait`.
73
+ scroll_amount (int | None): The number of 'clicks' to scroll. Required only by `action=scroll`.
74
+ scroll_direction (Literal["up", "down", "left", "right] | None): The direction to scroll the screen. Required only by `action=scroll`.
75
+ start_coordinate (tuple[int, int] | None): The (x, y) pixel coordinate on the screen from which to initiate a drag. Required only by `action=scroll`.
62
76
  text (str | None): The text to type or the key to press. Required when action is "key" or "type".
63
- coordinate (tuple[int, int] | None): The (x, y) pixel coordinate on the screen to which to move or drag. Required when action is "mouse_move" or "left_click_drag".
64
77
 
65
78
  Returns:
66
79
  The output of the command. Many commands will include a screenshot reflecting the result of the command in their output.
67
80
  """
68
- if action in ("mouse_move", "left_click_drag"):
69
- if coordinate is None:
70
- raise ToolParsingError(f"coordinate is required for {action}")
71
- if text is not None:
72
- raise ToolParsingError(f"text is not accepted for {action}")
73
- if not isinstance(coordinate, list) or len(coordinate) != 2:
74
- raise ToolParsingError(f"{coordinate} must be a tuple of length 2")
75
- if not all(isinstance(i, int) and i >= 0 for i in coordinate):
76
- raise ToolParsingError(
77
- f"{coordinate} must be a tuple of non-negative ints"
81
+ match action:
82
+ case "key":
83
+ return await common.press_key(not_none(text, "text"), timeout=timeout)
84
+ case "hold_key":
85
+ return await common.hold_key(
86
+ not_none(text, "text"),
87
+ not_none(duration, "duration"),
88
+ timeout=timeout,
78
89
  )
79
-
80
- if action == "mouse_move":
90
+ case "type":
91
+ return await common.type(not_none(text, "text"), timeout=timeout)
92
+ case "cursor_position":
93
+ return await common.cursor_position(timeout=timeout)
94
+ case "mouse_move":
81
95
  return await common.mouse_move(
82
- coordinate[0], coordinate[1], timeout=timeout
96
+ not_none(coordinate, "coordinate"), timeout=timeout
97
+ )
98
+ case "left_mouse_down":
99
+ return await common.left_mouse_down(timeout=timeout)
100
+ case "left_mouse_up":
101
+ return await common.left_mouse_up(timeout=timeout)
102
+ case "left_click":
103
+ return await common.left_click(
104
+ not_none(coordinate, "coordinate"), timeout=timeout
83
105
  )
84
- elif action == "left_click_drag":
106
+ case "left_click_drag":
85
107
  return await common.left_click_drag(
86
- coordinate[0], coordinate[1], timeout=timeout
108
+ not_none(start_coordinate, "start_coordinate"),
109
+ not_none(coordinate, "coordinate"),
110
+ timeout=timeout,
87
111
  )
88
-
89
- if action in ("key", "type"):
90
- if text is None:
91
- raise ToolParsingError(f"text is required for {action}")
92
- if coordinate is not None:
93
- raise ToolParsingError(f"coordinate is not accepted for {action}")
94
- if not isinstance(text, str):
95
- raise ToolParsingError(f"{text} must be a string")
96
-
97
- if action == "key":
98
- return await common.press_key(text, timeout=timeout)
99
- elif action == "type":
100
- return await common.type(text, timeout=timeout)
101
-
102
- if action in (
103
- "left_click",
104
- "right_click",
105
- "double_click",
106
- "middle_click",
107
- "screenshot",
108
- "cursor_position",
109
- ):
110
- if text is not None:
111
- raise ToolParsingError(f"text is not accepted for {action}")
112
- if coordinate is not None:
113
- raise ToolParsingError(f"coordinate is not accepted for {action}")
114
-
115
- if action == "screenshot":
112
+ case "right_click":
113
+ return await common.right_click(
114
+ not_none(coordinate, "coordinate"), timeout=timeout
115
+ )
116
+ case "middle_click":
117
+ return await common.middle_click(
118
+ not_none(coordinate, "coordinate"), timeout=timeout
119
+ )
120
+ case "double_click":
121
+ return await common.double_click(
122
+ not_none(coordinate, "coordinate"), timeout=timeout
123
+ )
124
+ case "triple_click":
125
+ return await common.triple_click(
126
+ not_none(coordinate, "coordinate"), timeout=timeout
127
+ )
128
+ case "scroll":
129
+ return await common.scroll(
130
+ not_none(scroll_amount, "scroll_amount"),
131
+ not_none(scroll_direction, "scroll_direction"),
132
+ coordinate,
133
+ timeout=timeout,
134
+ )
135
+ case "wait":
136
+ return await common.wait(
137
+ not_none(duration, "duration"), timeout=timeout
138
+ )
139
+ case "screenshot":
116
140
  return await common.screenshot(timeout=timeout)
117
- elif action == "cursor_position":
118
- return await common.cursor_position(timeout=timeout)
119
- elif action == "left_click":
120
- return await common.left_click(timeout=timeout)
121
- elif action == "right_click":
122
- return await common.right_click(timeout=timeout)
123
- elif action == "middle_click":
124
- return await common.middle_click(timeout=timeout)
125
- elif action == "double_click":
126
- return await common.double_click(timeout=timeout)
127
141
 
128
142
  raise ToolParsingError(f"Invalid action: {action}")
129
143
 
@@ -161,3 +175,12 @@ def _computer_model_input(max_screenshots: int) -> ToolCallModelInput:
161
175
  return input_content
162
176
 
163
177
  return model_input
178
+
179
+
180
+ T = TypeVar("T")
181
+
182
+
183
+ def not_none(value: T | None, name: str) -> T:
184
+ if value is None:
185
+ raise ToolParsingError(f"{name} must be provided")
186
+ return value
@@ -1,3 +1,9 @@
1
1
  {
2
- "security.workspace.trust.enabled": false
2
+ "security.workspace.trust.enabled": false,
3
+ "update.showReleaseNotes": false,
4
+ "editor.cursorBlinking": "solid",
5
+ "editor.cursorWidth": 3,
6
+ "workbench.colorCustomizations": {
7
+ "editorCursor.foreground": "#FF0000"
8
+ }
3
9
  }
@@ -0,0 +1,91 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+
3
+ <channel name="xfwm4" version="1.0">
4
+ <property name="general" type="empty">
5
+ <property name="activate_action" type="string" value="bring" />
6
+ <property name="borderless_maximize" type="bool" value="true" />
7
+ <property name="box_move" type="bool" value="false" />
8
+ <property name="box_resize" type="bool" value="false" />
9
+ <property name="button_layout" type="string" value="O|SHMC" />
10
+ <property name="button_offset" type="int" value="0" />
11
+ <property name="button_spacing" type="int" value="0" />
12
+ <property name="click_to_focus" type="bool" value="true" />
13
+ <property name="cycle_apps_only" type="bool" value="false" />
14
+ <property name="cycle_draw_frame" type="bool" value="true" />
15
+ <property name="cycle_raise" type="bool" value="false" />
16
+ <property name="cycle_hidden" type="bool" value="true" />
17
+ <property name="cycle_minimum" type="bool" value="true" />
18
+ <property name="cycle_minimized" type="bool" value="false" />
19
+ <property name="cycle_preview" type="bool" value="true" />
20
+ <property name="cycle_tabwin_mode" type="int" value="0" />
21
+ <property name="cycle_workspaces" type="bool" value="false" />
22
+ <property name="double_click_action" type="string" value="maximize" />
23
+ <property name="double_click_distance" type="int" value="5" />
24
+ <property name="double_click_time" type="int" value="250" />
25
+ <property name="easy_click" type="string" value="Alt" />
26
+ <property name="focus_delay" type="int" value="250" />
27
+ <property name="focus_hint" type="bool" value="true" />
28
+ <property name="focus_new" type="bool" value="true" />
29
+ <property name="frame_opacity" type="int" value="100" />
30
+ <property name="frame_border_top" type="int" value="0" />
31
+ <property name="full_width_title" type="bool" value="true" />
32
+ <property name="horiz_scroll_opacity" type="bool" value="false" />
33
+ <property name="inactive_opacity" type="int" value="100" />
34
+ <property name="maximized_offset" type="int" value="0" />
35
+ <property name="mousewheel_rollup" type="bool" value="true" />
36
+ <property name="move_opacity" type="int" value="100" />
37
+ <property name="placement_mode" type="string" value="center" />
38
+ <property name="placement_ratio" type="int" value="20" />
39
+ <property name="popup_opacity" type="int" value="100" />
40
+ <property name="prevent_focus_stealing" type="bool" value="false" />
41
+ <property name="raise_delay" type="int" value="250" />
42
+ <property name="raise_on_click" type="bool" value="true" />
43
+ <property name="raise_on_focus" type="bool" value="false" />
44
+ <property name="raise_with_any_button" type="bool" value="true" />
45
+ <property name="repeat_urgent_blink" type="bool" value="false" />
46
+ <property name="resize_opacity" type="int" value="100" />
47
+ <property name="scroll_workspaces" type="bool" value="true" />
48
+ <property name="shadow_delta_height" type="int" value="0" />
49
+ <property name="shadow_delta_width" type="int" value="0" />
50
+ <property name="shadow_delta_x" type="int" value="0" />
51
+ <property name="shadow_delta_y" type="int" value="-3" />
52
+ <property name="shadow_opacity" type="int" value="50" />
53
+ <property name="show_app_icon" type="bool" value="false" />
54
+ <property name="show_dock_shadow" type="bool" value="true" />
55
+ <property name="show_frame_shadow" type="bool" value="true" />
56
+ <property name="show_popup_shadow" type="bool" value="false" />
57
+ <property name="snap_resist" type="bool" value="false" />
58
+ <property name="snap_to_border" type="bool" value="true" />
59
+ <property name="snap_to_windows" type="bool" value="false" />
60
+ <property name="snap_width" type="int" value="10" />
61
+ <property name="vblank_mode" type="string" value="auto" />
62
+ <property name="theme" type="string" value="Default" />
63
+ <property name="tile_on_move" type="bool" value="true" />
64
+ <property name="title_alignment" type="string" value="center" />
65
+ <property name="title_font" type="string" value="Sans Bold 9" />
66
+ <property name="title_horizontal_offset" type="int" value="0" />
67
+ <property name="titleless_maximize" type="bool" value="false" />
68
+ <property name="title_shadow_active" type="string" value="false" />
69
+ <property name="title_shadow_inactive" type="string" value="false" />
70
+ <property name="title_vertical_offset_active" type="int" value="0" />
71
+ <property name="title_vertical_offset_inactive" type="int" value="0" />
72
+ <property name="toggle_workspaces" type="bool" value="false" />
73
+ <property name="unredirect_overlays" type="bool" value="true" />
74
+ <property name="urgent_blink" type="bool" value="false" />
75
+ <property name="use_compositing" type="bool" value="true" />
76
+ <property name="workspace_count" type="int" value="1" />
77
+ <property name="wrap_cycle" type="bool" value="true" />
78
+ <property name="wrap_layout" type="bool" value="true" />
79
+ <property name="wrap_resistance" type="int" value="10" />
80
+ <property name="wrap_windows" type="bool" value="true" />
81
+ <property name="wrap_workspaces" type="bool" value="false" />
82
+ <property name="zoom_desktop" type="bool" value="true" />
83
+ <property name="zoom_pointer" type="bool" value="true" />
84
+ <property name="workspace_names" type="array">
85
+ <value type="string" value="Workspace 1" />
86
+ <value type="string" value="Workspace 2" />
87
+ <value type="string" value="Workspace 3" />
88
+ <value type="string" value="Workspace 4" />
89
+ </property>
90
+ </property>
91
+ </channel>
@@ -0,0 +1,8 @@
1
+ [MASTER]
2
+ ; R - Refactorings
3
+ ; C - Convention
4
+ ; W - Warning
5
+ ; E - Error
6
+ enable=C,R,W,E
7
+ disable=R0903,C0114,C0115,C0116,C0301,C0411,C1804,C1805,W0120,W0511,W0718,W1203,E0401,E1101,E0611,E1128
8
+ score=no
@@ -0,0 +1,12 @@
1
+ {
2
+ "cSpell.words": [
3
+ "FWXGA",
4
+ "getmouselocation",
5
+ "keyup",
6
+ "mousedown",
7
+ "mousemove",
8
+ "mouseup",
9
+ "scrot",
10
+ "WXGA"
11
+ ]
12
+ }
@@ -0,0 +1,78 @@
1
+ from argparse import Action, ArgumentParser, Namespace
2
+ from typing import Sequence
3
+
4
+
5
+ def parse_arguments(args: Sequence[str] | None = None) -> Namespace:
6
+ return _create_parser().parse_args(args)
7
+
8
+
9
+ def _create_parser() -> ArgumentParser:
10
+ parser = ArgumentParser(prog="computer_tool")
11
+ subparsers = parser.add_subparsers(dest="action", required=True)
12
+
13
+ # these take no additional arguments
14
+ subparsers.add_parser(
15
+ "screenshot",
16
+ aliases=["cursor_position", "left_mouse_down", "left_mouse_up"],
17
+ )
18
+
19
+ key_and_type = subparsers.add_parser("type", aliases=["key"])
20
+ _add_text(key_and_type)
21
+
22
+ hold_key = subparsers.add_parser("hold_key")
23
+ _add_text(hold_key)
24
+ _add_duration(hold_key)
25
+
26
+ mouse_move = subparsers.add_parser("mouse_move")
27
+ _add_coordinate(mouse_move)
28
+
29
+ click = subparsers.add_parser(
30
+ "left_click",
31
+ aliases=["right_click", "middle_click", "double_click", "triple_click"],
32
+ )
33
+ _add_coordinate(click, False)
34
+ _add_text(click, False)
35
+
36
+ left_click_drag = subparsers.add_parser("left_click_drag")
37
+ _add_start_coordinate(left_click_drag)
38
+ _add_coordinate(left_click_drag)
39
+ _add_text(left_click_drag, False)
40
+
41
+ scroll = subparsers.add_parser("scroll")
42
+ _add_scroll_direction(scroll)
43
+ _add_scroll_amount(scroll)
44
+ # despite what the doc says, the model doesn't always provide a coordinate
45
+ _add_coordinate(scroll, False)
46
+
47
+ wait = subparsers.add_parser("wait")
48
+ _add_duration(wait)
49
+
50
+ return parser
51
+
52
+
53
+ def _add_scroll_direction(subparser: ArgumentParser) -> Action:
54
+ return subparser.add_argument(
55
+ "--scroll_direction", choices=["up", "down", "left", "right"], required=True
56
+ )
57
+
58
+
59
+ def _add_scroll_amount(subparser: ArgumentParser) -> Action:
60
+ return subparser.add_argument("--scroll_amount", type=int, required=True)
61
+
62
+
63
+ def _add_coordinate(subparser: ArgumentParser, required: bool = True) -> Action:
64
+ return subparser.add_argument("--coordinate", type=int, nargs=2, required=required)
65
+
66
+
67
+ def _add_start_coordinate(subparser: ArgumentParser) -> Action:
68
+ return subparser.add_argument(
69
+ "--start_coordinate", type=int, nargs=2, required=True
70
+ )
71
+
72
+
73
+ def _add_duration(subparser: ArgumentParser) -> Action:
74
+ return subparser.add_argument("--duration", type=int, required=True)
75
+
76
+
77
+ def _add_text(subparser: ArgumentParser, required: bool = True) -> Action:
78
+ return subparser.add_argument("--text", type=str, required=required)
@@ -0,0 +1,20 @@
1
+ from typing import Literal
2
+
3
+ Action = Literal[
4
+ "key",
5
+ "hold_key",
6
+ "type",
7
+ "cursor_position",
8
+ "mouse_move",
9
+ "left_mouse_down",
10
+ "left_mouse_up",
11
+ "left_click",
12
+ "left_click_drag",
13
+ "right_click",
14
+ "middle_click",
15
+ "double_click",
16
+ "triple_click",
17
+ "scroll",
18
+ "wait",
19
+ "screenshot",
20
+ ]