cua-agent 0.1.5__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +3 -4
- agent/core/__init__.py +3 -10
- agent/core/computer_agent.py +207 -32
- agent/core/experiment.py +20 -3
- agent/core/loop.py +78 -120
- agent/core/messages.py +279 -125
- agent/core/telemetry.py +44 -32
- agent/core/types.py +35 -0
- agent/core/visualization.py +197 -0
- agent/providers/anthropic/api/client.py +142 -1
- agent/providers/anthropic/api_handler.py +140 -0
- agent/providers/anthropic/callbacks/__init__.py +5 -0
- agent/providers/anthropic/loop.py +224 -209
- agent/providers/anthropic/messages/manager.py +3 -1
- agent/providers/anthropic/response_handler.py +229 -0
- agent/providers/anthropic/tools/base.py +1 -1
- agent/providers/anthropic/tools/bash.py +0 -97
- agent/providers/anthropic/tools/collection.py +2 -2
- agent/providers/anthropic/tools/computer.py +34 -24
- agent/providers/anthropic/tools/manager.py +2 -2
- agent/providers/anthropic/utils.py +370 -0
- agent/providers/omni/__init__.py +1 -20
- agent/providers/omni/api_handler.py +42 -0
- agent/providers/omni/clients/anthropic.py +4 -0
- agent/providers/omni/image_utils.py +0 -72
- agent/providers/omni/loop.py +497 -607
- agent/providers/omni/parser.py +60 -5
- agent/providers/omni/tools/__init__.py +25 -8
- agent/providers/omni/tools/base.py +29 -0
- agent/providers/omni/tools/bash.py +43 -38
- agent/providers/omni/tools/computer.py +144 -181
- agent/providers/omni/tools/manager.py +26 -48
- agent/providers/omni/types.py +0 -4
- agent/providers/omni/utils.py +225 -144
- {cua_agent-0.1.5.dist-info → cua_agent-0.1.17.dist-info}/METADATA +6 -36
- cua_agent-0.1.17.dist-info/RECORD +63 -0
- agent/core/agent.py +0 -252
- agent/core/base_agent.py +0 -164
- agent/core/factory.py +0 -102
- agent/providers/omni/callbacks.py +0 -78
- agent/providers/omni/clients/groq.py +0 -101
- agent/providers/omni/experiment.py +0 -273
- agent/providers/omni/messages.py +0 -171
- agent/providers/omni/tool_manager.py +0 -91
- agent/providers/omni/visualization.py +0 -130
- agent/types/__init__.py +0 -26
- agent/types/base.py +0 -53
- agent/types/messages.py +0 -36
- cua_agent-0.1.5.dist-info/RECORD +0 -67
- /agent/{types → core}/tools.py +0 -0
- {cua_agent-0.1.5.dist-info → cua_agent-0.1.17.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.5.dist-info → cua_agent-0.1.17.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""Response and tool handling for Anthropic provider."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple, cast
|
|
5
|
+
|
|
6
|
+
from anthropic.types.beta import (
|
|
7
|
+
BetaMessage,
|
|
8
|
+
BetaMessageParam,
|
|
9
|
+
BetaTextBlock,
|
|
10
|
+
BetaTextBlockParam,
|
|
11
|
+
BetaToolUseBlockParam,
|
|
12
|
+
BetaContentBlockParam,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .tools import ToolResult
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AnthropicResponseHandler:
|
|
21
|
+
"""Handles Anthropic API responses and tool execution results."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, loop):
|
|
24
|
+
"""Initialize the response handler.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
loop: Reference to the parent loop instance that provides context
|
|
28
|
+
"""
|
|
29
|
+
self.loop = loop
|
|
30
|
+
|
|
31
|
+
async def handle_response(
|
|
32
|
+
self, response: BetaMessage, messages: List[Dict[str, Any]]
|
|
33
|
+
) -> Tuple[List[Dict[str, Any]], bool]:
|
|
34
|
+
"""Handle the Anthropic API response.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
response: API response
|
|
38
|
+
messages: List of messages for context
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Tuple containing:
|
|
42
|
+
- List of new messages to be added
|
|
43
|
+
- Boolean indicating if the loop should continue
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
new_messages = []
|
|
47
|
+
|
|
48
|
+
# Convert response to parameter format
|
|
49
|
+
response_params = self.response_to_params(response)
|
|
50
|
+
|
|
51
|
+
# Collect all existing tool_use IDs from previous messages for validation
|
|
52
|
+
existing_tool_use_ids = set()
|
|
53
|
+
for msg in messages:
|
|
54
|
+
if msg.get("role") == "assistant" and isinstance(msg.get("content"), list):
|
|
55
|
+
for block in msg.get("content", []):
|
|
56
|
+
if (
|
|
57
|
+
isinstance(block, dict)
|
|
58
|
+
and block.get("type") == "tool_use"
|
|
59
|
+
and "id" in block
|
|
60
|
+
):
|
|
61
|
+
existing_tool_use_ids.add(block["id"])
|
|
62
|
+
|
|
63
|
+
# Also add new tool_use IDs from the current response
|
|
64
|
+
current_tool_use_ids = set()
|
|
65
|
+
for block in response_params:
|
|
66
|
+
if isinstance(block, dict) and block.get("type") == "tool_use" and "id" in block:
|
|
67
|
+
current_tool_use_ids.add(block["id"])
|
|
68
|
+
existing_tool_use_ids.add(block["id"])
|
|
69
|
+
|
|
70
|
+
logger.info(f"Existing tool_use IDs in conversation: {existing_tool_use_ids}")
|
|
71
|
+
logger.info(f"New tool_use IDs in current response: {current_tool_use_ids}")
|
|
72
|
+
|
|
73
|
+
# Create assistant message
|
|
74
|
+
new_messages.append(
|
|
75
|
+
{
|
|
76
|
+
"role": "assistant",
|
|
77
|
+
"content": response_params,
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if self.loop.callback_manager is None:
|
|
82
|
+
raise RuntimeError(
|
|
83
|
+
"Callback manager not initialized. Call initialize_client() first."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Handle tool use blocks and collect results
|
|
87
|
+
tool_result_content = []
|
|
88
|
+
for content_block in response_params:
|
|
89
|
+
# Notify callback of content
|
|
90
|
+
self.loop.callback_manager.on_content(cast(BetaContentBlockParam, content_block))
|
|
91
|
+
|
|
92
|
+
# Handle tool use
|
|
93
|
+
if content_block.get("type") == "tool_use":
|
|
94
|
+
if self.loop.tool_manager is None:
|
|
95
|
+
raise RuntimeError(
|
|
96
|
+
"Tool manager not initialized. Call initialize_client() first."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Execute the tool
|
|
100
|
+
result = await self.loop.tool_manager.execute_tool(
|
|
101
|
+
name=content_block["name"],
|
|
102
|
+
tool_input=cast(Dict[str, Any], content_block["input"]),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Verify the tool_use ID exists in the conversation (which it should now)
|
|
106
|
+
tool_use_id = content_block["id"]
|
|
107
|
+
if tool_use_id in existing_tool_use_ids:
|
|
108
|
+
# Create tool result and add to content
|
|
109
|
+
tool_result = self.make_tool_result(cast(ToolResult, result), tool_use_id)
|
|
110
|
+
tool_result_content.append(tool_result)
|
|
111
|
+
|
|
112
|
+
# Notify callback of tool result
|
|
113
|
+
self.loop.callback_manager.on_tool_result(
|
|
114
|
+
cast(ToolResult, result), content_block["id"]
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
logger.warning(
|
|
118
|
+
f"Tool use ID {tool_use_id} not found in previous messages. Skipping tool result."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# If no tool results, we're done
|
|
122
|
+
if not tool_result_content:
|
|
123
|
+
# Signal completion
|
|
124
|
+
self.loop.callback_manager.on_content({"type": "text", "text": "<DONE>"})
|
|
125
|
+
return new_messages, False
|
|
126
|
+
|
|
127
|
+
# Add tool results as user message
|
|
128
|
+
new_messages.append({"content": tool_result_content, "role": "user"})
|
|
129
|
+
return new_messages, True
|
|
130
|
+
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.error(f"Error handling response: {str(e)}")
|
|
133
|
+
new_messages.append(
|
|
134
|
+
{
|
|
135
|
+
"role": "assistant",
|
|
136
|
+
"content": f"Error: {str(e)}",
|
|
137
|
+
}
|
|
138
|
+
)
|
|
139
|
+
return new_messages, False
|
|
140
|
+
|
|
141
|
+
def response_to_params(
|
|
142
|
+
self,
|
|
143
|
+
response: BetaMessage,
|
|
144
|
+
) -> List[Dict[str, Any]]:
|
|
145
|
+
"""Convert API response to message parameters.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
response: API response message
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
List of content blocks
|
|
152
|
+
"""
|
|
153
|
+
result = []
|
|
154
|
+
for block in response.content:
|
|
155
|
+
if isinstance(block, BetaTextBlock):
|
|
156
|
+
result.append({"type": "text", "text": block.text})
|
|
157
|
+
else:
|
|
158
|
+
result.append(cast(Dict[str, Any], block.model_dump()))
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
def make_tool_result(self, result: ToolResult, tool_use_id: str) -> Dict[str, Any]:
|
|
162
|
+
"""Convert a tool result to API format.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
result: Tool execution result
|
|
166
|
+
tool_use_id: ID of the tool use
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Formatted tool result
|
|
170
|
+
"""
|
|
171
|
+
if result.content:
|
|
172
|
+
return {
|
|
173
|
+
"type": "tool_result",
|
|
174
|
+
"content": result.content,
|
|
175
|
+
"tool_use_id": tool_use_id,
|
|
176
|
+
"is_error": bool(result.error),
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
tool_result_content = []
|
|
180
|
+
is_error = False
|
|
181
|
+
|
|
182
|
+
if result.error:
|
|
183
|
+
is_error = True
|
|
184
|
+
tool_result_content = [
|
|
185
|
+
{
|
|
186
|
+
"type": "text",
|
|
187
|
+
"text": self.maybe_prepend_system_tool_result(result, result.error),
|
|
188
|
+
}
|
|
189
|
+
]
|
|
190
|
+
else:
|
|
191
|
+
if result.output:
|
|
192
|
+
tool_result_content.append(
|
|
193
|
+
{
|
|
194
|
+
"type": "text",
|
|
195
|
+
"text": self.maybe_prepend_system_tool_result(result, result.output),
|
|
196
|
+
}
|
|
197
|
+
)
|
|
198
|
+
if result.base64_image:
|
|
199
|
+
tool_result_content.append(
|
|
200
|
+
{
|
|
201
|
+
"type": "image",
|
|
202
|
+
"source": {
|
|
203
|
+
"type": "base64",
|
|
204
|
+
"media_type": "image/png",
|
|
205
|
+
"data": result.base64_image,
|
|
206
|
+
},
|
|
207
|
+
}
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
"type": "tool_result",
|
|
212
|
+
"content": tool_result_content,
|
|
213
|
+
"tool_use_id": tool_use_id,
|
|
214
|
+
"is_error": is_error,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
def maybe_prepend_system_tool_result(self, result: ToolResult, result_text: str) -> str:
|
|
218
|
+
"""Prepend system information to tool result if available.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
result: Tool execution result
|
|
222
|
+
result_text: Text to prepend to
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Text with system information prepended if available
|
|
226
|
+
"""
|
|
227
|
+
if result.system:
|
|
228
|
+
result_text = f"<s>{result.system}</s>\n{result_text}"
|
|
229
|
+
return result_text
|
|
@@ -6,7 +6,7 @@ from typing import Any, Dict
|
|
|
6
6
|
|
|
7
7
|
from anthropic.types.beta import BetaToolUnionParam
|
|
8
8
|
|
|
9
|
-
from ....core.tools.base import BaseTool
|
|
9
|
+
from ....core.tools.base import BaseTool
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class BaseAnthropicTool(BaseTool, metaclass=ABCMeta):
|
|
@@ -7,102 +7,6 @@ from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult
|
|
|
7
7
|
from ....core.tools.bash import BaseBashTool
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class _BashSession:
|
|
11
|
-
"""A session of a bash shell."""
|
|
12
|
-
|
|
13
|
-
_started: bool
|
|
14
|
-
_process: asyncio.subprocess.Process
|
|
15
|
-
|
|
16
|
-
command: str = "/bin/bash"
|
|
17
|
-
_output_delay: float = 0.2 # seconds
|
|
18
|
-
_timeout: float = 120.0 # seconds
|
|
19
|
-
_sentinel: str = "<<exit>>"
|
|
20
|
-
|
|
21
|
-
def __init__(self):
|
|
22
|
-
self._started = False
|
|
23
|
-
self._timed_out = False
|
|
24
|
-
|
|
25
|
-
async def start(self):
|
|
26
|
-
if self._started:
|
|
27
|
-
return
|
|
28
|
-
|
|
29
|
-
self._process = await asyncio.create_subprocess_shell(
|
|
30
|
-
self.command,
|
|
31
|
-
preexec_fn=os.setsid,
|
|
32
|
-
shell=True,
|
|
33
|
-
bufsize=0,
|
|
34
|
-
stdin=asyncio.subprocess.PIPE,
|
|
35
|
-
stdout=asyncio.subprocess.PIPE,
|
|
36
|
-
stderr=asyncio.subprocess.PIPE,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
self._started = True
|
|
40
|
-
|
|
41
|
-
def stop(self):
|
|
42
|
-
"""Terminate the bash shell."""
|
|
43
|
-
if not self._started:
|
|
44
|
-
raise ToolError("Session has not started.")
|
|
45
|
-
if self._process.returncode is not None:
|
|
46
|
-
return
|
|
47
|
-
self._process.terminate()
|
|
48
|
-
|
|
49
|
-
async def run(self, command: str):
|
|
50
|
-
"""Execute a command in the bash shell."""
|
|
51
|
-
if not self._started:
|
|
52
|
-
raise ToolError("Session has not started.")
|
|
53
|
-
if self._process.returncode is not None:
|
|
54
|
-
return ToolResult(
|
|
55
|
-
system="tool must be restarted",
|
|
56
|
-
error=f"bash has exited with returncode {self._process.returncode}",
|
|
57
|
-
)
|
|
58
|
-
if self._timed_out:
|
|
59
|
-
raise ToolError(
|
|
60
|
-
f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
# we know these are not None because we created the process with PIPEs
|
|
64
|
-
assert self._process.stdin
|
|
65
|
-
assert self._process.stdout
|
|
66
|
-
assert self._process.stderr
|
|
67
|
-
|
|
68
|
-
# send command to the process
|
|
69
|
-
self._process.stdin.write(command.encode() + f"; echo '{self._sentinel}'\n".encode())
|
|
70
|
-
await self._process.stdin.drain()
|
|
71
|
-
|
|
72
|
-
# read output from the process, until the sentinel is found
|
|
73
|
-
try:
|
|
74
|
-
async with asyncio.timeout(self._timeout):
|
|
75
|
-
while True:
|
|
76
|
-
await asyncio.sleep(self._output_delay)
|
|
77
|
-
# if we read directly from stdout/stderr, it will wait forever for
|
|
78
|
-
# EOF. use the StreamReader buffer directly instead.
|
|
79
|
-
output = (
|
|
80
|
-
self._process.stdout._buffer.decode()
|
|
81
|
-
) # pyright: ignore[reportAttributeAccessIssue]
|
|
82
|
-
if self._sentinel in output:
|
|
83
|
-
# strip the sentinel and break
|
|
84
|
-
output = output[: output.index(self._sentinel)]
|
|
85
|
-
break
|
|
86
|
-
except asyncio.TimeoutError:
|
|
87
|
-
self._timed_out = True
|
|
88
|
-
raise ToolError(
|
|
89
|
-
f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
|
|
90
|
-
) from None
|
|
91
|
-
|
|
92
|
-
if output.endswith("\n"):
|
|
93
|
-
output = output[:-1]
|
|
94
|
-
|
|
95
|
-
error = self._process.stderr._buffer.decode() # pyright: ignore[reportAttributeAccessIssue]
|
|
96
|
-
if error.endswith("\n"):
|
|
97
|
-
error = error[:-1]
|
|
98
|
-
|
|
99
|
-
# clear the buffers so that the next output can be read correctly
|
|
100
|
-
self._process.stdout._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
|
|
101
|
-
self._process.stderr._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
|
|
102
|
-
|
|
103
|
-
return CLIResult(output=output, error=error)
|
|
104
|
-
|
|
105
|
-
|
|
106
10
|
class BashTool(BaseBashTool, BaseAnthropicTool):
|
|
107
11
|
"""
|
|
108
12
|
A tool that allows the agent to run bash commands.
|
|
@@ -124,7 +28,6 @@ class BashTool(BaseBashTool, BaseAnthropicTool):
|
|
|
124
28
|
# Then initialize the Anthropic tool
|
|
125
29
|
BaseAnthropicTool.__init__(self)
|
|
126
30
|
# Initialize bash session
|
|
127
|
-
self._session = _BashSession()
|
|
128
31
|
|
|
129
32
|
async def __call__(self, command: str | None = None, restart: bool = False, **kwargs):
|
|
130
33
|
"""Execute a bash command.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Collection classes for managing multiple tools."""
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any, cast
|
|
4
4
|
|
|
5
5
|
from anthropic.types.beta import BetaToolUnionParam
|
|
6
6
|
|
|
@@ -22,7 +22,7 @@ class ToolCollection:
|
|
|
22
22
|
def to_params(
|
|
23
23
|
self,
|
|
24
24
|
) -> list[BetaToolUnionParam]:
|
|
25
|
-
return [tool.to_params() for tool in self.tools]
|
|
25
|
+
return cast(list[BetaToolUnionParam], [tool.to_params() for tool in self.tools])
|
|
26
26
|
|
|
27
27
|
async def run(self, *, name: str, tool_input: dict[str, Any]) -> ToolResult:
|
|
28
28
|
tool = self.tool_map.get(name)
|
|
@@ -61,9 +61,9 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
61
61
|
|
|
62
62
|
name: Literal["computer"] = "computer"
|
|
63
63
|
api_type: Literal["computer_20250124"] = "computer_20250124"
|
|
64
|
-
width: int | None
|
|
65
|
-
height: int | None
|
|
66
|
-
display_num: int | None
|
|
64
|
+
width: int | None = None
|
|
65
|
+
height: int | None = None
|
|
66
|
+
display_num: int | None = None
|
|
67
67
|
computer: Computer # The CUA Computer instance
|
|
68
68
|
logger = logging.getLogger(__name__)
|
|
69
69
|
|
|
@@ -106,6 +106,7 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
106
106
|
display_size = await self.computer.interface.get_screen_size()
|
|
107
107
|
self.width = display_size["width"]
|
|
108
108
|
self.height = display_size["height"]
|
|
109
|
+
assert isinstance(self.width, int) and isinstance(self.height, int)
|
|
109
110
|
self.logger.info(f"Initialized screen dimensions to {self.width}x{self.height}")
|
|
110
111
|
|
|
111
112
|
async def __call__(
|
|
@@ -120,6 +121,8 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
120
121
|
# Ensure dimensions are initialized
|
|
121
122
|
if self.width is None or self.height is None:
|
|
122
123
|
await self.initialize_dimensions()
|
|
124
|
+
if self.width is None or self.height is None:
|
|
125
|
+
raise ToolError("Failed to initialize screen dimensions")
|
|
123
126
|
except Exception as e:
|
|
124
127
|
raise ToolError(f"Failed to initialize dimensions: {e}")
|
|
125
128
|
|
|
@@ -147,7 +150,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
147
150
|
self.logger.info(
|
|
148
151
|
f"Scaling image from {pre_img.size} to {self.width}x{self.height} to match screen dimensions"
|
|
149
152
|
)
|
|
150
|
-
|
|
153
|
+
if not isinstance(self.width, int) or not isinstance(self.height, int):
|
|
154
|
+
raise ToolError("Screen dimensions must be integers")
|
|
155
|
+
size = (int(self.width), int(self.height))
|
|
156
|
+
pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
|
|
151
157
|
|
|
152
158
|
self.logger.info(f" Current dimensions: {pre_img.width}x{pre_img.height}")
|
|
153
159
|
|
|
@@ -160,15 +166,7 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
160
166
|
await self.computer.interface.move_cursor(x, y)
|
|
161
167
|
# Then perform drag operation - check if drag_to exists or we need to use other methods
|
|
162
168
|
try:
|
|
163
|
-
|
|
164
|
-
await self.computer.interface.drag_to(x, y)
|
|
165
|
-
else:
|
|
166
|
-
# Alternative approach: press mouse down, move, release
|
|
167
|
-
await self.computer.interface.mouse_down()
|
|
168
|
-
await asyncio.sleep(0.2)
|
|
169
|
-
await self.computer.interface.move_cursor(x, y)
|
|
170
|
-
await asyncio.sleep(0.2)
|
|
171
|
-
await self.computer.interface.mouse_up()
|
|
169
|
+
await self.computer.interface.drag_to(x, y)
|
|
172
170
|
except Exception as e:
|
|
173
171
|
self.logger.error(f"Error during drag operation: {str(e)}")
|
|
174
172
|
raise ToolError(f"Failed to perform drag: {str(e)}")
|
|
@@ -214,9 +212,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
214
212
|
self.logger.info(
|
|
215
213
|
f"Scaling image from {pre_img.size} to {self.width}x{self.height} to match screen dimensions"
|
|
216
214
|
)
|
|
217
|
-
|
|
218
|
-
(
|
|
219
|
-
)
|
|
215
|
+
if not isinstance(self.width, int) or not isinstance(self.height, int):
|
|
216
|
+
raise ToolError("Screen dimensions must be integers")
|
|
217
|
+
size = (int(self.width), int(self.height))
|
|
218
|
+
pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
|
|
220
219
|
# Save the scaled image back to bytes
|
|
221
220
|
buffer = io.BytesIO()
|
|
222
221
|
pre_img.save(buffer, format="PNG")
|
|
@@ -275,9 +274,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
275
274
|
self.logger.info(
|
|
276
275
|
f"Scaling image from {pre_img.size} to {self.width}x{self.height}"
|
|
277
276
|
)
|
|
278
|
-
|
|
279
|
-
(
|
|
280
|
-
)
|
|
277
|
+
if not isinstance(self.width, int) or not isinstance(self.height, int):
|
|
278
|
+
raise ToolError("Screen dimensions must be integers")
|
|
279
|
+
size = (int(self.width), int(self.height))
|
|
280
|
+
pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
|
|
281
281
|
|
|
282
282
|
# Perform the click action
|
|
283
283
|
if action == "left_click":
|
|
@@ -335,7 +335,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
335
335
|
self.logger.info(
|
|
336
336
|
f"Scaling image from {pre_img.size} to {self.width}x{self.height}"
|
|
337
337
|
)
|
|
338
|
-
|
|
338
|
+
if not isinstance(self.width, int) or not isinstance(self.height, int):
|
|
339
|
+
raise ToolError("Screen dimensions must be integers")
|
|
340
|
+
size = (int(self.width), int(self.height))
|
|
341
|
+
pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
|
|
339
342
|
|
|
340
343
|
if action == "key":
|
|
341
344
|
# Special handling for page up/down on macOS
|
|
@@ -365,7 +368,7 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
365
368
|
# Handle single key press
|
|
366
369
|
self.logger.info(f"Pressing key: {text}")
|
|
367
370
|
try:
|
|
368
|
-
await self.computer.interface.
|
|
371
|
+
await self.computer.interface.press_key(text)
|
|
369
372
|
output_text = text
|
|
370
373
|
except ValueError as e:
|
|
371
374
|
raise ToolError(f"Invalid key: {text}. {str(e)}")
|
|
@@ -442,7 +445,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
442
445
|
self.logger.info(
|
|
443
446
|
f"Scaling image from {img.size} to {self.width}x{self.height}"
|
|
444
447
|
)
|
|
445
|
-
|
|
448
|
+
if not isinstance(self.width, int) or not isinstance(self.height, int):
|
|
449
|
+
raise ToolError("Screen dimensions must be integers")
|
|
450
|
+
size = (int(self.width), int(self.height))
|
|
451
|
+
img = img.resize(size, Image.Resampling.LANCZOS)
|
|
446
452
|
buffer = io.BytesIO()
|
|
447
453
|
img.save(buffer, format="PNG")
|
|
448
454
|
screenshot = buffer.getvalue()
|
|
@@ -451,7 +457,8 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
451
457
|
|
|
452
458
|
elif action == "cursor_position":
|
|
453
459
|
pos = await self.computer.interface.get_cursor_position()
|
|
454
|
-
|
|
460
|
+
x, y = pos # Unpack the tuple
|
|
461
|
+
return ToolResult(output=f"X={int(x)},Y={int(y)}")
|
|
455
462
|
|
|
456
463
|
except Exception as e:
|
|
457
464
|
self.logger.error(f"Error during {action} action: {str(e)}")
|
|
@@ -517,7 +524,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
|
|
|
517
524
|
# Scale image if needed
|
|
518
525
|
if img.size != (self.width, self.height):
|
|
519
526
|
self.logger.info(f"Scaling image from {img.size} to {self.width}x{self.height}")
|
|
520
|
-
|
|
527
|
+
if not isinstance(self.width, int) or not isinstance(self.height, int):
|
|
528
|
+
raise ToolError("Screen dimensions must be integers")
|
|
529
|
+
size = (int(self.width), int(self.height))
|
|
530
|
+
img = img.resize(size, Image.Resampling.LANCZOS)
|
|
521
531
|
buffer = io.BytesIO()
|
|
522
532
|
img.save(buffer, format="PNG")
|
|
523
533
|
screenshot = buffer.getvalue()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Dict, List
|
|
1
|
+
from typing import Any, Dict, List, cast
|
|
2
2
|
from anthropic.types.beta import BetaToolUnionParam
|
|
3
3
|
from computer.computer import Computer
|
|
4
4
|
|
|
@@ -37,7 +37,7 @@ class ToolManager(BaseToolManager):
|
|
|
37
37
|
"""Get tool parameters for Anthropic API calls."""
|
|
38
38
|
if self.tools is None:
|
|
39
39
|
raise RuntimeError("Tools not initialized. Call initialize() first.")
|
|
40
|
-
return self.tools.to_params()
|
|
40
|
+
return cast(List[BetaToolUnionParam], self.tools.to_params())
|
|
41
41
|
|
|
42
42
|
async def execute_tool(self, name: str, tool_input: dict[str, Any]) -> ToolResult:
|
|
43
43
|
"""Execute a tool with the given input.
|