cua-agent 0.1.6__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (42) hide show
  1. agent/__init__.py +3 -2
  2. agent/core/__init__.py +0 -5
  3. agent/core/computer_agent.py +21 -28
  4. agent/core/loop.py +78 -124
  5. agent/core/messages.py +279 -125
  6. agent/core/types.py +35 -0
  7. agent/core/visualization.py +197 -0
  8. agent/providers/anthropic/api/client.py +142 -1
  9. agent/providers/anthropic/api_handler.py +140 -0
  10. agent/providers/anthropic/callbacks/__init__.py +5 -0
  11. agent/providers/anthropic/loop.py +206 -220
  12. agent/providers/anthropic/response_handler.py +229 -0
  13. agent/providers/anthropic/tools/bash.py +0 -97
  14. agent/providers/anthropic/utils.py +370 -0
  15. agent/providers/omni/__init__.py +1 -20
  16. agent/providers/omni/api_handler.py +42 -0
  17. agent/providers/omni/clients/anthropic.py +4 -0
  18. agent/providers/omni/image_utils.py +0 -72
  19. agent/providers/omni/loop.py +490 -606
  20. agent/providers/omni/parser.py +58 -4
  21. agent/providers/omni/tools/__init__.py +25 -7
  22. agent/providers/omni/tools/base.py +29 -0
  23. agent/providers/omni/tools/bash.py +43 -38
  24. agent/providers/omni/tools/computer.py +144 -182
  25. agent/providers/omni/tools/manager.py +25 -45
  26. agent/providers/omni/types.py +0 -4
  27. agent/providers/omni/utils.py +224 -145
  28. {cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/METADATA +6 -36
  29. cua_agent-0.1.17.dist-info/RECORD +63 -0
  30. agent/providers/omni/callbacks.py +0 -78
  31. agent/providers/omni/clients/groq.py +0 -101
  32. agent/providers/omni/experiment.py +0 -276
  33. agent/providers/omni/messages.py +0 -171
  34. agent/providers/omni/tool_manager.py +0 -91
  35. agent/providers/omni/visualization.py +0 -130
  36. agent/types/__init__.py +0 -23
  37. agent/types/base.py +0 -41
  38. agent/types/messages.py +0 -36
  39. cua_agent-0.1.6.dist-info/RECORD +0 -64
  40. /agent/{types → core}/tools.py +0 -0
  41. {cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/WHEEL +0 -0
  42. {cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,229 @@
1
+ """Response and tool handling for Anthropic provider."""
2
+
3
+ import logging
4
+ from typing import Any, Dict, List, Optional, Tuple, cast
5
+
6
+ from anthropic.types.beta import (
7
+ BetaMessage,
8
+ BetaMessageParam,
9
+ BetaTextBlock,
10
+ BetaTextBlockParam,
11
+ BetaToolUseBlockParam,
12
+ BetaContentBlockParam,
13
+ )
14
+
15
+ from .tools import ToolResult
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class AnthropicResponseHandler:
21
+ """Handles Anthropic API responses and tool execution results."""
22
+
23
+ def __init__(self, loop):
24
+ """Initialize the response handler.
25
+
26
+ Args:
27
+ loop: Reference to the parent loop instance that provides context
28
+ """
29
+ self.loop = loop
30
+
31
+ async def handle_response(
32
+ self, response: BetaMessage, messages: List[Dict[str, Any]]
33
+ ) -> Tuple[List[Dict[str, Any]], bool]:
34
+ """Handle the Anthropic API response.
35
+
36
+ Args:
37
+ response: API response
38
+ messages: List of messages for context
39
+
40
+ Returns:
41
+ Tuple containing:
42
+ - List of new messages to be added
43
+ - Boolean indicating if the loop should continue
44
+ """
45
+ try:
46
+ new_messages = []
47
+
48
+ # Convert response to parameter format
49
+ response_params = self.response_to_params(response)
50
+
51
+ # Collect all existing tool_use IDs from previous messages for validation
52
+ existing_tool_use_ids = set()
53
+ for msg in messages:
54
+ if msg.get("role") == "assistant" and isinstance(msg.get("content"), list):
55
+ for block in msg.get("content", []):
56
+ if (
57
+ isinstance(block, dict)
58
+ and block.get("type") == "tool_use"
59
+ and "id" in block
60
+ ):
61
+ existing_tool_use_ids.add(block["id"])
62
+
63
+ # Also add new tool_use IDs from the current response
64
+ current_tool_use_ids = set()
65
+ for block in response_params:
66
+ if isinstance(block, dict) and block.get("type") == "tool_use" and "id" in block:
67
+ current_tool_use_ids.add(block["id"])
68
+ existing_tool_use_ids.add(block["id"])
69
+
70
+ logger.info(f"Existing tool_use IDs in conversation: {existing_tool_use_ids}")
71
+ logger.info(f"New tool_use IDs in current response: {current_tool_use_ids}")
72
+
73
+ # Create assistant message
74
+ new_messages.append(
75
+ {
76
+ "role": "assistant",
77
+ "content": response_params,
78
+ }
79
+ )
80
+
81
+ if self.loop.callback_manager is None:
82
+ raise RuntimeError(
83
+ "Callback manager not initialized. Call initialize_client() first."
84
+ )
85
+
86
+ # Handle tool use blocks and collect results
87
+ tool_result_content = []
88
+ for content_block in response_params:
89
+ # Notify callback of content
90
+ self.loop.callback_manager.on_content(cast(BetaContentBlockParam, content_block))
91
+
92
+ # Handle tool use
93
+ if content_block.get("type") == "tool_use":
94
+ if self.loop.tool_manager is None:
95
+ raise RuntimeError(
96
+ "Tool manager not initialized. Call initialize_client() first."
97
+ )
98
+
99
+ # Execute the tool
100
+ result = await self.loop.tool_manager.execute_tool(
101
+ name=content_block["name"],
102
+ tool_input=cast(Dict[str, Any], content_block["input"]),
103
+ )
104
+
105
+ # Verify the tool_use ID exists in the conversation (which it should now)
106
+ tool_use_id = content_block["id"]
107
+ if tool_use_id in existing_tool_use_ids:
108
+ # Create tool result and add to content
109
+ tool_result = self.make_tool_result(cast(ToolResult, result), tool_use_id)
110
+ tool_result_content.append(tool_result)
111
+
112
+ # Notify callback of tool result
113
+ self.loop.callback_manager.on_tool_result(
114
+ cast(ToolResult, result), content_block["id"]
115
+ )
116
+ else:
117
+ logger.warning(
118
+ f"Tool use ID {tool_use_id} not found in previous messages. Skipping tool result."
119
+ )
120
+
121
+ # If no tool results, we're done
122
+ if not tool_result_content:
123
+ # Signal completion
124
+ self.loop.callback_manager.on_content({"type": "text", "text": "<DONE>"})
125
+ return new_messages, False
126
+
127
+ # Add tool results as user message
128
+ new_messages.append({"content": tool_result_content, "role": "user"})
129
+ return new_messages, True
130
+
131
+ except Exception as e:
132
+ logger.error(f"Error handling response: {str(e)}")
133
+ new_messages.append(
134
+ {
135
+ "role": "assistant",
136
+ "content": f"Error: {str(e)}",
137
+ }
138
+ )
139
+ return new_messages, False
140
+
141
+ def response_to_params(
142
+ self,
143
+ response: BetaMessage,
144
+ ) -> List[Dict[str, Any]]:
145
+ """Convert API response to message parameters.
146
+
147
+ Args:
148
+ response: API response message
149
+
150
+ Returns:
151
+ List of content blocks
152
+ """
153
+ result = []
154
+ for block in response.content:
155
+ if isinstance(block, BetaTextBlock):
156
+ result.append({"type": "text", "text": block.text})
157
+ else:
158
+ result.append(cast(Dict[str, Any], block.model_dump()))
159
+ return result
160
+
161
+ def make_tool_result(self, result: ToolResult, tool_use_id: str) -> Dict[str, Any]:
162
+ """Convert a tool result to API format.
163
+
164
+ Args:
165
+ result: Tool execution result
166
+ tool_use_id: ID of the tool use
167
+
168
+ Returns:
169
+ Formatted tool result
170
+ """
171
+ if result.content:
172
+ return {
173
+ "type": "tool_result",
174
+ "content": result.content,
175
+ "tool_use_id": tool_use_id,
176
+ "is_error": bool(result.error),
177
+ }
178
+
179
+ tool_result_content = []
180
+ is_error = False
181
+
182
+ if result.error:
183
+ is_error = True
184
+ tool_result_content = [
185
+ {
186
+ "type": "text",
187
+ "text": self.maybe_prepend_system_tool_result(result, result.error),
188
+ }
189
+ ]
190
+ else:
191
+ if result.output:
192
+ tool_result_content.append(
193
+ {
194
+ "type": "text",
195
+ "text": self.maybe_prepend_system_tool_result(result, result.output),
196
+ }
197
+ )
198
+ if result.base64_image:
199
+ tool_result_content.append(
200
+ {
201
+ "type": "image",
202
+ "source": {
203
+ "type": "base64",
204
+ "media_type": "image/png",
205
+ "data": result.base64_image,
206
+ },
207
+ }
208
+ )
209
+
210
+ return {
211
+ "type": "tool_result",
212
+ "content": tool_result_content,
213
+ "tool_use_id": tool_use_id,
214
+ "is_error": is_error,
215
+ }
216
+
217
+ def maybe_prepend_system_tool_result(self, result: ToolResult, result_text: str) -> str:
218
+ """Prepend system information to tool result if available.
219
+
220
+ Args:
221
+ result: Tool execution result
222
+ result_text: Text to prepend to
223
+
224
+ Returns:
225
+ Text with system information prepended if available
226
+ """
227
+ if result.system:
228
+ result_text = f"<s>{result.system}</s>\n{result_text}"
229
+ return result_text
@@ -7,102 +7,6 @@ from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult
7
7
  from ....core.tools.bash import BaseBashTool
8
8
 
9
9
 
10
- class _BashSession:
11
- """A session of a bash shell."""
12
-
13
- _started: bool
14
- _process: asyncio.subprocess.Process
15
-
16
- command: str = "/bin/bash"
17
- _output_delay: float = 0.2 # seconds
18
- _timeout: float = 120.0 # seconds
19
- _sentinel: str = "<<exit>>"
20
-
21
- def __init__(self):
22
- self._started = False
23
- self._timed_out = False
24
-
25
- async def start(self):
26
- if self._started:
27
- return
28
-
29
- self._process = await asyncio.create_subprocess_shell(
30
- self.command,
31
- preexec_fn=os.setsid,
32
- shell=True,
33
- bufsize=0,
34
- stdin=asyncio.subprocess.PIPE,
35
- stdout=asyncio.subprocess.PIPE,
36
- stderr=asyncio.subprocess.PIPE,
37
- )
38
-
39
- self._started = True
40
-
41
- def stop(self):
42
- """Terminate the bash shell."""
43
- if not self._started:
44
- raise ToolError("Session has not started.")
45
- if self._process.returncode is not None:
46
- return
47
- self._process.terminate()
48
-
49
- async def run(self, command: str):
50
- """Execute a command in the bash shell."""
51
- if not self._started:
52
- raise ToolError("Session has not started.")
53
- if self._process.returncode is not None:
54
- return ToolResult(
55
- system="tool must be restarted",
56
- error=f"bash has exited with returncode {self._process.returncode}",
57
- )
58
- if self._timed_out:
59
- raise ToolError(
60
- f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
61
- )
62
-
63
- # we know these are not None because we created the process with PIPEs
64
- assert self._process.stdin
65
- assert self._process.stdout
66
- assert self._process.stderr
67
-
68
- # send command to the process
69
- self._process.stdin.write(command.encode() + f"; echo '{self._sentinel}'\n".encode())
70
- await self._process.stdin.drain()
71
-
72
- # read output from the process, until the sentinel is found
73
- try:
74
- async with asyncio.timeout(self._timeout):
75
- while True:
76
- await asyncio.sleep(self._output_delay)
77
- # if we read directly from stdout/stderr, it will wait forever for
78
- # EOF. use the StreamReader buffer directly instead.
79
- output = (
80
- self._process.stdout._buffer.decode()
81
- ) # pyright: ignore[reportAttributeAccessIssue]
82
- if self._sentinel in output:
83
- # strip the sentinel and break
84
- output = output[: output.index(self._sentinel)]
85
- break
86
- except asyncio.TimeoutError:
87
- self._timed_out = True
88
- raise ToolError(
89
- f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
90
- ) from None
91
-
92
- if output.endswith("\n"):
93
- output = output[:-1]
94
-
95
- error = self._process.stderr._buffer.decode() # pyright: ignore[reportAttributeAccessIssue]
96
- if error.endswith("\n"):
97
- error = error[:-1]
98
-
99
- # clear the buffers so that the next output can be read correctly
100
- self._process.stdout._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
101
- self._process.stderr._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
102
-
103
- return CLIResult(output=output, error=error)
104
-
105
-
106
10
  class BashTool(BaseBashTool, BaseAnthropicTool):
107
11
  """
108
12
  A tool that allows the agent to run bash commands.
@@ -124,7 +28,6 @@ class BashTool(BaseBashTool, BaseAnthropicTool):
124
28
  # Then initialize the Anthropic tool
125
29
  BaseAnthropicTool.__init__(self)
126
30
  # Initialize bash session
127
- self._session = _BashSession()
128
31
 
129
32
  async def __call__(self, command: str | None = None, restart: bool = False, **kwargs):
130
33
  """Execute a bash command.