cua-agent 0.1.6__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (57) hide show
  1. agent/__init__.py +3 -2
  2. agent/core/__init__.py +1 -6
  3. agent/core/{computer_agent.py → agent.py} +31 -76
  4. agent/core/{loop.py → base.py} +68 -127
  5. agent/core/factory.py +104 -0
  6. agent/core/messages.py +279 -125
  7. agent/core/provider_config.py +15 -0
  8. agent/core/types.py +45 -0
  9. agent/core/visualization.py +197 -0
  10. agent/providers/anthropic/api/client.py +142 -1
  11. agent/providers/anthropic/api_handler.py +140 -0
  12. agent/providers/anthropic/callbacks/__init__.py +5 -0
  13. agent/providers/anthropic/loop.py +207 -221
  14. agent/providers/anthropic/response_handler.py +226 -0
  15. agent/providers/anthropic/tools/bash.py +0 -97
  16. agent/providers/anthropic/utils.py +368 -0
  17. agent/providers/omni/__init__.py +1 -20
  18. agent/providers/omni/api_handler.py +42 -0
  19. agent/providers/omni/clients/anthropic.py +4 -0
  20. agent/providers/omni/image_utils.py +0 -72
  21. agent/providers/omni/loop.py +491 -607
  22. agent/providers/omni/parser.py +58 -4
  23. agent/providers/omni/tools/__init__.py +25 -7
  24. agent/providers/omni/tools/base.py +29 -0
  25. agent/providers/omni/tools/bash.py +43 -38
  26. agent/providers/omni/tools/computer.py +144 -182
  27. agent/providers/omni/tools/manager.py +25 -45
  28. agent/providers/omni/types.py +1 -3
  29. agent/providers/omni/utils.py +224 -145
  30. agent/providers/openai/__init__.py +6 -0
  31. agent/providers/openai/api_handler.py +453 -0
  32. agent/providers/openai/loop.py +440 -0
  33. agent/providers/openai/response_handler.py +205 -0
  34. agent/providers/openai/tools/__init__.py +15 -0
  35. agent/providers/openai/tools/base.py +79 -0
  36. agent/providers/openai/tools/computer.py +319 -0
  37. agent/providers/openai/tools/manager.py +106 -0
  38. agent/providers/openai/types.py +36 -0
  39. agent/providers/openai/utils.py +98 -0
  40. cua_agent-0.1.18.dist-info/METADATA +165 -0
  41. cua_agent-0.1.18.dist-info/RECORD +73 -0
  42. agent/README.md +0 -63
  43. agent/providers/anthropic/messages/manager.py +0 -112
  44. agent/providers/omni/callbacks.py +0 -78
  45. agent/providers/omni/clients/groq.py +0 -101
  46. agent/providers/omni/experiment.py +0 -276
  47. agent/providers/omni/messages.py +0 -171
  48. agent/providers/omni/tool_manager.py +0 -91
  49. agent/providers/omni/visualization.py +0 -130
  50. agent/types/__init__.py +0 -23
  51. agent/types/base.py +0 -41
  52. agent/types/messages.py +0 -36
  53. cua_agent-0.1.6.dist-info/METADATA +0 -120
  54. cua_agent-0.1.6.dist-info/RECORD +0 -64
  55. /agent/{types → core}/tools.py +0 -0
  56. {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/WHEEL +0 -0
  57. {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,226 @@
1
+ """Response and tool handling for Anthropic provider."""
2
+
3
+ import logging
4
+ from typing import Any, Dict, List, Tuple, cast
5
+
6
+ from anthropic.types.beta import (
7
+ BetaMessage,
8
+ BetaTextBlock,
9
+ BetaContentBlockParam,
10
+ )
11
+
12
+ from .tools import ToolResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class AnthropicResponseHandler:
18
+ """Handles Anthropic API responses and tool execution results."""
19
+
20
+ def __init__(self, loop):
21
+ """Initialize the response handler.
22
+
23
+ Args:
24
+ loop: Reference to the parent loop instance that provides context
25
+ """
26
+ self.loop = loop
27
+
28
+ async def handle_response(
29
+ self, response: BetaMessage, messages: List[Dict[str, Any]]
30
+ ) -> Tuple[List[Dict[str, Any]], bool]:
31
+ """Handle the Anthropic API response.
32
+
33
+ Args:
34
+ response: API response
35
+ messages: List of messages for context
36
+
37
+ Returns:
38
+ Tuple containing:
39
+ - List of new messages to be added
40
+ - Boolean indicating if the loop should continue
41
+ """
42
+ try:
43
+ new_messages = []
44
+
45
+ # Convert response to parameter format
46
+ response_params = self.response_to_params(response)
47
+
48
+ # Collect all existing tool_use IDs from previous messages for validation
49
+ existing_tool_use_ids = set()
50
+ for msg in messages:
51
+ if msg.get("role") == "assistant" and isinstance(msg.get("content"), list):
52
+ for block in msg.get("content", []):
53
+ if (
54
+ isinstance(block, dict)
55
+ and block.get("type") == "tool_use"
56
+ and "id" in block
57
+ ):
58
+ existing_tool_use_ids.add(block["id"])
59
+
60
+ # Also add new tool_use IDs from the current response
61
+ current_tool_use_ids = set()
62
+ for block in response_params:
63
+ if isinstance(block, dict) and block.get("type") == "tool_use" and "id" in block:
64
+ current_tool_use_ids.add(block["id"])
65
+ existing_tool_use_ids.add(block["id"])
66
+
67
+ logger.info(f"Existing tool_use IDs in conversation: {existing_tool_use_ids}")
68
+ logger.info(f"New tool_use IDs in current response: {current_tool_use_ids}")
69
+
70
+ # Create assistant message
71
+ new_messages.append(
72
+ {
73
+ "role": "assistant",
74
+ "content": response_params,
75
+ }
76
+ )
77
+
78
+ if self.loop.callback_manager is None:
79
+ raise RuntimeError(
80
+ "Callback manager not initialized. Call initialize_client() first."
81
+ )
82
+
83
+ # Handle tool use blocks and collect results
84
+ tool_result_content = []
85
+ for content_block in response_params:
86
+ # Notify callback of content
87
+ self.loop.callback_manager.on_content(cast(BetaContentBlockParam, content_block))
88
+
89
+ # Handle tool use
90
+ if content_block.get("type") == "tool_use":
91
+ if self.loop.tool_manager is None:
92
+ raise RuntimeError(
93
+ "Tool manager not initialized. Call initialize_client() first."
94
+ )
95
+
96
+ # Execute the tool
97
+ result = await self.loop.tool_manager.execute_tool(
98
+ name=content_block["name"],
99
+ tool_input=cast(Dict[str, Any], content_block["input"]),
100
+ )
101
+
102
+ # Verify the tool_use ID exists in the conversation (which it should now)
103
+ tool_use_id = content_block["id"]
104
+ if tool_use_id in existing_tool_use_ids:
105
+ # Create tool result and add to content
106
+ tool_result = self.make_tool_result(cast(ToolResult, result), tool_use_id)
107
+ tool_result_content.append(tool_result)
108
+
109
+ # Notify callback of tool result
110
+ self.loop.callback_manager.on_tool_result(
111
+ cast(ToolResult, result), content_block["id"]
112
+ )
113
+ else:
114
+ logger.warning(
115
+ f"Tool use ID {tool_use_id} not found in previous messages. Skipping tool result."
116
+ )
117
+
118
+ # If no tool results, we're done
119
+ if not tool_result_content:
120
+ # Signal completion
121
+ self.loop.callback_manager.on_content({"type": "text", "text": "<DONE>"})
122
+ return new_messages, False
123
+
124
+ # Add tool results as user message
125
+ new_messages.append({"content": tool_result_content, "role": "user"})
126
+ return new_messages, True
127
+
128
+ except Exception as e:
129
+ logger.error(f"Error handling response: {str(e)}")
130
+ new_messages.append(
131
+ {
132
+ "role": "assistant",
133
+ "content": f"Error: {str(e)}",
134
+ }
135
+ )
136
+ return new_messages, False
137
+
138
+ def response_to_params(
139
+ self,
140
+ response: BetaMessage,
141
+ ) -> List[Dict[str, Any]]:
142
+ """Convert API response to message parameters.
143
+
144
+ Args:
145
+ response: API response message
146
+
147
+ Returns:
148
+ List of content blocks
149
+ """
150
+ result = []
151
+ for block in response.content:
152
+ if isinstance(block, BetaTextBlock):
153
+ result.append({"type": "text", "text": block.text})
154
+ else:
155
+ result.append(cast(Dict[str, Any], block.model_dump()))
156
+ return result
157
+
158
+ def make_tool_result(self, result: ToolResult, tool_use_id: str) -> Dict[str, Any]:
159
+ """Convert a tool result to API format.
160
+
161
+ Args:
162
+ result: Tool execution result
163
+ tool_use_id: ID of the tool use
164
+
165
+ Returns:
166
+ Formatted tool result
167
+ """
168
+ if result.content:
169
+ return {
170
+ "type": "tool_result",
171
+ "content": result.content,
172
+ "tool_use_id": tool_use_id,
173
+ "is_error": bool(result.error),
174
+ }
175
+
176
+ tool_result_content = []
177
+ is_error = False
178
+
179
+ if result.error:
180
+ is_error = True
181
+ tool_result_content = [
182
+ {
183
+ "type": "text",
184
+ "text": self.maybe_prepend_system_tool_result(result, result.error),
185
+ }
186
+ ]
187
+ else:
188
+ if result.output:
189
+ tool_result_content.append(
190
+ {
191
+ "type": "text",
192
+ "text": self.maybe_prepend_system_tool_result(result, result.output),
193
+ }
194
+ )
195
+ if result.base64_image:
196
+ tool_result_content.append(
197
+ {
198
+ "type": "image",
199
+ "source": {
200
+ "type": "base64",
201
+ "media_type": "image/png",
202
+ "data": result.base64_image,
203
+ },
204
+ }
205
+ )
206
+
207
+ return {
208
+ "type": "tool_result",
209
+ "content": tool_result_content,
210
+ "tool_use_id": tool_use_id,
211
+ "is_error": is_error,
212
+ }
213
+
214
+ def maybe_prepend_system_tool_result(self, result: ToolResult, result_text: str) -> str:
215
+ """Prepend system information to tool result if available.
216
+
217
+ Args:
218
+ result: Tool execution result
219
+ result_text: Text to prepend to
220
+
221
+ Returns:
222
+ Text with system information prepended if available
223
+ """
224
+ if result.system:
225
+ result_text = f"<s>{result.system}</s>\n{result_text}"
226
+ return result_text
@@ -7,102 +7,6 @@ from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult
7
7
  from ....core.tools.bash import BaseBashTool
8
8
 
9
9
 
10
- class _BashSession:
11
- """A session of a bash shell."""
12
-
13
- _started: bool
14
- _process: asyncio.subprocess.Process
15
-
16
- command: str = "/bin/bash"
17
- _output_delay: float = 0.2 # seconds
18
- _timeout: float = 120.0 # seconds
19
- _sentinel: str = "<<exit>>"
20
-
21
- def __init__(self):
22
- self._started = False
23
- self._timed_out = False
24
-
25
- async def start(self):
26
- if self._started:
27
- return
28
-
29
- self._process = await asyncio.create_subprocess_shell(
30
- self.command,
31
- preexec_fn=os.setsid,
32
- shell=True,
33
- bufsize=0,
34
- stdin=asyncio.subprocess.PIPE,
35
- stdout=asyncio.subprocess.PIPE,
36
- stderr=asyncio.subprocess.PIPE,
37
- )
38
-
39
- self._started = True
40
-
41
- def stop(self):
42
- """Terminate the bash shell."""
43
- if not self._started:
44
- raise ToolError("Session has not started.")
45
- if self._process.returncode is not None:
46
- return
47
- self._process.terminate()
48
-
49
- async def run(self, command: str):
50
- """Execute a command in the bash shell."""
51
- if not self._started:
52
- raise ToolError("Session has not started.")
53
- if self._process.returncode is not None:
54
- return ToolResult(
55
- system="tool must be restarted",
56
- error=f"bash has exited with returncode {self._process.returncode}",
57
- )
58
- if self._timed_out:
59
- raise ToolError(
60
- f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
61
- )
62
-
63
- # we know these are not None because we created the process with PIPEs
64
- assert self._process.stdin
65
- assert self._process.stdout
66
- assert self._process.stderr
67
-
68
- # send command to the process
69
- self._process.stdin.write(command.encode() + f"; echo '{self._sentinel}'\n".encode())
70
- await self._process.stdin.drain()
71
-
72
- # read output from the process, until the sentinel is found
73
- try:
74
- async with asyncio.timeout(self._timeout):
75
- while True:
76
- await asyncio.sleep(self._output_delay)
77
- # if we read directly from stdout/stderr, it will wait forever for
78
- # EOF. use the StreamReader buffer directly instead.
79
- output = (
80
- self._process.stdout._buffer.decode()
81
- ) # pyright: ignore[reportAttributeAccessIssue]
82
- if self._sentinel in output:
83
- # strip the sentinel and break
84
- output = output[: output.index(self._sentinel)]
85
- break
86
- except asyncio.TimeoutError:
87
- self._timed_out = True
88
- raise ToolError(
89
- f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
90
- ) from None
91
-
92
- if output.endswith("\n"):
93
- output = output[:-1]
94
-
95
- error = self._process.stderr._buffer.decode() # pyright: ignore[reportAttributeAccessIssue]
96
- if error.endswith("\n"):
97
- error = error[:-1]
98
-
99
- # clear the buffers so that the next output can be read correctly
100
- self._process.stdout._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
101
- self._process.stderr._buffer.clear() # pyright: ignore[reportAttributeAccessIssue]
102
-
103
- return CLIResult(output=output, error=error)
104
-
105
-
106
10
  class BashTool(BaseBashTool, BaseAnthropicTool):
107
11
  """
108
12
  A tool that allows the agent to run bash commands.
@@ -124,7 +28,6 @@ class BashTool(BaseBashTool, BaseAnthropicTool):
124
28
  # Then initialize the Anthropic tool
125
29
  BaseAnthropicTool.__init__(self)
126
30
  # Initialize bash session
127
- self._session = _BashSession()
128
31
 
129
32
  async def __call__(self, command: str | None = None, restart: bool = False, **kwargs):
130
33
  """Execute a bash command.