cua-agent 0.2.7__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

agent/core/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  from .factory import BaseLoop
4
4
  from .messages import (
5
- BaseMessageManager,
5
+ StandardMessageManager,
6
6
  ImageRetentionConfig,
7
7
  )
8
8
  from .callbacks import (
@@ -18,7 +18,7 @@ __all__ = [
18
18
  "BaseLoop",
19
19
  "CallbackManager",
20
20
  "CallbackHandler",
21
- "BaseMessageManager",
21
+ "StandardMessageManager",
22
22
  "ImageRetentionConfig",
23
23
  "BaseCallbackManager",
24
24
  "ContentCallback",
agent/core/base.py CHANGED
@@ -5,7 +5,6 @@ import asyncio
5
5
  from abc import ABC, abstractmethod
6
6
  from typing import Any, AsyncGenerator, Dict, List, Optional
7
7
 
8
- from agent.providers.omni.parser import ParseResult
9
8
  from computer import Computer
10
9
  from .messages import StandardMessageManager, ImageRetentionConfig
11
10
  from .types import AgentResponse
@@ -207,7 +206,7 @@ class BaseLoop(ABC):
207
206
  # EVENT HOOKS / CALLBACKS
208
207
  ###########################################
209
208
 
210
- async def handle_screenshot(self, screenshot_base64: str, action_type: str = "", parsed_screen: Optional[ParseResult] = None) -> None:
209
+ async def handle_screenshot(self, screenshot_base64: str, action_type: str = "", parsed_screen: Optional[dict] = None) -> None:
211
210
  """Process a screenshot through callback managers
212
211
 
213
212
  Args:
agent/core/callbacks.py CHANGED
@@ -6,8 +6,6 @@ from abc import ABC, abstractmethod
6
6
  from datetime import datetime
7
7
  from typing import Any, Dict, List, Optional, Protocol
8
8
 
9
- from agent.providers.omni.parser import ParseResult
10
-
11
9
  logger = logging.getLogger(__name__)
12
10
 
13
11
  class ContentCallback(Protocol):
@@ -117,7 +115,7 @@ class CallbackManager:
117
115
  for handler in self.handlers:
118
116
  await handler.on_error(error, **kwargs)
119
117
 
120
- async def on_screenshot(self, screenshot_base64: str, action_type: str = "", parsed_screen: Optional[ParseResult] = None) -> None:
118
+ async def on_screenshot(self, screenshot_base64: str, action_type: str = "", parsed_screen: Optional[dict] = None) -> None:
121
119
  """Called when a screenshot is taken.
122
120
 
123
121
  Args:
@@ -166,7 +164,7 @@ class CallbackHandler(ABC):
166
164
  pass
167
165
 
168
166
  @abstractmethod
169
- async def on_screenshot(self, screenshot_base64: str, action_type: str = "", parsed_screen: Optional[ParseResult] = None) -> None:
167
+ async def on_screenshot(self, screenshot_base64: str, action_type: str = "", parsed_screen: Optional[dict] = None) -> None:
170
168
  """Called when a screenshot is taken.
171
169
 
172
170
  Args:
agent/core/messages.py CHANGED
@@ -5,7 +5,6 @@ import json
5
5
  from typing import Any, Dict, List, Optional, Union, Tuple
6
6
  from dataclasses import dataclass
7
7
  import re
8
- from ..providers.omni.parser import ParseResult
9
8
 
10
9
  logger = logging.getLogger(__name__)
11
10
 
@@ -22,106 +21,6 @@ class ImageRetentionConfig:
22
21
  """Check if image retention is enabled."""
23
22
  return self.num_images_to_keep is not None and self.num_images_to_keep > 0
24
23
 
25
-
26
- class BaseMessageManager:
27
- """Base class for message preparation and management."""
28
-
29
- def __init__(self, image_retention_config: Optional[ImageRetentionConfig] = None):
30
- """Initialize the message manager.
31
-
32
- Args:
33
- image_retention_config: Configuration for image retention
34
- """
35
- self.image_retention_config = image_retention_config or ImageRetentionConfig()
36
- if self.image_retention_config.min_removal_threshold < 1:
37
- raise ValueError("min_removal_threshold must be at least 1")
38
-
39
- # Track provider for message formatting
40
- self.provider = "openai" # Default provider
41
-
42
- def set_provider(self, provider: str) -> None:
43
- """Set the current provider to format messages for.
44
-
45
- Args:
46
- provider: Provider name (e.g., 'openai', 'anthropic')
47
- """
48
- self.provider = provider.lower()
49
-
50
- def prepare_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
51
- """Prepare messages by applying image retention and caching as configured.
52
-
53
- Args:
54
- messages: List of messages to prepare
55
-
56
- Returns:
57
- Prepared messages
58
- """
59
- if self.image_retention_config.should_retain_images():
60
- self._filter_images(messages)
61
- if self.image_retention_config.enable_caching:
62
- self._inject_caching(messages)
63
- return messages
64
-
65
- def _filter_images(self, messages: List[Dict[str, Any]]) -> None:
66
- """Filter messages to retain only the specified number of most recent images.
67
-
68
- Args:
69
- messages: Messages to filter
70
- """
71
- # Find all tool result blocks that contain images
72
- tool_results = [
73
- item
74
- for message in messages
75
- for item in (message["content"] if isinstance(message["content"], list) else [])
76
- if isinstance(item, dict) and item.get("type") == "tool_result"
77
- ]
78
-
79
- # Count total images
80
- total_images = sum(
81
- 1
82
- for result in tool_results
83
- for content in result.get("content", [])
84
- if isinstance(content, dict) and content.get("type") == "image"
85
- )
86
-
87
- # Calculate how many images to remove
88
- images_to_remove = total_images - (self.image_retention_config.num_images_to_keep or 0)
89
- images_to_remove -= images_to_remove % self.image_retention_config.min_removal_threshold
90
-
91
- # Remove oldest images first
92
- for result in tool_results:
93
- if isinstance(result.get("content"), list):
94
- new_content = []
95
- for content in result["content"]:
96
- if isinstance(content, dict) and content.get("type") == "image":
97
- if images_to_remove > 0:
98
- images_to_remove -= 1
99
- continue
100
- new_content.append(content)
101
- result["content"] = new_content
102
-
103
- def _inject_caching(self, messages: List[Dict[str, Any]]) -> None:
104
- """Inject caching control for recent message turns.
105
-
106
- Args:
107
- messages: Messages to inject caching into
108
- """
109
- # Only apply cache_control for Anthropic API, not OpenAI
110
- if self.provider != "anthropic":
111
- return
112
-
113
- # Default to caching last 3 turns
114
- turns_to_cache = 3
115
- for message in reversed(messages):
116
- if message["role"] == "user" and isinstance(content := message["content"], list):
117
- if turns_to_cache:
118
- turns_to_cache -= 1
119
- content[-1]["cache_control"] = {"type": "ephemeral"}
120
- else:
121
- content[-1].pop("cache_control", None)
122
- break
123
-
124
-
125
24
  class StandardMessageManager:
126
25
  """Manages messages in a standardized OpenAI format across different providers."""
127
26
 
@@ -160,6 +59,7 @@ class StandardMessageManager:
160
59
 
161
60
  def get_messages(self) -> List[Dict[str, Any]]:
162
61
  """Get all messages in standard format.
62
+ This method applies image retention policy if configured.
163
63
 
164
64
  Returns:
165
65
  List of messages
@@ -283,8 +283,12 @@ class AnthropicLoop(BaseLoop):
283
283
  # Create new turn directory for this API call
284
284
  self._create_turn_dir()
285
285
 
286
+
287
+ # Apply image retention policy
288
+ self.message_manager.messages = messages.copy()
289
+ prepared_messages = self.message_manager.get_messages()
286
290
  # Convert standard messages to Anthropic format using utility function
287
- anthropic_messages, system_content = to_anthropic_format(messages.copy())
291
+ anthropic_messages, system_content = to_anthropic_format(prepared_messages)
288
292
 
289
293
  # Use API handler to make API call with Anthropic format
290
294
  response = await self.api_handler.make_api_call(
@@ -4,7 +4,6 @@ import logging
4
4
  import re
5
5
  from typing import Any, Dict, List, Optional, Tuple, cast
6
6
  from anthropic.types.beta import BetaMessage
7
- from ..omni.parser import ParseResult
8
7
  from ...core.types import AgentResponse
9
8
  from datetime import datetime
10
9
 
@@ -188,7 +187,7 @@ def from_anthropic_format(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]
188
187
  async def to_agent_response_format(
189
188
  response: BetaMessage,
190
189
  messages: List[Dict[str, Any]],
191
- parsed_screen: Optional[ParseResult] = None,
190
+ parsed_screen: Optional[dict] = None,
192
191
  parser: Optional[Any] = None,
193
192
  model: Optional[str] = None,
194
193
  ) -> AgentResponse:
@@ -276,7 +276,7 @@ class OpenAILoop(BaseLoop):
276
276
  # Call API
277
277
  screen_size = await self.computer.interface.get_screen_size()
278
278
  response = await self.api_handler.send_initial_request(
279
- messages=messages,
279
+ messages=self.message_manager.get_messages(), # Apply image retention policy
280
280
  display_width=str(screen_size["width"]),
281
281
  display_height=str(screen_size["height"]),
282
282
  previous_response_id=self.last_response_id,
@@ -397,7 +397,7 @@ class OpenAILoop(BaseLoop):
397
397
  # The API handler will extract this from the message history
398
398
  if isinstance(self.last_response_id, str):
399
399
  response = await self.api_handler.send_computer_call_request(
400
- messages=self.message_manager.messages,
400
+ messages=self.message_manager.get_messages(), # Apply image retention policy
401
401
  display_width=str(screen_size["width"]),
402
402
  display_height=str(screen_size["height"]),
403
403
  previous_response_id=self.last_response_id, # Use instance variable
@@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Any
6
6
  import aiohttp
7
7
  import re
8
8
  from .base import BaseUITarsClient
9
+ import asyncio
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
@@ -144,7 +145,7 @@ class OAICompatClient(BaseUITarsClient):
144
145
  else:
145
146
  message = {"role": "user", "content": [{"type": "text", "text": item}]}
146
147
  final_messages.append(message)
147
-
148
+
148
149
  payload = {
149
150
  "model": self.model,
150
151
  "messages": final_messages,
@@ -192,7 +193,8 @@ class OAICompatClient(BaseUITarsClient):
192
193
 
193
194
  # if 503, then the endpoint is still warming up
194
195
  if response.status == 503:
195
- logger.error(f"Endpoint is still warming up, please try again later")
196
+ logger.error(f"Endpoint is still warming up, trying again in 30 seconds...")
197
+ await asyncio.sleep(30)
196
198
  raise Exception(f"Endpoint is still warming up: {response_text}")
197
199
 
198
200
  # Try to parse as JSON if the content type is appropriate
agent/ui/gradio/app.py CHANGED
@@ -41,7 +41,6 @@ from typing import cast
41
41
  # Import from agent package
42
42
  from agent.core.types import AgentResponse
43
43
  from agent.core.callbacks import DefaultCallbackHandler
44
- from agent.providers.omni.parser import ParseResult
45
44
  from computer import Computer
46
45
 
47
46
  from agent import ComputerAgent, AgentLoop, LLM, LLMProvider
@@ -103,7 +102,7 @@ class GradioChatScreenshotHandler(DefaultCallbackHandler):
103
102
  self,
104
103
  screenshot_base64: str,
105
104
  action_type: str = "",
106
- parsed_screen: Optional[ParseResult] = None,
105
+ parsed_screen: Optional[dict] = None,
107
106
  ) -> None:
108
107
  """Add screenshot to chatbot when a screenshot is taken and update the annotated image.
109
108
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.2.7
3
+ Version: 0.2.10
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.11
@@ -1,11 +1,11 @@
1
1
  agent/__init__.py,sha256=guFGtorDBF6R5hVep0Bvci3_sUJfBlcsq9ss5Kwrej8,1484
2
- agent/core/__init__.py,sha256=7DhJ_6KKooM6uTmDIlumCnd7OFcU67BYIIR1dpIYUB0,506
2
+ agent/core/__init__.py,sha256=3x4XmLSj40-sjUMOtxOuM82RnOQl0I5AwURk5wW_9GE,514
3
3
  agent/core/agent.py,sha256=HUfBe7Uam3TObAmf6KH0GDKuNCNunNmmMcuxS7aZg0Q,8332
4
- agent/core/base.py,sha256=AiSjnBAcHhZIca4KWBP1vQRE3HyikAPkr4Ij9WDevZQ,8374
5
- agent/core/callbacks.py,sha256=FKAxyajJ-ZJ5SxNXoupNcrm0GYBgjOjJEsStqst0EAk,6453
4
+ agent/core/base.py,sha256=7hD1rosM-JjyruwSplD4-5YO6BaO1a1bD7bjFYGGUrg,8315
5
+ agent/core/callbacks.py,sha256=uAoJo4rHpVf1d8rzEBFdtSud9jRndPLwDoC4U4uYZlw,6386
6
6
  agent/core/experiment.py,sha256=Ywj6q3JZFDKicfPuQsDl0vSN55HS7-Cnk3u3EcUCKe8,8866
7
7
  agent/core/factory.py,sha256=zzlCdibctqhf8Uta-SrvE-G7h59wAw-7SGhHiGvS9GY,4608
8
- agent/core/messages.py,sha256=-OVMDqcxK5MUHPEkHliK29XFJYMRAc1keFvzrUyrOmM,16231
8
+ agent/core/messages.py,sha256=OfwelngzxBxwbbWQWcyZ4ViRdhSQ2YbqqWixeqtbePk,12473
9
9
  agent/core/provider_config.py,sha256=jB3fLsEsf806HQZ8jtzfSq4bCYGYONBeuCOoog_Nv_Y,768
10
10
  agent/core/telemetry.py,sha256=HElPd32k_w2SJ6t-Cc3j_2-AKdLbFwh2YlM8QViDgRw,4790
11
11
  agent/core/tools.py,sha256=53aPme3O8U91n122Smu3TGbyGjQQe2zDimaZgKkFNi0,878
@@ -25,7 +25,7 @@ agent/providers/anthropic/api/logging.py,sha256=vHpwkIyOZdkSTVIH4ycbBPd4a_rzhP7O
25
25
  agent/providers/anthropic/api_handler.py,sha256=pWXcqDs0ruviDhRNRrz5Ac9ZH4yDv6ZlwpeG3a42cDg,5206
26
26
  agent/providers/anthropic/callbacks/__init__.py,sha256=PciBb6Z6MKSwfXqDjU3pV_0FS4MOn_Np_A7_skD-6dA,104
27
27
  agent/providers/anthropic/callbacks/manager.py,sha256=euIah5yiM8nhisN-RWXewo6v0WQr0c-FbMBO04r6dJk,1865
28
- agent/providers/anthropic/loop.py,sha256=Sepfo0b0oQT98xd3Sv2S7Xc81bfU7L4_Zv3VTiapKkg,21661
28
+ agent/providers/anthropic/loop.py,sha256=T2Ip6Nixsmk67uO-tHIsayrZsMksMp951lnP0QZ6VcM,21847
29
29
  agent/providers/anthropic/prompts.py,sha256=EaqyvUb90yybv75VsBYzu4sroga7eMACys0uH9mIVWM,1993
30
30
  agent/providers/anthropic/response_handler.py,sha256=ZTprV4NTP9Eb9jQ7QgEKZBX0L6rMj5nqBRiE3Zfws8I,8008
31
31
  agent/providers/anthropic/tools/__init__.py,sha256=JyZwuVtPUnZwRSZBSCdQv9yxbLCsygm3l8Ywjjt9qTQ,661
@@ -37,7 +37,7 @@ agent/providers/anthropic/tools/edit.py,sha256=EGRP61MDA4Oue1D7Q-_vLpd6LdGbdBA1Z
37
37
  agent/providers/anthropic/tools/manager.py,sha256=yNvgTkfEqnOz5isDF0RxvmBMZB0uh2PipFEH-PUXpoY,2020
38
38
  agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
39
39
  agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
40
- agent/providers/anthropic/utils.py,sha256=qDp0bFGQhK1dG9U461iaeCiyoVUsksXmD43g9cedRW8,14367
40
+ agent/providers/anthropic/utils.py,sha256=6-lANH2-PjnYcZ_n8uGPbkbk9pqIUad5wh07zzslz3Q,14322
41
41
  agent/providers/omni/__init__.py,sha256=5ix67iJdtQNGuGJEjEOF65PwFWO7vdo1QlXD28bRbW4,179
42
42
  agent/providers/omni/api_handler.py,sha256=7CpD43lYAqTyNKWfrD8XcM9ekbajqKCTH9p0TWtEQyg,1163
43
43
  agent/providers/omni/clients/anthropic.py,sha256=nC_lj3UwrLqx9TIew58yxLqKwrH1_LwJD6EqVSEfp3g,3670
@@ -58,7 +58,7 @@ agent/providers/omni/tools/manager.py,sha256=UhtasaxGcmkxtz-bP1UJ1a4xdYnD3Cv8Pbt
58
58
  agent/providers/omni/utils.py,sha256=Ikp6ONL1HO637o3KDtv5yv6q-4uIWAzMSQDvGetWXC8,8724
59
59
  agent/providers/openai/__init__.py,sha256=8DS6YNZp42NLCacwXsfRaghyczaOCVovX8TgzXUZf_o,165
60
60
  agent/providers/openai/api_handler.py,sha256=L1K56dR1j4JsX1sX4OFYeKoCUMM25Fwj2y9nqv8oOhw,17736
61
- agent/providers/openai/loop.py,sha256=KWN1I8_t7UGsMiz8Jn9oifPp06aFMFcYaoxBlHb63FA,20622
61
+ agent/providers/openai/loop.py,sha256=_MyjPu4rpHpTxS2nTSRLHrCbSDkZPK5WEG1APKGP-1U,20717
62
62
  agent/providers/openai/response_handler.py,sha256=K8v_92uSr9R74Y5INY4naeEZZZm35CLIl4h74MBZhsw,7953
63
63
  agent/providers/openai/tools/__init__.py,sha256=-KbHMWcd2OVTk5RYQ3ACBEMygwbH-VW6n_98p0lwM4A,344
64
64
  agent/providers/openai/tools/base.py,sha256=Np_BC9Cm6TslK99etE9hVTtsBlcEaGhoNCK3NXdB_Lw,2474
@@ -69,7 +69,7 @@ agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgP
69
69
  agent/providers/uitars/__init__.py,sha256=sq5OMVJP9E_sok9tIiKJreGkjmNWXPMObjPTClYv1es,38
70
70
  agent/providers/uitars/clients/base.py,sha256=5w8Ajmq1JiPyUQJUAq1lSkfpA8_Ts80NQiDxPMTtQrI,948
71
71
  agent/providers/uitars/clients/mlxvlm.py,sha256=lMnN6ecMmWHf_l7khJ2iJHHvT7PE4XagUjrWhB0zEhc,10893
72
- agent/providers/uitars/clients/oaicompat.py,sha256=uYjwrGCVpFi8wj4kcaJ905ABiY6ksJZXaLlM61B2DUA,8907
72
+ agent/providers/uitars/clients/oaicompat.py,sha256=Aw-HMVqRmgNDw-8UxpXU8td4tvTN4ASqGVaNDGPKhSc,8993
73
73
  agent/providers/uitars/loop.py,sha256=m2T7OKHN4HgUO0CLEKF-DD4lCULk_L1MSL4DA8Unt8o,26663
74
74
  agent/providers/uitars/prompts.py,sha256=_pQNd438mFpZKZT0aMl6Bd0_GgQxuy9y08kQAMPi9UM,2536
75
75
  agent/providers/uitars/tools/__init__.py,sha256=0hc3W6u5TvcXYztYKIyve_C2G3XMfwt_y7grmH0ZHC0,29
@@ -79,8 +79,8 @@ agent/providers/uitars/utils.py,sha256=493STTEEJcVhVbQgR0e8rNTI1DjkxUx8IgIv3wkJ1
79
79
  agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
80
80
  agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
81
81
  agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
82
- agent/ui/gradio/app.py,sha256=StBehGfPJhE6ywnxU3CHDPkZrOm_2XMT1Npepf89G5c,70675
83
- cua_agent-0.2.7.dist-info/METADATA,sha256=Jdz7v8P_JvHbN1vEcIyDzf2a51FUJQ5D4WJqL9cLbyA,12688
84
- cua_agent-0.2.7.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
85
- cua_agent-0.2.7.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
86
- cua_agent-0.2.7.dist-info/RECORD,,
82
+ agent/ui/gradio/app.py,sha256=8NGwdng57sAQ2i2vD2THvpWKdOD-Y7BNYeQGAcj3xtA,70616
83
+ cua_agent-0.2.10.dist-info/METADATA,sha256=UVZuW6ZdH47YKe0Cx5Hd_-RdINufw-HnvsDGw6uXe1A,12689
84
+ cua_agent-0.2.10.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
85
+ cua_agent-0.2.10.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
86
+ cua_agent-0.2.10.dist-info/RECORD,,