cua-agent 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

@@ -17,6 +17,7 @@ from anthropic.types.beta import (
17
17
  BetaTextBlock,
18
18
  BetaTextBlockParam,
19
19
  BetaToolUseBlockParam,
20
+ BetaContentBlockParam,
20
21
  )
21
22
 
22
23
  # Computer
@@ -24,12 +25,12 @@ from computer import Computer
24
25
 
25
26
  # Base imports
26
27
  from ...core.loop import BaseLoop
27
- from ...core.messages import ImageRetentionConfig
28
+ from ...core.messages import ImageRetentionConfig as CoreImageRetentionConfig
28
29
 
29
30
  # Anthropic provider-specific imports
30
31
  from .api.client import AnthropicClientFactory, BaseAnthropicClient
31
32
  from .tools.manager import ToolManager
32
- from .messages.manager import MessageManager
33
+ from .messages.manager import MessageManager, ImageRetentionConfig
33
34
  from .callbacks.manager import CallbackManager
34
35
  from .prompts import SYSTEM_PROMPT
35
36
  from .types import LLMProvider
@@ -48,8 +49,8 @@ class AnthropicLoop(BaseLoop):
48
49
  def __init__(
49
50
  self,
50
51
  api_key: str,
52
+ computer: Computer,
51
53
  model: str = "claude-3-7-sonnet-20250219", # Fixed model
52
- computer: Optional[Computer] = None,
53
54
  only_n_most_recent_images: Optional[int] = 2,
54
55
  base_dir: Optional[str] = "trajectories",
55
56
  max_retries: int = 3,
@@ -69,7 +70,7 @@ class AnthropicLoop(BaseLoop):
69
70
  retry_delay: Delay between retries in seconds
70
71
  save_trajectory: Whether to save trajectory data
71
72
  """
72
- # Initialize base class
73
+ # Initialize base class with core config
73
74
  super().__init__(
74
75
  computer=computer,
75
76
  model=model,
@@ -93,8 +94,8 @@ class AnthropicLoop(BaseLoop):
93
94
  self.message_manager = None
94
95
  self.callback_manager = None
95
96
 
96
- # Configure image retention
97
- self.image_retention_config = ImageRetentionConfig(
97
+ # Configure image retention with core config
98
+ self.image_retention_config = CoreImageRetentionConfig(
98
99
  num_images_to_keep=only_n_most_recent_images
99
100
  )
100
101
 
@@ -113,7 +114,7 @@ class AnthropicLoop(BaseLoop):
113
114
 
114
115
  # Initialize message manager
115
116
  self.message_manager = MessageManager(
116
- ImageRetentionConfig(
117
+ image_retention_config=ImageRetentionConfig(
117
118
  num_images_to_keep=self.only_n_most_recent_images, enable_caching=True
118
119
  )
119
120
  )
@@ -250,6 +251,10 @@ class AnthropicLoop(BaseLoop):
250
251
  await self._process_screen(parsed_screen, self.message_history)
251
252
 
252
253
  # Prepare messages and make API call
254
+ if self.message_manager is None:
255
+ raise RuntimeError(
256
+ "Message manager not initialized. Call initialize_client() first."
257
+ )
253
258
  prepared_messages = self.message_manager.prepare_messages(
254
259
  cast(List[BetaMessageParam], self.message_history.copy())
255
260
  )
@@ -257,7 +262,7 @@ class AnthropicLoop(BaseLoop):
257
262
  # Create new turn directory for this API call
258
263
  self._create_turn_dir()
259
264
 
260
- # Make API call
265
+ # Use _make_api_call instead of direct client call to ensure logging
261
266
  response = await self._make_api_call(prepared_messages)
262
267
 
263
268
  # Handle the response
@@ -287,6 +292,11 @@ class AnthropicLoop(BaseLoop):
287
292
  Returns:
288
293
  API response
289
294
  """
295
+ if self.client is None:
296
+ raise RuntimeError("Client not initialized. Call initialize_client() first.")
297
+ if self.tool_manager is None:
298
+ raise RuntimeError("Tool manager not initialized. Call initialize_client() first.")
299
+
290
300
  last_error = None
291
301
 
292
302
  for attempt in range(self.max_retries):
@@ -297,6 +307,7 @@ class AnthropicLoop(BaseLoop):
297
307
  "max_tokens": self.max_tokens,
298
308
  "system": SYSTEM_PROMPT,
299
309
  }
310
+ # Let ExperimentManager handle sanitization
300
311
  self._log_api_call("request", request_data)
301
312
 
302
313
  # Setup betas and system
@@ -320,7 +331,7 @@ class AnthropicLoop(BaseLoop):
320
331
  betas=betas,
321
332
  )
322
333
 
323
- # Log success response
334
+ # Let ExperimentManager handle sanitization
324
335
  self._log_api_call("response", request_data, response)
325
336
 
326
337
  return response
@@ -365,25 +376,38 @@ class AnthropicLoop(BaseLoop):
365
376
  }
366
377
  )
367
378
 
379
+ if self.callback_manager is None:
380
+ raise RuntimeError(
381
+ "Callback manager not initialized. Call initialize_client() first."
382
+ )
383
+
368
384
  # Handle tool use blocks and collect results
369
385
  tool_result_content = []
370
386
  for content_block in response_params:
371
387
  # Notify callback of content
372
- self.callback_manager.on_content(content_block)
388
+ self.callback_manager.on_content(cast(BetaContentBlockParam, content_block))
373
389
 
374
390
  # Handle tool use
375
391
  if content_block.get("type") == "tool_use":
392
+ if self.tool_manager is None:
393
+ raise RuntimeError(
394
+ "Tool manager not initialized. Call initialize_client() first."
395
+ )
376
396
  result = await self.tool_manager.execute_tool(
377
397
  name=content_block["name"],
378
398
  tool_input=cast(Dict[str, Any], content_block["input"]),
379
399
  )
380
400
 
381
401
  # Create tool result and add to content
382
- tool_result = self._make_tool_result(result, content_block["id"])
402
+ tool_result = self._make_tool_result(
403
+ cast(ToolResult, result), content_block["id"]
404
+ )
383
405
  tool_result_content.append(tool_result)
384
406
 
385
407
  # Notify callback of tool result
386
- self.callback_manager.on_tool_result(result, content_block["id"])
408
+ self.callback_manager.on_tool_result(
409
+ cast(ToolResult, result), content_block["id"]
410
+ )
387
411
 
388
412
  # If no tool results, we're done
389
413
  if not tool_result_content:
@@ -495,13 +519,13 @@ class AnthropicLoop(BaseLoop):
495
519
  result_text = f"<s>{result.system}</s>\n{result_text}"
496
520
  return result_text
497
521
 
498
- def _handle_content(self, content: Dict[str, Any]) -> None:
522
+ def _handle_content(self, content: BetaContentBlockParam) -> None:
499
523
  """Handle content updates from the assistant."""
500
524
  if content.get("type") == "text":
501
- text = content.get("text", "")
525
+ text_content = cast(BetaTextBlockParam, content)
526
+ text = text_content["text"]
502
527
  if text == "<DONE>":
503
528
  return
504
-
505
529
  logger.info(f"Assistant: {text}")
506
530
 
507
531
  def _handle_tool_result(self, result: ToolResult, tool_id: str) -> None:
@@ -517,5 +541,10 @@ class AnthropicLoop(BaseLoop):
517
541
  """Handle API interactions."""
518
542
  if error:
519
543
  logger.error(f"API error: {error}")
544
+ self._log_api_call("error", request, error=error)
520
545
  else:
521
546
  logger.debug(f"API request: {request}")
547
+ if response:
548
+ self._log_api_call("response", request, response)
549
+ else:
550
+ self._log_api_call("request", request)
@@ -90,7 +90,9 @@ class MessageManager:
90
90
  blocks_with_cache_control += 1
91
91
  # Add cache control to the last content block only
92
92
  if content and len(content) > 0:
93
- content[-1]["cache_control"] = {"type": "ephemeral"}
93
+ content[-1]["cache_control"] = BetaCacheControlEphemeralParam(
94
+ type="ephemeral"
95
+ )
94
96
  else:
95
97
  # Remove any existing cache control
96
98
  if content and len(content) > 0:
@@ -6,7 +6,7 @@ from typing import Any, Dict
6
6
 
7
7
  from anthropic.types.beta import BetaToolUnionParam
8
8
 
9
- from ....core.tools.base import BaseTool, ToolError, ToolResult, ToolFailure, CLIResult
9
+ from ....core.tools.base import BaseTool
10
10
 
11
11
 
12
12
  class BaseAnthropicTool(BaseTool, metaclass=ABCMeta):
@@ -1,6 +1,6 @@
1
1
  """Collection classes for managing multiple tools."""
2
2
 
3
- from typing import Any
3
+ from typing import Any, cast
4
4
 
5
5
  from anthropic.types.beta import BetaToolUnionParam
6
6
 
@@ -22,7 +22,7 @@ class ToolCollection:
22
22
  def to_params(
23
23
  self,
24
24
  ) -> list[BetaToolUnionParam]:
25
- return [tool.to_params() for tool in self.tools]
25
+ return cast(list[BetaToolUnionParam], [tool.to_params() for tool in self.tools])
26
26
 
27
27
  async def run(self, *, name: str, tool_input: dict[str, Any]) -> ToolResult:
28
28
  tool = self.tool_map.get(name)
@@ -61,9 +61,9 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
61
61
 
62
62
  name: Literal["computer"] = "computer"
63
63
  api_type: Literal["computer_20250124"] = "computer_20250124"
64
- width: int | None
65
- height: int | None
66
- display_num: int | None
64
+ width: int | None = None
65
+ height: int | None = None
66
+ display_num: int | None = None
67
67
  computer: Computer # The CUA Computer instance
68
68
  logger = logging.getLogger(__name__)
69
69
 
@@ -106,6 +106,7 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
106
106
  display_size = await self.computer.interface.get_screen_size()
107
107
  self.width = display_size["width"]
108
108
  self.height = display_size["height"]
109
+ assert isinstance(self.width, int) and isinstance(self.height, int)
109
110
  self.logger.info(f"Initialized screen dimensions to {self.width}x{self.height}")
110
111
 
111
112
  async def __call__(
@@ -120,6 +121,8 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
120
121
  # Ensure dimensions are initialized
121
122
  if self.width is None or self.height is None:
122
123
  await self.initialize_dimensions()
124
+ if self.width is None or self.height is None:
125
+ raise ToolError("Failed to initialize screen dimensions")
123
126
  except Exception as e:
124
127
  raise ToolError(f"Failed to initialize dimensions: {e}")
125
128
 
@@ -147,7 +150,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
147
150
  self.logger.info(
148
151
  f"Scaling image from {pre_img.size} to {self.width}x{self.height} to match screen dimensions"
149
152
  )
150
- pre_img = pre_img.resize((self.width, self.height), Image.Resampling.LANCZOS)
153
+ if not isinstance(self.width, int) or not isinstance(self.height, int):
154
+ raise ToolError("Screen dimensions must be integers")
155
+ size = (int(self.width), int(self.height))
156
+ pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
151
157
 
152
158
  self.logger.info(f" Current dimensions: {pre_img.width}x{pre_img.height}")
153
159
 
@@ -160,15 +166,7 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
160
166
  await self.computer.interface.move_cursor(x, y)
161
167
  # Then perform drag operation - check if drag_to exists or we need to use other methods
162
168
  try:
163
- if hasattr(self.computer.interface, "drag_to"):
164
- await self.computer.interface.drag_to(x, y)
165
- else:
166
- # Alternative approach: press mouse down, move, release
167
- await self.computer.interface.mouse_down()
168
- await asyncio.sleep(0.2)
169
- await self.computer.interface.move_cursor(x, y)
170
- await asyncio.sleep(0.2)
171
- await self.computer.interface.mouse_up()
169
+ await self.computer.interface.drag_to(x, y)
172
170
  except Exception as e:
173
171
  self.logger.error(f"Error during drag operation: {str(e)}")
174
172
  raise ToolError(f"Failed to perform drag: {str(e)}")
@@ -214,9 +212,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
214
212
  self.logger.info(
215
213
  f"Scaling image from {pre_img.size} to {self.width}x{self.height} to match screen dimensions"
216
214
  )
217
- pre_img = pre_img.resize(
218
- (self.width, self.height), Image.Resampling.LANCZOS
219
- )
215
+ if not isinstance(self.width, int) or not isinstance(self.height, int):
216
+ raise ToolError("Screen dimensions must be integers")
217
+ size = (int(self.width), int(self.height))
218
+ pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
220
219
  # Save the scaled image back to bytes
221
220
  buffer = io.BytesIO()
222
221
  pre_img.save(buffer, format="PNG")
@@ -275,9 +274,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
275
274
  self.logger.info(
276
275
  f"Scaling image from {pre_img.size} to {self.width}x{self.height}"
277
276
  )
278
- pre_img = pre_img.resize(
279
- (self.width, self.height), Image.Resampling.LANCZOS
280
- )
277
+ if not isinstance(self.width, int) or not isinstance(self.height, int):
278
+ raise ToolError("Screen dimensions must be integers")
279
+ size = (int(self.width), int(self.height))
280
+ pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
281
281
 
282
282
  # Perform the click action
283
283
  if action == "left_click":
@@ -335,7 +335,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
335
335
  self.logger.info(
336
336
  f"Scaling image from {pre_img.size} to {self.width}x{self.height}"
337
337
  )
338
- pre_img = pre_img.resize((self.width, self.height), Image.Resampling.LANCZOS)
338
+ if not isinstance(self.width, int) or not isinstance(self.height, int):
339
+ raise ToolError("Screen dimensions must be integers")
340
+ size = (int(self.width), int(self.height))
341
+ pre_img = pre_img.resize(size, Image.Resampling.LANCZOS)
339
342
 
340
343
  if action == "key":
341
344
  # Special handling for page up/down on macOS
@@ -365,7 +368,7 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
365
368
  # Handle single key press
366
369
  self.logger.info(f"Pressing key: {text}")
367
370
  try:
368
- await self.computer.interface.press(text)
371
+ await self.computer.interface.press_key(text)
369
372
  output_text = text
370
373
  except ValueError as e:
371
374
  raise ToolError(f"Invalid key: {text}. {str(e)}")
@@ -442,7 +445,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
442
445
  self.logger.info(
443
446
  f"Scaling image from {img.size} to {self.width}x{self.height}"
444
447
  )
445
- img = img.resize((self.width, self.height), Image.Resampling.LANCZOS)
448
+ if not isinstance(self.width, int) or not isinstance(self.height, int):
449
+ raise ToolError("Screen dimensions must be integers")
450
+ size = (int(self.width), int(self.height))
451
+ img = img.resize(size, Image.Resampling.LANCZOS)
446
452
  buffer = io.BytesIO()
447
453
  img.save(buffer, format="PNG")
448
454
  screenshot = buffer.getvalue()
@@ -451,7 +457,8 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
451
457
 
452
458
  elif action == "cursor_position":
453
459
  pos = await self.computer.interface.get_cursor_position()
454
- return ToolResult(output=f"X={int(pos[0])},Y={int(pos[1])}")
460
+ x, y = pos # Unpack the tuple
461
+ return ToolResult(output=f"X={int(x)},Y={int(y)}")
455
462
 
456
463
  except Exception as e:
457
464
  self.logger.error(f"Error during {action} action: {str(e)}")
@@ -517,7 +524,10 @@ class ComputerTool(BaseComputerTool, BaseAnthropicTool):
517
524
  # Scale image if needed
518
525
  if img.size != (self.width, self.height):
519
526
  self.logger.info(f"Scaling image from {img.size} to {self.width}x{self.height}")
520
- img = img.resize((self.width, self.height), Image.Resampling.LANCZOS)
527
+ if not isinstance(self.width, int) or not isinstance(self.height, int):
528
+ raise ToolError("Screen dimensions must be integers")
529
+ size = (int(self.width), int(self.height))
530
+ img = img.resize(size, Image.Resampling.LANCZOS)
521
531
  buffer = io.BytesIO()
522
532
  img.save(buffer, format="PNG")
523
533
  screenshot = buffer.getvalue()
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List
1
+ from typing import Any, Dict, List, cast
2
2
  from anthropic.types.beta import BetaToolUnionParam
3
3
  from computer.computer import Computer
4
4
 
@@ -37,7 +37,7 @@ class ToolManager(BaseToolManager):
37
37
  """Get tool parameters for Anthropic API calls."""
38
38
  if self.tools is None:
39
39
  raise RuntimeError("Tools not initialized. Call initialize() first.")
40
- return self.tools.to_params()
40
+ return cast(List[BetaToolUnionParam], self.tools.to_params())
41
41
 
42
42
  async def execute_tool(self, name: str, tool_input: dict[str, Any]) -> ToolResult:
43
43
  """Execute a tool with the given input.
@@ -126,15 +126,18 @@ class ExperimentManager:
126
126
  # Since we no longer want to use the images/ folder, we'll skip this functionality
127
127
  return
128
128
 
129
- def save_screenshot(self, img_base64: str, action_type: str = "") -> None:
129
+ def save_screenshot(self, img_base64: str, action_type: str = "") -> Optional[str]:
130
130
  """Save a screenshot to the experiment directory.
131
131
 
132
132
  Args:
133
133
  img_base64: Base64 encoded screenshot
134
134
  action_type: Type of action that triggered the screenshot
135
+
136
+ Returns:
137
+ Optional[str]: Path to the saved screenshot, or None if saving failed
135
138
  """
136
139
  if not self.current_turn_dir:
137
- return
140
+ return None
138
141
 
139
142
  try:
140
143
  # Increment screenshot counter
@@ -13,6 +13,7 @@ import asyncio
13
13
  from httpx import ConnectError, ReadTimeout
14
14
  import shutil
15
15
  import copy
16
+ from typing import cast
16
17
 
17
18
  from .parser import OmniParser, ParseResult, ParserMetadata, UIElement
18
19
  from ...core.loop import BaseLoop
@@ -182,8 +183,6 @@ class OmniLoop(BaseLoop):
182
183
 
183
184
  if self.provider == LLMProvider.OPENAI:
184
185
  self.client = OpenAIClient(api_key=self.api_key, model=self.model)
185
- elif self.provider == LLMProvider.GROQ:
186
- self.client = GroqClient(api_key=self.api_key, model=self.model)
187
186
  elif self.provider == LLMProvider.ANTHROPIC:
188
187
  self.client = AnthropicClient(
189
188
  api_key=self.api_key,
@@ -329,10 +328,15 @@ class OmniLoop(BaseLoop):
329
328
  raise RuntimeError(error_message)
330
329
 
331
330
  async def _handle_response(
332
- self, response: Any, messages: List[Dict[str, Any]], parsed_screen: Dict[str, Any]
331
+ self, response: Any, messages: List[Dict[str, Any]], parsed_screen: ParseResult
333
332
  ) -> Tuple[bool, bool]:
334
333
  """Handle API response.
335
334
 
335
+ Args:
336
+ response: API response
337
+ messages: List of messages to update
338
+ parsed_screen: Current parsed screen information
339
+
336
340
  Returns:
337
341
  Tuple of (should_continue, action_screenshot_saved)
338
342
  """
@@ -394,7 +398,9 @@ class OmniLoop(BaseLoop):
394
398
 
395
399
  try:
396
400
  # Execute action with current parsed screen info
397
- await self._execute_action(parsed_content, parsed_screen)
401
+ await self._execute_action(
402
+ parsed_content, cast(ParseResult, parsed_screen)
403
+ )
398
404
  action_screenshot_saved = True
399
405
  except Exception as e:
400
406
  logger.error(f"Error executing action: {str(e)}")
@@ -463,7 +469,7 @@ class OmniLoop(BaseLoop):
463
469
 
464
470
  try:
465
471
  # Execute action with current parsed screen info
466
- await self._execute_action(parsed_content, parsed_screen)
472
+ await self._execute_action(parsed_content, cast(ParseResult, parsed_screen))
467
473
  action_screenshot_saved = True
468
474
  except Exception as e:
469
475
  logger.error(f"Error executing action: {str(e)}")
@@ -488,7 +494,7 @@ class OmniLoop(BaseLoop):
488
494
 
489
495
  try:
490
496
  # Execute action with current parsed screen info
491
- await self._execute_action(content, parsed_screen)
497
+ await self._execute_action(content, cast(ParseResult, parsed_screen))
492
498
  action_screenshot_saved = True
493
499
  except Exception as e:
494
500
  logger.error(f"Error executing action: {str(e)}")
@@ -122,8 +122,9 @@ class OmniParser:
122
122
  # Create a minimal valid result for error cases
123
123
  return ParseResult(
124
124
  elements=[],
125
+ screen_info=None,
125
126
  annotated_image_base64="",
126
- parsed_content_list=[f"Error: {str(e)}"],
127
+ parsed_content_list=[{"error": str(e)}],
127
128
  metadata=ParserMetadata(
128
129
  image_size=(0, 0),
129
130
  num_icons=0,
@@ -2,7 +2,6 @@
2
2
 
3
3
  from .bash import OmniBashTool
4
4
  from .computer import OmniComputerTool
5
- from .edit import OmniEditTool
6
5
  from .manager import OmniToolManager
7
6
 
8
7
  __all__ = [
@@ -177,7 +177,7 @@ class OmniComputerTool(BaseComputerTool):
177
177
  keys = text.split("+")
178
178
  await self.computer.interface.hotkey(*keys)
179
179
  else:
180
- await self.computer.interface.press(text)
180
+ await self.computer.interface.press_key(text)
181
181
 
182
182
  # Take screenshot after action
183
183
  screenshot = await self.computer.interface.screenshot()
@@ -188,7 +188,8 @@ class OmniComputerTool(BaseComputerTool):
188
188
  )
189
189
  elif action == "cursor_position":
190
190
  pos = await self.computer.interface.get_cursor_position()
191
- return ToolResult(output=f"X={int(pos[0])},Y={int(pos[1])}")
191
+ x, y = pos
192
+ return ToolResult(output=f"X={int(x)},Y={int(y)}")
192
193
  elif action == "scroll":
193
194
  if direction == "down":
194
195
  self.logger.info(f"Scrolling down, amount: {amount}")
@@ -10,7 +10,6 @@ from ....core.tools.collection import ToolCollection
10
10
 
11
11
  from .bash import OmniBashTool
12
12
  from .computer import OmniComputerTool
13
- from .edit import OmniEditTool
14
13
 
15
14
 
16
15
  class ProviderType(Enum):
@@ -35,11 +34,10 @@ class OmniToolManager(BaseToolManager):
35
34
  # Initialize tools
36
35
  self.computer_tool = OmniComputerTool(self.computer)
37
36
  self.bash_tool = OmniBashTool(self.computer)
38
- self.edit_tool = OmniEditTool(self.computer)
39
37
 
40
38
  def _initialize_tools(self) -> ToolCollection:
41
39
  """Initialize all available tools."""
42
- return ToolCollection(self.computer_tool, self.bash_tool, self.edit_tool)
40
+ return ToolCollection(self.computer_tool, self.bash_tool)
43
41
 
44
42
  async def _initialize_tools_specific(self) -> None:
45
43
  """Initialize provider-specific tool requirements."""
@@ -96,7 +96,7 @@ def compress_image_base64(
96
96
  # Resize image
97
97
  new_width = int(img.width * scale_factor)
98
98
  new_height = int(img.height * scale_factor)
99
- current_img = img.resize((new_width, new_height), Image.LANCZOS)
99
+ current_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
100
100
 
101
101
  # Try with reduced size and quality
102
102
  buffer = io.BytesIO()
@@ -130,7 +130,9 @@ def compress_image_base64(
130
130
 
131
131
  # Last resort: Use minimum quality and size
132
132
  buffer = io.BytesIO()
133
- smallest_img = img.resize((int(img.width * 0.5), int(img.height * 0.5)), Image.LANCZOS)
133
+ smallest_img = img.resize(
134
+ (int(img.width * 0.5), int(img.height * 0.5)), Image.Resampling.LANCZOS
135
+ )
134
136
  # Convert to RGB if necessary
135
137
  if smallest_img.mode in ("RGBA", "LA") or (
136
138
  smallest_img.mode == "P" and "transparency" in smallest_img.info
agent/types/__init__.py CHANGED
@@ -1,23 +1,20 @@
1
1
  """Type definitions for the agent package."""
2
2
 
3
- from .base import Provider, HostConfig, TaskResult, Annotation
3
+ from .base import HostConfig, TaskResult, Annotation
4
4
  from .messages import Message, Request, Response, StepMessage, DisengageMessage
5
5
  from .tools import ToolInvocation, ToolInvocationState, ClientAttachment, ToolResult
6
6
 
7
7
  __all__ = [
8
8
  # Base types
9
- "Provider",
10
9
  "HostConfig",
11
10
  "TaskResult",
12
11
  "Annotation",
13
-
14
12
  # Message types
15
13
  "Message",
16
14
  "Request",
17
15
  "Response",
18
16
  "StepMessage",
19
17
  "DisengageMessage",
20
-
21
18
  # Tool types
22
19
  "ToolInvocation",
23
20
  "ToolInvocationState",
agent/types/base.py CHANGED
@@ -5,17 +5,6 @@ from typing import Dict, Any
5
5
  from pydantic import BaseModel, ConfigDict
6
6
 
7
7
 
8
- class Provider(str, Enum):
9
- """Available AI providers."""
10
-
11
- UNKNOWN = "unknown" # Default provider for base class
12
- ANTHROPIC = "anthropic"
13
- OPENAI = "openai"
14
- OLLAMA = "ollama"
15
- OMNI = "omni"
16
- GROQ = "groq"
17
-
18
-
19
8
  class HostConfig(BaseModel):
20
9
  """Host configuration."""
21
10
 
@@ -48,6 +37,5 @@ class AgentLoop(Enum):
48
37
  """Enumeration of available loop types."""
49
38
 
50
39
  ANTHROPIC = auto() # Anthropic implementation
51
- OPENAI = auto() # OpenAI implementation
52
40
  OMNI = auto() # OmniLoop implementation
53
41
  # Add more loop types as needed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: <3.13,>=3.10