cua-agent 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

@@ -8,7 +8,7 @@ from litellm import completion, acompletion
8
8
  # Try to import HuggingFace dependencies
9
9
  try:
10
10
  import torch
11
- from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
11
+ from transformers import AutoModelForImageTextToText, AutoProcessor
12
12
  HF_AVAILABLE = True
13
13
  except ImportError:
14
14
  HF_AVAILABLE = False
@@ -40,7 +40,7 @@ class HuggingFaceLocalAdapter(CustomLLM):
40
40
  """
41
41
  if model_name not in self.models:
42
42
  # Load model
43
- model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
43
+ model = AutoModelForImageTextToText.from_pretrained(
44
44
  model_name,
45
45
  torch_dtype=torch.float16,
46
46
  device_map=self.device,
@@ -141,8 +141,7 @@ class HuggingFaceLocalAdapter(CustomLLM):
141
141
  )
142
142
 
143
143
  # Move inputs to the same device as model
144
- if torch.cuda.is_available() and self.device != "cpu":
145
- inputs = inputs.to("cuda")
144
+ inputs = inputs.to(model.device)
146
145
 
147
146
  # Generate response
148
147
  with torch.no_grad():
agent/agent.py CHANGED
@@ -411,6 +411,9 @@ class ComputerAgent:
411
411
  # Perform computer actions
412
412
  action = item.get("action")
413
413
  action_type = action.get("type")
414
+ if action_type is None:
415
+ print(f"Action type cannot be `None`: action={action}, action_type={action_type}")
416
+ return []
414
417
 
415
418
  # Extract action arguments (all fields except 'type')
416
419
  action_args = {k: v for k, v in action.items() if k != "type"}
@@ -9,10 +9,7 @@ import io
9
9
  import logging
10
10
 
11
11
  try:
12
- from presidio_analyzer import AnalyzerEngine
13
- from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine
14
- from presidio_anonymizer.entities import RecognizerResult, OperatorConfig
15
- from presidio_image_redactor import ImageRedactorEngine
12
+ # TODO: Add Presidio dependencies
16
13
  from PIL import Image
17
14
  PRESIDIO_AVAILABLE = True
18
15
  except ImportError:
@@ -32,11 +29,7 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
32
29
 
33
30
  def __init__(
34
31
  self,
35
- anonymize_text: bool = True,
36
- anonymize_images: bool = True,
37
- entities_to_anonymize: Optional[List[str]] = None,
38
- anonymization_operator: str = "replace",
39
- image_redaction_color: Tuple[int, int, int] = (255, 192, 203) # Pink
32
+ # TODO: Any extra kwargs if needed
40
33
  ):
41
34
  """
42
35
  Initialize the PII anonymization callback.
@@ -51,23 +44,10 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
51
44
  if not PRESIDIO_AVAILABLE:
52
45
  raise ImportError(
53
46
  "Presidio is not available. Install with: "
54
- "pip install presidio-analyzer presidio-anonymizer presidio-image-redactor"
47
+ "pip install cua-agent[pii-anonymization]"
55
48
  )
56
49
 
57
- self.anonymize_text = anonymize_text
58
- self.anonymize_images = anonymize_images
59
- self.entities_to_anonymize = entities_to_anonymize
60
- self.anonymization_operator = anonymization_operator
61
- self.image_redaction_color = image_redaction_color
62
-
63
- # Initialize Presidio engines
64
- self.analyzer = AnalyzerEngine()
65
- self.anonymizer = AnonymizerEngine()
66
- self.deanonymizer = DeanonymizeEngine()
67
- self.image_redactor = ImageRedactorEngine()
68
-
69
- # Store anonymization mappings for deanonymization
70
- self.anonymization_mappings: Dict[str, Any] = {}
50
+ # TODO: Implement __init__
71
51
 
72
52
  async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
73
53
  """
@@ -79,9 +59,6 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
79
59
  Returns:
80
60
  List of messages with PII anonymized
81
61
  """
82
- if not self.anonymize_text and not self.anonymize_images:
83
- return messages
84
-
85
62
  anonymized_messages = []
86
63
  for msg in messages:
87
64
  anonymized_msg = await self._anonymize_message(msg)
@@ -99,9 +76,6 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
99
76
  Returns:
100
77
  List of output with PII deanonymized for tool calls
101
78
  """
102
- if not self.anonymize_text:
103
- return output
104
-
105
79
  deanonymized_output = []
106
80
  for item in output:
107
81
  # Only deanonymize tool calls and computer_call messages
@@ -114,146 +88,9 @@ class PIIAnonymizationCallback(AsyncCallbackHandler):
114
88
  return deanonymized_output
115
89
 
116
90
  async def _anonymize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
117
- """Anonymize PII in a single message."""
118
- msg_copy = message.copy()
119
-
120
- # Anonymize text content
121
- if self.anonymize_text:
122
- msg_copy = await self._anonymize_text_content(msg_copy)
123
-
124
- # Redact images in computer_call_output
125
- if self.anonymize_images and msg_copy.get("type") == "computer_call_output":
126
- msg_copy = await self._redact_image_content(msg_copy)
127
-
128
- return msg_copy
129
-
130
- async def _anonymize_text_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
131
- """Anonymize text content in a message."""
132
- msg_copy = message.copy()
133
-
134
- # Handle content array
135
- content = msg_copy.get("content", [])
136
- if isinstance(content, str):
137
- anonymized_text, _ = await self._anonymize_text(content)
138
- msg_copy["content"] = anonymized_text
139
- elif isinstance(content, list):
140
- anonymized_content = []
141
- for item in content:
142
- if isinstance(item, dict) and item.get("type") == "text":
143
- text = item.get("text", "")
144
- anonymized_text, _ = await self._anonymize_text(text)
145
- item_copy = item.copy()
146
- item_copy["text"] = anonymized_text
147
- anonymized_content.append(item_copy)
148
- else:
149
- anonymized_content.append(item)
150
- msg_copy["content"] = anonymized_content
151
-
152
- return msg_copy
153
-
154
- async def _redact_image_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
155
- """Redact PII from images in computer_call_output messages."""
156
- msg_copy = message.copy()
157
- output = msg_copy.get("output", {})
158
-
159
- if isinstance(output, dict) and "image_url" in output:
160
- try:
161
- # Extract base64 image data
162
- image_url = output["image_url"]
163
- if image_url.startswith("data:image/"):
164
- # Parse data URL
165
- header, data = image_url.split(",", 1)
166
- image_data = base64.b64decode(data)
167
-
168
- # Load image with PIL
169
- image = Image.open(io.BytesIO(image_data))
170
-
171
- # Redact PII from image
172
- redacted_image = self.image_redactor.redact(image, self.image_redaction_color)
173
-
174
- # Convert back to base64
175
- buffer = io.BytesIO()
176
- redacted_image.save(buffer, format="PNG")
177
- redacted_data = base64.b64encode(buffer.getvalue()).decode()
178
-
179
- # Update image URL
180
- output_copy = output.copy()
181
- output_copy["image_url"] = f"data:image/png;base64,{redacted_data}"
182
- msg_copy["output"] = output_copy
183
-
184
- except Exception as e:
185
- logger.warning(f"Failed to redact image: {e}")
186
-
187
- return msg_copy
91
+ # TODO: Implement _anonymize_message
92
+ return message
188
93
 
189
94
  async def _deanonymize_item(self, item: Dict[str, Any]) -> Dict[str, Any]:
190
- """Deanonymize PII in tool calls and computer outputs."""
191
- item_copy = item.copy()
192
-
193
- # Handle computer_call arguments
194
- if item.get("type") == "computer_call":
195
- args = item_copy.get("args", {})
196
- if isinstance(args, dict):
197
- deanonymized_args = {}
198
- for key, value in args.items():
199
- if isinstance(value, str):
200
- deanonymized_value, _ = await self._deanonymize_text(value)
201
- deanonymized_args[key] = deanonymized_value
202
- else:
203
- deanonymized_args[key] = value
204
- item_copy["args"] = deanonymized_args
205
-
206
- return item_copy
207
-
208
- async def _anonymize_text(self, text: str) -> Tuple[str, List[RecognizerResult]]:
209
- """Anonymize PII in text and return the anonymized text and results."""
210
- if not text.strip():
211
- return text, []
212
-
213
- try:
214
- # Analyze text for PII
215
- analyzer_results = self.analyzer.analyze(
216
- text=text,
217
- entities=self.entities_to_anonymize,
218
- language="en"
219
- )
220
-
221
- if not analyzer_results:
222
- return text, []
223
-
224
- # Anonymize the text
225
- anonymized_result = self.anonymizer.anonymize(
226
- text=text,
227
- analyzer_results=analyzer_results,
228
- operators={entity_type: OperatorConfig(self.anonymization_operator)
229
- for entity_type in set(result.entity_type for result in analyzer_results)}
230
- )
231
-
232
- # Store mapping for deanonymization
233
- mapping_key = str(hash(text))
234
- self.anonymization_mappings[mapping_key] = {
235
- "original": text,
236
- "anonymized": anonymized_result.text,
237
- "results": analyzer_results
238
- }
239
-
240
- return anonymized_result.text, analyzer_results
241
-
242
- except Exception as e:
243
- logger.warning(f"Failed to anonymize text: {e}")
244
- return text, []
245
-
246
- async def _deanonymize_text(self, text: str) -> Tuple[str, bool]:
247
- """Attempt to deanonymize text using stored mappings."""
248
- try:
249
- # Look for matching anonymized text in mappings
250
- for mapping_key, mapping in self.anonymization_mappings.items():
251
- if mapping["anonymized"] == text:
252
- return mapping["original"], True
253
-
254
- # If no mapping found, return original text
255
- return text, False
256
-
257
- except Exception as e:
258
- logger.warning(f"Failed to deanonymize text: {e}")
259
- return text, False
95
+ # TODO: Implement _deanonymize_item
96
+ return item
agent/ui/gradio/app.py CHANGED
@@ -178,13 +178,20 @@ def create_computer_instance(
178
178
  """Create or get the global Computer instance."""
179
179
  global global_computer
180
180
  if global_computer is None:
181
- global_computer = Computer(
182
- verbosity=verbosity,
183
- os_type=os_type,
184
- provider_type=provider_type,
185
- name=name if name else "",
186
- api_key=api_key
187
- )
181
+ if provider_type == "localhost":
182
+ global_computer = Computer(
183
+ verbosity=verbosity,
184
+ os_type=os_type,
185
+ use_host_computer_server=True
186
+ )
187
+ else:
188
+ global_computer = Computer(
189
+ verbosity=verbosity,
190
+ os_type=os_type,
191
+ provider_type=provider_type,
192
+ name=name if name else "",
193
+ api_key=api_key
194
+ )
188
195
  return global_computer
189
196
 
190
197
 
@@ -211,7 +211,7 @@ if __name__ == "__main__":
211
211
  is_windows = platform.system().lower() == "windows"
212
212
  is_mac = platform.system().lower() == "darwin"
213
213
 
214
- providers = ["cloud"]
214
+ providers = ["cloud", "localhost"]
215
215
  if is_mac:
216
216
  providers += ["lume"]
217
217
  if is_windows:
@@ -403,6 +403,23 @@ if __name__ == "__main__":
403
403
  type="password",
404
404
  )
405
405
 
406
+ # Provider visibility update function
407
+ def update_provider_visibility(provider):
408
+ """Update visibility of container name and API key based on selected provider."""
409
+ is_localhost = provider == "localhost"
410
+ return [
411
+ gr.update(visible=not is_localhost), # container_name
412
+ gr.update(visible=not is_localhost and not has_cua_key) # cua_cloud_api_key
413
+ ]
414
+
415
+ # Connect provider change event
416
+ computer_provider.change(
417
+ fn=update_provider_visibility,
418
+ inputs=[computer_provider],
419
+ outputs=[container_name, cua_cloud_api_key],
420
+ queue=False
421
+ )
422
+
406
423
  # Connect UI update events
407
424
  for dropdown in [agent_loop, omni_model_choice, uitars_model_choice, openai_model_choice, anthropic_model_choice]:
408
425
  dropdown.change(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.4.5
3
+ Version: 0.4.7
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.11
@@ -12,7 +12,7 @@ Requires-Dist: typing-extensions>=4.12.2
12
12
  Requires-Dist: pydantic>=2.6.4
13
13
  Requires-Dist: rich>=13.7.1
14
14
  Requires-Dist: python-dotenv>=1.0.1
15
- Requires-Dist: cua-computer<0.5.0,>=0.3.0
15
+ Requires-Dist: cua-computer<0.5.0,>=0.4.0
16
16
  Requires-Dist: cua-core<0.2.0,>=0.1.8
17
17
  Requires-Dist: certifi>=2024.2.2
18
18
  Requires-Dist: litellm>=1.74.8
@@ -1,14 +1,14 @@
1
1
  agent/__init__.py,sha256=PfRgVa_aJQL9fK0D1g2r__Kdg3627EigNS31_M8Ivkk,1539
2
2
  agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
3
3
  agent/adapters/__init__.py,sha256=szM2HMten2WkcqXeRnan__-sXjpyS4eyvIW0LXSfj4U,178
4
- agent/adapters/huggingfacelocal_adapter.py,sha256=dnzzxYCvFiuDdNzsb_1uM-boWv1eS__dWMve_fAnlUc,8038
5
- agent/agent.py,sha256=Vn7ygehx19It5FarZJ2NwVwNTOtNYtD21x8LEBhlWcE,24609
4
+ agent/adapters/huggingfacelocal_adapter.py,sha256=o2IQI1wuZWDYgGPj92dkxTb3uk07XjJdvC19O2_aeak,7963
5
+ agent/agent.py,sha256=bSmc_5Jr4CTvTut8lgNwNpnk9w4sD9SACQb0GbT4zwg,24770
6
6
  agent/callbacks/__init__.py,sha256=yxxBXUqpXQ-jRi_ixJMtmQPxoNRy5Vz1PUBzNNa1Dwg,538
7
7
  agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
8
8
  agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
9
9
  agent/callbacks/image_retention.py,sha256=tiuRT5ke9xXTb2eP8Gz-2ITyAMY29LURUH6AbjX3RP8,6165
10
10
  agent/callbacks/logging.py,sha256=OOxU97EzrxlnUAtiEnvy9FB7SwCUK90-rdpDFA2Ae4E,10921
11
- agent/callbacks/pii_anonymization.py,sha256=UKAqNacHG3z92_6uocVzOIl8gJoqyofldCoCmB4UVIE,10268
11
+ agent/callbacks/pii_anonymization.py,sha256=NEkUTUjQBi82nqus7kT-1E4RaeQ2hQrY7YCnKndLhP8,3272
12
12
  agent/callbacks/telemetry.py,sha256=PU7pkK7W1v1xjDN-9gA30lGvn4-WhqK3BPHGW3HpTOc,7497
13
13
  agent/callbacks/trajectory_saver.py,sha256=POE8aPT-MBzfW873wr6C7iiVUHtp483KwvLPxC1S3EY,11626
14
14
  agent/cli.py,sha256=odI7cdl1psOGK-mEQzezsPzbRcLFwDbi7A2ukvYq8dk,12130
@@ -25,9 +25,9 @@ agent/types.py,sha256=GiLxIcF7s1XIh_WaY7tjdQPFpdTXb5MWVe_ZUPA0gkY,2364
25
25
  agent/ui/__init__.py,sha256=DTZpK85QXscXK2nM9HtpAhVBF13yAamUrtwrQSuV-kM,126
26
26
  agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
27
27
  agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
28
- agent/ui/gradio/app.py,sha256=X7he4jzyFqWJDP1y_M8yfZvfdy6GHNuclLn4k9iIwAw,8824
29
- agent/ui/gradio/ui_components.py,sha256=WxFE-4wvdEgj7FPLNXUrs118sXJ9vN3kLkZxtto-weo,34474
30
- cua_agent-0.4.5.dist-info/METADATA,sha256=VILpU50MidWIJQ3MP1ux45cuv8aOQL99XRbXIQcWu_g,12060
31
- cua_agent-0.4.5.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
32
- cua_agent-0.4.5.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
33
- cua_agent-0.4.5.dist-info/RECORD,,
28
+ agent/ui/gradio/app.py,sha256=9UOPwuwspLrnHGY91zdzuRqkMH4cmwOBH-f-BC0gVC4,9077
29
+ agent/ui/gradio/ui_components.py,sha256=hVMGZxAEq1LBHOqKj-RbDXJsj1j0Qw5dOV0ecWIHxmc,35397
30
+ cua_agent-0.4.7.dist-info/METADATA,sha256=wCahxHMvzKL-FkTFy4XlZZirBwl1v-RYWRcYbFcJBDk,12060
31
+ cua_agent-0.4.7.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
32
+ cua_agent-0.4.7.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
33
+ cua_agent-0.4.7.dist-info/RECORD,,