cua-agent 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (112) hide show
  1. agent/__init__.py +21 -12
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +229 -0
  5. agent/agent.py +594 -0
  6. agent/callbacks/__init__.py +19 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/telemetry.py +210 -0
  13. agent/callbacks/trajectory_saver.py +305 -0
  14. agent/cli.py +297 -0
  15. agent/computer_handler.py +107 -0
  16. agent/decorators.py +90 -0
  17. agent/loops/__init__.py +11 -0
  18. agent/loops/anthropic.py +728 -0
  19. agent/loops/omniparser.py +339 -0
  20. agent/loops/openai.py +95 -0
  21. agent/loops/uitars.py +688 -0
  22. agent/responses.py +207 -0
  23. agent/telemetry.py +135 -14
  24. agent/types.py +79 -0
  25. agent/ui/__init__.py +7 -1
  26. agent/ui/__main__.py +2 -13
  27. agent/ui/gradio/__init__.py +6 -19
  28. agent/ui/gradio/app.py +94 -1313
  29. agent/ui/gradio/ui_components.py +721 -0
  30. cua_agent-0.4.0.dist-info/METADATA +424 -0
  31. cua_agent-0.4.0.dist-info/RECORD +33 -0
  32. agent/core/__init__.py +0 -27
  33. agent/core/agent.py +0 -210
  34. agent/core/base.py +0 -217
  35. agent/core/callbacks.py +0 -200
  36. agent/core/experiment.py +0 -249
  37. agent/core/factory.py +0 -122
  38. agent/core/messages.py +0 -332
  39. agent/core/provider_config.py +0 -21
  40. agent/core/telemetry.py +0 -142
  41. agent/core/tools/__init__.py +0 -21
  42. agent/core/tools/base.py +0 -74
  43. agent/core/tools/bash.py +0 -52
  44. agent/core/tools/collection.py +0 -46
  45. agent/core/tools/computer.py +0 -113
  46. agent/core/tools/edit.py +0 -67
  47. agent/core/tools/manager.py +0 -56
  48. agent/core/tools.py +0 -32
  49. agent/core/types.py +0 -88
  50. agent/core/visualization.py +0 -197
  51. agent/providers/__init__.py +0 -4
  52. agent/providers/anthropic/__init__.py +0 -6
  53. agent/providers/anthropic/api/client.py +0 -360
  54. agent/providers/anthropic/api/logging.py +0 -150
  55. agent/providers/anthropic/api_handler.py +0 -140
  56. agent/providers/anthropic/callbacks/__init__.py +0 -5
  57. agent/providers/anthropic/callbacks/manager.py +0 -65
  58. agent/providers/anthropic/loop.py +0 -568
  59. agent/providers/anthropic/prompts.py +0 -23
  60. agent/providers/anthropic/response_handler.py +0 -226
  61. agent/providers/anthropic/tools/__init__.py +0 -33
  62. agent/providers/anthropic/tools/base.py +0 -88
  63. agent/providers/anthropic/tools/bash.py +0 -66
  64. agent/providers/anthropic/tools/collection.py +0 -34
  65. agent/providers/anthropic/tools/computer.py +0 -396
  66. agent/providers/anthropic/tools/edit.py +0 -326
  67. agent/providers/anthropic/tools/manager.py +0 -54
  68. agent/providers/anthropic/tools/run.py +0 -42
  69. agent/providers/anthropic/types.py +0 -16
  70. agent/providers/anthropic/utils.py +0 -381
  71. agent/providers/omni/__init__.py +0 -8
  72. agent/providers/omni/api_handler.py +0 -42
  73. agent/providers/omni/clients/anthropic.py +0 -103
  74. agent/providers/omni/clients/base.py +0 -35
  75. agent/providers/omni/clients/oaicompat.py +0 -195
  76. agent/providers/omni/clients/ollama.py +0 -122
  77. agent/providers/omni/clients/openai.py +0 -155
  78. agent/providers/omni/clients/utils.py +0 -25
  79. agent/providers/omni/image_utils.py +0 -34
  80. agent/providers/omni/loop.py +0 -990
  81. agent/providers/omni/parser.py +0 -307
  82. agent/providers/omni/prompts.py +0 -64
  83. agent/providers/omni/tools/__init__.py +0 -30
  84. agent/providers/omni/tools/base.py +0 -29
  85. agent/providers/omni/tools/bash.py +0 -74
  86. agent/providers/omni/tools/computer.py +0 -179
  87. agent/providers/omni/tools/manager.py +0 -61
  88. agent/providers/omni/utils.py +0 -236
  89. agent/providers/openai/__init__.py +0 -6
  90. agent/providers/openai/api_handler.py +0 -456
  91. agent/providers/openai/loop.py +0 -472
  92. agent/providers/openai/response_handler.py +0 -205
  93. agent/providers/openai/tools/__init__.py +0 -15
  94. agent/providers/openai/tools/base.py +0 -79
  95. agent/providers/openai/tools/computer.py +0 -326
  96. agent/providers/openai/tools/manager.py +0 -106
  97. agent/providers/openai/types.py +0 -36
  98. agent/providers/openai/utils.py +0 -98
  99. agent/providers/uitars/__init__.py +0 -1
  100. agent/providers/uitars/clients/base.py +0 -35
  101. agent/providers/uitars/clients/mlxvlm.py +0 -263
  102. agent/providers/uitars/clients/oaicompat.py +0 -214
  103. agent/providers/uitars/loop.py +0 -660
  104. agent/providers/uitars/prompts.py +0 -63
  105. agent/providers/uitars/tools/__init__.py +0 -1
  106. agent/providers/uitars/tools/computer.py +0 -283
  107. agent/providers/uitars/tools/manager.py +0 -60
  108. agent/providers/uitars/utils.py +0 -264
  109. cua_agent-0.3.2.dist-info/METADATA +0 -295
  110. cua_agent-0.3.2.dist-info/RECORD +0 -87
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +0 -0
  112. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0
agent/__init__.py CHANGED
@@ -1,12 +1,27 @@
1
- """CUA (Computer Use) Agent for AI-driven computer interaction."""
1
+ """
2
+ agent - Decorator-based Computer Use Agent with liteLLM integration
3
+ """
2
4
 
3
- import sys
4
5
  import logging
6
+ import sys
7
+
8
+ from .decorators import agent_loop
9
+ from .agent import ComputerAgent
10
+ from .types import Messages, AgentResponse
11
+
12
+ # Import loops to register them
13
+ from . import loops
5
14
 
6
- __version__ = "0.1.0"
15
+ __all__ = [
16
+ "agent_loop",
17
+ "ComputerAgent",
18
+ "Messages",
19
+ "AgentResponse"
20
+ ]
7
21
 
8
- # Initialize logging
9
- logger = logging.getLogger("agent")
22
+ __version__ = "0.4.0"
23
+
24
+ logger = logging.getLogger(__name__)
10
25
 
11
26
  # Initialize telemetry when the package is imported
12
27
  try:
@@ -18,7 +33,7 @@ try:
18
33
  )
19
34
 
20
35
  # Import set_dimension from our own telemetry module
21
- from .core.telemetry import set_dimension
36
+ from .telemetry import set_dimension
22
37
 
23
38
  # Check if telemetry is enabled
24
39
  if is_telemetry_enabled():
@@ -47,9 +62,3 @@ except ImportError as e:
47
62
  except Exception as e:
48
63
  # Other issues with telemetry
49
64
  logger.warning(f"Error initializing telemetry: {e}")
50
-
51
- from .core.types import LLMProvider, LLM
52
- from .core.factory import AgentLoop
53
- from .core.agent import ComputerAgent
54
-
55
- __all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
agent/__main__.py ADDED
@@ -0,0 +1,21 @@
1
+ """
2
+ Entry point for running agent CLI module.
3
+
4
+ Usage:
5
+ python -m agent.cli <model_string>
6
+ """
7
+
8
+ import sys
9
+ import asyncio
10
+ from .cli import main
11
+
12
+ if __name__ == "__main__":
13
+ # Check if 'cli' is specified as the module
14
+ if len(sys.argv) > 1 and sys.argv[1] == "cli":
15
+ # Remove 'cli' from arguments and run CLI
16
+ sys.argv.pop(1)
17
+ asyncio.run(main())
18
+ else:
19
+ print("Usage: python -m agent.cli <model_string>")
20
+ print("Example: python -m agent.cli openai/computer-use-preview")
21
+ sys.exit(1)
@@ -0,0 +1,9 @@
1
+ """
2
+ Adapters package for agent - Custom LLM adapters for LiteLLM
3
+ """
4
+
5
+ from .huggingfacelocal_adapter import HuggingFaceLocalAdapter
6
+
7
+ __all__ = [
8
+ "HuggingFaceLocalAdapter",
9
+ ]
@@ -0,0 +1,229 @@
1
+ import asyncio
2
+ import warnings
3
+ from typing import Iterator, AsyncIterator, Dict, List, Any, Optional
4
+ from litellm.types.utils import GenericStreamingChunk, ModelResponse
5
+ from litellm.llms.custom_llm import CustomLLM
6
+ from litellm import completion, acompletion
7
+
8
+ # Try to import HuggingFace dependencies
9
+ try:
10
+ import torch
11
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
12
+ HF_AVAILABLE = True
13
+ except ImportError:
14
+ HF_AVAILABLE = False
15
+
16
+
17
+ class HuggingFaceLocalAdapter(CustomLLM):
18
+ """HuggingFace Local Adapter for running vision-language models locally."""
19
+
20
+ def __init__(self, device: str = "auto", **kwargs):
21
+ """Initialize the adapter.
22
+
23
+ Args:
24
+ device: Device to load model on ("auto", "cuda", "cpu", etc.)
25
+ **kwargs: Additional arguments
26
+ """
27
+ super().__init__()
28
+ self.device = device
29
+ self.models = {} # Cache for loaded models
30
+ self.processors = {} # Cache for loaded processors
31
+
32
+ def _load_model_and_processor(self, model_name: str):
33
+ """Load model and processor if not already cached.
34
+
35
+ Args:
36
+ model_name: Name of the model to load
37
+
38
+ Returns:
39
+ Tuple of (model, processor)
40
+ """
41
+ if model_name not in self.models:
42
+ # Load model
43
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
44
+ model_name,
45
+ torch_dtype=torch.float16,
46
+ device_map=self.device,
47
+ attn_implementation="sdpa"
48
+ )
49
+
50
+ # Load processor
51
+ processor = AutoProcessor.from_pretrained(model_name)
52
+
53
+ # Cache them
54
+ self.models[model_name] = model
55
+ self.processors[model_name] = processor
56
+
57
+ return self.models[model_name], self.processors[model_name]
58
+
59
+ def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
60
+ """Convert OpenAI format messages to HuggingFace format.
61
+
62
+ Args:
63
+ messages: Messages in OpenAI format
64
+
65
+ Returns:
66
+ Messages in HuggingFace format
67
+ """
68
+ converted_messages = []
69
+
70
+ for message in messages:
71
+ converted_message = {
72
+ "role": message["role"],
73
+ "content": []
74
+ }
75
+
76
+ content = message.get("content", [])
77
+ if isinstance(content, str):
78
+ # Simple text content
79
+ converted_message["content"].append({
80
+ "type": "text",
81
+ "text": content
82
+ })
83
+ elif isinstance(content, list):
84
+ # Multi-modal content
85
+ for item in content:
86
+ if item.get("type") == "text":
87
+ converted_message["content"].append({
88
+ "type": "text",
89
+ "text": item.get("text", "")
90
+ })
91
+ elif item.get("type") == "image_url":
92
+ # Convert image_url format to image format
93
+ image_url = item.get("image_url", {}).get("url", "")
94
+ converted_message["content"].append({
95
+ "type": "image",
96
+ "image": image_url
97
+ })
98
+
99
+ converted_messages.append(converted_message)
100
+
101
+ return converted_messages
102
+
103
+ def _generate(self, **kwargs) -> str:
104
+ """Generate response using the local HuggingFace model.
105
+
106
+ Args:
107
+ **kwargs: Keyword arguments containing messages and model info
108
+
109
+ Returns:
110
+ Generated text response
111
+ """
112
+ if not HF_AVAILABLE:
113
+ raise ImportError(
114
+ "HuggingFace transformers dependencies not found. "
115
+ "Please install with: pip install \"cua-agent[uitars-hf]\""
116
+ )
117
+
118
+ # Extract messages and model from kwargs
119
+ messages = kwargs.get('messages', [])
120
+ model_name = kwargs.get('model', 'ByteDance-Seed/UI-TARS-1.5-7B')
121
+ max_new_tokens = kwargs.get('max_tokens', 128)
122
+
123
+ # Warn about ignored kwargs
124
+ ignored_kwargs = set(kwargs.keys()) - {'messages', 'model', 'max_tokens'}
125
+ if ignored_kwargs:
126
+ warnings.warn(f"Ignoring unsupported kwargs: {ignored_kwargs}")
127
+
128
+ # Load model and processor
129
+ model, processor = self._load_model_and_processor(model_name)
130
+
131
+ # Convert messages to HuggingFace format
132
+ hf_messages = self._convert_messages(messages)
133
+
134
+ # Apply chat template and tokenize
135
+ inputs = processor.apply_chat_template(
136
+ hf_messages,
137
+ add_generation_prompt=True,
138
+ tokenize=True,
139
+ return_dict=True,
140
+ return_tensors="pt"
141
+ )
142
+
143
+ # Move inputs to the same device as model
144
+ if torch.cuda.is_available() and self.device != "cpu":
145
+ inputs = inputs.to("cuda")
146
+
147
+ # Generate response
148
+ with torch.no_grad():
149
+ generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
150
+
151
+ # Trim input tokens from output
152
+ generated_ids_trimmed = [
153
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
154
+ ]
155
+
156
+ # Decode output
157
+ output_text = processor.batch_decode(
158
+ generated_ids_trimmed,
159
+ skip_special_tokens=True,
160
+ clean_up_tokenization_spaces=False
161
+ )
162
+
163
+ return output_text[0] if output_text else ""
164
+
165
+ def completion(self, *args, **kwargs) -> ModelResponse:
166
+ """Synchronous completion method.
167
+
168
+ Returns:
169
+ ModelResponse with generated text
170
+ """
171
+ generated_text = self._generate(**kwargs)
172
+
173
+ return completion(
174
+ model=f"huggingface-local/{kwargs['model']}",
175
+ mock_response=generated_text,
176
+ )
177
+
178
+ async def acompletion(self, *args, **kwargs) -> ModelResponse:
179
+ """Asynchronous completion method.
180
+
181
+ Returns:
182
+ ModelResponse with generated text
183
+ """
184
+ # Run _generate in thread pool to avoid blocking
185
+ generated_text = await asyncio.to_thread(self._generate, **kwargs)
186
+
187
+ return await acompletion(
188
+ model=f"huggingface-local/{kwargs['model']}",
189
+ mock_response=generated_text,
190
+ )
191
+
192
+ def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
193
+ """Synchronous streaming method.
194
+
195
+ Returns:
196
+ Iterator of GenericStreamingChunk
197
+ """
198
+ generated_text = self._generate(**kwargs)
199
+
200
+ generic_streaming_chunk: GenericStreamingChunk = {
201
+ "finish_reason": "stop",
202
+ "index": 0,
203
+ "is_finished": True,
204
+ "text": generated_text,
205
+ "tool_use": None,
206
+ "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
207
+ }
208
+
209
+ yield generic_streaming_chunk
210
+
211
+ async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
212
+ """Asynchronous streaming method.
213
+
214
+ Returns:
215
+ AsyncIterator of GenericStreamingChunk
216
+ """
217
+ # Run _generate in thread pool to avoid blocking
218
+ generated_text = await asyncio.to_thread(self._generate, **kwargs)
219
+
220
+ generic_streaming_chunk: GenericStreamingChunk = {
221
+ "finish_reason": "stop",
222
+ "index": 0,
223
+ "is_finished": True,
224
+ "text": generated_text,
225
+ "tool_use": None,
226
+ "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
227
+ }
228
+
229
+ yield generic_streaming_chunk