cua-agent 0.3.2__py3-none-any.whl → 0.4.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (111) hide show
  1. agent/__init__.py +15 -51
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +229 -0
  5. agent/agent.py +577 -0
  6. agent/callbacks/__init__.py +17 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/trajectory_saver.py +305 -0
  13. agent/cli.py +290 -0
  14. agent/computer_handler.py +107 -0
  15. agent/decorators.py +90 -0
  16. agent/loops/__init__.py +11 -0
  17. agent/loops/anthropic.py +728 -0
  18. agent/loops/omniparser.py +339 -0
  19. agent/loops/openai.py +95 -0
  20. agent/loops/uitars.py +688 -0
  21. agent/responses.py +207 -0
  22. agent/types.py +79 -0
  23. agent/ui/__init__.py +7 -1
  24. agent/ui/gradio/__init__.py +6 -19
  25. agent/ui/gradio/app.py +80 -1299
  26. agent/ui/gradio/ui_components.py +703 -0
  27. cua_agent-0.4.0b2.dist-info/METADATA +424 -0
  28. cua_agent-0.4.0b2.dist-info/RECORD +30 -0
  29. agent/core/__init__.py +0 -27
  30. agent/core/agent.py +0 -210
  31. agent/core/base.py +0 -217
  32. agent/core/callbacks.py +0 -200
  33. agent/core/experiment.py +0 -249
  34. agent/core/factory.py +0 -122
  35. agent/core/messages.py +0 -332
  36. agent/core/provider_config.py +0 -21
  37. agent/core/telemetry.py +0 -142
  38. agent/core/tools/__init__.py +0 -21
  39. agent/core/tools/base.py +0 -74
  40. agent/core/tools/bash.py +0 -52
  41. agent/core/tools/collection.py +0 -46
  42. agent/core/tools/computer.py +0 -113
  43. agent/core/tools/edit.py +0 -67
  44. agent/core/tools/manager.py +0 -56
  45. agent/core/tools.py +0 -32
  46. agent/core/types.py +0 -88
  47. agent/core/visualization.py +0 -197
  48. agent/providers/__init__.py +0 -4
  49. agent/providers/anthropic/__init__.py +0 -6
  50. agent/providers/anthropic/api/client.py +0 -360
  51. agent/providers/anthropic/api/logging.py +0 -150
  52. agent/providers/anthropic/api_handler.py +0 -140
  53. agent/providers/anthropic/callbacks/__init__.py +0 -5
  54. agent/providers/anthropic/callbacks/manager.py +0 -65
  55. agent/providers/anthropic/loop.py +0 -568
  56. agent/providers/anthropic/prompts.py +0 -23
  57. agent/providers/anthropic/response_handler.py +0 -226
  58. agent/providers/anthropic/tools/__init__.py +0 -33
  59. agent/providers/anthropic/tools/base.py +0 -88
  60. agent/providers/anthropic/tools/bash.py +0 -66
  61. agent/providers/anthropic/tools/collection.py +0 -34
  62. agent/providers/anthropic/tools/computer.py +0 -396
  63. agent/providers/anthropic/tools/edit.py +0 -326
  64. agent/providers/anthropic/tools/manager.py +0 -54
  65. agent/providers/anthropic/tools/run.py +0 -42
  66. agent/providers/anthropic/types.py +0 -16
  67. agent/providers/anthropic/utils.py +0 -381
  68. agent/providers/omni/__init__.py +0 -8
  69. agent/providers/omni/api_handler.py +0 -42
  70. agent/providers/omni/clients/anthropic.py +0 -103
  71. agent/providers/omni/clients/base.py +0 -35
  72. agent/providers/omni/clients/oaicompat.py +0 -195
  73. agent/providers/omni/clients/ollama.py +0 -122
  74. agent/providers/omni/clients/openai.py +0 -155
  75. agent/providers/omni/clients/utils.py +0 -25
  76. agent/providers/omni/image_utils.py +0 -34
  77. agent/providers/omni/loop.py +0 -990
  78. agent/providers/omni/parser.py +0 -307
  79. agent/providers/omni/prompts.py +0 -64
  80. agent/providers/omni/tools/__init__.py +0 -30
  81. agent/providers/omni/tools/base.py +0 -29
  82. agent/providers/omni/tools/bash.py +0 -74
  83. agent/providers/omni/tools/computer.py +0 -179
  84. agent/providers/omni/tools/manager.py +0 -61
  85. agent/providers/omni/utils.py +0 -236
  86. agent/providers/openai/__init__.py +0 -6
  87. agent/providers/openai/api_handler.py +0 -456
  88. agent/providers/openai/loop.py +0 -472
  89. agent/providers/openai/response_handler.py +0 -205
  90. agent/providers/openai/tools/__init__.py +0 -15
  91. agent/providers/openai/tools/base.py +0 -79
  92. agent/providers/openai/tools/computer.py +0 -326
  93. agent/providers/openai/tools/manager.py +0 -106
  94. agent/providers/openai/types.py +0 -36
  95. agent/providers/openai/utils.py +0 -98
  96. agent/providers/uitars/__init__.py +0 -1
  97. agent/providers/uitars/clients/base.py +0 -35
  98. agent/providers/uitars/clients/mlxvlm.py +0 -263
  99. agent/providers/uitars/clients/oaicompat.py +0 -214
  100. agent/providers/uitars/loop.py +0 -660
  101. agent/providers/uitars/prompts.py +0 -63
  102. agent/providers/uitars/tools/__init__.py +0 -1
  103. agent/providers/uitars/tools/computer.py +0 -283
  104. agent/providers/uitars/tools/manager.py +0 -60
  105. agent/providers/uitars/utils.py +0 -264
  106. agent/telemetry.py +0 -21
  107. agent/ui/__main__.py +0 -15
  108. cua_agent-0.3.2.dist-info/METADATA +0 -295
  109. cua_agent-0.3.2.dist-info/RECORD +0 -87
  110. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/WHEEL +0 -0
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/entry_points.txt +0 -0
agent/__init__.py CHANGED
@@ -1,55 +1,19 @@
1
- """CUA (Computer Use) Agent for AI-driven computer interaction."""
1
+ """
2
+ agent - Decorator-based Computer Use Agent with liteLLM integration
3
+ """
2
4
 
3
- import sys
4
- import logging
5
+ from .decorators import agent_loop
6
+ from .agent import ComputerAgent
7
+ from .types import Messages, AgentResponse
5
8
 
6
- __version__ = "0.1.0"
9
+ # Import loops to register them
10
+ from . import loops
7
11
 
8
- # Initialize logging
9
- logger = logging.getLogger("agent")
12
+ __all__ = [
13
+ "agent_loop",
14
+ "ComputerAgent",
15
+ "Messages",
16
+ "AgentResponse"
17
+ ]
10
18
 
11
- # Initialize telemetry when the package is imported
12
- try:
13
- # Import from core telemetry for basic functions
14
- from core.telemetry import (
15
- is_telemetry_enabled,
16
- flush,
17
- record_event,
18
- )
19
-
20
- # Import set_dimension from our own telemetry module
21
- from .core.telemetry import set_dimension
22
-
23
- # Check if telemetry is enabled
24
- if is_telemetry_enabled():
25
- logger.info("Telemetry is enabled")
26
-
27
- # Record package initialization
28
- record_event(
29
- "module_init",
30
- {
31
- "module": "agent",
32
- "version": __version__,
33
- "python_version": sys.version,
34
- },
35
- )
36
-
37
- # Set the package version as a dimension
38
- set_dimension("agent_version", __version__)
39
-
40
- # Flush events to ensure they're sent
41
- flush()
42
- else:
43
- logger.info("Telemetry is disabled")
44
- except ImportError as e:
45
- # Telemetry not available
46
- logger.warning(f"Telemetry not available: {e}")
47
- except Exception as e:
48
- # Other issues with telemetry
49
- logger.warning(f"Error initializing telemetry: {e}")
50
-
51
- from .core.types import LLMProvider, LLM
52
- from .core.factory import AgentLoop
53
- from .core.agent import ComputerAgent
54
-
55
- __all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
19
+ __version__ = "0.4.0b2"
agent/__main__.py ADDED
@@ -0,0 +1,21 @@
1
+ """
2
+ Entry point for running agent CLI module.
3
+
4
+ Usage:
5
+ python -m agent.cli <model_string>
6
+ """
7
+
8
+ import sys
9
+ import asyncio
10
+ from .cli import main
11
+
12
+ if __name__ == "__main__":
13
+ # Check if 'cli' is specified as the module
14
+ if len(sys.argv) > 1 and sys.argv[1] == "cli":
15
+ # Remove 'cli' from arguments and run CLI
16
+ sys.argv.pop(1)
17
+ asyncio.run(main())
18
+ else:
19
+ print("Usage: python -m agent.cli <model_string>")
20
+ print("Example: python -m agent.cli openai/computer-use-preview")
21
+ sys.exit(1)
@@ -0,0 +1,9 @@
1
+ """
2
+ Adapters package for agent - Custom LLM adapters for LiteLLM
3
+ """
4
+
5
+ from .huggingfacelocal_adapter import HuggingFaceLocalAdapter
6
+
7
+ __all__ = [
8
+ "HuggingFaceLocalAdapter",
9
+ ]
@@ -0,0 +1,229 @@
1
+ import asyncio
2
+ import warnings
3
+ from typing import Iterator, AsyncIterator, Dict, List, Any, Optional
4
+ from litellm.types.utils import GenericStreamingChunk, ModelResponse
5
+ from litellm.llms.custom_llm import CustomLLM
6
+ from litellm import completion, acompletion
7
+
8
+ # Try to import HuggingFace dependencies
9
+ try:
10
+ import torch
11
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
12
+ HF_AVAILABLE = True
13
+ except ImportError:
14
+ HF_AVAILABLE = False
15
+
16
+
17
+ class HuggingFaceLocalAdapter(CustomLLM):
18
+ """HuggingFace Local Adapter for running vision-language models locally."""
19
+
20
+ def __init__(self, device: str = "auto", **kwargs):
21
+ """Initialize the adapter.
22
+
23
+ Args:
24
+ device: Device to load model on ("auto", "cuda", "cpu", etc.)
25
+ **kwargs: Additional arguments
26
+ """
27
+ super().__init__()
28
+ self.device = device
29
+ self.models = {} # Cache for loaded models
30
+ self.processors = {} # Cache for loaded processors
31
+
32
+ def _load_model_and_processor(self, model_name: str):
33
+ """Load model and processor if not already cached.
34
+
35
+ Args:
36
+ model_name: Name of the model to load
37
+
38
+ Returns:
39
+ Tuple of (model, processor)
40
+ """
41
+ if model_name not in self.models:
42
+ # Load model
43
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
44
+ model_name,
45
+ torch_dtype=torch.float16,
46
+ device_map=self.device,
47
+ attn_implementation="sdpa"
48
+ )
49
+
50
+ # Load processor
51
+ processor = AutoProcessor.from_pretrained(model_name)
52
+
53
+ # Cache them
54
+ self.models[model_name] = model
55
+ self.processors[model_name] = processor
56
+
57
+ return self.models[model_name], self.processors[model_name]
58
+
59
+ def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
60
+ """Convert OpenAI format messages to HuggingFace format.
61
+
62
+ Args:
63
+ messages: Messages in OpenAI format
64
+
65
+ Returns:
66
+ Messages in HuggingFace format
67
+ """
68
+ converted_messages = []
69
+
70
+ for message in messages:
71
+ converted_message = {
72
+ "role": message["role"],
73
+ "content": []
74
+ }
75
+
76
+ content = message.get("content", [])
77
+ if isinstance(content, str):
78
+ # Simple text content
79
+ converted_message["content"].append({
80
+ "type": "text",
81
+ "text": content
82
+ })
83
+ elif isinstance(content, list):
84
+ # Multi-modal content
85
+ for item in content:
86
+ if item.get("type") == "text":
87
+ converted_message["content"].append({
88
+ "type": "text",
89
+ "text": item.get("text", "")
90
+ })
91
+ elif item.get("type") == "image_url":
92
+ # Convert image_url format to image format
93
+ image_url = item.get("image_url", {}).get("url", "")
94
+ converted_message["content"].append({
95
+ "type": "image",
96
+ "image": image_url
97
+ })
98
+
99
+ converted_messages.append(converted_message)
100
+
101
+ return converted_messages
102
+
103
+ def _generate(self, **kwargs) -> str:
104
+ """Generate response using the local HuggingFace model.
105
+
106
+ Args:
107
+ **kwargs: Keyword arguments containing messages and model info
108
+
109
+ Returns:
110
+ Generated text response
111
+ """
112
+ if not HF_AVAILABLE:
113
+ raise ImportError(
114
+ "HuggingFace transformers dependencies not found. "
115
+ "Please install with: pip install \"cua-agent[uitars-hf]\""
116
+ )
117
+
118
+ # Extract messages and model from kwargs
119
+ messages = kwargs.get('messages', [])
120
+ model_name = kwargs.get('model', 'ByteDance-Seed/UI-TARS-1.5-7B')
121
+ max_new_tokens = kwargs.get('max_tokens', 128)
122
+
123
+ # Warn about ignored kwargs
124
+ ignored_kwargs = set(kwargs.keys()) - {'messages', 'model', 'max_tokens'}
125
+ if ignored_kwargs:
126
+ warnings.warn(f"Ignoring unsupported kwargs: {ignored_kwargs}")
127
+
128
+ # Load model and processor
129
+ model, processor = self._load_model_and_processor(model_name)
130
+
131
+ # Convert messages to HuggingFace format
132
+ hf_messages = self._convert_messages(messages)
133
+
134
+ # Apply chat template and tokenize
135
+ inputs = processor.apply_chat_template(
136
+ hf_messages,
137
+ add_generation_prompt=True,
138
+ tokenize=True,
139
+ return_dict=True,
140
+ return_tensors="pt"
141
+ )
142
+
143
+ # Move inputs to the same device as model
144
+ if torch.cuda.is_available() and self.device != "cpu":
145
+ inputs = inputs.to("cuda")
146
+
147
+ # Generate response
148
+ with torch.no_grad():
149
+ generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
150
+
151
+ # Trim input tokens from output
152
+ generated_ids_trimmed = [
153
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
154
+ ]
155
+
156
+ # Decode output
157
+ output_text = processor.batch_decode(
158
+ generated_ids_trimmed,
159
+ skip_special_tokens=True,
160
+ clean_up_tokenization_spaces=False
161
+ )
162
+
163
+ return output_text[0] if output_text else ""
164
+
165
+ def completion(self, *args, **kwargs) -> ModelResponse:
166
+ """Synchronous completion method.
167
+
168
+ Returns:
169
+ ModelResponse with generated text
170
+ """
171
+ generated_text = self._generate(**kwargs)
172
+
173
+ return completion(
174
+ model=f"huggingface-local/{kwargs['model']}",
175
+ mock_response=generated_text,
176
+ )
177
+
178
+ async def acompletion(self, *args, **kwargs) -> ModelResponse:
179
+ """Asynchronous completion method.
180
+
181
+ Returns:
182
+ ModelResponse with generated text
183
+ """
184
+ # Run _generate in thread pool to avoid blocking
185
+ generated_text = await asyncio.to_thread(self._generate, **kwargs)
186
+
187
+ return await acompletion(
188
+ model=f"huggingface-local/{kwargs['model']}",
189
+ mock_response=generated_text,
190
+ )
191
+
192
+ def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
193
+ """Synchronous streaming method.
194
+
195
+ Returns:
196
+ Iterator of GenericStreamingChunk
197
+ """
198
+ generated_text = self._generate(**kwargs)
199
+
200
+ generic_streaming_chunk: GenericStreamingChunk = {
201
+ "finish_reason": "stop",
202
+ "index": 0,
203
+ "is_finished": True,
204
+ "text": generated_text,
205
+ "tool_use": None,
206
+ "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
207
+ }
208
+
209
+ yield generic_streaming_chunk
210
+
211
+ async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
212
+ """Asynchronous streaming method.
213
+
214
+ Returns:
215
+ AsyncIterator of GenericStreamingChunk
216
+ """
217
+ # Run _generate in thread pool to avoid blocking
218
+ generated_text = await asyncio.to_thread(self._generate, **kwargs)
219
+
220
+ generic_streaming_chunk: GenericStreamingChunk = {
221
+ "finish_reason": "stop",
222
+ "index": 0,
223
+ "is_finished": True,
224
+ "text": generated_text,
225
+ "tool_use": None,
226
+ "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
227
+ }
228
+
229
+ yield generic_streaming_chunk