cua-agent 0.3.2__py3-none-any.whl → 0.4.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (111) hide show
  1. agent/__init__.py +15 -51
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +216 -0
  5. agent/agent.py +577 -0
  6. agent/callbacks/__init__.py +17 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/trajectory_saver.py +305 -0
  13. agent/cli.py +290 -0
  14. agent/computer_handler.py +107 -0
  15. agent/decorators.py +90 -0
  16. agent/loops/__init__.py +11 -0
  17. agent/loops/anthropic.py +728 -0
  18. agent/loops/omniparser.py +339 -0
  19. agent/loops/openai.py +95 -0
  20. agent/loops/uitars.py +688 -0
  21. agent/responses.py +207 -0
  22. agent/types.py +79 -0
  23. agent/ui/__init__.py +7 -1
  24. agent/ui/gradio/__init__.py +6 -19
  25. agent/ui/gradio/app.py +80 -1299
  26. agent/ui/gradio/ui_components.py +703 -0
  27. cua_agent-0.4.0b1.dist-info/METADATA +424 -0
  28. cua_agent-0.4.0b1.dist-info/RECORD +30 -0
  29. agent/core/__init__.py +0 -27
  30. agent/core/agent.py +0 -210
  31. agent/core/base.py +0 -217
  32. agent/core/callbacks.py +0 -200
  33. agent/core/experiment.py +0 -249
  34. agent/core/factory.py +0 -122
  35. agent/core/messages.py +0 -332
  36. agent/core/provider_config.py +0 -21
  37. agent/core/telemetry.py +0 -142
  38. agent/core/tools/__init__.py +0 -21
  39. agent/core/tools/base.py +0 -74
  40. agent/core/tools/bash.py +0 -52
  41. agent/core/tools/collection.py +0 -46
  42. agent/core/tools/computer.py +0 -113
  43. agent/core/tools/edit.py +0 -67
  44. agent/core/tools/manager.py +0 -56
  45. agent/core/tools.py +0 -32
  46. agent/core/types.py +0 -88
  47. agent/core/visualization.py +0 -197
  48. agent/providers/__init__.py +0 -4
  49. agent/providers/anthropic/__init__.py +0 -6
  50. agent/providers/anthropic/api/client.py +0 -360
  51. agent/providers/anthropic/api/logging.py +0 -150
  52. agent/providers/anthropic/api_handler.py +0 -140
  53. agent/providers/anthropic/callbacks/__init__.py +0 -5
  54. agent/providers/anthropic/callbacks/manager.py +0 -65
  55. agent/providers/anthropic/loop.py +0 -568
  56. agent/providers/anthropic/prompts.py +0 -23
  57. agent/providers/anthropic/response_handler.py +0 -226
  58. agent/providers/anthropic/tools/__init__.py +0 -33
  59. agent/providers/anthropic/tools/base.py +0 -88
  60. agent/providers/anthropic/tools/bash.py +0 -66
  61. agent/providers/anthropic/tools/collection.py +0 -34
  62. agent/providers/anthropic/tools/computer.py +0 -396
  63. agent/providers/anthropic/tools/edit.py +0 -326
  64. agent/providers/anthropic/tools/manager.py +0 -54
  65. agent/providers/anthropic/tools/run.py +0 -42
  66. agent/providers/anthropic/types.py +0 -16
  67. agent/providers/anthropic/utils.py +0 -381
  68. agent/providers/omni/__init__.py +0 -8
  69. agent/providers/omni/api_handler.py +0 -42
  70. agent/providers/omni/clients/anthropic.py +0 -103
  71. agent/providers/omni/clients/base.py +0 -35
  72. agent/providers/omni/clients/oaicompat.py +0 -195
  73. agent/providers/omni/clients/ollama.py +0 -122
  74. agent/providers/omni/clients/openai.py +0 -155
  75. agent/providers/omni/clients/utils.py +0 -25
  76. agent/providers/omni/image_utils.py +0 -34
  77. agent/providers/omni/loop.py +0 -990
  78. agent/providers/omni/parser.py +0 -307
  79. agent/providers/omni/prompts.py +0 -64
  80. agent/providers/omni/tools/__init__.py +0 -30
  81. agent/providers/omni/tools/base.py +0 -29
  82. agent/providers/omni/tools/bash.py +0 -74
  83. agent/providers/omni/tools/computer.py +0 -179
  84. agent/providers/omni/tools/manager.py +0 -61
  85. agent/providers/omni/utils.py +0 -236
  86. agent/providers/openai/__init__.py +0 -6
  87. agent/providers/openai/api_handler.py +0 -456
  88. agent/providers/openai/loop.py +0 -472
  89. agent/providers/openai/response_handler.py +0 -205
  90. agent/providers/openai/tools/__init__.py +0 -15
  91. agent/providers/openai/tools/base.py +0 -79
  92. agent/providers/openai/tools/computer.py +0 -326
  93. agent/providers/openai/tools/manager.py +0 -106
  94. agent/providers/openai/types.py +0 -36
  95. agent/providers/openai/utils.py +0 -98
  96. agent/providers/uitars/__init__.py +0 -1
  97. agent/providers/uitars/clients/base.py +0 -35
  98. agent/providers/uitars/clients/mlxvlm.py +0 -263
  99. agent/providers/uitars/clients/oaicompat.py +0 -214
  100. agent/providers/uitars/loop.py +0 -660
  101. agent/providers/uitars/prompts.py +0 -63
  102. agent/providers/uitars/tools/__init__.py +0 -1
  103. agent/providers/uitars/tools/computer.py +0 -283
  104. agent/providers/uitars/tools/manager.py +0 -60
  105. agent/providers/uitars/utils.py +0 -264
  106. agent/telemetry.py +0 -21
  107. agent/ui/__main__.py +0 -15
  108. cua_agent-0.3.2.dist-info/METADATA +0 -295
  109. cua_agent-0.3.2.dist-info/RECORD +0 -87
  110. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b1.dist-info}/WHEEL +0 -0
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b1.dist-info}/entry_points.txt +0 -0
agent/__init__.py CHANGED
@@ -1,55 +1,19 @@
1
- """CUA (Computer Use) Agent for AI-driven computer interaction."""
1
+ """
2
+ agent - Decorator-based Computer Use Agent with liteLLM integration
3
+ """
2
4
 
3
- import sys
4
- import logging
5
+ from .decorators import agent_loop
6
+ from .agent import ComputerAgent
7
+ from .types import Messages, AgentResponse
5
8
 
6
- __version__ = "0.1.0"
9
+ # Import loops to register them
10
+ from . import loops
7
11
 
8
- # Initialize logging
9
- logger = logging.getLogger("agent")
12
+ __all__ = [
13
+ "agent_loop",
14
+ "ComputerAgent",
15
+ "Messages",
16
+ "AgentResponse"
17
+ ]
10
18
 
11
- # Initialize telemetry when the package is imported
12
- try:
13
- # Import from core telemetry for basic functions
14
- from core.telemetry import (
15
- is_telemetry_enabled,
16
- flush,
17
- record_event,
18
- )
19
-
20
- # Import set_dimension from our own telemetry module
21
- from .core.telemetry import set_dimension
22
-
23
- # Check if telemetry is enabled
24
- if is_telemetry_enabled():
25
- logger.info("Telemetry is enabled")
26
-
27
- # Record package initialization
28
- record_event(
29
- "module_init",
30
- {
31
- "module": "agent",
32
- "version": __version__,
33
- "python_version": sys.version,
34
- },
35
- )
36
-
37
- # Set the package version as a dimension
38
- set_dimension("agent_version", __version__)
39
-
40
- # Flush events to ensure they're sent
41
- flush()
42
- else:
43
- logger.info("Telemetry is disabled")
44
- except ImportError as e:
45
- # Telemetry not available
46
- logger.warning(f"Telemetry not available: {e}")
47
- except Exception as e:
48
- # Other issues with telemetry
49
- logger.warning(f"Error initializing telemetry: {e}")
50
-
51
- from .core.types import LLMProvider, LLM
52
- from .core.factory import AgentLoop
53
- from .core.agent import ComputerAgent
54
-
55
- __all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
19
+ __version__ = "0.4.0b1"
agent/__main__.py ADDED
@@ -0,0 +1,21 @@
1
+ """
2
+ Entry point for running agent CLI module.
3
+
4
+ Usage:
5
+ python -m agent.cli <model_string>
6
+ """
7
+
8
+ import sys
9
+ import asyncio
10
+ from .cli import main
11
+
12
+ if __name__ == "__main__":
13
+ # Check if 'cli' is specified as the module
14
+ if len(sys.argv) > 1 and sys.argv[1] == "cli":
15
+ # Remove 'cli' from arguments and run CLI
16
+ sys.argv.pop(1)
17
+ asyncio.run(main())
18
+ else:
19
+ print("Usage: python -m agent.cli <model_string>")
20
+ print("Example: python -m agent.cli openai/computer-use-preview")
21
+ sys.exit(1)
@@ -0,0 +1,9 @@
1
+ """
2
+ Adapters package for agent - Custom LLM adapters for LiteLLM
3
+ """
4
+
5
+ from .huggingfacelocal_adapter import HuggingFaceLocalAdapter
6
+
7
+ __all__ = [
8
+ "HuggingFaceLocalAdapter",
9
+ ]
@@ -0,0 +1,216 @@
1
+ import asyncio
2
+ import warnings
3
+ from typing import Iterator, AsyncIterator, Dict, List, Any, Optional
4
+ import torch
5
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
6
+ from litellm.types.utils import GenericStreamingChunk, ModelResponse
7
+ from litellm import CustomLLM, completion, acompletion
8
+
9
+
10
+ class HuggingFaceLocalAdapter(CustomLLM):
11
+ """HuggingFace Local Adapter for running vision-language models locally."""
12
+
13
+ def __init__(self, device: str = "auto", **kwargs):
14
+ """Initialize the adapter.
15
+
16
+ Args:
17
+ device: Device to load model on ("auto", "cuda", "cpu", etc.)
18
+ **kwargs: Additional arguments
19
+ """
20
+ super().__init__()
21
+ self.device = device
22
+ self.models = {} # Cache for loaded models
23
+ self.processors = {} # Cache for loaded processors
24
+
25
+ def _load_model_and_processor(self, model_name: str):
26
+ """Load model and processor if not already cached.
27
+
28
+ Args:
29
+ model_name: Name of the model to load
30
+
31
+ Returns:
32
+ Tuple of (model, processor)
33
+ """
34
+ if model_name not in self.models:
35
+ # Load model
36
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
37
+ model_name,
38
+ torch_dtype=torch.float16,
39
+ device_map=self.device,
40
+ attn_implementation="sdpa"
41
+ )
42
+
43
+ # Load processor
44
+ processor = AutoProcessor.from_pretrained(model_name)
45
+
46
+ # Cache them
47
+ self.models[model_name] = model
48
+ self.processors[model_name] = processor
49
+
50
+ return self.models[model_name], self.processors[model_name]
51
+
52
+ def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
53
+ """Convert OpenAI format messages to HuggingFace format.
54
+
55
+ Args:
56
+ messages: Messages in OpenAI format
57
+
58
+ Returns:
59
+ Messages in HuggingFace format
60
+ """
61
+ converted_messages = []
62
+
63
+ for message in messages:
64
+ converted_message = {
65
+ "role": message["role"],
66
+ "content": []
67
+ }
68
+
69
+ content = message.get("content", [])
70
+ if isinstance(content, str):
71
+ # Simple text content
72
+ converted_message["content"].append({
73
+ "type": "text",
74
+ "text": content
75
+ })
76
+ elif isinstance(content, list):
77
+ # Multi-modal content
78
+ for item in content:
79
+ if item.get("type") == "text":
80
+ converted_message["content"].append({
81
+ "type": "text",
82
+ "text": item.get("text", "")
83
+ })
84
+ elif item.get("type") == "image_url":
85
+ # Convert image_url format to image format
86
+ image_url = item.get("image_url", {}).get("url", "")
87
+ converted_message["content"].append({
88
+ "type": "image",
89
+ "image": image_url
90
+ })
91
+
92
+ converted_messages.append(converted_message)
93
+
94
+ return converted_messages
95
+
96
+ def _generate(self, **kwargs) -> str:
97
+ """Generate response using the local HuggingFace model.
98
+
99
+ Args:
100
+ **kwargs: Keyword arguments containing messages and model info
101
+
102
+ Returns:
103
+ Generated text response
104
+ """
105
+ # Extract messages and model from kwargs
106
+ messages = kwargs.get('messages', [])
107
+ model_name = kwargs.get('model', 'ByteDance-Seed/UI-TARS-1.5-7B')
108
+ max_new_tokens = kwargs.get('max_tokens', 128)
109
+
110
+ # Warn about ignored kwargs
111
+ ignored_kwargs = set(kwargs.keys()) - {'messages', 'model', 'max_tokens'}
112
+ if ignored_kwargs:
113
+ warnings.warn(f"Ignoring unsupported kwargs: {ignored_kwargs}")
114
+
115
+ # Load model and processor
116
+ model, processor = self._load_model_and_processor(model_name)
117
+
118
+ # Convert messages to HuggingFace format
119
+ hf_messages = self._convert_messages(messages)
120
+
121
+ # Apply chat template and tokenize
122
+ inputs = processor.apply_chat_template(
123
+ hf_messages,
124
+ add_generation_prompt=True,
125
+ tokenize=True,
126
+ return_dict=True,
127
+ return_tensors="pt"
128
+ )
129
+
130
+ # Move inputs to the same device as model
131
+ if torch.cuda.is_available() and self.device != "cpu":
132
+ inputs = inputs.to("cuda")
133
+
134
+ # Generate response
135
+ with torch.no_grad():
136
+ generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
137
+
138
+ # Trim input tokens from output
139
+ generated_ids_trimmed = [
140
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
141
+ ]
142
+
143
+ # Decode output
144
+ output_text = processor.batch_decode(
145
+ generated_ids_trimmed,
146
+ skip_special_tokens=True,
147
+ clean_up_tokenization_spaces=False
148
+ )
149
+
150
+ return output_text[0] if output_text else ""
151
+
152
+ def completion(self, *args, **kwargs) -> ModelResponse:
153
+ """Synchronous completion method.
154
+
155
+ Returns:
156
+ ModelResponse with generated text
157
+ """
158
+ generated_text = self._generate(**kwargs)
159
+
160
+ return completion(
161
+ model=f"huggingface-local/{kwargs['model']}",
162
+ mock_response=generated_text,
163
+ )
164
+
165
+ async def acompletion(self, *args, **kwargs) -> ModelResponse:
166
+ """Asynchronous completion method.
167
+
168
+ Returns:
169
+ ModelResponse with generated text
170
+ """
171
+ # Run _generate in thread pool to avoid blocking
172
+ generated_text = await asyncio.to_thread(self._generate, **kwargs)
173
+
174
+ return await acompletion(
175
+ model=f"huggingface-local/{kwargs['model']}",
176
+ mock_response=generated_text,
177
+ )
178
+
179
+ def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
180
+ """Synchronous streaming method.
181
+
182
+ Returns:
183
+ Iterator of GenericStreamingChunk
184
+ """
185
+ generated_text = self._generate(**kwargs)
186
+
187
+ generic_streaming_chunk: GenericStreamingChunk = {
188
+ "finish_reason": "stop",
189
+ "index": 0,
190
+ "is_finished": True,
191
+ "text": generated_text,
192
+ "tool_use": None,
193
+ "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
194
+ }
195
+
196
+ yield generic_streaming_chunk
197
+
198
+ async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
199
+ """Asynchronous streaming method.
200
+
201
+ Returns:
202
+ AsyncIterator of GenericStreamingChunk
203
+ """
204
+ # Run _generate in thread pool to avoid blocking
205
+ generated_text = await asyncio.to_thread(self._generate, **kwargs)
206
+
207
+ generic_streaming_chunk: GenericStreamingChunk = {
208
+ "finish_reason": "stop",
209
+ "index": 0,
210
+ "is_finished": True,
211
+ "text": generated_text,
212
+ "tool_use": None,
213
+ "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
214
+ }
215
+
216
+ yield generic_streaming_chunk