cua-agent 0.3.1__py3-none-any.whl → 0.4.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (111) hide show
  1. agent/__init__.py +15 -51
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +216 -0
  5. agent/agent.py +577 -0
  6. agent/callbacks/__init__.py +17 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/trajectory_saver.py +305 -0
  13. agent/cli.py +290 -0
  14. agent/computer_handler.py +107 -0
  15. agent/decorators.py +90 -0
  16. agent/loops/__init__.py +11 -0
  17. agent/loops/anthropic.py +728 -0
  18. agent/loops/omniparser.py +339 -0
  19. agent/loops/openai.py +95 -0
  20. agent/loops/uitars.py +688 -0
  21. agent/responses.py +207 -0
  22. agent/types.py +79 -0
  23. agent/ui/__init__.py +7 -1
  24. agent/ui/gradio/__init__.py +6 -19
  25. agent/ui/gradio/app.py +80 -1299
  26. agent/ui/gradio/ui_components.py +703 -0
  27. cua_agent-0.4.0b1.dist-info/METADATA +424 -0
  28. cua_agent-0.4.0b1.dist-info/RECORD +30 -0
  29. {cua_agent-0.3.1.dist-info → cua_agent-0.4.0b1.dist-info}/WHEEL +1 -1
  30. agent/core/__init__.py +0 -27
  31. agent/core/agent.py +0 -210
  32. agent/core/base.py +0 -217
  33. agent/core/callbacks.py +0 -200
  34. agent/core/experiment.py +0 -249
  35. agent/core/factory.py +0 -122
  36. agent/core/messages.py +0 -332
  37. agent/core/provider_config.py +0 -21
  38. agent/core/telemetry.py +0 -142
  39. agent/core/tools/__init__.py +0 -21
  40. agent/core/tools/base.py +0 -74
  41. agent/core/tools/bash.py +0 -52
  42. agent/core/tools/collection.py +0 -46
  43. agent/core/tools/computer.py +0 -113
  44. agent/core/tools/edit.py +0 -67
  45. agent/core/tools/manager.py +0 -56
  46. agent/core/tools.py +0 -32
  47. agent/core/types.py +0 -88
  48. agent/core/visualization.py +0 -197
  49. agent/providers/__init__.py +0 -4
  50. agent/providers/anthropic/__init__.py +0 -6
  51. agent/providers/anthropic/api/client.py +0 -360
  52. agent/providers/anthropic/api/logging.py +0 -150
  53. agent/providers/anthropic/api_handler.py +0 -140
  54. agent/providers/anthropic/callbacks/__init__.py +0 -5
  55. agent/providers/anthropic/callbacks/manager.py +0 -65
  56. agent/providers/anthropic/loop.py +0 -568
  57. agent/providers/anthropic/prompts.py +0 -23
  58. agent/providers/anthropic/response_handler.py +0 -226
  59. agent/providers/anthropic/tools/__init__.py +0 -33
  60. agent/providers/anthropic/tools/base.py +0 -88
  61. agent/providers/anthropic/tools/bash.py +0 -66
  62. agent/providers/anthropic/tools/collection.py +0 -34
  63. agent/providers/anthropic/tools/computer.py +0 -396
  64. agent/providers/anthropic/tools/edit.py +0 -326
  65. agent/providers/anthropic/tools/manager.py +0 -54
  66. agent/providers/anthropic/tools/run.py +0 -42
  67. agent/providers/anthropic/types.py +0 -16
  68. agent/providers/anthropic/utils.py +0 -367
  69. agent/providers/omni/__init__.py +0 -8
  70. agent/providers/omni/api_handler.py +0 -42
  71. agent/providers/omni/clients/anthropic.py +0 -103
  72. agent/providers/omni/clients/base.py +0 -35
  73. agent/providers/omni/clients/oaicompat.py +0 -195
  74. agent/providers/omni/clients/ollama.py +0 -122
  75. agent/providers/omni/clients/openai.py +0 -155
  76. agent/providers/omni/clients/utils.py +0 -25
  77. agent/providers/omni/image_utils.py +0 -34
  78. agent/providers/omni/loop.py +0 -990
  79. agent/providers/omni/parser.py +0 -307
  80. agent/providers/omni/prompts.py +0 -64
  81. agent/providers/omni/tools/__init__.py +0 -30
  82. agent/providers/omni/tools/base.py +0 -29
  83. agent/providers/omni/tools/bash.py +0 -74
  84. agent/providers/omni/tools/computer.py +0 -179
  85. agent/providers/omni/tools/manager.py +0 -61
  86. agent/providers/omni/utils.py +0 -236
  87. agent/providers/openai/__init__.py +0 -6
  88. agent/providers/openai/api_handler.py +0 -456
  89. agent/providers/openai/loop.py +0 -472
  90. agent/providers/openai/response_handler.py +0 -205
  91. agent/providers/openai/tools/__init__.py +0 -15
  92. agent/providers/openai/tools/base.py +0 -79
  93. agent/providers/openai/tools/computer.py +0 -326
  94. agent/providers/openai/tools/manager.py +0 -106
  95. agent/providers/openai/types.py +0 -36
  96. agent/providers/openai/utils.py +0 -98
  97. agent/providers/uitars/__init__.py +0 -1
  98. agent/providers/uitars/clients/base.py +0 -35
  99. agent/providers/uitars/clients/mlxvlm.py +0 -263
  100. agent/providers/uitars/clients/oaicompat.py +0 -214
  101. agent/providers/uitars/loop.py +0 -660
  102. agent/providers/uitars/prompts.py +0 -63
  103. agent/providers/uitars/tools/__init__.py +0 -1
  104. agent/providers/uitars/tools/computer.py +0 -283
  105. agent/providers/uitars/tools/manager.py +0 -60
  106. agent/providers/uitars/utils.py +0 -264
  107. agent/telemetry.py +0 -21
  108. agent/ui/__main__.py +0 -15
  109. cua_agent-0.3.1.dist-info/METADATA +0 -295
  110. cua_agent-0.3.1.dist-info/RECORD +0 -87
  111. {cua_agent-0.3.1.dist-info → cua_agent-0.4.0b1.dist-info}/entry_points.txt +0 -0
agent/__init__.py CHANGED
@@ -1,55 +1,19 @@
1
- """CUA (Computer Use) Agent for AI-driven computer interaction."""
1
+ """
2
+ agent - Decorator-based Computer Use Agent with liteLLM integration
3
+ """
2
4
 
3
- import sys
4
- import logging
5
+ from .decorators import agent_loop
6
+ from .agent import ComputerAgent
7
+ from .types import Messages, AgentResponse
5
8
 
6
- __version__ = "0.1.0"
9
+ # Import loops to register them
10
+ from . import loops
7
11
 
8
- # Initialize logging
9
- logger = logging.getLogger("agent")
12
+ __all__ = [
13
+ "agent_loop",
14
+ "ComputerAgent",
15
+ "Messages",
16
+ "AgentResponse"
17
+ ]
10
18
 
11
- # Initialize telemetry when the package is imported
12
- try:
13
- # Import from core telemetry for basic functions
14
- from core.telemetry import (
15
- is_telemetry_enabled,
16
- flush,
17
- record_event,
18
- )
19
-
20
- # Import set_dimension from our own telemetry module
21
- from .core.telemetry import set_dimension
22
-
23
- # Check if telemetry is enabled
24
- if is_telemetry_enabled():
25
- logger.info("Telemetry is enabled")
26
-
27
- # Record package initialization
28
- record_event(
29
- "module_init",
30
- {
31
- "module": "agent",
32
- "version": __version__,
33
- "python_version": sys.version,
34
- },
35
- )
36
-
37
- # Set the package version as a dimension
38
- set_dimension("agent_version", __version__)
39
-
40
- # Flush events to ensure they're sent
41
- flush()
42
- else:
43
- logger.info("Telemetry is disabled")
44
- except ImportError as e:
45
- # Telemetry not available
46
- logger.warning(f"Telemetry not available: {e}")
47
- except Exception as e:
48
- # Other issues with telemetry
49
- logger.warning(f"Error initializing telemetry: {e}")
50
-
51
- from .core.types import LLMProvider, LLM
52
- from .core.factory import AgentLoop
53
- from .core.agent import ComputerAgent
54
-
55
- __all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
19
+ __version__ = "0.4.0b1"
agent/__main__.py ADDED
@@ -0,0 +1,21 @@
1
+ """
2
+ Entry point for running agent CLI module.
3
+
4
+ Usage:
5
+ python -m agent.cli <model_string>
6
+ """
7
+
8
+ import sys
9
+ import asyncio
10
+ from .cli import main
11
+
12
+ if __name__ == "__main__":
13
+ # Check if 'cli' is specified as the module
14
+ if len(sys.argv) > 1 and sys.argv[1] == "cli":
15
+ # Remove 'cli' from arguments and run CLI
16
+ sys.argv.pop(1)
17
+ asyncio.run(main())
18
+ else:
19
+ print("Usage: python -m agent.cli <model_string>")
20
+ print("Example: python -m agent.cli openai/computer-use-preview")
21
+ sys.exit(1)
@@ -0,0 +1,9 @@
1
+ """
2
+ Adapters package for agent - Custom LLM adapters for LiteLLM
3
+ """
4
+
5
+ from .huggingfacelocal_adapter import HuggingFaceLocalAdapter
6
+
7
+ __all__ = [
8
+ "HuggingFaceLocalAdapter",
9
+ ]
@@ -0,0 +1,216 @@
1
+ import asyncio
2
+ import warnings
3
+ from typing import Iterator, AsyncIterator, Dict, List, Any, Optional
4
+ import torch
5
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
6
+ from litellm.types.utils import GenericStreamingChunk, ModelResponse
7
+ from litellm import CustomLLM, completion, acompletion
8
+
9
+
10
+ class HuggingFaceLocalAdapter(CustomLLM):
11
+ """HuggingFace Local Adapter for running vision-language models locally."""
12
+
13
+ def __init__(self, device: str = "auto", **kwargs):
14
+ """Initialize the adapter.
15
+
16
+ Args:
17
+ device: Device to load model on ("auto", "cuda", "cpu", etc.)
18
+ **kwargs: Additional arguments
19
+ """
20
+ super().__init__()
21
+ self.device = device
22
+ self.models = {} # Cache for loaded models
23
+ self.processors = {} # Cache for loaded processors
24
+
25
+ def _load_model_and_processor(self, model_name: str):
26
+ """Load model and processor if not already cached.
27
+
28
+ Args:
29
+ model_name: Name of the model to load
30
+
31
+ Returns:
32
+ Tuple of (model, processor)
33
+ """
34
+ if model_name not in self.models:
35
+ # Load model
36
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
37
+ model_name,
38
+ torch_dtype=torch.float16,
39
+ device_map=self.device,
40
+ attn_implementation="sdpa"
41
+ )
42
+
43
+ # Load processor
44
+ processor = AutoProcessor.from_pretrained(model_name)
45
+
46
+ # Cache them
47
+ self.models[model_name] = model
48
+ self.processors[model_name] = processor
49
+
50
+ return self.models[model_name], self.processors[model_name]
51
+
52
+ def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
53
+ """Convert OpenAI format messages to HuggingFace format.
54
+
55
+ Args:
56
+ messages: Messages in OpenAI format
57
+
58
+ Returns:
59
+ Messages in HuggingFace format
60
+ """
61
+ converted_messages = []
62
+
63
+ for message in messages:
64
+ converted_message = {
65
+ "role": message["role"],
66
+ "content": []
67
+ }
68
+
69
+ content = message.get("content", [])
70
+ if isinstance(content, str):
71
+ # Simple text content
72
+ converted_message["content"].append({
73
+ "type": "text",
74
+ "text": content
75
+ })
76
+ elif isinstance(content, list):
77
+ # Multi-modal content
78
+ for item in content:
79
+ if item.get("type") == "text":
80
+ converted_message["content"].append({
81
+ "type": "text",
82
+ "text": item.get("text", "")
83
+ })
84
+ elif item.get("type") == "image_url":
85
+ # Convert image_url format to image format
86
+ image_url = item.get("image_url", {}).get("url", "")
87
+ converted_message["content"].append({
88
+ "type": "image",
89
+ "image": image_url
90
+ })
91
+
92
+ converted_messages.append(converted_message)
93
+
94
+ return converted_messages
95
+
96
+ def _generate(self, **kwargs) -> str:
97
+ """Generate response using the local HuggingFace model.
98
+
99
+ Args:
100
+ **kwargs: Keyword arguments containing messages and model info
101
+
102
+ Returns:
103
+ Generated text response
104
+ """
105
+ # Extract messages and model from kwargs
106
+ messages = kwargs.get('messages', [])
107
+ model_name = kwargs.get('model', 'ByteDance-Seed/UI-TARS-1.5-7B')
108
+ max_new_tokens = kwargs.get('max_tokens', 128)
109
+
110
+ # Warn about ignored kwargs
111
+ ignored_kwargs = set(kwargs.keys()) - {'messages', 'model', 'max_tokens'}
112
+ if ignored_kwargs:
113
+ warnings.warn(f"Ignoring unsupported kwargs: {ignored_kwargs}")
114
+
115
+ # Load model and processor
116
+ model, processor = self._load_model_and_processor(model_name)
117
+
118
+ # Convert messages to HuggingFace format
119
+ hf_messages = self._convert_messages(messages)
120
+
121
+ # Apply chat template and tokenize
122
+ inputs = processor.apply_chat_template(
123
+ hf_messages,
124
+ add_generation_prompt=True,
125
+ tokenize=True,
126
+ return_dict=True,
127
+ return_tensors="pt"
128
+ )
129
+
130
+ # Move inputs to the same device as model
131
+ if torch.cuda.is_available() and self.device != "cpu":
132
+ inputs = inputs.to("cuda")
133
+
134
+ # Generate response
135
+ with torch.no_grad():
136
+ generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
137
+
138
+ # Trim input tokens from output
139
+ generated_ids_trimmed = [
140
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
141
+ ]
142
+
143
+ # Decode output
144
+ output_text = processor.batch_decode(
145
+ generated_ids_trimmed,
146
+ skip_special_tokens=True,
147
+ clean_up_tokenization_spaces=False
148
+ )
149
+
150
+ return output_text[0] if output_text else ""
151
+
152
+ def completion(self, *args, **kwargs) -> ModelResponse:
153
+ """Synchronous completion method.
154
+
155
+ Returns:
156
+ ModelResponse with generated text
157
+ """
158
+ generated_text = self._generate(**kwargs)
159
+
160
+ return completion(
161
+ model=f"huggingface-local/{kwargs['model']}",
162
+ mock_response=generated_text,
163
+ )
164
+
165
+ async def acompletion(self, *args, **kwargs) -> ModelResponse:
166
+ """Asynchronous completion method.
167
+
168
+ Returns:
169
+ ModelResponse with generated text
170
+ """
171
+ # Run _generate in thread pool to avoid blocking
172
+ generated_text = await asyncio.to_thread(self._generate, **kwargs)
173
+
174
+ return await acompletion(
175
+ model=f"huggingface-local/{kwargs['model']}",
176
+ mock_response=generated_text,
177
+ )
178
+
179
+ def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
180
+ """Synchronous streaming method.
181
+
182
+ Returns:
183
+ Iterator of GenericStreamingChunk
184
+ """
185
+ generated_text = self._generate(**kwargs)
186
+
187
+ generic_streaming_chunk: GenericStreamingChunk = {
188
+ "finish_reason": "stop",
189
+ "index": 0,
190
+ "is_finished": True,
191
+ "text": generated_text,
192
+ "tool_use": None,
193
+ "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
194
+ }
195
+
196
+ yield generic_streaming_chunk
197
+
198
+ async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
199
+ """Asynchronous streaming method.
200
+
201
+ Returns:
202
+ AsyncIterator of GenericStreamingChunk
203
+ """
204
+ # Run _generate in thread pool to avoid blocking
205
+ generated_text = await asyncio.to_thread(self._generate, **kwargs)
206
+
207
+ generic_streaming_chunk: GenericStreamingChunk = {
208
+ "finish_reason": "stop",
209
+ "index": 0,
210
+ "is_finished": True,
211
+ "text": generated_text,
212
+ "tool_use": None,
213
+ "usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
214
+ }
215
+
216
+ yield generic_streaming_chunk