cua-agent 0.3.2__py3-none-any.whl → 0.4.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +15 -51
- agent/__main__.py +21 -0
- agent/adapters/__init__.py +9 -0
- agent/adapters/huggingfacelocal_adapter.py +229 -0
- agent/agent.py +577 -0
- agent/callbacks/__init__.py +17 -0
- agent/callbacks/base.py +153 -0
- agent/callbacks/budget_manager.py +44 -0
- agent/callbacks/image_retention.py +139 -0
- agent/callbacks/logging.py +247 -0
- agent/callbacks/pii_anonymization.py +259 -0
- agent/callbacks/trajectory_saver.py +305 -0
- agent/cli.py +290 -0
- agent/computer_handler.py +107 -0
- agent/decorators.py +90 -0
- agent/loops/__init__.py +11 -0
- agent/loops/anthropic.py +728 -0
- agent/loops/omniparser.py +339 -0
- agent/loops/openai.py +95 -0
- agent/loops/uitars.py +688 -0
- agent/responses.py +207 -0
- agent/types.py +79 -0
- agent/ui/__init__.py +7 -1
- agent/ui/gradio/__init__.py +6 -19
- agent/ui/gradio/app.py +80 -1299
- agent/ui/gradio/ui_components.py +703 -0
- cua_agent-0.4.0b2.dist-info/METADATA +424 -0
- cua_agent-0.4.0b2.dist-info/RECORD +30 -0
- agent/core/__init__.py +0 -27
- agent/core/agent.py +0 -210
- agent/core/base.py +0 -217
- agent/core/callbacks.py +0 -200
- agent/core/experiment.py +0 -249
- agent/core/factory.py +0 -122
- agent/core/messages.py +0 -332
- agent/core/provider_config.py +0 -21
- agent/core/telemetry.py +0 -142
- agent/core/tools/__init__.py +0 -21
- agent/core/tools/base.py +0 -74
- agent/core/tools/bash.py +0 -52
- agent/core/tools/collection.py +0 -46
- agent/core/tools/computer.py +0 -113
- agent/core/tools/edit.py +0 -67
- agent/core/tools/manager.py +0 -56
- agent/core/tools.py +0 -32
- agent/core/types.py +0 -88
- agent/core/visualization.py +0 -197
- agent/providers/__init__.py +0 -4
- agent/providers/anthropic/__init__.py +0 -6
- agent/providers/anthropic/api/client.py +0 -360
- agent/providers/anthropic/api/logging.py +0 -150
- agent/providers/anthropic/api_handler.py +0 -140
- agent/providers/anthropic/callbacks/__init__.py +0 -5
- agent/providers/anthropic/callbacks/manager.py +0 -65
- agent/providers/anthropic/loop.py +0 -568
- agent/providers/anthropic/prompts.py +0 -23
- agent/providers/anthropic/response_handler.py +0 -226
- agent/providers/anthropic/tools/__init__.py +0 -33
- agent/providers/anthropic/tools/base.py +0 -88
- agent/providers/anthropic/tools/bash.py +0 -66
- agent/providers/anthropic/tools/collection.py +0 -34
- agent/providers/anthropic/tools/computer.py +0 -396
- agent/providers/anthropic/tools/edit.py +0 -326
- agent/providers/anthropic/tools/manager.py +0 -54
- agent/providers/anthropic/tools/run.py +0 -42
- agent/providers/anthropic/types.py +0 -16
- agent/providers/anthropic/utils.py +0 -381
- agent/providers/omni/__init__.py +0 -8
- agent/providers/omni/api_handler.py +0 -42
- agent/providers/omni/clients/anthropic.py +0 -103
- agent/providers/omni/clients/base.py +0 -35
- agent/providers/omni/clients/oaicompat.py +0 -195
- agent/providers/omni/clients/ollama.py +0 -122
- agent/providers/omni/clients/openai.py +0 -155
- agent/providers/omni/clients/utils.py +0 -25
- agent/providers/omni/image_utils.py +0 -34
- agent/providers/omni/loop.py +0 -990
- agent/providers/omni/parser.py +0 -307
- agent/providers/omni/prompts.py +0 -64
- agent/providers/omni/tools/__init__.py +0 -30
- agent/providers/omni/tools/base.py +0 -29
- agent/providers/omni/tools/bash.py +0 -74
- agent/providers/omni/tools/computer.py +0 -179
- agent/providers/omni/tools/manager.py +0 -61
- agent/providers/omni/utils.py +0 -236
- agent/providers/openai/__init__.py +0 -6
- agent/providers/openai/api_handler.py +0 -456
- agent/providers/openai/loop.py +0 -472
- agent/providers/openai/response_handler.py +0 -205
- agent/providers/openai/tools/__init__.py +0 -15
- agent/providers/openai/tools/base.py +0 -79
- agent/providers/openai/tools/computer.py +0 -326
- agent/providers/openai/tools/manager.py +0 -106
- agent/providers/openai/types.py +0 -36
- agent/providers/openai/utils.py +0 -98
- agent/providers/uitars/__init__.py +0 -1
- agent/providers/uitars/clients/base.py +0 -35
- agent/providers/uitars/clients/mlxvlm.py +0 -263
- agent/providers/uitars/clients/oaicompat.py +0 -214
- agent/providers/uitars/loop.py +0 -660
- agent/providers/uitars/prompts.py +0 -63
- agent/providers/uitars/tools/__init__.py +0 -1
- agent/providers/uitars/tools/computer.py +0 -283
- agent/providers/uitars/tools/manager.py +0 -60
- agent/providers/uitars/utils.py +0 -264
- agent/telemetry.py +0 -21
- agent/ui/__main__.py +0 -15
- cua_agent-0.3.2.dist-info/METADATA +0 -295
- cua_agent-0.3.2.dist-info/RECORD +0 -87
- {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/WHEEL +0 -0
- {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/entry_points.txt +0 -0
agent/__init__.py
CHANGED
|
@@ -1,55 +1,19 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""
|
|
2
|
+
agent - Decorator-based Computer Use Agent with liteLLM integration
|
|
3
|
+
"""
|
|
2
4
|
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
+
from .decorators import agent_loop
|
|
6
|
+
from .agent import ComputerAgent
|
|
7
|
+
from .types import Messages, AgentResponse
|
|
5
8
|
|
|
6
|
-
|
|
9
|
+
# Import loops to register them
|
|
10
|
+
from . import loops
|
|
7
11
|
|
|
8
|
-
|
|
9
|
-
|
|
12
|
+
__all__ = [
|
|
13
|
+
"agent_loop",
|
|
14
|
+
"ComputerAgent",
|
|
15
|
+
"Messages",
|
|
16
|
+
"AgentResponse"
|
|
17
|
+
]
|
|
10
18
|
|
|
11
|
-
|
|
12
|
-
try:
|
|
13
|
-
# Import from core telemetry for basic functions
|
|
14
|
-
from core.telemetry import (
|
|
15
|
-
is_telemetry_enabled,
|
|
16
|
-
flush,
|
|
17
|
-
record_event,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
# Import set_dimension from our own telemetry module
|
|
21
|
-
from .core.telemetry import set_dimension
|
|
22
|
-
|
|
23
|
-
# Check if telemetry is enabled
|
|
24
|
-
if is_telemetry_enabled():
|
|
25
|
-
logger.info("Telemetry is enabled")
|
|
26
|
-
|
|
27
|
-
# Record package initialization
|
|
28
|
-
record_event(
|
|
29
|
-
"module_init",
|
|
30
|
-
{
|
|
31
|
-
"module": "agent",
|
|
32
|
-
"version": __version__,
|
|
33
|
-
"python_version": sys.version,
|
|
34
|
-
},
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
# Set the package version as a dimension
|
|
38
|
-
set_dimension("agent_version", __version__)
|
|
39
|
-
|
|
40
|
-
# Flush events to ensure they're sent
|
|
41
|
-
flush()
|
|
42
|
-
else:
|
|
43
|
-
logger.info("Telemetry is disabled")
|
|
44
|
-
except ImportError as e:
|
|
45
|
-
# Telemetry not available
|
|
46
|
-
logger.warning(f"Telemetry not available: {e}")
|
|
47
|
-
except Exception as e:
|
|
48
|
-
# Other issues with telemetry
|
|
49
|
-
logger.warning(f"Error initializing telemetry: {e}")
|
|
50
|
-
|
|
51
|
-
from .core.types import LLMProvider, LLM
|
|
52
|
-
from .core.factory import AgentLoop
|
|
53
|
-
from .core.agent import ComputerAgent
|
|
54
|
-
|
|
55
|
-
__all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
|
|
19
|
+
__version__ = "0.4.0b2"
|
agent/__main__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Entry point for running agent CLI module.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python -m agent.cli <model_string>
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
import asyncio
|
|
10
|
+
from .cli import main
|
|
11
|
+
|
|
12
|
+
if __name__ == "__main__":
|
|
13
|
+
# Check if 'cli' is specified as the module
|
|
14
|
+
if len(sys.argv) > 1 and sys.argv[1] == "cli":
|
|
15
|
+
# Remove 'cli' from arguments and run CLI
|
|
16
|
+
sys.argv.pop(1)
|
|
17
|
+
asyncio.run(main())
|
|
18
|
+
else:
|
|
19
|
+
print("Usage: python -m agent.cli <model_string>")
|
|
20
|
+
print("Example: python -m agent.cli openai/computer-use-preview")
|
|
21
|
+
sys.exit(1)
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Iterator, AsyncIterator, Dict, List, Any, Optional
|
|
4
|
+
from litellm.types.utils import GenericStreamingChunk, ModelResponse
|
|
5
|
+
from litellm.llms.custom_llm import CustomLLM
|
|
6
|
+
from litellm import completion, acompletion
|
|
7
|
+
|
|
8
|
+
# Try to import HuggingFace dependencies
|
|
9
|
+
try:
|
|
10
|
+
import torch
|
|
11
|
+
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
|
12
|
+
HF_AVAILABLE = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
HF_AVAILABLE = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class HuggingFaceLocalAdapter(CustomLLM):
|
|
18
|
+
"""HuggingFace Local Adapter for running vision-language models locally."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, device: str = "auto", **kwargs):
|
|
21
|
+
"""Initialize the adapter.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
device: Device to load model on ("auto", "cuda", "cpu", etc.)
|
|
25
|
+
**kwargs: Additional arguments
|
|
26
|
+
"""
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.device = device
|
|
29
|
+
self.models = {} # Cache for loaded models
|
|
30
|
+
self.processors = {} # Cache for loaded processors
|
|
31
|
+
|
|
32
|
+
def _load_model_and_processor(self, model_name: str):
|
|
33
|
+
"""Load model and processor if not already cached.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
model_name: Name of the model to load
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Tuple of (model, processor)
|
|
40
|
+
"""
|
|
41
|
+
if model_name not in self.models:
|
|
42
|
+
# Load model
|
|
43
|
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
44
|
+
model_name,
|
|
45
|
+
torch_dtype=torch.float16,
|
|
46
|
+
device_map=self.device,
|
|
47
|
+
attn_implementation="sdpa"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Load processor
|
|
51
|
+
processor = AutoProcessor.from_pretrained(model_name)
|
|
52
|
+
|
|
53
|
+
# Cache them
|
|
54
|
+
self.models[model_name] = model
|
|
55
|
+
self.processors[model_name] = processor
|
|
56
|
+
|
|
57
|
+
return self.models[model_name], self.processors[model_name]
|
|
58
|
+
|
|
59
|
+
def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
60
|
+
"""Convert OpenAI format messages to HuggingFace format.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
messages: Messages in OpenAI format
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Messages in HuggingFace format
|
|
67
|
+
"""
|
|
68
|
+
converted_messages = []
|
|
69
|
+
|
|
70
|
+
for message in messages:
|
|
71
|
+
converted_message = {
|
|
72
|
+
"role": message["role"],
|
|
73
|
+
"content": []
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
content = message.get("content", [])
|
|
77
|
+
if isinstance(content, str):
|
|
78
|
+
# Simple text content
|
|
79
|
+
converted_message["content"].append({
|
|
80
|
+
"type": "text",
|
|
81
|
+
"text": content
|
|
82
|
+
})
|
|
83
|
+
elif isinstance(content, list):
|
|
84
|
+
# Multi-modal content
|
|
85
|
+
for item in content:
|
|
86
|
+
if item.get("type") == "text":
|
|
87
|
+
converted_message["content"].append({
|
|
88
|
+
"type": "text",
|
|
89
|
+
"text": item.get("text", "")
|
|
90
|
+
})
|
|
91
|
+
elif item.get("type") == "image_url":
|
|
92
|
+
# Convert image_url format to image format
|
|
93
|
+
image_url = item.get("image_url", {}).get("url", "")
|
|
94
|
+
converted_message["content"].append({
|
|
95
|
+
"type": "image",
|
|
96
|
+
"image": image_url
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
converted_messages.append(converted_message)
|
|
100
|
+
|
|
101
|
+
return converted_messages
|
|
102
|
+
|
|
103
|
+
def _generate(self, **kwargs) -> str:
|
|
104
|
+
"""Generate response using the local HuggingFace model.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
**kwargs: Keyword arguments containing messages and model info
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Generated text response
|
|
111
|
+
"""
|
|
112
|
+
if not HF_AVAILABLE:
|
|
113
|
+
raise ImportError(
|
|
114
|
+
"HuggingFace transformers dependencies not found. "
|
|
115
|
+
"Please install with: pip install \"cua-agent[uitars-hf]\""
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Extract messages and model from kwargs
|
|
119
|
+
messages = kwargs.get('messages', [])
|
|
120
|
+
model_name = kwargs.get('model', 'ByteDance-Seed/UI-TARS-1.5-7B')
|
|
121
|
+
max_new_tokens = kwargs.get('max_tokens', 128)
|
|
122
|
+
|
|
123
|
+
# Warn about ignored kwargs
|
|
124
|
+
ignored_kwargs = set(kwargs.keys()) - {'messages', 'model', 'max_tokens'}
|
|
125
|
+
if ignored_kwargs:
|
|
126
|
+
warnings.warn(f"Ignoring unsupported kwargs: {ignored_kwargs}")
|
|
127
|
+
|
|
128
|
+
# Load model and processor
|
|
129
|
+
model, processor = self._load_model_and_processor(model_name)
|
|
130
|
+
|
|
131
|
+
# Convert messages to HuggingFace format
|
|
132
|
+
hf_messages = self._convert_messages(messages)
|
|
133
|
+
|
|
134
|
+
# Apply chat template and tokenize
|
|
135
|
+
inputs = processor.apply_chat_template(
|
|
136
|
+
hf_messages,
|
|
137
|
+
add_generation_prompt=True,
|
|
138
|
+
tokenize=True,
|
|
139
|
+
return_dict=True,
|
|
140
|
+
return_tensors="pt"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Move inputs to the same device as model
|
|
144
|
+
if torch.cuda.is_available() and self.device != "cpu":
|
|
145
|
+
inputs = inputs.to("cuda")
|
|
146
|
+
|
|
147
|
+
# Generate response
|
|
148
|
+
with torch.no_grad():
|
|
149
|
+
generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
|
|
150
|
+
|
|
151
|
+
# Trim input tokens from output
|
|
152
|
+
generated_ids_trimmed = [
|
|
153
|
+
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
# Decode output
|
|
157
|
+
output_text = processor.batch_decode(
|
|
158
|
+
generated_ids_trimmed,
|
|
159
|
+
skip_special_tokens=True,
|
|
160
|
+
clean_up_tokenization_spaces=False
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return output_text[0] if output_text else ""
|
|
164
|
+
|
|
165
|
+
def completion(self, *args, **kwargs) -> ModelResponse:
|
|
166
|
+
"""Synchronous completion method.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
ModelResponse with generated text
|
|
170
|
+
"""
|
|
171
|
+
generated_text = self._generate(**kwargs)
|
|
172
|
+
|
|
173
|
+
return completion(
|
|
174
|
+
model=f"huggingface-local/{kwargs['model']}",
|
|
175
|
+
mock_response=generated_text,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
async def acompletion(self, *args, **kwargs) -> ModelResponse:
|
|
179
|
+
"""Asynchronous completion method.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
ModelResponse with generated text
|
|
183
|
+
"""
|
|
184
|
+
# Run _generate in thread pool to avoid blocking
|
|
185
|
+
generated_text = await asyncio.to_thread(self._generate, **kwargs)
|
|
186
|
+
|
|
187
|
+
return await acompletion(
|
|
188
|
+
model=f"huggingface-local/{kwargs['model']}",
|
|
189
|
+
mock_response=generated_text,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
|
|
193
|
+
"""Synchronous streaming method.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Iterator of GenericStreamingChunk
|
|
197
|
+
"""
|
|
198
|
+
generated_text = self._generate(**kwargs)
|
|
199
|
+
|
|
200
|
+
generic_streaming_chunk: GenericStreamingChunk = {
|
|
201
|
+
"finish_reason": "stop",
|
|
202
|
+
"index": 0,
|
|
203
|
+
"is_finished": True,
|
|
204
|
+
"text": generated_text,
|
|
205
|
+
"tool_use": None,
|
|
206
|
+
"usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
yield generic_streaming_chunk
|
|
210
|
+
|
|
211
|
+
async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
|
|
212
|
+
"""Asynchronous streaming method.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
AsyncIterator of GenericStreamingChunk
|
|
216
|
+
"""
|
|
217
|
+
# Run _generate in thread pool to avoid blocking
|
|
218
|
+
generated_text = await asyncio.to_thread(self._generate, **kwargs)
|
|
219
|
+
|
|
220
|
+
generic_streaming_chunk: GenericStreamingChunk = {
|
|
221
|
+
"finish_reason": "stop",
|
|
222
|
+
"index": 0,
|
|
223
|
+
"is_finished": True,
|
|
224
|
+
"text": generated_text,
|
|
225
|
+
"tool_use": None,
|
|
226
|
+
"usage": {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0},
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
yield generic_streaming_chunk
|