cua-agent 0.4.34__py3-none-any.whl → 0.4.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +4 -10
- agent/__main__.py +2 -1
- agent/adapters/huggingfacelocal_adapter.py +54 -61
- agent/adapters/human_adapter.py +116 -114
- agent/adapters/mlxvlm_adapter.py +110 -99
- agent/adapters/models/__init__.py +14 -6
- agent/adapters/models/generic.py +7 -4
- agent/adapters/models/internvl.py +66 -30
- agent/adapters/models/opencua.py +23 -8
- agent/adapters/models/qwen2_5_vl.py +7 -4
- agent/agent.py +184 -158
- agent/callbacks/__init__.py +4 -4
- agent/callbacks/base.py +45 -31
- agent/callbacks/budget_manager.py +22 -10
- agent/callbacks/image_retention.py +18 -13
- agent/callbacks/logging.py +55 -42
- agent/callbacks/operator_validator.py +3 -1
- agent/callbacks/pii_anonymization.py +19 -16
- agent/callbacks/telemetry.py +67 -61
- agent/callbacks/trajectory_saver.py +90 -70
- agent/cli.py +115 -110
- agent/computers/__init__.py +13 -8
- agent/computers/base.py +26 -17
- agent/computers/cua.py +27 -23
- agent/computers/custom.py +72 -69
- agent/decorators.py +23 -14
- agent/human_tool/__init__.py +2 -7
- agent/human_tool/__main__.py +6 -2
- agent/human_tool/server.py +48 -37
- agent/human_tool/ui.py +235 -185
- agent/integrations/hud/__init__.py +15 -21
- agent/integrations/hud/agent.py +101 -83
- agent/integrations/hud/proxy.py +90 -57
- agent/loops/__init__.py +25 -21
- agent/loops/anthropic.py +537 -483
- agent/loops/base.py +13 -14
- agent/loops/composed_grounded.py +135 -149
- agent/loops/gemini.py +31 -12
- agent/loops/glm45v.py +135 -133
- agent/loops/gta1.py +47 -50
- agent/loops/holo.py +4 -2
- agent/loops/internvl.py +6 -11
- agent/loops/moondream3.py +36 -12
- agent/loops/omniparser.py +212 -209
- agent/loops/openai.py +49 -50
- agent/loops/opencua.py +29 -41
- agent/loops/qwen.py +475 -0
- agent/loops/uitars.py +237 -202
- agent/proxy/examples.py +54 -50
- agent/proxy/handlers.py +27 -34
- agent/responses.py +330 -330
- agent/types.py +11 -5
- agent/ui/__init__.py +1 -1
- agent/ui/__main__.py +1 -1
- agent/ui/gradio/app.py +23 -18
- agent/ui/gradio/ui_components.py +310 -161
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/METADATA +18 -10
- cua_agent-0.4.35.dist-info/RECORD +64 -0
- cua_agent-0.4.34.dist-info/RECORD +0 -63
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.34.dist-info → cua_agent-0.4.35.dist-info}/entry_points.txt +0 -0
agent/computers/cua.py
CHANGED
|
@@ -3,24 +3,27 @@ Computer handler implementation for OpenAI computer-use-preview protocol.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import base64
|
|
6
|
-
from typing import Dict, List,
|
|
7
|
-
|
|
6
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
|
7
|
+
|
|
8
8
|
from computer import Computer
|
|
9
9
|
|
|
10
|
+
from .base import AsyncComputerHandler
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
class cuaComputerHandler(AsyncComputerHandler):
|
|
11
14
|
"""Computer handler that implements the Computer protocol using the computer interface."""
|
|
12
|
-
|
|
15
|
+
|
|
13
16
|
def __init__(self, cua_computer: Computer):
|
|
14
17
|
"""Initialize with a computer interface (from tool schema)."""
|
|
15
18
|
self.cua_computer = cua_computer
|
|
16
19
|
self.interface = None
|
|
17
20
|
|
|
18
21
|
async def _initialize(self):
|
|
19
|
-
if hasattr(self.cua_computer,
|
|
22
|
+
if hasattr(self.cua_computer, "_initialized") and not self.cua_computer._initialized:
|
|
20
23
|
await self.cua_computer.run()
|
|
21
24
|
self.interface = self.cua_computer.interface
|
|
22
|
-
|
|
23
|
-
# ==== Computer-Use-Preview Action Space ====
|
|
25
|
+
|
|
26
|
+
# ==== Computer-Use-Preview Action Space ====
|
|
24
27
|
|
|
25
28
|
async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
|
|
26
29
|
"""Get the current environment type."""
|
|
@@ -32,13 +35,13 @@ class cuaComputerHandler(AsyncComputerHandler):
|
|
|
32
35
|
assert self.interface is not None
|
|
33
36
|
screen_size = await self.interface.get_screen_size()
|
|
34
37
|
return screen_size["width"], screen_size["height"]
|
|
35
|
-
|
|
38
|
+
|
|
36
39
|
async def screenshot(self) -> str:
|
|
37
40
|
"""Take a screenshot and return as base64 string."""
|
|
38
41
|
assert self.interface is not None
|
|
39
42
|
screenshot_bytes = await self.interface.screenshot()
|
|
40
|
-
return base64.b64encode(screenshot_bytes).decode(
|
|
41
|
-
|
|
43
|
+
return base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
44
|
+
|
|
42
45
|
async def click(self, x: int, y: int, button: str = "left") -> None:
|
|
43
46
|
"""Click at coordinates with specified button."""
|
|
44
47
|
assert self.interface is not None
|
|
@@ -49,34 +52,35 @@ class cuaComputerHandler(AsyncComputerHandler):
|
|
|
49
52
|
else:
|
|
50
53
|
# Default to left click for unknown buttons
|
|
51
54
|
await self.interface.left_click(x, y)
|
|
52
|
-
|
|
55
|
+
|
|
53
56
|
async def double_click(self, x: int, y: int) -> None:
|
|
54
57
|
"""Double click at coordinates."""
|
|
55
58
|
assert self.interface is not None
|
|
56
59
|
await self.interface.double_click(x, y)
|
|
57
|
-
|
|
60
|
+
|
|
58
61
|
async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
59
62
|
"""Scroll at coordinates with specified scroll amounts."""
|
|
60
63
|
assert self.interface is not None
|
|
61
64
|
await self.interface.move_cursor(x, y)
|
|
62
65
|
await self.interface.scroll(scroll_x, scroll_y)
|
|
63
|
-
|
|
66
|
+
|
|
64
67
|
async def type(self, text: str) -> None:
|
|
65
68
|
"""Type text."""
|
|
66
69
|
assert self.interface is not None
|
|
67
70
|
await self.interface.type_text(text)
|
|
68
|
-
|
|
71
|
+
|
|
69
72
|
async def wait(self, ms: int = 1000) -> None:
|
|
70
73
|
"""Wait for specified milliseconds."""
|
|
71
74
|
assert self.interface is not None
|
|
72
75
|
import asyncio
|
|
76
|
+
|
|
73
77
|
await asyncio.sleep(ms / 1000.0)
|
|
74
|
-
|
|
78
|
+
|
|
75
79
|
async def move(self, x: int, y: int) -> None:
|
|
76
80
|
"""Move cursor to coordinates."""
|
|
77
81
|
assert self.interface is not None
|
|
78
82
|
await self.interface.move_cursor(x, y)
|
|
79
|
-
|
|
83
|
+
|
|
80
84
|
async def keypress(self, keys: Union[List[str], str]) -> None:
|
|
81
85
|
"""Press key combination."""
|
|
82
86
|
assert self.interface is not None
|
|
@@ -87,38 +91,38 @@ class cuaComputerHandler(AsyncComputerHandler):
|
|
|
87
91
|
else:
|
|
88
92
|
# Handle key combinations
|
|
89
93
|
await self.interface.hotkey(*keys)
|
|
90
|
-
|
|
94
|
+
|
|
91
95
|
async def drag(self, path: List[Dict[str, int]]) -> None:
|
|
92
96
|
"""Drag along specified path."""
|
|
93
97
|
assert self.interface is not None
|
|
94
98
|
if not path:
|
|
95
99
|
return
|
|
96
|
-
|
|
100
|
+
|
|
97
101
|
# Start drag from first point
|
|
98
102
|
start = path[0]
|
|
99
103
|
await self.interface.mouse_down(start["x"], start["y"])
|
|
100
|
-
|
|
104
|
+
|
|
101
105
|
# Move through path
|
|
102
106
|
for point in path[1:]:
|
|
103
107
|
await self.interface.move_cursor(point["x"], point["y"])
|
|
104
|
-
|
|
108
|
+
|
|
105
109
|
# End drag at last point
|
|
106
110
|
end = path[-1]
|
|
107
111
|
await self.interface.mouse_up(end["x"], end["y"])
|
|
108
|
-
|
|
112
|
+
|
|
109
113
|
async def get_current_url(self) -> str:
|
|
110
114
|
"""Get current URL (for browser environments)."""
|
|
111
115
|
# This would need to be implemented based on the specific browser interface
|
|
112
116
|
# For now, return empty string
|
|
113
117
|
return ""
|
|
114
118
|
|
|
115
|
-
# ==== Anthropic Computer Action Space ====
|
|
119
|
+
# ==== Anthropic Computer Action Space ====
|
|
116
120
|
async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
|
117
121
|
"""Left mouse down at coordinates."""
|
|
118
122
|
assert self.interface is not None
|
|
119
123
|
await self.interface.mouse_down(x, y, button="left")
|
|
120
|
-
|
|
124
|
+
|
|
121
125
|
async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
|
122
126
|
"""Left mouse up at coordinates."""
|
|
123
127
|
assert self.interface is not None
|
|
124
|
-
await self.interface.mouse_up(x, y, button="left")
|
|
128
|
+
await self.interface.mouse_up(x, y, button="left")
|
agent/computers/custom.py
CHANGED
|
@@ -3,47 +3,49 @@ Custom computer handler implementation that accepts a dictionary of functions.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import base64
|
|
6
|
-
from typing import Dict, List, Any, Literal, Union, Optional, Callable
|
|
7
|
-
from PIL import Image
|
|
8
6
|
import io
|
|
7
|
+
from typing import Any, Callable, Dict, List, Literal, Optional, Union
|
|
8
|
+
|
|
9
|
+
from PIL import Image
|
|
10
|
+
|
|
9
11
|
from .base import AsyncComputerHandler
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class CustomComputerHandler(AsyncComputerHandler):
|
|
13
15
|
"""Computer handler that implements the Computer protocol using a dictionary of custom functions."""
|
|
14
|
-
|
|
16
|
+
|
|
15
17
|
def __init__(self, functions: Dict[str, Callable]):
|
|
16
18
|
"""
|
|
17
19
|
Initialize with a dictionary of functions.
|
|
18
|
-
|
|
20
|
+
|
|
19
21
|
Args:
|
|
20
22
|
functions: Dictionary where keys are method names and values are callable functions.
|
|
21
23
|
Only 'screenshot' is required, all others are optional.
|
|
22
|
-
|
|
24
|
+
|
|
23
25
|
Raises:
|
|
24
26
|
ValueError: If required 'screenshot' function is not provided.
|
|
25
27
|
"""
|
|
26
|
-
if
|
|
28
|
+
if "screenshot" not in functions:
|
|
27
29
|
raise ValueError("'screenshot' function is required in functions dictionary")
|
|
28
|
-
|
|
30
|
+
|
|
29
31
|
self.functions = functions
|
|
30
32
|
self._last_screenshot_size: Optional[tuple[int, int]] = None
|
|
31
|
-
|
|
33
|
+
|
|
32
34
|
async def _call_function(self, func, *args, **kwargs):
|
|
33
35
|
"""
|
|
34
36
|
Call a function, handling both async and sync functions.
|
|
35
|
-
|
|
37
|
+
|
|
36
38
|
Args:
|
|
37
39
|
func: The function to call
|
|
38
40
|
*args: Positional arguments to pass to the function
|
|
39
41
|
**kwargs: Keyword arguments to pass to the function
|
|
40
|
-
|
|
42
|
+
|
|
41
43
|
Returns:
|
|
42
44
|
The result of the function call
|
|
43
45
|
"""
|
|
44
46
|
import asyncio
|
|
45
47
|
import inspect
|
|
46
|
-
|
|
48
|
+
|
|
47
49
|
if callable(func):
|
|
48
50
|
if inspect.iscoroutinefunction(func):
|
|
49
51
|
return await func(*args, **kwargs)
|
|
@@ -51,14 +53,14 @@ class CustomComputerHandler(AsyncComputerHandler):
|
|
|
51
53
|
return func(*args, **kwargs)
|
|
52
54
|
else:
|
|
53
55
|
return func
|
|
54
|
-
|
|
56
|
+
|
|
55
57
|
async def _get_value(self, attribute: str):
|
|
56
58
|
"""
|
|
57
59
|
Get value for an attribute, checking both 'get_{attribute}' and '{attribute}' keys.
|
|
58
|
-
|
|
60
|
+
|
|
59
61
|
Args:
|
|
60
62
|
attribute: The attribute name to look for
|
|
61
|
-
|
|
63
|
+
|
|
62
64
|
Returns:
|
|
63
65
|
The value from the functions dict, called if callable, returned directly if not
|
|
64
66
|
"""
|
|
@@ -66,20 +68,20 @@ class CustomComputerHandler(AsyncComputerHandler):
|
|
|
66
68
|
get_key = f"get_{attribute}"
|
|
67
69
|
if get_key in self.functions:
|
|
68
70
|
return await self._call_function(self.functions[get_key])
|
|
69
|
-
|
|
70
|
-
# Check for '{attribute}'
|
|
71
|
+
|
|
72
|
+
# Check for '{attribute}'
|
|
71
73
|
if attribute in self.functions:
|
|
72
74
|
return await self._call_function(self.functions[attribute])
|
|
73
|
-
|
|
75
|
+
|
|
74
76
|
return None
|
|
75
|
-
|
|
77
|
+
|
|
76
78
|
def _to_b64_str(self, img: Union[bytes, Image.Image, str]) -> str:
|
|
77
79
|
"""
|
|
78
80
|
Convert image to base64 string.
|
|
79
|
-
|
|
81
|
+
|
|
80
82
|
Args:
|
|
81
83
|
img: Image as bytes, PIL Image, or base64 string
|
|
82
|
-
|
|
84
|
+
|
|
83
85
|
Returns:
|
|
84
86
|
str: Base64 encoded image string
|
|
85
87
|
"""
|
|
@@ -88,43 +90,43 @@ class CustomComputerHandler(AsyncComputerHandler):
|
|
|
88
90
|
return img
|
|
89
91
|
elif isinstance(img, bytes):
|
|
90
92
|
# Raw bytes
|
|
91
|
-
return base64.b64encode(img).decode(
|
|
93
|
+
return base64.b64encode(img).decode("utf-8")
|
|
92
94
|
elif isinstance(img, Image.Image):
|
|
93
95
|
# PIL Image
|
|
94
96
|
buffer = io.BytesIO()
|
|
95
|
-
img.save(buffer, format=
|
|
96
|
-
return base64.b64encode(buffer.getvalue()).decode(
|
|
97
|
+
img.save(buffer, format="PNG")
|
|
98
|
+
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
|
97
99
|
else:
|
|
98
100
|
raise ValueError(f"Unsupported image type: {type(img)}")
|
|
99
|
-
|
|
100
|
-
# ==== Computer-Use-Preview Action Space ====
|
|
101
|
+
|
|
102
|
+
# ==== Computer-Use-Preview Action Space ====
|
|
101
103
|
|
|
102
104
|
async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
|
|
103
105
|
"""Get the current environment type."""
|
|
104
|
-
result = await self._get_value(
|
|
106
|
+
result = await self._get_value("environment")
|
|
105
107
|
if result is None:
|
|
106
108
|
return "linux"
|
|
107
109
|
assert result in ["windows", "mac", "linux", "browser"]
|
|
108
|
-
return result
|
|
110
|
+
return result # type: ignore
|
|
109
111
|
|
|
110
112
|
async def get_dimensions(self) -> tuple[int, int]:
|
|
111
113
|
"""Get screen dimensions as (width, height)."""
|
|
112
|
-
result = await self._get_value(
|
|
114
|
+
result = await self._get_value("dimensions")
|
|
113
115
|
if result is not None:
|
|
114
|
-
return result
|
|
115
|
-
|
|
116
|
+
return result # type: ignore
|
|
117
|
+
|
|
116
118
|
# Fallback: use last screenshot size if available
|
|
117
119
|
if not self._last_screenshot_size:
|
|
118
120
|
await self.screenshot()
|
|
119
121
|
assert self._last_screenshot_size is not None, "Failed to get screenshot size"
|
|
120
|
-
|
|
122
|
+
|
|
121
123
|
return self._last_screenshot_size
|
|
122
|
-
|
|
124
|
+
|
|
123
125
|
async def screenshot(self) -> str:
|
|
124
126
|
"""Take a screenshot and return as base64 string."""
|
|
125
|
-
result = await self._call_function(self.functions[
|
|
126
|
-
b64_str = self._to_b64_str(result)
|
|
127
|
-
|
|
127
|
+
result = await self._call_function(self.functions["screenshot"])
|
|
128
|
+
b64_str = self._to_b64_str(result) # type: ignore
|
|
129
|
+
|
|
128
130
|
# Try to extract dimensions for fallback use
|
|
129
131
|
try:
|
|
130
132
|
if isinstance(result, Image.Image):
|
|
@@ -136,74 +138,75 @@ class CustomComputerHandler(AsyncComputerHandler):
|
|
|
136
138
|
except Exception:
|
|
137
139
|
# If we can't get dimensions, that's okay
|
|
138
140
|
pass
|
|
139
|
-
|
|
141
|
+
|
|
140
142
|
return b64_str
|
|
141
|
-
|
|
143
|
+
|
|
142
144
|
async def click(self, x: int, y: int, button: str = "left") -> None:
|
|
143
145
|
"""Click at coordinates with specified button."""
|
|
144
|
-
if
|
|
145
|
-
await self._call_function(self.functions[
|
|
146
|
+
if "click" in self.functions:
|
|
147
|
+
await self._call_function(self.functions["click"], x, y, button)
|
|
146
148
|
# No-op if not implemented
|
|
147
|
-
|
|
149
|
+
|
|
148
150
|
async def double_click(self, x: int, y: int) -> None:
|
|
149
151
|
"""Double click at coordinates."""
|
|
150
|
-
if
|
|
151
|
-
await self._call_function(self.functions[
|
|
152
|
+
if "double_click" in self.functions:
|
|
153
|
+
await self._call_function(self.functions["double_click"], x, y)
|
|
152
154
|
# No-op if not implemented
|
|
153
|
-
|
|
155
|
+
|
|
154
156
|
async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
155
157
|
"""Scroll at coordinates with specified scroll amounts."""
|
|
156
|
-
if
|
|
157
|
-
await self._call_function(self.functions[
|
|
158
|
+
if "scroll" in self.functions:
|
|
159
|
+
await self._call_function(self.functions["scroll"], x, y, scroll_x, scroll_y)
|
|
158
160
|
# No-op if not implemented
|
|
159
|
-
|
|
161
|
+
|
|
160
162
|
async def type(self, text: str) -> None:
|
|
161
163
|
"""Type text."""
|
|
162
|
-
if
|
|
163
|
-
await self._call_function(self.functions[
|
|
164
|
+
if "type" in self.functions:
|
|
165
|
+
await self._call_function(self.functions["type"], text)
|
|
164
166
|
# No-op if not implemented
|
|
165
|
-
|
|
167
|
+
|
|
166
168
|
async def wait(self, ms: int = 1000) -> None:
|
|
167
169
|
"""Wait for specified milliseconds."""
|
|
168
|
-
if
|
|
169
|
-
await self._call_function(self.functions[
|
|
170
|
+
if "wait" in self.functions:
|
|
171
|
+
await self._call_function(self.functions["wait"], ms)
|
|
170
172
|
else:
|
|
171
173
|
# Default implementation
|
|
172
174
|
import asyncio
|
|
175
|
+
|
|
173
176
|
await asyncio.sleep(ms / 1000.0)
|
|
174
|
-
|
|
177
|
+
|
|
175
178
|
async def move(self, x: int, y: int) -> None:
|
|
176
179
|
"""Move cursor to coordinates."""
|
|
177
|
-
if
|
|
178
|
-
await self._call_function(self.functions[
|
|
180
|
+
if "move" in self.functions:
|
|
181
|
+
await self._call_function(self.functions["move"], x, y)
|
|
179
182
|
# No-op if not implemented
|
|
180
|
-
|
|
183
|
+
|
|
181
184
|
async def keypress(self, keys: Union[List[str], str]) -> None:
|
|
182
185
|
"""Press key combination."""
|
|
183
|
-
if
|
|
184
|
-
await self._call_function(self.functions[
|
|
186
|
+
if "keypress" in self.functions:
|
|
187
|
+
await self._call_function(self.functions["keypress"], keys)
|
|
185
188
|
# No-op if not implemented
|
|
186
|
-
|
|
189
|
+
|
|
187
190
|
async def drag(self, path: List[Dict[str, int]]) -> None:
|
|
188
191
|
"""Drag along specified path."""
|
|
189
|
-
if
|
|
190
|
-
await self._call_function(self.functions[
|
|
192
|
+
if "drag" in self.functions:
|
|
193
|
+
await self._call_function(self.functions["drag"], path)
|
|
191
194
|
# No-op if not implemented
|
|
192
|
-
|
|
195
|
+
|
|
193
196
|
async def get_current_url(self) -> str:
|
|
194
197
|
"""Get current URL (for browser environments)."""
|
|
195
|
-
if
|
|
196
|
-
return await self._get_value(
|
|
198
|
+
if "get_current_url" in self.functions:
|
|
199
|
+
return await self._get_value("current_url") # type: ignore
|
|
197
200
|
return "" # Default fallback
|
|
198
|
-
|
|
201
|
+
|
|
199
202
|
async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
|
200
203
|
"""Left mouse down at coordinates."""
|
|
201
|
-
if
|
|
202
|
-
await self._call_function(self.functions[
|
|
204
|
+
if "left_mouse_down" in self.functions:
|
|
205
|
+
await self._call_function(self.functions["left_mouse_down"], x, y)
|
|
203
206
|
# No-op if not implemented
|
|
204
|
-
|
|
207
|
+
|
|
205
208
|
async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
|
206
209
|
"""Left mouse up at coordinates."""
|
|
207
|
-
if
|
|
208
|
-
await self._call_function(self.functions[
|
|
210
|
+
if "left_mouse_up" in self.functions:
|
|
211
|
+
await self._call_function(self.functions["left_mouse_up"], x, y)
|
|
209
212
|
# No-op if not implemented
|
agent/decorators.py
CHANGED
|
@@ -3,47 +3,56 @@ Decorators for agent - agent_loop decorator
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from typing import List, Optional
|
|
6
|
+
|
|
6
7
|
from .types import AgentConfigInfo
|
|
7
8
|
|
|
8
9
|
# Global registry
|
|
9
10
|
_agent_configs: List[AgentConfigInfo] = []
|
|
10
11
|
|
|
12
|
+
|
|
11
13
|
def register_agent(models: str, priority: int = 0):
|
|
12
14
|
"""
|
|
13
15
|
Decorator to register an AsyncAgentConfig class.
|
|
14
|
-
|
|
16
|
+
|
|
15
17
|
Args:
|
|
16
18
|
models: Regex pattern to match supported models
|
|
17
19
|
priority: Priority for agent selection (higher = more priority)
|
|
18
20
|
"""
|
|
21
|
+
|
|
19
22
|
def decorator(agent_class: type):
|
|
20
23
|
# Validate that the class implements AsyncAgentConfig protocol
|
|
21
|
-
if not hasattr(agent_class,
|
|
22
|
-
raise ValueError(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
if not hasattr(agent_class,
|
|
26
|
-
raise ValueError(
|
|
27
|
-
|
|
24
|
+
if not hasattr(agent_class, "predict_step"):
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Agent class {agent_class.__name__} must implement predict_step method"
|
|
27
|
+
)
|
|
28
|
+
if not hasattr(agent_class, "predict_click"):
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"Agent class {agent_class.__name__} must implement predict_click method"
|
|
31
|
+
)
|
|
32
|
+
if not hasattr(agent_class, "get_capabilities"):
|
|
33
|
+
raise ValueError(
|
|
34
|
+
f"Agent class {agent_class.__name__} must implement get_capabilities method"
|
|
35
|
+
)
|
|
36
|
+
|
|
28
37
|
# Register the agent config
|
|
29
38
|
config_info = AgentConfigInfo(
|
|
30
|
-
agent_class=agent_class,
|
|
31
|
-
models_regex=models,
|
|
32
|
-
priority=priority
|
|
39
|
+
agent_class=agent_class, models_regex=models, priority=priority
|
|
33
40
|
)
|
|
34
41
|
_agent_configs.append(config_info)
|
|
35
|
-
|
|
42
|
+
|
|
36
43
|
# Sort by priority (highest first)
|
|
37
44
|
_agent_configs.sort(key=lambda x: x.priority, reverse=True)
|
|
38
|
-
|
|
45
|
+
|
|
39
46
|
return agent_class
|
|
40
|
-
|
|
47
|
+
|
|
41
48
|
return decorator
|
|
42
49
|
|
|
50
|
+
|
|
43
51
|
def get_agent_configs() -> List[AgentConfigInfo]:
|
|
44
52
|
"""Get all registered agent configs"""
|
|
45
53
|
return _agent_configs.copy()
|
|
46
54
|
|
|
55
|
+
|
|
47
56
|
def find_agent_config(model: str) -> Optional[AgentConfigInfo]:
|
|
48
57
|
"""Find the best matching agent config for a model"""
|
|
49
58
|
for config_info in _agent_configs:
|
agent/human_tool/__init__.py
CHANGED
|
@@ -12,7 +12,7 @@ Components:
|
|
|
12
12
|
Usage:
|
|
13
13
|
# Run the server and UI
|
|
14
14
|
python -m agent.human_tool
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
# Or run components separately
|
|
17
17
|
python -m agent.human_tool.server # API server only
|
|
18
18
|
python -m agent.human_tool.ui # UI only
|
|
@@ -21,9 +21,4 @@ Usage:
|
|
|
21
21
|
from .server import CompletionQueue, completion_queue
|
|
22
22
|
from .ui import HumanCompletionUI, create_ui
|
|
23
23
|
|
|
24
|
-
__all__ = [
|
|
25
|
-
"CompletionQueue",
|
|
26
|
-
"completion_queue",
|
|
27
|
-
"HumanCompletionUI",
|
|
28
|
-
"create_ui"
|
|
29
|
-
]
|
|
24
|
+
__all__ = ["CompletionQueue", "completion_queue", "HumanCompletionUI", "create_ui"]
|
agent/human_tool/__main__.py
CHANGED
|
@@ -8,6 +8,7 @@ with a Gradio UI for human interaction.
|
|
|
8
8
|
|
|
9
9
|
import gradio as gr
|
|
10
10
|
from fastapi import FastAPI
|
|
11
|
+
|
|
11
12
|
from .server import app as fastapi_app
|
|
12
13
|
from .ui import create_ui
|
|
13
14
|
|
|
@@ -18,6 +19,7 @@ gradio_demo = create_ui()
|
|
|
18
19
|
CUSTOM_PATH = "/gradio"
|
|
19
20
|
app = gr.mount_gradio_app(fastapi_app, gradio_demo, path=CUSTOM_PATH)
|
|
20
21
|
|
|
22
|
+
|
|
21
23
|
# Add a redirect from root to Gradio UI
|
|
22
24
|
@fastapi_app.get("/")
|
|
23
25
|
async def redirect_to_ui():
|
|
@@ -25,14 +27,16 @@ async def redirect_to_ui():
|
|
|
25
27
|
return {
|
|
26
28
|
"message": "Human Completion Server is running",
|
|
27
29
|
"ui_url": "/gradio",
|
|
28
|
-
"api_docs": "/docs"
|
|
30
|
+
"api_docs": "/docs",
|
|
29
31
|
}
|
|
30
32
|
|
|
33
|
+
|
|
31
34
|
if __name__ == "__main__":
|
|
32
35
|
import uvicorn
|
|
36
|
+
|
|
33
37
|
print("🚀 Starting Human-in-the-Loop Completion Server...")
|
|
34
38
|
print("📊 API Server: http://localhost:8002")
|
|
35
39
|
print("🎨 Gradio UI: http://localhost:8002/gradio")
|
|
36
40
|
print("📚 API Docs: http://localhost:8002/docs")
|
|
37
|
-
|
|
41
|
+
|
|
38
42
|
uvicorn.run(app, host="0.0.0.0", port=8002)
|