cua-agent 0.4.22__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (79) hide show
  1. agent/__init__.py +4 -10
  2. agent/__main__.py +2 -1
  3. agent/adapters/__init__.py +4 -0
  4. agent/adapters/azure_ml_adapter.py +283 -0
  5. agent/adapters/cua_adapter.py +161 -0
  6. agent/adapters/huggingfacelocal_adapter.py +67 -125
  7. agent/adapters/human_adapter.py +116 -114
  8. agent/adapters/mlxvlm_adapter.py +110 -99
  9. agent/adapters/models/__init__.py +41 -0
  10. agent/adapters/models/generic.py +78 -0
  11. agent/adapters/models/internvl.py +290 -0
  12. agent/adapters/models/opencua.py +115 -0
  13. agent/adapters/models/qwen2_5_vl.py +78 -0
  14. agent/agent.py +337 -185
  15. agent/callbacks/__init__.py +9 -4
  16. agent/callbacks/base.py +45 -31
  17. agent/callbacks/budget_manager.py +22 -10
  18. agent/callbacks/image_retention.py +54 -98
  19. agent/callbacks/logging.py +55 -42
  20. agent/callbacks/operator_validator.py +35 -33
  21. agent/callbacks/otel.py +291 -0
  22. agent/callbacks/pii_anonymization.py +19 -16
  23. agent/callbacks/prompt_instructions.py +47 -0
  24. agent/callbacks/telemetry.py +99 -61
  25. agent/callbacks/trajectory_saver.py +95 -69
  26. agent/cli.py +269 -119
  27. agent/computers/__init__.py +14 -9
  28. agent/computers/base.py +32 -19
  29. agent/computers/cua.py +52 -25
  30. agent/computers/custom.py +78 -71
  31. agent/decorators.py +23 -14
  32. agent/human_tool/__init__.py +2 -7
  33. agent/human_tool/__main__.py +6 -2
  34. agent/human_tool/server.py +48 -37
  35. agent/human_tool/ui.py +359 -235
  36. agent/integrations/hud/__init__.py +38 -99
  37. agent/integrations/hud/agent.py +369 -0
  38. agent/integrations/hud/proxy.py +166 -52
  39. agent/loops/__init__.py +44 -14
  40. agent/loops/anthropic.py +579 -492
  41. agent/loops/base.py +19 -15
  42. agent/loops/composed_grounded.py +136 -150
  43. agent/loops/fara/__init__.py +8 -0
  44. agent/loops/fara/config.py +506 -0
  45. agent/loops/fara/helpers.py +357 -0
  46. agent/loops/fara/schema.py +143 -0
  47. agent/loops/gelato.py +183 -0
  48. agent/loops/gemini.py +935 -0
  49. agent/loops/generic_vlm.py +601 -0
  50. agent/loops/glm45v.py +140 -135
  51. agent/loops/gta1.py +48 -51
  52. agent/loops/holo.py +218 -0
  53. agent/loops/internvl.py +180 -0
  54. agent/loops/moondream3.py +493 -0
  55. agent/loops/omniparser.py +326 -226
  56. agent/loops/openai.py +50 -51
  57. agent/loops/opencua.py +134 -0
  58. agent/loops/uiins.py +175 -0
  59. agent/loops/uitars.py +247 -206
  60. agent/loops/uitars2.py +951 -0
  61. agent/playground/__init__.py +5 -0
  62. agent/playground/server.py +301 -0
  63. agent/proxy/examples.py +61 -57
  64. agent/proxy/handlers.py +46 -39
  65. agent/responses.py +447 -347
  66. agent/tools/__init__.py +24 -0
  67. agent/tools/base.py +253 -0
  68. agent/tools/browser_tool.py +423 -0
  69. agent/types.py +11 -5
  70. agent/ui/__init__.py +1 -1
  71. agent/ui/__main__.py +1 -1
  72. agent/ui/gradio/app.py +25 -22
  73. agent/ui/gradio/ui_components.py +314 -167
  74. cua_agent-0.7.16.dist-info/METADATA +85 -0
  75. cua_agent-0.7.16.dist-info/RECORD +79 -0
  76. {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
  77. cua_agent-0.4.22.dist-info/METADATA +0 -436
  78. cua_agent-0.4.22.dist-info/RECORD +0 -51
  79. {cua_agent-0.4.22.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
agent/computers/base.py CHANGED
@@ -2,69 +2,82 @@
2
2
  Base computer interface protocol for agent interactions.
3
3
  """
4
4
 
5
- from typing import Protocol, Literal, List, Dict, Any, Union, Optional, runtime_checkable
5
+ from typing import (
6
+ Any,
7
+ Dict,
8
+ List,
9
+ Literal,
10
+ Optional,
11
+ Protocol,
12
+ Union,
13
+ runtime_checkable,
14
+ )
6
15
 
7
16
 
8
17
  @runtime_checkable
9
18
  class AsyncComputerHandler(Protocol):
10
19
  """Protocol defining the interface for computer interactions."""
11
-
12
- # ==== Computer-Use-Preview Action Space ====
20
+
21
+ # ==== Computer-Use-Preview Action Space ====
13
22
 
14
23
  async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
15
24
  """Get the current environment type."""
16
25
  ...
17
-
26
+
18
27
  async def get_dimensions(self) -> tuple[int, int]:
19
28
  """Get screen dimensions as (width, height)."""
20
29
  ...
21
-
22
- async def screenshot(self) -> str:
23
- """Take a screenshot and return as base64 string."""
30
+
31
+ async def screenshot(self, text: Optional[str] = None) -> str:
32
+ """Take a screenshot and return as base64 string.
33
+
34
+ Args:
35
+ text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
36
+ """
24
37
  ...
25
-
38
+
26
39
  async def click(self, x: int, y: int, button: str = "left") -> None:
27
40
  """Click at coordinates with specified button."""
28
41
  ...
29
-
42
+
30
43
  async def double_click(self, x: int, y: int) -> None:
31
44
  """Double click at coordinates."""
32
45
  ...
33
-
46
+
34
47
  async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
35
48
  """Scroll at coordinates with specified scroll amounts."""
36
49
  ...
37
-
50
+
38
51
  async def type(self, text: str) -> None:
39
52
  """Type text."""
40
53
  ...
41
-
54
+
42
55
  async def wait(self, ms: int = 1000) -> None:
43
56
  """Wait for specified milliseconds."""
44
57
  ...
45
-
58
+
46
59
  async def move(self, x: int, y: int) -> None:
47
60
  """Move cursor to coordinates."""
48
61
  ...
49
-
62
+
50
63
  async def keypress(self, keys: Union[List[str], str]) -> None:
51
64
  """Press key combination."""
52
65
  ...
53
-
66
+
54
67
  async def drag(self, path: List[Dict[str, int]]) -> None:
55
68
  """Drag along specified path."""
56
69
  ...
57
-
70
+
58
71
  async def get_current_url(self) -> str:
59
72
  """Get current URL (for browser environments)."""
60
73
  ...
61
-
62
- # ==== Anthropic Action Space ====
74
+
75
+ # ==== Anthropic Action Space ====
63
76
 
64
77
  async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
65
78
  """Left mouse down at coordinates."""
66
79
  ...
67
-
80
+
68
81
  async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
69
82
  """Left mouse up at coordinates."""
70
83
  ...
agent/computers/cua.py CHANGED
@@ -3,24 +3,27 @@ Computer handler implementation for OpenAI computer-use-preview protocol.
3
3
  """
4
4
 
5
5
  import base64
6
- from typing import Dict, List, Any, Literal, Union, Optional
7
- from .base import AsyncComputerHandler
6
+ from typing import Any, Dict, List, Literal, Optional, Union
7
+
8
8
  from computer import Computer
9
9
 
10
+ from .base import AsyncComputerHandler
11
+
12
+
10
13
  class cuaComputerHandler(AsyncComputerHandler):
11
14
  """Computer handler that implements the Computer protocol using the computer interface."""
12
-
15
+
13
16
  def __init__(self, cua_computer: Computer):
14
17
  """Initialize with a computer interface (from tool schema)."""
15
18
  self.cua_computer = cua_computer
16
19
  self.interface = None
17
20
 
18
21
  async def _initialize(self):
19
- if hasattr(self.cua_computer, '_initialized') and not self.cua_computer._initialized:
22
+ if hasattr(self.cua_computer, "_initialized") and not self.cua_computer._initialized:
20
23
  await self.cua_computer.run()
21
24
  self.interface = self.cua_computer.interface
22
-
23
- # ==== Computer-Use-Preview Action Space ====
25
+
26
+ # ==== Computer-Use-Preview Action Space ====
24
27
 
25
28
  async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
26
29
  """Get the current environment type."""
@@ -32,13 +35,17 @@ class cuaComputerHandler(AsyncComputerHandler):
32
35
  assert self.interface is not None
33
36
  screen_size = await self.interface.get_screen_size()
34
37
  return screen_size["width"], screen_size["height"]
35
-
36
- async def screenshot(self) -> str:
37
- """Take a screenshot and return as base64 string."""
38
+
39
+ async def screenshot(self, text: Optional[str] = None) -> str:
40
+ """Take a screenshot and return as base64 string.
41
+
42
+ Args:
43
+ text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
44
+ """
38
45
  assert self.interface is not None
39
46
  screenshot_bytes = await self.interface.screenshot()
40
- return base64.b64encode(screenshot_bytes).decode('utf-8')
41
-
47
+ return base64.b64encode(screenshot_bytes).decode("utf-8")
48
+
42
49
  async def click(self, x: int, y: int, button: str = "left") -> None:
43
50
  """Click at coordinates with specified button."""
44
51
  assert self.interface is not None
@@ -49,34 +56,35 @@ class cuaComputerHandler(AsyncComputerHandler):
49
56
  else:
50
57
  # Default to left click for unknown buttons
51
58
  await self.interface.left_click(x, y)
52
-
59
+
53
60
  async def double_click(self, x: int, y: int) -> None:
54
61
  """Double click at coordinates."""
55
62
  assert self.interface is not None
56
63
  await self.interface.double_click(x, y)
57
-
64
+
58
65
  async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
59
66
  """Scroll at coordinates with specified scroll amounts."""
60
67
  assert self.interface is not None
61
68
  await self.interface.move_cursor(x, y)
62
69
  await self.interface.scroll(scroll_x, scroll_y)
63
-
70
+
64
71
  async def type(self, text: str) -> None:
65
72
  """Type text."""
66
73
  assert self.interface is not None
67
74
  await self.interface.type_text(text)
68
-
75
+
69
76
  async def wait(self, ms: int = 1000) -> None:
70
77
  """Wait for specified milliseconds."""
71
78
  assert self.interface is not None
72
79
  import asyncio
80
+
73
81
  await asyncio.sleep(ms / 1000.0)
74
-
82
+
75
83
  async def move(self, x: int, y: int) -> None:
76
84
  """Move cursor to coordinates."""
77
85
  assert self.interface is not None
78
86
  await self.interface.move_cursor(x, y)
79
-
87
+
80
88
  async def keypress(self, keys: Union[List[str], str]) -> None:
81
89
  """Press key combination."""
82
90
  assert self.interface is not None
@@ -87,38 +95,57 @@ class cuaComputerHandler(AsyncComputerHandler):
87
95
  else:
88
96
  # Handle key combinations
89
97
  await self.interface.hotkey(*keys)
90
-
98
+
91
99
  async def drag(self, path: List[Dict[str, int]]) -> None:
92
100
  """Drag along specified path."""
93
101
  assert self.interface is not None
94
102
  if not path:
95
103
  return
96
-
104
+
97
105
  # Start drag from first point
98
106
  start = path[0]
99
107
  await self.interface.mouse_down(start["x"], start["y"])
100
-
108
+
101
109
  # Move through path
102
110
  for point in path[1:]:
103
111
  await self.interface.move_cursor(point["x"], point["y"])
104
-
112
+
105
113
  # End drag at last point
106
114
  end = path[-1]
107
115
  await self.interface.mouse_up(end["x"], end["y"])
108
-
116
+
109
117
  async def get_current_url(self) -> str:
110
118
  """Get current URL (for browser environments)."""
111
119
  # This would need to be implemented based on the specific browser interface
112
120
  # For now, return empty string
113
121
  return ""
114
122
 
115
- # ==== Anthropic Computer Action Space ====
123
+ # ==== Anthropic Computer Action Space ====
116
124
  async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
117
125
  """Left mouse down at coordinates."""
118
126
  assert self.interface is not None
119
127
  await self.interface.mouse_down(x, y, button="left")
120
-
128
+
121
129
  async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
122
130
  """Left mouse up at coordinates."""
123
131
  assert self.interface is not None
124
- await self.interface.mouse_up(x, y, button="left")
132
+ await self.interface.mouse_up(x, y, button="left")
133
+
134
+ # ==== Browser Control Methods (via Playwright) ====
135
+ async def playwright_exec(
136
+ self, command: str, params: Optional[Dict[str, Any]] = None
137
+ ) -> Dict[str, Any]:
138
+ """Execute a Playwright browser command.
139
+
140
+ Supports: visit_url, click, type, scroll, web_search, screenshot,
141
+ get_current_url, go_back, go_forward
142
+
143
+ Args:
144
+ command: The browser command to execute
145
+ params: Command parameters
146
+
147
+ Returns:
148
+ Dict containing the command result
149
+ """
150
+ assert self.interface is not None
151
+ return await self.interface.playwright_exec(command, params or {})
agent/computers/custom.py CHANGED
@@ -3,47 +3,49 @@ Custom computer handler implementation that accepts a dictionary of functions.
3
3
  """
4
4
 
5
5
  import base64
6
- from typing import Dict, List, Any, Literal, Union, Optional, Callable
7
- from PIL import Image
8
6
  import io
7
+ from typing import Any, Callable, Dict, List, Literal, Optional, Union
8
+
9
+ from PIL import Image
10
+
9
11
  from .base import AsyncComputerHandler
10
12
 
11
13
 
12
14
  class CustomComputerHandler(AsyncComputerHandler):
13
15
  """Computer handler that implements the Computer protocol using a dictionary of custom functions."""
14
-
16
+
15
17
  def __init__(self, functions: Dict[str, Callable]):
16
18
  """
17
19
  Initialize with a dictionary of functions.
18
-
20
+
19
21
  Args:
20
22
  functions: Dictionary where keys are method names and values are callable functions.
21
23
  Only 'screenshot' is required, all others are optional.
22
-
24
+
23
25
  Raises:
24
26
  ValueError: If required 'screenshot' function is not provided.
25
27
  """
26
- if 'screenshot' not in functions:
28
+ if "screenshot" not in functions:
27
29
  raise ValueError("'screenshot' function is required in functions dictionary")
28
-
30
+
29
31
  self.functions = functions
30
32
  self._last_screenshot_size: Optional[tuple[int, int]] = None
31
-
33
+
32
34
  async def _call_function(self, func, *args, **kwargs):
33
35
  """
34
36
  Call a function, handling both async and sync functions.
35
-
37
+
36
38
  Args:
37
39
  func: The function to call
38
40
  *args: Positional arguments to pass to the function
39
41
  **kwargs: Keyword arguments to pass to the function
40
-
42
+
41
43
  Returns:
42
44
  The result of the function call
43
45
  """
44
46
  import asyncio
45
47
  import inspect
46
-
48
+
47
49
  if callable(func):
48
50
  if inspect.iscoroutinefunction(func):
49
51
  return await func(*args, **kwargs)
@@ -51,14 +53,14 @@ class CustomComputerHandler(AsyncComputerHandler):
51
53
  return func(*args, **kwargs)
52
54
  else:
53
55
  return func
54
-
56
+
55
57
  async def _get_value(self, attribute: str):
56
58
  """
57
59
  Get value for an attribute, checking both 'get_{attribute}' and '{attribute}' keys.
58
-
60
+
59
61
  Args:
60
62
  attribute: The attribute name to look for
61
-
63
+
62
64
  Returns:
63
65
  The value from the functions dict, called if callable, returned directly if not
64
66
  """
@@ -66,20 +68,20 @@ class CustomComputerHandler(AsyncComputerHandler):
66
68
  get_key = f"get_{attribute}"
67
69
  if get_key in self.functions:
68
70
  return await self._call_function(self.functions[get_key])
69
-
70
- # Check for '{attribute}'
71
+
72
+ # Check for '{attribute}'
71
73
  if attribute in self.functions:
72
74
  return await self._call_function(self.functions[attribute])
73
-
75
+
74
76
  return None
75
-
77
+
76
78
  def _to_b64_str(self, img: Union[bytes, Image.Image, str]) -> str:
77
79
  """
78
80
  Convert image to base64 string.
79
-
81
+
80
82
  Args:
81
83
  img: Image as bytes, PIL Image, or base64 string
82
-
84
+
83
85
  Returns:
84
86
  str: Base64 encoded image string
85
87
  """
@@ -88,43 +90,47 @@ class CustomComputerHandler(AsyncComputerHandler):
88
90
  return img
89
91
  elif isinstance(img, bytes):
90
92
  # Raw bytes
91
- return base64.b64encode(img).decode('utf-8')
93
+ return base64.b64encode(img).decode("utf-8")
92
94
  elif isinstance(img, Image.Image):
93
95
  # PIL Image
94
96
  buffer = io.BytesIO()
95
- img.save(buffer, format='PNG')
96
- return base64.b64encode(buffer.getvalue()).decode('utf-8')
97
+ img.save(buffer, format="PNG")
98
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
97
99
  else:
98
100
  raise ValueError(f"Unsupported image type: {type(img)}")
99
-
100
- # ==== Computer-Use-Preview Action Space ====
101
+
102
+ # ==== Computer-Use-Preview Action Space ====
101
103
 
102
104
  async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
103
105
  """Get the current environment type."""
104
- result = await self._get_value('environment')
106
+ result = await self._get_value("environment")
105
107
  if result is None:
106
108
  return "linux"
107
109
  assert result in ["windows", "mac", "linux", "browser"]
108
- return result # type: ignore
110
+ return result # type: ignore
109
111
 
110
112
  async def get_dimensions(self) -> tuple[int, int]:
111
113
  """Get screen dimensions as (width, height)."""
112
- result = await self._get_value('dimensions')
114
+ result = await self._get_value("dimensions")
113
115
  if result is not None:
114
- return result # type: ignore
115
-
116
+ return result # type: ignore
117
+
116
118
  # Fallback: use last screenshot size if available
117
119
  if not self._last_screenshot_size:
118
120
  await self.screenshot()
119
121
  assert self._last_screenshot_size is not None, "Failed to get screenshot size"
120
-
122
+
121
123
  return self._last_screenshot_size
122
-
123
- async def screenshot(self) -> str:
124
- """Take a screenshot and return as base64 string."""
125
- result = await self._call_function(self.functions['screenshot'])
126
- b64_str = self._to_b64_str(result) # type: ignore
127
-
124
+
125
+ async def screenshot(self, text: Optional[str] = None) -> str:
126
+ """Take a screenshot and return as base64 string.
127
+
128
+ Args:
129
+ text: Optional descriptive text (for compatibility with GPT-4o models, ignored)
130
+ """
131
+ result = await self._call_function(self.functions["screenshot"])
132
+ b64_str = self._to_b64_str(result) # type: ignore
133
+
128
134
  # Try to extract dimensions for fallback use
129
135
  try:
130
136
  if isinstance(result, Image.Image):
@@ -136,74 +142,75 @@ class CustomComputerHandler(AsyncComputerHandler):
136
142
  except Exception:
137
143
  # If we can't get dimensions, that's okay
138
144
  pass
139
-
145
+
140
146
  return b64_str
141
-
147
+
142
148
  async def click(self, x: int, y: int, button: str = "left") -> None:
143
149
  """Click at coordinates with specified button."""
144
- if 'click' in self.functions:
145
- await self._call_function(self.functions['click'], x, y, button)
150
+ if "click" in self.functions:
151
+ await self._call_function(self.functions["click"], x, y, button)
146
152
  # No-op if not implemented
147
-
153
+
148
154
  async def double_click(self, x: int, y: int) -> None:
149
155
  """Double click at coordinates."""
150
- if 'double_click' in self.functions:
151
- await self._call_function(self.functions['double_click'], x, y)
156
+ if "double_click" in self.functions:
157
+ await self._call_function(self.functions["double_click"], x, y)
152
158
  # No-op if not implemented
153
-
159
+
154
160
  async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
155
161
  """Scroll at coordinates with specified scroll amounts."""
156
- if 'scroll' in self.functions:
157
- await self._call_function(self.functions['scroll'], x, y, scroll_x, scroll_y)
162
+ if "scroll" in self.functions:
163
+ await self._call_function(self.functions["scroll"], x, y, scroll_x, scroll_y)
158
164
  # No-op if not implemented
159
-
165
+
160
166
  async def type(self, text: str) -> None:
161
167
  """Type text."""
162
- if 'type' in self.functions:
163
- await self._call_function(self.functions['type'], text)
168
+ if "type" in self.functions:
169
+ await self._call_function(self.functions["type"], text)
164
170
  # No-op if not implemented
165
-
171
+
166
172
  async def wait(self, ms: int = 1000) -> None:
167
173
  """Wait for specified milliseconds."""
168
- if 'wait' in self.functions:
169
- await self._call_function(self.functions['wait'], ms)
174
+ if "wait" in self.functions:
175
+ await self._call_function(self.functions["wait"], ms)
170
176
  else:
171
177
  # Default implementation
172
178
  import asyncio
179
+
173
180
  await asyncio.sleep(ms / 1000.0)
174
-
181
+
175
182
  async def move(self, x: int, y: int) -> None:
176
183
  """Move cursor to coordinates."""
177
- if 'move' in self.functions:
178
- await self._call_function(self.functions['move'], x, y)
184
+ if "move" in self.functions:
185
+ await self._call_function(self.functions["move"], x, y)
179
186
  # No-op if not implemented
180
-
187
+
181
188
  async def keypress(self, keys: Union[List[str], str]) -> None:
182
189
  """Press key combination."""
183
- if 'keypress' in self.functions:
184
- await self._call_function(self.functions['keypress'], keys)
190
+ if "keypress" in self.functions:
191
+ await self._call_function(self.functions["keypress"], keys)
185
192
  # No-op if not implemented
186
-
193
+
187
194
  async def drag(self, path: List[Dict[str, int]]) -> None:
188
195
  """Drag along specified path."""
189
- if 'drag' in self.functions:
190
- await self._call_function(self.functions['drag'], path)
196
+ if "drag" in self.functions:
197
+ await self._call_function(self.functions["drag"], path)
191
198
  # No-op if not implemented
192
-
199
+
193
200
  async def get_current_url(self) -> str:
194
201
  """Get current URL (for browser environments)."""
195
- if 'get_current_url' in self.functions:
196
- return await self._get_value('current_url') # type: ignore
202
+ if "get_current_url" in self.functions:
203
+ return await self._get_value("current_url") # type: ignore
197
204
  return "" # Default fallback
198
-
205
+
199
206
  async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
200
207
  """Left mouse down at coordinates."""
201
- if 'left_mouse_down' in self.functions:
202
- await self._call_function(self.functions['left_mouse_down'], x, y)
208
+ if "left_mouse_down" in self.functions:
209
+ await self._call_function(self.functions["left_mouse_down"], x, y)
203
210
  # No-op if not implemented
204
-
211
+
205
212
  async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
206
213
  """Left mouse up at coordinates."""
207
- if 'left_mouse_up' in self.functions:
208
- await self._call_function(self.functions['left_mouse_up'], x, y)
214
+ if "left_mouse_up" in self.functions:
215
+ await self._call_function(self.functions["left_mouse_up"], x, y)
209
216
  # No-op if not implemented
agent/decorators.py CHANGED
@@ -3,47 +3,56 @@ Decorators for agent - agent_loop decorator
3
3
  """
4
4
 
5
5
  from typing import List, Optional
6
+
6
7
  from .types import AgentConfigInfo
7
8
 
8
9
  # Global registry
9
10
  _agent_configs: List[AgentConfigInfo] = []
10
11
 
12
+
11
13
  def register_agent(models: str, priority: int = 0):
12
14
  """
13
15
  Decorator to register an AsyncAgentConfig class.
14
-
16
+
15
17
  Args:
16
18
  models: Regex pattern to match supported models
17
19
  priority: Priority for agent selection (higher = more priority)
18
20
  """
21
+
19
22
  def decorator(agent_class: type):
20
23
  # Validate that the class implements AsyncAgentConfig protocol
21
- if not hasattr(agent_class, 'predict_step'):
22
- raise ValueError(f"Agent class {agent_class.__name__} must implement predict_step method")
23
- if not hasattr(agent_class, 'predict_click'):
24
- raise ValueError(f"Agent class {agent_class.__name__} must implement predict_click method")
25
- if not hasattr(agent_class, 'get_capabilities'):
26
- raise ValueError(f"Agent class {agent_class.__name__} must implement get_capabilities method")
27
-
24
+ if not hasattr(agent_class, "predict_step"):
25
+ raise ValueError(
26
+ f"Agent class {agent_class.__name__} must implement predict_step method"
27
+ )
28
+ if not hasattr(agent_class, "predict_click"):
29
+ raise ValueError(
30
+ f"Agent class {agent_class.__name__} must implement predict_click method"
31
+ )
32
+ if not hasattr(agent_class, "get_capabilities"):
33
+ raise ValueError(
34
+ f"Agent class {agent_class.__name__} must implement get_capabilities method"
35
+ )
36
+
28
37
  # Register the agent config
29
38
  config_info = AgentConfigInfo(
30
- agent_class=agent_class,
31
- models_regex=models,
32
- priority=priority
39
+ agent_class=agent_class, models_regex=models, priority=priority
33
40
  )
34
41
  _agent_configs.append(config_info)
35
-
42
+
36
43
  # Sort by priority (highest first)
37
44
  _agent_configs.sort(key=lambda x: x.priority, reverse=True)
38
-
45
+
39
46
  return agent_class
40
-
47
+
41
48
  return decorator
42
49
 
50
+
43
51
  def get_agent_configs() -> List[AgentConfigInfo]:
44
52
  """Get all registered agent configs"""
45
53
  return _agent_configs.copy()
46
54
 
55
+
47
56
  def find_agent_config(model: str) -> Optional[AgentConfigInfo]:
48
57
  """Find the best matching agent config for a model"""
49
58
  for config_info in _agent_configs:
@@ -12,7 +12,7 @@ Components:
12
12
  Usage:
13
13
  # Run the server and UI
14
14
  python -m agent.human_tool
15
-
15
+
16
16
  # Or run components separately
17
17
  python -m agent.human_tool.server # API server only
18
18
  python -m agent.human_tool.ui # UI only
@@ -21,9 +21,4 @@ Usage:
21
21
  from .server import CompletionQueue, completion_queue
22
22
  from .ui import HumanCompletionUI, create_ui
23
23
 
24
- __all__ = [
25
- "CompletionQueue",
26
- "completion_queue",
27
- "HumanCompletionUI",
28
- "create_ui"
29
- ]
24
+ __all__ = ["CompletionQueue", "completion_queue", "HumanCompletionUI", "create_ui"]
@@ -8,6 +8,7 @@ with a Gradio UI for human interaction.
8
8
 
9
9
  import gradio as gr
10
10
  from fastapi import FastAPI
11
+
11
12
  from .server import app as fastapi_app
12
13
  from .ui import create_ui
13
14
 
@@ -18,6 +19,7 @@ gradio_demo = create_ui()
18
19
  CUSTOM_PATH = "/gradio"
19
20
  app = gr.mount_gradio_app(fastapi_app, gradio_demo, path=CUSTOM_PATH)
20
21
 
22
+
21
23
  # Add a redirect from root to Gradio UI
22
24
  @fastapi_app.get("/")
23
25
  async def redirect_to_ui():
@@ -25,14 +27,16 @@ async def redirect_to_ui():
25
27
  return {
26
28
  "message": "Human Completion Server is running",
27
29
  "ui_url": "/gradio",
28
- "api_docs": "/docs"
30
+ "api_docs": "/docs",
29
31
  }
30
32
 
33
+
31
34
  if __name__ == "__main__":
32
35
  import uvicorn
36
+
33
37
  print("🚀 Starting Human-in-the-Loop Completion Server...")
34
38
  print("📊 API Server: http://localhost:8002")
35
39
  print("🎨 Gradio UI: http://localhost:8002/gradio")
36
40
  print("📚 API Docs: http://localhost:8002/docs")
37
-
41
+
38
42
  uvicorn.run(app, host="0.0.0.0", port=8002)