lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (108) hide show
  1. lm_deluge/__init__.py +1 -2
  2. lm_deluge/api_requests/anthropic.py +117 -22
  3. lm_deluge/api_requests/base.py +84 -11
  4. lm_deluge/api_requests/bedrock.py +30 -6
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +166 -20
  7. lm_deluge/api_requests/openai.py +145 -25
  8. lm_deluge/batches.py +15 -45
  9. lm_deluge/client.py +309 -50
  10. lm_deluge/config.py +15 -3
  11. lm_deluge/models/__init__.py +14 -1
  12. lm_deluge/models/anthropic.py +29 -14
  13. lm_deluge/models/arcee.py +16 -0
  14. lm_deluge/models/deepseek.py +36 -4
  15. lm_deluge/models/google.py +42 -0
  16. lm_deluge/models/grok.py +24 -0
  17. lm_deluge/models/kimi.py +36 -0
  18. lm_deluge/models/minimax.py +18 -0
  19. lm_deluge/models/openai.py +100 -0
  20. lm_deluge/models/openrouter.py +133 -7
  21. lm_deluge/models/together.py +11 -0
  22. lm_deluge/models/zai.py +50 -0
  23. lm_deluge/pipelines/gepa/__init__.py +95 -0
  24. lm_deluge/pipelines/gepa/core.py +354 -0
  25. lm_deluge/pipelines/gepa/docs/samples.py +705 -0
  26. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  27. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  28. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  29. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  30. lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  31. lm_deluge/pipelines/gepa/optimizer.py +435 -0
  32. lm_deluge/pipelines/gepa/proposer.py +235 -0
  33. lm_deluge/pipelines/gepa/util.py +165 -0
  34. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  35. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  36. lm_deluge/prompt.py +537 -88
  37. lm_deluge/request_context.py +7 -2
  38. lm_deluge/server/__init__.py +24 -0
  39. lm_deluge/server/__main__.py +144 -0
  40. lm_deluge/server/adapters.py +369 -0
  41. lm_deluge/server/app.py +388 -0
  42. lm_deluge/server/auth.py +71 -0
  43. lm_deluge/server/model_policy.py +215 -0
  44. lm_deluge/server/models_anthropic.py +172 -0
  45. lm_deluge/server/models_openai.py +175 -0
  46. lm_deluge/tool/__init__.py +1130 -0
  47. lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
  48. lm_deluge/tool/builtin/anthropic/bash.py +0 -0
  49. lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
  50. lm_deluge/tool/builtin/gemini.py +59 -0
  51. lm_deluge/tool/builtin/openai.py +74 -0
  52. lm_deluge/tool/cua/__init__.py +173 -0
  53. lm_deluge/tool/cua/actions.py +148 -0
  54. lm_deluge/tool/cua/base.py +27 -0
  55. lm_deluge/tool/cua/batch.py +215 -0
  56. lm_deluge/tool/cua/converters.py +466 -0
  57. lm_deluge/tool/cua/kernel.py +702 -0
  58. lm_deluge/tool/cua/trycua.py +989 -0
  59. lm_deluge/tool/prefab/__init__.py +45 -0
  60. lm_deluge/tool/prefab/batch_tool.py +156 -0
  61. lm_deluge/tool/prefab/docs.py +1119 -0
  62. lm_deluge/tool/prefab/email.py +294 -0
  63. lm_deluge/tool/prefab/filesystem.py +1711 -0
  64. lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
  65. lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
  66. lm_deluge/tool/prefab/memory.py +458 -0
  67. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  68. lm_deluge/tool/prefab/otc/executor.py +281 -0
  69. lm_deluge/tool/prefab/otc/parse.py +188 -0
  70. lm_deluge/tool/prefab/random.py +212 -0
  71. lm_deluge/tool/prefab/rlm/__init__.py +296 -0
  72. lm_deluge/tool/prefab/rlm/executor.py +349 -0
  73. lm_deluge/tool/prefab/rlm/parse.py +144 -0
  74. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  75. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  76. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  77. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  78. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  79. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
  80. lm_deluge/tool/prefab/sheets.py +385 -0
  81. lm_deluge/tool/prefab/skills.py +0 -0
  82. lm_deluge/tool/prefab/subagents.py +233 -0
  83. lm_deluge/tool/prefab/todos.py +342 -0
  84. lm_deluge/tool/prefab/tool_search.py +169 -0
  85. lm_deluge/tool/prefab/web_search.py +199 -0
  86. lm_deluge/tracker.py +16 -13
  87. lm_deluge/util/schema.py +412 -0
  88. lm_deluge/warnings.py +8 -0
  89. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
  90. lm_deluge-0.0.90.dist-info/RECORD +132 -0
  91. lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
  92. lm_deluge/built_in_tools/openai.py +0 -28
  93. lm_deluge/presets/cerebras.py +0 -17
  94. lm_deluge/presets/meta.py +0 -13
  95. lm_deluge/tool.py +0 -849
  96. lm_deluge-0.0.67.dist-info/RECORD +0 -72
  97. lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
  98. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  99. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  100. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  101. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  102. /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
  103. /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
  104. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
  105. /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
  106. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
  107. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
  108. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,989 @@
1
+ """
2
+ TryCUA (cua.ai) implementation of ComputerExecutor.
3
+
4
+ This module provides a ComputerExecutor that connects to a TryCUA computer-server
5
+ instance via WebSocket to execute computer use actions on a remote desktop.
6
+
7
+ The computer-server can be:
8
+ - A local instance: ws://localhost:8000/ws
9
+ - A cloud instance: wss://your-container.containers.cloud.trycua.com:8443/ws
10
+
11
+ No SDK required - communicates directly via WebSocket using JSON messages.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import base64
18
+ import json
19
+ import time
20
+ from typing import Any
21
+
22
+ from .actions import (
23
+ Bash,
24
+ Click,
25
+ CUAction,
26
+ DoubleClick,
27
+ Drag,
28
+ GoBack,
29
+ GoForward,
30
+ HoldKey,
31
+ Keypress,
32
+ MouseDown,
33
+ MouseUp,
34
+ Move,
35
+ Navigate,
36
+ Scroll,
37
+ Search,
38
+ TripleClick,
39
+ Type,
40
+ Wait,
41
+ )
42
+ from .base import ComputerExecutor, CUActionResult, Screenshot as ScreenshotResult
43
+
44
+
45
+ class TryCUAConnection:
46
+ """
47
+ Manages a WebSocket connection to a TryCUA computer-server.
48
+
49
+ Usage:
50
+ # Sync context manager
51
+ with TryCUAConnection("ws://localhost:8000/ws") as conn:
52
+ executor = TryCUAExecutor(conn)
53
+ result = executor.execute(Screenshot(kind="screenshot"))
54
+
55
+ # Async context manager
56
+ async with AsyncTryCUAConnection("ws://localhost:8000/ws") as conn:
57
+ executor = AsyncTryCUAExecutor(conn)
58
+ result = await executor.execute(Screenshot(kind="screenshot"))
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ ws_url: str = "ws://localhost:8000/ws",
64
+ *,
65
+ container_name: str | None = None,
66
+ api_key: str | None = None,
67
+ ):
68
+ """
69
+ Initialize a TryCUA connection configuration.
70
+
71
+ Args:
72
+ ws_url: WebSocket URL to the computer-server
73
+ container_name: Container name for cloud authentication (optional)
74
+ api_key: API key for cloud authentication (optional)
75
+ """
76
+ self.ws_url = ws_url
77
+ self.container_name = container_name
78
+ self.api_key = api_key
79
+ self._ws = None
80
+ self._loop = None
81
+
82
+ def connect(self) -> "TryCUAConnection":
83
+ """Establish the WebSocket connection synchronously."""
84
+ try:
85
+ import websockets.sync.client as ws_sync
86
+ except ImportError:
87
+ raise ImportError(
88
+ "The 'websockets' package is required for TryCUAConnection. "
89
+ "Install it with: pip install websockets"
90
+ )
91
+
92
+ self._ws = ws_sync.connect(self.ws_url)
93
+
94
+ # Authenticate if credentials provided
95
+ if self.container_name and self.api_key:
96
+ self._send_command(
97
+ "authenticate",
98
+ {"container_name": self.container_name, "api_key": self.api_key},
99
+ )
100
+
101
+ return self
102
+
103
+ def disconnect(self) -> None:
104
+ """Close the WebSocket connection."""
105
+ if self._ws:
106
+ self._ws.close()
107
+ self._ws = None
108
+
109
+ def _send_command(self, command: str, params: dict[str, Any] | None = None) -> dict:
110
+ """Send a command and return the response."""
111
+ if not self._ws:
112
+ raise RuntimeError("Not connected. Call connect() first.")
113
+
114
+ message = {"command": command, "params": params or {}}
115
+ self._ws.send(json.dumps(message))
116
+ response = self._ws.recv()
117
+ return json.loads(response)
118
+
119
+ def send_command(self, command: str, params: dict[str, Any] | None = None) -> dict:
120
+ """Public method to send a command and return the response."""
121
+ return self._send_command(command, params)
122
+
123
+ def __enter__(self) -> "TryCUAConnection":
124
+ return self.connect()
125
+
126
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
127
+ self.disconnect()
128
+
129
+
130
+ class AsyncTryCUAConnection:
131
+ """
132
+ Async version of TryCUAConnection for use with asyncio.
133
+
134
+ Usage:
135
+ async with AsyncTryCUAConnection("ws://localhost:8000/ws") as conn:
136
+ executor = AsyncTryCUAExecutor(conn)
137
+ result = await executor.execute(Screenshot(kind="screenshot"))
138
+ """
139
+
140
+ def __init__(
141
+ self,
142
+ ws_url: str = "ws://localhost:8000/ws",
143
+ *,
144
+ container_name: str | None = None,
145
+ api_key: str | None = None,
146
+ ):
147
+ self.ws_url = ws_url
148
+ self.container_name = container_name
149
+ self.api_key = api_key
150
+ self._ws = None
151
+
152
+ async def connect(self) -> "AsyncTryCUAConnection":
153
+ """Establish the WebSocket connection asynchronously."""
154
+ try:
155
+ import websockets
156
+ except ImportError:
157
+ raise ImportError(
158
+ "The 'websockets' package is required for AsyncTryCUAConnection. "
159
+ "Install it with: pip install websockets"
160
+ )
161
+
162
+ self._ws = await websockets.connect(self.ws_url)
163
+
164
+ # Authenticate if credentials provided
165
+ if self.container_name and self.api_key:
166
+ await self._send_command(
167
+ "authenticate",
168
+ {"container_name": self.container_name, "api_key": self.api_key},
169
+ )
170
+
171
+ return self
172
+
173
+ async def disconnect(self) -> None:
174
+ """Close the WebSocket connection."""
175
+ if self._ws:
176
+ await self._ws.close()
177
+ self._ws = None
178
+
179
+ async def _send_command(
180
+ self, command: str, params: dict[str, Any] | None = None
181
+ ) -> dict:
182
+ """Send a command and return the response."""
183
+ if not self._ws:
184
+ raise RuntimeError("Not connected. Call connect() first.")
185
+
186
+ message = {"command": command, "params": params or {}}
187
+ await self._ws.send(json.dumps(message))
188
+ response = await self._ws.recv()
189
+ return json.loads(response)
190
+
191
+ async def send_command(
192
+ self, command: str, params: dict[str, Any] | None = None
193
+ ) -> dict:
194
+ """Public method to send a command and return the response."""
195
+ return await self._send_command(command, params)
196
+
197
+ async def __aenter__(self) -> "AsyncTryCUAConnection":
198
+ return await self.connect()
199
+
200
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
201
+ await self.disconnect()
202
+
203
+
204
+ class TryCUAExecutor(ComputerExecutor):
205
+ """
206
+ Execute computer use actions on a TryCUA computer-server.
207
+
208
+ This executor maps CUAction types to TryCUA's computer control commands,
209
+ enabling vision-based LLM loops to control a remote desktop.
210
+
211
+ Example:
212
+ with TryCUAConnection("ws://localhost:8000/ws") as conn:
213
+ executor = TryCUAExecutor(conn)
214
+
215
+ # Take a screenshot
216
+ result = executor.execute(Screenshot(kind="screenshot"))
217
+ print(f"Got {len(result['screenshot']['content'])} bytes")
218
+
219
+ # Click at coordinates
220
+ executor.execute(Click(kind="click", x=100, y=200, button="left"))
221
+
222
+ # Type text
223
+ executor.execute(Type(kind="type", text="Hello, world!"))
224
+ """
225
+
226
+ def __init__(self, connection: TryCUAConnection):
227
+ """
228
+ Initialize the executor with an active TryCUA connection.
229
+
230
+ Args:
231
+ connection: An active TryCUAConnection instance
232
+ """
233
+ self.conn = connection
234
+
235
+ def execute(self, action: CUAction) -> CUActionResult:
236
+ """
237
+ Execute a computer use action on the TryCUA desktop.
238
+
239
+ Args:
240
+ action: The action to execute (Click, Type, Screenshot, etc.)
241
+
242
+ Returns:
243
+ CUActionResult with screenshot (if applicable) and metadata
244
+ """
245
+ kind = action["kind"]
246
+
247
+ if kind == "screenshot":
248
+ return self._screenshot()
249
+ elif kind == "click":
250
+ return self._click(action) # type: ignore
251
+ elif kind == "double_click":
252
+ return self._double_click(action) # type: ignore
253
+ elif kind == "triple_click":
254
+ return self._triple_click(action) # type: ignore
255
+ elif kind == "move":
256
+ return self._move(action) # type: ignore
257
+ elif kind == "scroll":
258
+ return self._scroll(action) # type: ignore
259
+ elif kind == "type":
260
+ return self._type(action) # type: ignore
261
+ elif kind == "keypress":
262
+ return self._keypress(action) # type: ignore
263
+ elif kind == "drag":
264
+ return self._drag(action) # type: ignore
265
+ elif kind == "wait":
266
+ return self._wait(action) # type: ignore
267
+ elif kind == "mouse_down":
268
+ return self._mouse_down(action) # type: ignore
269
+ elif kind == "mouse_up":
270
+ return self._mouse_up(action) # type: ignore
271
+ elif kind == "cursor_position":
272
+ return self._cursor_position()
273
+ elif kind == "hold_key":
274
+ return self._hold_key(action) # type: ignore
275
+ elif kind == "navigate":
276
+ return self._navigate(action) # type: ignore
277
+ elif kind == "go_back":
278
+ return self._go_back(action) # type: ignore
279
+ elif kind == "go_forward":
280
+ return self._go_forward(action) # type: ignore
281
+ elif kind == "search":
282
+ return self._search(action) # type: ignore
283
+ elif kind == "bash":
284
+ return self._bash(action) # type: ignore
285
+ else:
286
+ raise ValueError(f"Unsupported action kind: {kind}")
287
+
288
+ def _screenshot(self) -> CUActionResult:
289
+ """Capture a screenshot of the desktop."""
290
+ response = self.conn.send_command("screenshot")
291
+ if not response.get("success"):
292
+ raise RuntimeError(f"Screenshot failed: {response.get('error')}")
293
+
294
+ # Decode base64 image data
295
+ image_data = response.get("image_data", "")
296
+ content = base64.b64decode(image_data)
297
+
298
+ return CUActionResult(
299
+ screenshot=ScreenshotResult(media_type="image/png", content=content),
300
+ data={},
301
+ )
302
+
303
+ def _click(self, action: Click) -> CUActionResult:
304
+ """Execute a click action."""
305
+ x = action.get("x")
306
+ y = action.get("y")
307
+ button = action.get("button", "left")
308
+
309
+ # Map button names
310
+ if button == "middle":
311
+ button = "middle"
312
+ elif button in ("back", "forward"):
313
+ # TryCUA may not support these, fall back to left
314
+ button = "left"
315
+
316
+ if button == "left":
317
+ cmd = "left_click"
318
+ elif button == "right":
319
+ cmd = "right_click"
320
+ else:
321
+ cmd = "left_click"
322
+
323
+ params: dict[str, Any] = {}
324
+ if x is not None:
325
+ params["x"] = x
326
+ if y is not None:
327
+ params["y"] = y
328
+
329
+ response = self.conn.send_command(cmd, params)
330
+ if not response.get("success"):
331
+ raise RuntimeError(f"Click failed: {response.get('error')}")
332
+
333
+ return CUActionResult(screenshot=None, data={"action": "click"})
334
+
335
+ def _double_click(self, action: DoubleClick) -> CUActionResult:
336
+ """Execute a double click action."""
337
+ params: dict[str, Any] = {}
338
+ if action.get("x") is not None:
339
+ params["x"] = action["x"]
340
+ if action.get("y") is not None:
341
+ params["y"] = action["y"]
342
+
343
+ response = self.conn.send_command("double_click", params)
344
+ if not response.get("success"):
345
+ raise RuntimeError(f"Double click failed: {response.get('error')}")
346
+
347
+ return CUActionResult(screenshot=None, data={"action": "double_click"})
348
+
349
+ def _triple_click(self, action: TripleClick) -> CUActionResult:
350
+ """Execute a triple click action (3 rapid left clicks)."""
351
+ params: dict[str, Any] = {}
352
+ if action.get("x") is not None:
353
+ params["x"] = action["x"]
354
+ if action.get("y") is not None:
355
+ params["y"] = action["y"]
356
+
357
+ # TryCUA doesn't have native triple click, so do 3 clicks
358
+ for _ in range(3):
359
+ response = self.conn.send_command("left_click", params)
360
+ if not response.get("success"):
361
+ raise RuntimeError(f"Triple click failed: {response.get('error')}")
362
+
363
+ return CUActionResult(screenshot=None, data={"action": "triple_click"})
364
+
365
+ def _move(self, action: Move) -> CUActionResult:
366
+ """Move the mouse cursor."""
367
+ response = self.conn.send_command(
368
+ "move_cursor", {"x": action["x"], "y": action["y"]}
369
+ )
370
+ if not response.get("success"):
371
+ raise RuntimeError(f"Move failed: {response.get('error')}")
372
+
373
+ return CUActionResult(screenshot=None, data={"action": "move"})
374
+
375
+ def _scroll(self, action: Scroll) -> CUActionResult:
376
+ """Execute a scroll action."""
377
+ # Our action has dx, dy for scroll amounts (in pixels)
378
+ # Positive dy = scroll down, negative dy = scroll up
379
+ dx = action.get("dx", 0)
380
+ dy = action.get("dy", 0)
381
+
382
+ # First move to position if specified, then click to focus
383
+ x = action.get("x")
384
+ y = action.get("y")
385
+ if x is not None and y is not None:
386
+ self.conn.send_command("move_cursor", {"x": x, "y": y})
387
+ # Click to ensure the element under cursor gets focus for scroll
388
+ self.conn.send_command("left_click", {"x": x, "y": y})
389
+
390
+ # Convert pixel delta to scroll clicks (roughly 120 pixels per click)
391
+ # Use scroll_down/scroll_up for vertical, and scroll for horizontal
392
+ if dy != 0:
393
+ clicks = max(1, abs(dy) // 120)
394
+ if dy > 0:
395
+ # Positive dy means scroll down (content moves up)
396
+ response = self.conn.send_command("scroll_down", {"clicks": clicks})
397
+ else:
398
+ # Negative dy means scroll up (content moves down)
399
+ response = self.conn.send_command("scroll_up", {"clicks": clicks})
400
+ elif dx != 0:
401
+ # For horizontal scroll, use the generic scroll command
402
+ response = self.conn.send_command("scroll", {"x": dx, "y": 0})
403
+ else:
404
+ # No scroll needed
405
+ response = {"success": True}
406
+
407
+ if not response.get("success"):
408
+ raise RuntimeError(f"Scroll failed: {response.get('error')}")
409
+
410
+ return CUActionResult(screenshot=None, data={"action": "scroll"})
411
+
412
+ def _type(self, action: Type) -> CUActionResult:
413
+ """Type text."""
414
+ response = self.conn.send_command("type_text", {"text": action["text"]})
415
+ if not response.get("success"):
416
+ raise RuntimeError(f"Type failed: {response.get('error')}")
417
+
418
+ return CUActionResult(screenshot=None, data={"action": "type"})
419
+
420
+ def _keypress(self, action: Keypress) -> CUActionResult:
421
+ """Press key(s)."""
422
+ keys = action["keys"]
423
+
424
+ if len(keys) == 1:
425
+ # Single key press
426
+ response = self.conn.send_command("press_key", {"key": keys[0]})
427
+ else:
428
+ # Key combination (hotkey)
429
+ response = self.conn.send_command("hotkey", {"keys": keys})
430
+
431
+ if not response.get("success"):
432
+ raise RuntimeError(f"Keypress failed: {response.get('error')}")
433
+
434
+ return CUActionResult(screenshot=None, data={"action": "keypress"})
435
+
436
+ def _drag(self, action: Drag) -> CUActionResult:
437
+ """Execute a drag action."""
438
+ start_x = action.get("start_x")
439
+ start_y = action.get("start_y")
440
+ path = action.get("path", [])
441
+
442
+ # If start position specified, move there first
443
+ if start_x is not None and start_y is not None:
444
+ self.conn.send_command("move_cursor", {"x": start_x, "y": start_y})
445
+
446
+ # Execute drag to each point in path
447
+ for point in path:
448
+ end_x, end_y = point
449
+ response = self.conn.send_command("drag_to", {"x": end_x, "y": end_y})
450
+ if not response.get("success"):
451
+ raise RuntimeError(f"Drag failed: {response.get('error')}")
452
+
453
+ return CUActionResult(screenshot=None, data={"action": "drag"})
454
+
455
+ def _wait(self, action: Wait) -> CUActionResult:
456
+ """Wait for a specified duration."""
457
+ time.sleep(action["ms"] / 1000.0)
458
+ return CUActionResult(
459
+ screenshot=None, data={"action": "wait", "ms": action["ms"]}
460
+ )
461
+
462
+ def _mouse_down(self, action: MouseDown) -> CUActionResult:
463
+ """Press and hold a mouse button."""
464
+ # Get current cursor position for the command
465
+ pos_response = self.conn.send_command("get_cursor_position")
466
+ if pos_response.get("success"):
467
+ pos = pos_response.get("position", {})
468
+ x, y = pos.get("x", 0), pos.get("y", 0)
469
+ else:
470
+ x, y = 0, 0
471
+
472
+ response = self.conn.send_command(
473
+ "mouse_down", {"x": x, "y": y, "button": action.get("button", "left")}
474
+ )
475
+ if not response.get("success"):
476
+ raise RuntimeError(f"Mouse down failed: {response.get('error')}")
477
+
478
+ return CUActionResult(screenshot=None, data={"action": "mouse_down"})
479
+
480
+ def _mouse_up(self, action: MouseUp) -> CUActionResult:
481
+ """Release a mouse button."""
482
+ # Get current cursor position for the command
483
+ pos_response = self.conn.send_command("get_cursor_position")
484
+ if pos_response.get("success"):
485
+ pos = pos_response.get("position", {})
486
+ x, y = pos.get("x", 0), pos.get("y", 0)
487
+ else:
488
+ x, y = 0, 0
489
+
490
+ response = self.conn.send_command(
491
+ "mouse_up", {"x": x, "y": y, "button": action.get("button", "left")}
492
+ )
493
+ if not response.get("success"):
494
+ raise RuntimeError(f"Mouse up failed: {response.get('error')}")
495
+
496
+ return CUActionResult(screenshot=None, data={"action": "mouse_up"})
497
+
498
+ def _cursor_position(self) -> CUActionResult:
499
+ """Get current cursor position."""
500
+ response = self.conn.send_command("get_cursor_position")
501
+ if not response.get("success"):
502
+ raise RuntimeError(f"Get cursor position failed: {response.get('error')}")
503
+
504
+ pos = response.get("position", {})
505
+ return CUActionResult(
506
+ screenshot=None,
507
+ data={"action": "cursor_position", "x": pos.get("x"), "y": pos.get("y")},
508
+ )
509
+
510
+ def _hold_key(self, action: HoldKey) -> CUActionResult:
511
+ """Hold a key for a duration."""
512
+ key = action["key"]
513
+ ms = action["ms"]
514
+
515
+ # Press key down
516
+ self.conn.send_command("key_down", {"key": key})
517
+ # Wait
518
+ time.sleep(ms / 1000.0)
519
+ # Release key
520
+ self.conn.send_command("key_up", {"key": key})
521
+
522
+ return CUActionResult(
523
+ screenshot=None, data={"action": "hold_key", "key": key, "ms": ms}
524
+ )
525
+
526
+ def _navigate(self, action: Navigate) -> CUActionResult:
527
+ """Navigate to a URL (assumes browser is open)."""
528
+ url = action["url"]
529
+
530
+ # Use keyboard shortcuts to navigate: Ctrl+L, type URL, Enter
531
+ self.conn.send_command("hotkey", {"keys": ["ctrl", "l"]})
532
+ time.sleep(0.2)
533
+ self.conn.send_command("type_text", {"text": url})
534
+ time.sleep(0.1)
535
+ self.conn.send_command("press_key", {"key": "Return"})
536
+ time.sleep(1.5)
537
+
538
+ # Take screenshot after navigation
539
+ return self._screenshot_with_data({"action": "navigate", "url": url})
540
+
541
+ def _go_back(self, action: GoBack) -> CUActionResult:
542
+ """Go back in browser history."""
543
+ self.conn.send_command("hotkey", {"keys": ["alt", "Left"]})
544
+ time.sleep(0.5)
545
+ return self._screenshot_with_data({"action": "go_back"})
546
+
547
+ def _go_forward(self, action: GoForward) -> CUActionResult:
548
+ """Go forward in browser history."""
549
+ self.conn.send_command("hotkey", {"keys": ["alt", "Right"]})
550
+ time.sleep(0.5)
551
+ return self._screenshot_with_data({"action": "go_forward"})
552
+
553
+ def _search(self, action: Search) -> CUActionResult:
554
+ """Perform a web search."""
555
+ from urllib.parse import quote
556
+
557
+ query = action["query"]
558
+ search_url = f"https://www.google.com/search?q={quote(query)}"
559
+
560
+ # Navigate to search URL
561
+ self.conn.send_command("hotkey", {"keys": ["ctrl", "l"]})
562
+ time.sleep(0.2)
563
+ self.conn.send_command("type_text", {"text": search_url})
564
+ time.sleep(0.1)
565
+ self.conn.send_command("press_key", {"key": "Return"})
566
+ time.sleep(1.5)
567
+
568
+ return self._screenshot_with_data({"action": "search", "query": query})
569
+
570
+ def _bash(self, action: Bash) -> CUActionResult:
571
+ """Execute a bash command."""
572
+ command = action.get("command")
573
+ restart = action.get("restart", False)
574
+
575
+ if restart:
576
+ # Not directly supported, but we can try to open a new terminal
577
+ return CUActionResult(
578
+ screenshot=None,
579
+ data={"action": "bash", "error": "restart not supported"},
580
+ )
581
+
582
+ if command:
583
+ response = self.conn.send_command("run_command", {"command": command})
584
+ if not response.get("success"):
585
+ raise RuntimeError(f"Bash command failed: {response.get('error')}")
586
+
587
+ return CUActionResult(
588
+ screenshot=None,
589
+ data={
590
+ "action": "bash",
591
+ "stdout": response.get("stdout", ""),
592
+ "stderr": response.get("stderr", ""),
593
+ "return_code": response.get("return_code", 0),
594
+ },
595
+ )
596
+
597
+ return CUActionResult(screenshot=None, data={"action": "bash"})
598
+
599
+ def _screenshot_with_data(self, data: dict) -> CUActionResult:
600
+ """Take a screenshot and include additional data."""
601
+ response = self.conn.send_command("screenshot")
602
+ if not response.get("success"):
603
+ raise RuntimeError(f"Screenshot failed: {response.get('error')}")
604
+
605
+ image_data = response.get("image_data", "")
606
+ content = base64.b64decode(image_data)
607
+
608
+ return CUActionResult(
609
+ screenshot=ScreenshotResult(media_type="image/png", content=content),
610
+ data=data,
611
+ )
612
+
613
+
614
+ class AsyncTryCUAExecutor:
615
+ """
616
+ Async version of TryCUAExecutor for use with asyncio.
617
+
618
+ Example:
619
+ async with AsyncTryCUAConnection("ws://localhost:8000/ws") as conn:
620
+ executor = AsyncTryCUAExecutor(conn)
621
+ result = await executor.execute(Screenshot(kind="screenshot"))
622
+ """
623
+
624
+ def __init__(self, connection: AsyncTryCUAConnection):
625
+ """
626
+ Initialize the executor with an active async TryCUA connection.
627
+
628
+ Args:
629
+ connection: An active AsyncTryCUAConnection instance
630
+ """
631
+ self.conn = connection
632
+
633
+ async def execute(self, action: CUAction) -> CUActionResult:
634
+ """
635
+ Execute a computer use action on the TryCUA desktop asynchronously.
636
+
637
+ Args:
638
+ action: The action to execute (Click, Type, Screenshot, etc.)
639
+
640
+ Returns:
641
+ CUActionResult with screenshot (if applicable) and metadata
642
+ """
643
+ kind = action["kind"]
644
+
645
+ if kind == "screenshot":
646
+ return await self._screenshot()
647
+ elif kind == "click":
648
+ return await self._click(action) # type: ignore
649
+ elif kind == "double_click":
650
+ return await self._double_click(action) # type: ignore
651
+ elif kind == "triple_click":
652
+ return await self._triple_click(action) # type: ignore
653
+ elif kind == "move":
654
+ return await self._move(action) # type: ignore
655
+ elif kind == "scroll":
656
+ return await self._scroll(action) # type: ignore
657
+ elif kind == "type":
658
+ return await self._type(action) # type: ignore
659
+ elif kind == "keypress":
660
+ return await self._keypress(action) # type: ignore
661
+ elif kind == "drag":
662
+ return await self._drag(action) # type: ignore
663
+ elif kind == "wait":
664
+ return await self._wait(action) # type: ignore
665
+ elif kind == "mouse_down":
666
+ return await self._mouse_down(action) # type: ignore
667
+ elif kind == "mouse_up":
668
+ return await self._mouse_up(action) # type: ignore
669
+ elif kind == "cursor_position":
670
+ return await self._cursor_position()
671
+ elif kind == "hold_key":
672
+ return await self._hold_key(action) # type: ignore
673
+ elif kind == "navigate":
674
+ return await self._navigate(action) # type: ignore
675
+ elif kind == "go_back":
676
+ return await self._go_back(action) # type: ignore
677
+ elif kind == "go_forward":
678
+ return await self._go_forward(action) # type: ignore
679
+ elif kind == "search":
680
+ return await self._search(action) # type: ignore
681
+ elif kind == "bash":
682
+ return await self._bash(action) # type: ignore
683
+ else:
684
+ raise ValueError(f"Unsupported action kind: {kind}")
685
+
686
+ async def _screenshot(self) -> CUActionResult:
687
+ """Capture a screenshot of the desktop."""
688
+ response = await self.conn.send_command("screenshot")
689
+ if not response.get("success"):
690
+ raise RuntimeError(f"Screenshot failed: {response.get('error')}")
691
+
692
+ image_data = response.get("image_data", "")
693
+ content = base64.b64decode(image_data)
694
+
695
+ return CUActionResult(
696
+ screenshot=ScreenshotResult(media_type="image/png", content=content),
697
+ data={},
698
+ )
699
+
700
+ async def _click(self, action: Click) -> CUActionResult:
701
+ """Execute a click action."""
702
+ x = action.get("x")
703
+ y = action.get("y")
704
+ button = action.get("button", "left")
705
+
706
+ if button == "left":
707
+ cmd = "left_click"
708
+ elif button == "right":
709
+ cmd = "right_click"
710
+ else:
711
+ cmd = "left_click"
712
+
713
+ params: dict[str, Any] = {}
714
+ if x is not None:
715
+ params["x"] = x
716
+ if y is not None:
717
+ params["y"] = y
718
+
719
+ response = await self.conn.send_command(cmd, params)
720
+ if not response.get("success"):
721
+ raise RuntimeError(f"Click failed: {response.get('error')}")
722
+
723
+ return CUActionResult(screenshot=None, data={"action": "click"})
724
+
725
+ async def _double_click(self, action: DoubleClick) -> CUActionResult:
726
+ """Execute a double click action."""
727
+ params: dict[str, Any] = {}
728
+ if action.get("x") is not None:
729
+ params["x"] = action["x"]
730
+ if action.get("y") is not None:
731
+ params["y"] = action["y"]
732
+
733
+ response = await self.conn.send_command("double_click", params)
734
+ if not response.get("success"):
735
+ raise RuntimeError(f"Double click failed: {response.get('error')}")
736
+
737
+ return CUActionResult(screenshot=None, data={"action": "double_click"})
738
+
739
+ async def _triple_click(self, action: TripleClick) -> CUActionResult:
740
+ """Execute a triple click action (3 rapid left clicks)."""
741
+ params: dict[str, Any] = {}
742
+ if action.get("x") is not None:
743
+ params["x"] = action["x"]
744
+ if action.get("y") is not None:
745
+ params["y"] = action["y"]
746
+
747
+ for _ in range(3):
748
+ response = await self.conn.send_command("left_click", params)
749
+ if not response.get("success"):
750
+ raise RuntimeError(f"Triple click failed: {response.get('error')}")
751
+
752
+ return CUActionResult(screenshot=None, data={"action": "triple_click"})
753
+
754
+ async def _move(self, action: Move) -> CUActionResult:
755
+ """Move the mouse cursor."""
756
+ response = await self.conn.send_command(
757
+ "move_cursor", {"x": action["x"], "y": action["y"]}
758
+ )
759
+ if not response.get("success"):
760
+ raise RuntimeError(f"Move failed: {response.get('error')}")
761
+
762
+ return CUActionResult(screenshot=None, data={"action": "move"})
763
+
764
+ async def _scroll(self, action: Scroll) -> CUActionResult:
765
+ """Execute a scroll action."""
766
+ # Our action has dx, dy for scroll amounts (in pixels)
767
+ # Positive dy = scroll down, negative dy = scroll up
768
+ dx = action.get("dx", 0)
769
+ dy = action.get("dy", 0)
770
+
771
+ # First move to position if specified, then click to focus
772
+ x = action.get("x")
773
+ y = action.get("y")
774
+ if x is not None and y is not None:
775
+ await self.conn.send_command("move_cursor", {"x": x, "y": y})
776
+ # Click to ensure the element under cursor gets focus for scroll
777
+ await self.conn.send_command("left_click", {"x": x, "y": y})
778
+
779
+ # Convert pixel delta to scroll clicks (roughly 120 pixels per click)
780
+ # Use scroll_down/scroll_up for vertical, and scroll for horizontal
781
+ if dy != 0:
782
+ clicks = max(1, abs(dy) // 120)
783
+ if dy > 0:
784
+ # Positive dy means scroll down (content moves up)
785
+ response = await self.conn.send_command(
786
+ "scroll_down", {"clicks": clicks}
787
+ )
788
+ else:
789
+ # Negative dy means scroll up (content moves down)
790
+ response = await self.conn.send_command("scroll_up", {"clicks": clicks})
791
+ elif dx != 0:
792
+ # For horizontal scroll, use the generic scroll command
793
+ response = await self.conn.send_command("scroll", {"x": dx, "y": 0})
794
+ else:
795
+ # No scroll needed
796
+ response = {"success": True}
797
+
798
+ if not response.get("success"):
799
+ raise RuntimeError(f"Scroll failed: {response.get('error')}")
800
+
801
+ return CUActionResult(screenshot=None, data={"action": "scroll"})
802
+
803
+ async def _type(self, action: Type) -> CUActionResult:
804
+ """Type text."""
805
+ response = await self.conn.send_command("type_text", {"text": action["text"]})
806
+ if not response.get("success"):
807
+ raise RuntimeError(f"Type failed: {response.get('error')}")
808
+
809
+ return CUActionResult(screenshot=None, data={"action": "type"})
810
+
811
+ async def _keypress(self, action: Keypress) -> CUActionResult:
812
+ """Press key(s)."""
813
+ keys = action["keys"]
814
+
815
+ if len(keys) == 1:
816
+ response = await self.conn.send_command("press_key", {"key": keys[0]})
817
+ else:
818
+ response = await self.conn.send_command("hotkey", {"keys": keys})
819
+
820
+ if not response.get("success"):
821
+ raise RuntimeError(f"Keypress failed: {response.get('error')}")
822
+
823
+ return CUActionResult(screenshot=None, data={"action": "keypress"})
824
+
825
+ async def _drag(self, action: Drag) -> CUActionResult:
826
+ """Execute a drag action."""
827
+ start_x = action.get("start_x")
828
+ start_y = action.get("start_y")
829
+ path = action.get("path", [])
830
+
831
+ if start_x is not None and start_y is not None:
832
+ await self.conn.send_command("move_cursor", {"x": start_x, "y": start_y})
833
+
834
+ for point in path:
835
+ end_x, end_y = point
836
+ response = await self.conn.send_command("drag_to", {"x": end_x, "y": end_y})
837
+ if not response.get("success"):
838
+ raise RuntimeError(f"Drag failed: {response.get('error')}")
839
+
840
+ return CUActionResult(screenshot=None, data={"action": "drag"})
841
+
842
+ async def _wait(self, action: Wait) -> CUActionResult:
843
+ """Wait for a specified duration."""
844
+ await asyncio.sleep(action["ms"] / 1000.0)
845
+ return CUActionResult(
846
+ screenshot=None, data={"action": "wait", "ms": action["ms"]}
847
+ )
848
+
849
+ async def _mouse_down(self, action: MouseDown) -> CUActionResult:
850
+ """Press and hold a mouse button."""
851
+ pos_response = await self.conn.send_command("get_cursor_position")
852
+ if pos_response.get("success"):
853
+ pos = pos_response.get("position", {})
854
+ x, y = pos.get("x", 0), pos.get("y", 0)
855
+ else:
856
+ x, y = 0, 0
857
+
858
+ response = await self.conn.send_command(
859
+ "mouse_down", {"x": x, "y": y, "button": action.get("button", "left")}
860
+ )
861
+ if not response.get("success"):
862
+ raise RuntimeError(f"Mouse down failed: {response.get('error')}")
863
+
864
+ return CUActionResult(screenshot=None, data={"action": "mouse_down"})
865
+
866
+ async def _mouse_up(self, action: MouseUp) -> CUActionResult:
867
+ """Release a mouse button."""
868
+ pos_response = await self.conn.send_command("get_cursor_position")
869
+ if pos_response.get("success"):
870
+ pos = pos_response.get("position", {})
871
+ x, y = pos.get("x", 0), pos.get("y", 0)
872
+ else:
873
+ x, y = 0, 0
874
+
875
+ response = await self.conn.send_command(
876
+ "mouse_up", {"x": x, "y": y, "button": action.get("button", "left")}
877
+ )
878
+ if not response.get("success"):
879
+ raise RuntimeError(f"Mouse up failed: {response.get('error')}")
880
+
881
+ return CUActionResult(screenshot=None, data={"action": "mouse_up"})
882
+
883
+ async def _cursor_position(self) -> CUActionResult:
884
+ """Get current cursor position."""
885
+ response = await self.conn.send_command("get_cursor_position")
886
+ if not response.get("success"):
887
+ raise RuntimeError(f"Get cursor position failed: {response.get('error')}")
888
+
889
+ pos = response.get("position", {})
890
+ return CUActionResult(
891
+ screenshot=None,
892
+ data={"action": "cursor_position", "x": pos.get("x"), "y": pos.get("y")},
893
+ )
894
+
895
+ async def _hold_key(self, action: HoldKey) -> CUActionResult:
896
+ """Hold a key for a duration."""
897
+ key = action["key"]
898
+ ms = action["ms"]
899
+
900
+ await self.conn.send_command("key_down", {"key": key})
901
+ await asyncio.sleep(ms / 1000.0)
902
+ await self.conn.send_command("key_up", {"key": key})
903
+
904
+ return CUActionResult(
905
+ screenshot=None, data={"action": "hold_key", "key": key, "ms": ms}
906
+ )
907
+
908
+ async def _navigate(self, action: Navigate) -> CUActionResult:
909
+ """Navigate to a URL."""
910
+ url = action["url"]
911
+
912
+ await self.conn.send_command("hotkey", {"keys": ["ctrl", "l"]})
913
+ await asyncio.sleep(0.2)
914
+ await self.conn.send_command("type_text", {"text": url})
915
+ await asyncio.sleep(0.1)
916
+ await self.conn.send_command("press_key", {"key": "Return"})
917
+ await asyncio.sleep(1.5)
918
+
919
+ return await self._screenshot_with_data({"action": "navigate", "url": url})
920
+
921
+ async def _go_back(self, action: GoBack) -> CUActionResult:
922
+ """Go back in browser history."""
923
+ await self.conn.send_command("hotkey", {"keys": ["alt", "Left"]})
924
+ await asyncio.sleep(0.5)
925
+ return await self._screenshot_with_data({"action": "go_back"})
926
+
927
+ async def _go_forward(self, action: GoForward) -> CUActionResult:
928
+ """Go forward in browser history."""
929
+ await self.conn.send_command("hotkey", {"keys": ["alt", "Right"]})
930
+ await asyncio.sleep(0.5)
931
+ return await self._screenshot_with_data({"action": "go_forward"})
932
+
933
+ async def _search(self, action: Search) -> CUActionResult:
934
+ """Perform a web search."""
935
+ from urllib.parse import quote
936
+
937
+ query = action["query"]
938
+ search_url = f"https://www.google.com/search?q={quote(query)}"
939
+
940
+ await self.conn.send_command("hotkey", {"keys": ["ctrl", "l"]})
941
+ await asyncio.sleep(0.2)
942
+ await self.conn.send_command("type_text", {"text": search_url})
943
+ await asyncio.sleep(0.1)
944
+ await self.conn.send_command("press_key", {"key": "Return"})
945
+ await asyncio.sleep(1.5)
946
+
947
+ return await self._screenshot_with_data({"action": "search", "query": query})
948
+
949
+ async def _bash(self, action: Bash) -> CUActionResult:
950
+ """Execute a bash command."""
951
+ command = action.get("command")
952
+ restart = action.get("restart", False)
953
+
954
+ if restart:
955
+ return CUActionResult(
956
+ screenshot=None,
957
+ data={"action": "bash", "error": "restart not supported"},
958
+ )
959
+
960
+ if command:
961
+ response = await self.conn.send_command("run_command", {"command": command})
962
+ if not response.get("success"):
963
+ raise RuntimeError(f"Bash command failed: {response.get('error')}")
964
+
965
+ return CUActionResult(
966
+ screenshot=None,
967
+ data={
968
+ "action": "bash",
969
+ "stdout": response.get("stdout", ""),
970
+ "stderr": response.get("stderr", ""),
971
+ "return_code": response.get("return_code", 0),
972
+ },
973
+ )
974
+
975
+ return CUActionResult(screenshot=None, data={"action": "bash"})
976
+
977
+ async def _screenshot_with_data(self, data: dict) -> CUActionResult:
978
+ """Take a screenshot and include additional data."""
979
+ response = await self.conn.send_command("screenshot")
980
+ if not response.get("success"):
981
+ raise RuntimeError(f"Screenshot failed: {response.get('error')}")
982
+
983
+ image_data = response.get("image_data", "")
984
+ content = base64.b64decode(image_data)
985
+
986
+ return CUActionResult(
987
+ screenshot=ScreenshotResult(media_type="image/png", content=content),
988
+ data=data,
989
+ )