boxlite 0.3.0.post1__cp314-cp314-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of boxlite might be problematic. Click here for more details.

boxlite/computerbox.py ADDED
@@ -0,0 +1,502 @@
1
+ """
2
+ ComputerBox - Desktop environment with web access.
3
+
4
+ Provides a minimal, elegant API for running isolated desktop environments
5
+ that can be viewed from a browser, with full GUI automation support.
6
+ """
7
+
8
+ import asyncio
9
+ import base64
10
+ import logging
11
+ from typing import Optional, Tuple, TYPE_CHECKING
12
+
13
+ from .errors import ExecError, TimeoutError, ParseError
14
+ from .simplebox import SimpleBox
15
+
16
+ if TYPE_CHECKING:
17
+ from .boxlite import Boxlite
18
+
19
+ __all__ = ["ComputerBox"]
20
+
21
+ # Configure logger
22
+ logger = logging.getLogger("boxlite.computerbox")
23
+
24
+
25
+ class ComputerBox(SimpleBox):
26
+ """
27
+ Desktop environment accessible via web browser.
28
+
29
+ Auto-starts a full desktop environment with web interface.
30
+ Access the desktop by opening the URL in your browser.
31
+
32
+ Note: Uses HTTPS with self-signed certificate - your browser will show
33
+ a security warning. Click "Advanced" and "Proceed" to access the desktop.
34
+
35
+ Usage:
36
+ >>> async with ComputerBox() as desktop:
37
+ ... await desktop.wait_until_ready()
38
+ ... screenshot = await desktop.screenshot()
39
+
40
+ Example with custom settings:
41
+ >>> async with ComputerBox(memory=4096, cpu=4) as desktop:
42
+ ... await desktop.mouse_move(100, 200)
43
+ ... await desktop.left_click()
44
+ """
45
+
46
+ # Always use xfce desktop
47
+ _IMAGE_REFERENCE = "lscr.io/linuxserver/webtop:ubuntu-xfce"
48
+ # Webtop uses port 3001 with HTTPS
49
+ _GUEST_GUI_HTTP_PORT = 3000
50
+ _GUEST_GUI_HTTPS_PORT = 3001
51
+ # Webtop display number
52
+ _DISPLAY_NUMBER = ":1"
53
+ # Expected display resolution when SELKIES_IS_MANUAL_RESOLUTION_MODE=true (Anthropic requires ≤ 1280x800)
54
+ # Webtop/Selkies defaults to 1024x768 in manual resolution mode
55
+ _DEFAULT_DISPLAY_WIDTH_PX = 1024
56
+ _DEFAULT_DISPLAY_HEIGHT_PX = 768
57
+
58
+ def __init__(self, cpu: int = 2, memory: int = 2048, gui_http_port: int = 3000,
59
+ gui_https_port: int = 3001, runtime: Optional['Boxlite'] = None,
60
+ **kwargs):
61
+ """
62
+ Create and auto-start a desktop environment.
63
+
64
+ Args:
65
+ memory: Memory in MiB (default: 2048)
66
+ cpu: Number of CPU cores (default: 2)
67
+ gui_http_port: Port for web-based desktop GUI over HTTP (default: 3000)
68
+ gui_https_port: Port for web-based desktop GUI over HTTPS (default: 3001)
69
+ runtime: Optional runtime instance (uses global default if None)
70
+ **kwargs: Additional configuration options (volumes, etc.)
71
+ """
72
+ # Merge user-provided env with default env
73
+ user_env = kwargs.pop('env', [])
74
+ default_env = [
75
+ ("DISPLAY", self._DISPLAY_NUMBER),
76
+ ("DISPLAY_SIZEW", str(self._DEFAULT_DISPLAY_WIDTH_PX)),
77
+ ("DISPLAY_SIZEH", str(self._DEFAULT_DISPLAY_HEIGHT_PX)),
78
+ ("SELKIES_MANUAL_WIDTH", str(self._DEFAULT_DISPLAY_WIDTH_PX)),
79
+ ("SELKIES_MANUAL_HEIGHT", str(self._DEFAULT_DISPLAY_HEIGHT_PX)),
80
+ ("SELKIES_UI_SHOW_SIDEBAR", "false"),
81
+ ]
82
+ merged_env = default_env + list(user_env)
83
+
84
+ # Merge user-provided ports with default ports
85
+ user_ports = kwargs.pop('ports', [])
86
+ default_ports = [
87
+ (gui_http_port, self._GUEST_GUI_HTTP_PORT),
88
+ (gui_https_port, self._GUEST_GUI_HTTPS_PORT)
89
+ ]
90
+ merged_ports = default_ports + list(user_ports)
91
+
92
+ super().__init__(
93
+ image=self._IMAGE_REFERENCE,
94
+ memory_mib=memory,
95
+ cpus=cpu,
96
+ runtime=runtime,
97
+ env=merged_env,
98
+ ports=merged_ports,
99
+ **kwargs
100
+ )
101
+
102
+ async def wait_until_ready(self, timeout: int = 60):
103
+ """
104
+ Wait until the desktop environment is fully loaded and ready.
105
+
106
+ Waits for xfdesktop to render the desktop, which ensures screenshots won't be black.
107
+
108
+ Args:
109
+ timeout: Maximum time to wait in seconds (default: 60)
110
+
111
+ Raises:
112
+ TimeoutError: If desktop doesn't become ready within timeout period
113
+
114
+ Example:
115
+ >>> async with ComputerBox() as desktop:
116
+ ... await desktop.wait_until_ready()
117
+ ... # Desktop is now ready for automation and screenshots
118
+ """
119
+ logger.info("Waiting for desktop to become ready...")
120
+ import time
121
+ start_time = time.time()
122
+ retry_delay = 0.5
123
+
124
+ while True:
125
+ elapsed = time.time() - start_time
126
+ if elapsed > timeout:
127
+ raise TimeoutError(f"Desktop did not become ready within {timeout} seconds")
128
+
129
+ try:
130
+ # Check if xfdesktop window exists at correct resolution
131
+ exec_result = await self.exec("xwininfo", "-tree", "-root")
132
+ result = exec_result.stdout
133
+ expected_size = f'{self._DEFAULT_DISPLAY_WIDTH_PX}x{self._DEFAULT_DISPLAY_HEIGHT_PX}'
134
+
135
+ logger.debug(f"stdout {result}")
136
+
137
+ if 'xfdesktop' in result and expected_size in result:
138
+ logger.info(f"Desktop ready after {elapsed:.1f} seconds")
139
+ return
140
+
141
+ logger.debug(f"Desktop not ready yet (waited {elapsed:.1f}s), retrying...")
142
+ await asyncio.sleep(retry_delay)
143
+
144
+ except Exception as e:
145
+ logger.debug(f"Desktop not ready: {e}, retrying...")
146
+ await asyncio.sleep(retry_delay)
147
+
148
+ # GUI Automation Methods
149
+
150
+ async def screenshot(self) -> dict:
151
+ """
152
+ Capture a screenshot of the desktop using PIL.ImageGrab (pre-installed).
153
+
154
+ Note: Screenshots may be black if taken before the XFCE desktop has fully
155
+ initialized. Use wait_until_ready() before taking screenshots to ensure
156
+ the desktop has been rendered.
157
+
158
+ Returns:
159
+ Dictionary containing:
160
+ - data: Base64-encoded PNG images data
161
+ - width: Display width in pixels (1024)
162
+ - height: Display height in pixels (768)
163
+ - format: Image format ("png")
164
+
165
+ Example:
166
+ >>> async with ComputerBox() as desktop:
167
+ ... await desktop.wait_until_ready() # Ensure desktop is rendered
168
+ ... result = await desktop.screenshot()
169
+ ... image_data = base64.b64decode(result['data'])
170
+ ... with open('screenshot.png', 'wb') as f:
171
+ ... f.write(image_data)
172
+ """
173
+ logger.info("Taking screenshot...")
174
+
175
+ # Use PIL.ImageGrab (pre-installed in webtop) to capture screenshot
176
+ # This avoids needing to install scrot and is faster
177
+ logger.debug("Capturing screenshot with PIL.ImageGrab...")
178
+ python_code = '''
179
+ from PIL import ImageGrab
180
+ import io
181
+ import base64
182
+
183
+ # Capture screenshot
184
+ img = ImageGrab.grab()
185
+
186
+ # Convert to PNG in memory
187
+ buffer = io.BytesIO()
188
+ img.save(buffer, format="PNG")
189
+
190
+ # Output base64-encoded PNG
191
+ print(base64.b64encode(buffer.getvalue()).decode("utf-8"))
192
+ '''
193
+ # Execute and get stdout
194
+ exec_result = await self.exec("python3", "-c", python_code)
195
+
196
+ # Check if screenshot command succeeded
197
+ if exec_result.exit_code != 0:
198
+ logger.error(f"Screenshot failed with exit code {exec_result.exit_code}")
199
+ logger.error(f"stderr: {exec_result.stderr}")
200
+ raise ExecError("screenshot()", exec_result.exit_code, exec_result.stderr)
201
+
202
+ b64_data = exec_result.stdout.strip()
203
+
204
+ logger.info(
205
+ f"Screenshot captured: {self._DEFAULT_DISPLAY_WIDTH_PX}x{self._DEFAULT_DISPLAY_HEIGHT_PX}")
206
+ return {
207
+ "data": b64_data,
208
+ "width": self._DEFAULT_DISPLAY_WIDTH_PX,
209
+ "height": self._DEFAULT_DISPLAY_HEIGHT_PX,
210
+ "format": "png"
211
+ }
212
+
213
+ async def mouse_move(self, x: int, y: int):
214
+ """
215
+ Move mouse cursor to absolute coordinates.
216
+
217
+ Args:
218
+ x: X coordinate
219
+ y: Y coordinate
220
+
221
+ Example:
222
+ >>> async with ComputerBox() as desktop:
223
+ ... await desktop.mouse_move(100, 200)
224
+ """
225
+ logger.info(f"Moving mouse to ({x}, {y})")
226
+ exec_result = await self.exec("xdotool", "mousemove", str(x), str(y))
227
+ if exec_result.exit_code != 0:
228
+ raise ExecError(f"mouse_move({x}, {y})", exec_result.exit_code, exec_result.stderr)
229
+ logger.debug(f"Mouse moved to ({x}, {y})")
230
+
231
+ async def left_click(self):
232
+ """
233
+ Click left mouse button at current position.
234
+
235
+ Example:
236
+ >>> async with ComputerBox() as desktop:
237
+ ... await desktop.mouse_move(100, 200)
238
+ ... await desktop.left_click()
239
+ """
240
+ logger.info("Clicking left mouse button")
241
+ exec_result = await self.exec("xdotool", "click", "1")
242
+ if exec_result.exit_code != 0:
243
+ raise ExecError("left_click()", exec_result.exit_code, exec_result.stderr)
244
+ logger.debug("Clicked left button")
245
+
246
+ async def right_click(self):
247
+ """
248
+ Click right mouse button at current position.
249
+
250
+ Example:
251
+ >>> async with ComputerBox() as desktop:
252
+ ... await desktop.mouse_move(100, 200)
253
+ ... await desktop.right_click()
254
+ """
255
+ logger.info("Clicking right mouse button")
256
+ exec_result = await self.exec("xdotool", "click", "3")
257
+ if exec_result.exit_code != 0:
258
+ raise ExecError("right_click()", exec_result.exit_code, exec_result.stderr)
259
+ logger.debug("Clicked right button")
260
+
261
+ async def middle_click(self):
262
+ """
263
+ Click middle mouse button at current position.
264
+
265
+ Example:
266
+ >>> async with ComputerBox() as desktop:
267
+ ... await desktop.mouse_move(100, 200)
268
+ ... await desktop.middle_click()
269
+ """
270
+ logger.info("Clicking middle mouse button")
271
+ exec_result = await self.exec("xdotool", "click", "2")
272
+ if exec_result.exit_code != 0:
273
+ raise ExecError("middle_click()", exec_result.exit_code, exec_result.stderr)
274
+ logger.debug("Clicked middle button")
275
+
276
+ async def double_click(self):
277
+ """
278
+ Double-click left mouse button at current position.
279
+
280
+ Example:
281
+ >>> async with ComputerBox() as desktop:
282
+ ... await desktop.mouse_move(100, 200)
283
+ ... await desktop.double_click()
284
+ """
285
+ logger.info("Double-clicking left mouse button")
286
+ exec_result = await self.exec("xdotool", "click", "--repeat", "2", "--delay",
287
+ "100", "1")
288
+ if exec_result.exit_code != 0:
289
+ raise ExecError("double_click()", exec_result.exit_code, exec_result.stderr)
290
+ logger.debug("Double-clicked left button")
291
+
292
+ async def triple_click(self):
293
+ """
294
+ Triple-click left mouse button at current position.
295
+
296
+ Useful for selecting entire lines or paragraphs of text.
297
+
298
+ Example:
299
+ >>> async with ComputerBox() as desktop:
300
+ ... await desktop.mouse_move(100, 200)
301
+ ... await desktop.triple_click()
302
+ """
303
+ logger.info("Triple-clicking left mouse button")
304
+ # Anthropic requires 100-200ms delays between clicks
305
+ exec_result = await self.exec("xdotool", "click", "--repeat", "3", "--delay",
306
+ "100", "1")
307
+ if exec_result.exit_code != 0:
308
+ raise ExecError("triple_click()", exec_result.exit_code, exec_result.stderr)
309
+ logger.debug("Triple-clicked left button")
310
+
311
+ async def left_click_drag(self, start_x: int, start_y: int, end_x: int, end_y: int):
312
+ """
313
+ Drag mouse from start position to end position with left button held.
314
+
315
+ Args:
316
+ start_x: Starting X coordinate
317
+ start_y: Starting Y coordinate
318
+ end_x: Ending X coordinate
319
+ end_y: Ending Y coordinate
320
+
321
+ Example:
322
+ >>> async with ComputerBox() as desktop:
323
+ ... await desktop.left_click_drag(100, 100, 200, 200)
324
+ """
325
+ logger.info(f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y})")
326
+ # Chain all operations in single xdotool command: move, press, move, release
327
+ exec_result = await self.exec(
328
+ "xdotool",
329
+ "mousemove", str(start_x), str(start_y),
330
+ "mousedown", "1",
331
+ "sleep", "0.1",
332
+ "mousemove", str(end_x), str(end_y),
333
+ "sleep", "0.1",
334
+ "mouseup", "1"
335
+ )
336
+ if exec_result.exit_code != 0:
337
+ raise ExecError("left_click_drag()", exec_result.exit_code, exec_result.stderr)
338
+ logger.debug(f"Drag completed")
339
+
340
+ async def cursor_position(self) -> Tuple[int, int]:
341
+ """
342
+ Get the current mouse cursor position.
343
+
344
+ Returns:
345
+ Tuple of (x, y) coordinates
346
+
347
+ Example:
348
+ >>> async with ComputerBox() as desktop:
349
+ ... x, y = await desktop.cursor_position()
350
+ ... print(f"Cursor at ({x}, {y})")
351
+ """
352
+ logger.info("Getting cursor position")
353
+
354
+ # Use xdotool to get mouse location
355
+ exec_result = await self.exec("xdotool", "getmouselocation", "--shell")
356
+
357
+ # Check if command succeeded
358
+ if exec_result.exit_code != 0:
359
+ logger.error(f"xdotool failed with exit code {exec_result.exit_code}")
360
+ logger.error(f"stderr: {exec_result.stderr}")
361
+ raise ExecError("cursor_position()", exec_result.exit_code, exec_result.stderr)
362
+
363
+ # Parse output (format: "X=123\nY=456\nSCREEN=0\nWINDOW=...")
364
+ x, y = None, None
365
+ for line in exec_result.stdout.split('\n'):
366
+ clean_line = line.strip()
367
+ if clean_line.startswith('X='):
368
+ x = int(clean_line[2:])
369
+ elif clean_line.startswith('Y='):
370
+ y = int(clean_line[2:])
371
+
372
+ if x is not None and y is not None:
373
+ logger.info(f"Cursor position: ({x}, {y})")
374
+ return (x, y)
375
+
376
+ logger.error("Failed to parse cursor position from xdotool output")
377
+ raise ParseError("Failed to parse cursor position from xdotool output")
378
+
379
+ async def type(self, text: str):
380
+ """
381
+ Type text using the keyboard.
382
+
383
+ Args:
384
+ text: Text to type
385
+
386
+ Example:
387
+ >>> async with ComputerBox() as desktop:
388
+ ... await desktop.type("Hello World!")
389
+ """
390
+ logger.info(f"Typing text: {text[:50]}{'...' if len(text) > 50 else ''}")
391
+
392
+ # Escape special characters for xdotool
393
+ exec_result = await self.exec("xdotool", "type", "--", text)
394
+ if exec_result.exit_code != 0:
395
+ raise ExecError("type()", exec_result.exit_code, exec_result.stderr)
396
+ logger.debug(f"Typed {len(text)} characters")
397
+
398
+ async def key(self, text: str):
399
+ """
400
+ Press a special key or key combination.
401
+
402
+ Args:
403
+ text: Key to press (e.g., 'Return', 'Escape', 'ctrl+c', 'alt+F4')
404
+
405
+ Special keys: Return, Escape, Tab, space, BackSpace, Delete,
406
+ Up, Down, Left, Right, Home, End, Page_Up, Page_Down,
407
+ F1-F12, etc.
408
+
409
+ Example:
410
+ >>> async with ComputerBox() as desktop:
411
+ ... await desktop.key("Return")
412
+ ... await desktop.key("ctrl+c")
413
+ """
414
+ logger.info(f"Pressing key: {text}")
415
+ exec_result = await self.exec("xdotool", "key", text)
416
+ if exec_result.exit_code != 0:
417
+ raise ExecError("key()", exec_result.exit_code, exec_result.stderr)
418
+ logger.debug(f"Pressed key: {text}")
419
+
420
+ async def scroll(self, x: int, y: int, direction: str, amount: int = 3):
421
+ """
422
+ Scroll at a specific position.
423
+
424
+ Args:
425
+ x: X coordinate where to scroll
426
+ y: Y coordinate where to scroll
427
+ direction: Scroll direction - 'up', 'down', 'left', or 'right'
428
+ amount: Number of scroll units (default: 3)
429
+
430
+ Example:
431
+ >>> async with ComputerBox() as desktop:
432
+ ... # Scroll up in the middle of the screen
433
+ ... await desktop.scroll(512, 384, "up", amount=5)
434
+ """
435
+ logger.info(f"Scrolling {direction} at ({x}, {y}), amount={amount}")
436
+
437
+ # Map scroll directions to xdotool mouse button numbers
438
+ # In X11, scroll is simulated using mouse button clicks:
439
+ # Button 4 = scroll up, Button 5 = scroll down
440
+ # Button 6 = scroll left, Button 7 = scroll right
441
+ direction_map = {
442
+ "up": "4",
443
+ "down": "5",
444
+ "left": "6",
445
+ "right": "7"
446
+ }
447
+
448
+ button = direction_map.get(direction.lower())
449
+ if not button:
450
+ raise ValueError(
451
+ f"Invalid scroll direction: {direction}. Must be 'up', 'down', 'left', or 'right'")
452
+
453
+ # Chain mousemove and repeated clicks in single xdotool command
454
+ exec_result = await self.exec(
455
+ "xdotool",
456
+ "mousemove", str(x), str(y),
457
+ "click", "--repeat", str(amount), button
458
+ )
459
+
460
+ # Check if command succeeded
461
+ if exec_result.exit_code != 0:
462
+ logger.error(f"xdotool scroll failed with exit code {exec_result.exit_code}")
463
+ logger.error(f"stderr: {exec_result.stderr}")
464
+ raise ExecError("scroll()", exec_result.exit_code, exec_result.stderr)
465
+
466
+ logger.debug(f"Scrolled {direction} {amount} times at ({x}, {y})")
467
+
468
+ async def get_screen_size(self) -> Tuple[int, int]:
469
+ """
470
+ Get the screen resolution.
471
+
472
+ Returns:
473
+ Tuple of (width, height)
474
+
475
+ Example:
476
+ >>> async with ComputerBox() as desktop:
477
+ ... width, height = await desktop.get_screen_size()
478
+ ... print(f"Screen: {width}x{height}")
479
+ """
480
+ logger.info("Getting screen size")
481
+
482
+ # Use xdotool to get screen size
483
+ exec_result = await self.exec("xdotool", "getdisplaygeometry")
484
+
485
+ # Check if command succeeded (exit code is more reliable than stderr presence)
486
+ if exec_result.exit_code != 0:
487
+ logger.error(f"xdotool failed with exit code {exec_result.exit_code}")
488
+ logger.error(f"stderr: {exec_result.stderr}")
489
+ # Raise exception with stderr content so wait_until_ready() can detect it
490
+ raise ExecError("get_screen_size()", exec_result.exit_code, exec_result.stderr)
491
+
492
+ # Parse stdout (format: "width height")
493
+ result = exec_result.stdout.strip()
494
+ logger.debug(f"stdout result: {result}")
495
+ parts = result.split()
496
+ if len(parts) == 2:
497
+ size = (int(parts[0]), int(parts[1]))
498
+ logger.info(f"Screen size: {size[0]}x{size[1]}")
499
+ return size
500
+
501
+ logger.error("Failed to parse screen size from xdotool output")
502
+ raise ParseError("Failed to parse screen size from xdotool output")
boxlite/errors.py ADDED
@@ -0,0 +1,38 @@
1
+ """
2
+ BoxLite error types.
3
+
4
+ Provides a hierarchy of exceptions for different failure modes.
5
+ """
6
+
7
+ __all__ = ['BoxliteError', 'ExecError', 'TimeoutError', 'ParseError']
8
+
9
+
10
+ class BoxliteError(Exception):
11
+ """Base exception for all boxlite errors."""
12
+ pass
13
+
14
+
15
+ class ExecError(BoxliteError):
16
+ """
17
+ Raised when a command execution fails (non-zero exit code).
18
+
19
+ Attributes:
20
+ command: The command that failed
21
+ exit_code: The non-zero exit code
22
+ stderr: Standard error output from the command
23
+ """
24
+ def __init__(self, command: str, exit_code: int, stderr: str):
25
+ self.command = command
26
+ self.exit_code = exit_code
27
+ self.stderr = stderr
28
+ super().__init__(f"Command '{command}' failed with exit code {exit_code}: {stderr}")
29
+
30
+
31
+ class TimeoutError(BoxliteError):
32
+ """Raised when an operation times out."""
33
+ pass
34
+
35
+
36
+ class ParseError(BoxliteError):
37
+ """Raised when output parsing fails."""
38
+ pass
boxlite/exec.py ADDED
@@ -0,0 +1,26 @@
1
+ """
2
+ Execution API - Simple interface for command execution.
3
+
4
+ Provides Docker-like API for executing commands in boxes.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+
9
+ __all__ = [
10
+ 'ExecResult',
11
+ ]
12
+
13
+
14
+ @dataclass
15
+ class ExecResult:
16
+ """
17
+ Result from a command execution.
18
+
19
+ Attributes:
20
+ exit_code: Exit code from the command (negative if terminated by signal)
21
+ stdout: Standard output as string
22
+ stderr: Standard error as string
23
+ """
24
+ exit_code: int
25
+ stdout: str
26
+ stderr: str