boxlite 0.2.2__cp314-cp314t-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of boxlite might be problematic. Click here for more details.

boxlite/computerbox.py ADDED
@@ -0,0 +1,520 @@
1
+ """
2
+ ComputerBox - Desktop environment with web access.
3
+
4
+ Provides a minimal, elegant API for running isolated desktop environments
5
+ that can be viewed from a browser, with full GUI automation support.
6
+ """
7
+
8
+ import asyncio
9
+ import base64
10
+ import logging
11
+ from typing import Optional, Tuple, TYPE_CHECKING
12
+
13
+ from .errors import ExecError, TimeoutError, ParseError
14
+ from .simplebox import SimpleBox
15
+
16
+ if TYPE_CHECKING:
17
+ from .boxlite import Boxlite
18
+
19
+ __all__ = ["ComputerBox"]
20
+
21
+ # Configure logger
22
+ logger = logging.getLogger("boxlite.computerbox")
23
+
24
+
25
+ class ComputerBox(SimpleBox):
26
+ """
27
+ Desktop environment accessible via web browser.
28
+
29
+ Auto-starts a full desktop environment with web interface.
30
+ Access the desktop by opening the URL in your browser.
31
+
32
+ Note: Uses HTTPS with self-signed certificate - your browser will show
33
+ a security warning. Click "Advanced" and "Proceed" to access the desktop.
34
+
35
+ Usage:
36
+ >>> async with ComputerBox() as desktop:
37
+ ... print(f"Desktop ready at: {desktop.endpoint()}")
38
+ ... # Open the URL in your browser to see the desktop
39
+ ... await asyncio.sleep(300) # Keep running for 5 minutes
40
+
41
+ Example with custom settings:
42
+ >>> async with ComputerBox(memory=4096, cpu=4, monitor_https_port=3002) as desktop:
43
+ ... url = desktop.endpoint()
44
+ """
45
+
46
+ # Always use xfce desktop
47
+ _IMAGE_REFERENCE = "lscr.io/linuxserver/webtop:ubuntu-xfce"
48
+ # Webtop uses port 3001 with HTTPS
49
+ _GUEST_MONITOR_HTTP_PORT = 3000
50
+ _GUEST_MONITOR_HTTPS_PORT = 3001
51
+ # Webtop display number
52
+ _DISPLAY_NUMBER = ":1"
53
+ # Expected display resolution when SELKIES_IS_MANUAL_RESOLUTION_MODE=true (Anthropic requires ≤ 1280x800)
54
+ # Webtop/Selkies defaults to 1024x768 in manual resolution mode
55
+ _DEFAULT_DISPLAY_WIDTH_PX = 1024
56
+ _DEFAULT_DISPLAY_HEIGHT_PX = 768
57
+
58
+ def __init__(self, cpu: int = 2, memory: int = 2048, monitor_http_port: int = 3000,
59
+ monitor_https_port: int = 3001, runtime: Optional['Boxlite'] = None,
60
+ **kwargs):
61
+ """
62
+ Create and auto-start a desktop environment.
63
+
64
+ Args:
65
+ memory: Memory in MiB (default: 2048)
66
+ cpu: Number of CPU cores (default: 2)
67
+ monitor_https_port: Port for web-based desktop monitor (default: 3001)
68
+ runtime: Optional runtime instance (uses global default if None)
69
+ **kwargs: Additional configuration options (volumes, etc.)
70
+ """
71
+ self._monitor_port = monitor_https_port
72
+
73
+ # Merge user-provided env with default env
74
+ user_env = kwargs.pop('env', [])
75
+ default_env = [
76
+ ("DISPLAY", self._DISPLAY_NUMBER),
77
+ ("DISPLAY_SIZEW", str(self._DEFAULT_DISPLAY_WIDTH_PX)),
78
+ ("DISPLAY_SIZEH", str(self._DEFAULT_DISPLAY_HEIGHT_PX)),
79
+ ("SELKIES_MANUAL_WIDTH", str(self._DEFAULT_DISPLAY_WIDTH_PX)),
80
+ ("SELKIES_MANUAL_HEIGHT", str(self._DEFAULT_DISPLAY_HEIGHT_PX)),
81
+ ("SELKIES_UI_SHOW_SIDEBAR", "false"),
82
+ ]
83
+ merged_env = default_env + list(user_env)
84
+
85
+ # Merge user-provided ports with default ports
86
+ user_ports = kwargs.pop('ports', [])
87
+ default_ports = [
88
+ (monitor_http_port, self._GUEST_MONITOR_HTTP_PORT),
89
+ (monitor_https_port, self._GUEST_MONITOR_HTTPS_PORT)
90
+ ]
91
+ merged_ports = default_ports + list(user_ports)
92
+
93
+ super().__init__(
94
+ image=self._IMAGE_REFERENCE,
95
+ memory_mib=memory,
96
+ cpus=cpu,
97
+ runtime=runtime,
98
+ env=merged_env,
99
+ ports=merged_ports,
100
+ **kwargs
101
+ )
102
+
103
+ def endpoint(self) -> str:
104
+ """
105
+ Get the web interface endpoint.
106
+
107
+ Returns:
108
+ HTTPS endpoint URL to access the desktop in your browser.
109
+ Note: Uses self-signed certificate - browser will show security warning.
110
+
111
+ Example:
112
+ >>> async with ComputerBox() as desktop:
113
+ ... url = desktop.endpoint()
114
+ ... print(f"Open this URL: {url}")
115
+ ... # Navigate to the URL in your browser
116
+ ... # Accept the self-signed certificate warning
117
+ """
118
+ return f"https://localhost:{self._monitor_port}"
119
+
120
+ async def wait_until_ready(self, timeout: int = 60):
121
+ """
122
+ Wait until the desktop environment is fully loaded and ready.
123
+
124
+ Waits for xfdesktop to render the desktop, which ensures screenshots won't be black.
125
+
126
+ Args:
127
+ timeout: Maximum time to wait in seconds (default: 60)
128
+
129
+ Raises:
130
+ TimeoutError: If desktop doesn't become ready within timeout period
131
+
132
+ Example:
133
+ >>> async with ComputerBox() as desktop:
134
+ ... await desktop.wait_until_ready()
135
+ ... # Desktop is now ready for automation and screenshots
136
+ """
137
+ logger.info("Waiting for desktop to become ready...")
138
+ import time
139
+ start_time = time.time()
140
+ retry_delay = 0.5
141
+
142
+ while True:
143
+ elapsed = time.time() - start_time
144
+ if elapsed > timeout:
145
+ raise TimeoutError(f"Desktop did not become ready within {timeout} seconds")
146
+
147
+ try:
148
+ # Check if xfdesktop window exists at correct resolution
149
+ exec_result = await self.exec("xwininfo", "-tree", "-root")
150
+ result = exec_result.stdout
151
+ expected_size = f'{self._DEFAULT_DISPLAY_WIDTH_PX}x{self._DEFAULT_DISPLAY_HEIGHT_PX}'
152
+
153
+ logger.debug(f"stdout {result}")
154
+
155
+ if 'xfdesktop' in result and expected_size in result:
156
+ logger.info(f"Desktop ready after {elapsed:.1f} seconds")
157
+ return
158
+
159
+ logger.debug(f"Desktop not ready yet (waited {elapsed:.1f}s), retrying...")
160
+ await asyncio.sleep(retry_delay)
161
+
162
+ except Exception as e:
163
+ logger.debug(f"Desktop not ready: {e}, retrying...")
164
+ await asyncio.sleep(retry_delay)
165
+
166
+ # GUI Automation Methods
167
+
168
+ async def screenshot(self) -> dict:
169
+ """
170
+ Capture a screenshot of the desktop using PIL.ImageGrab (pre-installed).
171
+
172
+ Note: Screenshots may be black if taken before the XFCE desktop has fully
173
+ initialized. Use wait_until_ready() before taking screenshots to ensure
174
+ the desktop has been rendered.
175
+
176
+ Returns:
177
+ Dictionary containing:
178
+ - data: Base64-encoded PNG images data
179
+ - width: Display width in pixels (1024)
180
+ - height: Display height in pixels (768)
181
+ - format: Image format ("png")
182
+
183
+ Example:
184
+ >>> async with ComputerBox() as desktop:
185
+ ... await desktop.wait_until_ready() # Ensure desktop is rendered
186
+ ... result = await desktop.screenshot()
187
+ ... image_data = base64.b64decode(result['data'])
188
+ ... with open('screenshot.png', 'wb') as f:
189
+ ... f.write(image_data)
190
+ """
191
+ logger.info("Taking screenshot...")
192
+
193
+ # Use PIL.ImageGrab (pre-installed in webtop) to capture screenshot
194
+ # This avoids needing to install scrot and is faster
195
+ logger.debug("Capturing screenshot with PIL.ImageGrab...")
196
+ python_code = '''
197
+ from PIL import ImageGrab
198
+ import io
199
+ import base64
200
+
201
+ # Capture screenshot
202
+ img = ImageGrab.grab()
203
+
204
+ # Convert to PNG in memory
205
+ buffer = io.BytesIO()
206
+ img.save(buffer, format="PNG")
207
+
208
+ # Output base64-encoded PNG
209
+ print(base64.b64encode(buffer.getvalue()).decode("utf-8"))
210
+ '''
211
+ # Execute and get stdout
212
+ exec_result = await self.exec("python3", "-c", python_code)
213
+
214
+ # Check if screenshot command succeeded
215
+ if exec_result.exit_code != 0:
216
+ logger.error(f"Screenshot failed with exit code {exec_result.exit_code}")
217
+ logger.error(f"stderr: {exec_result.stderr}")
218
+ raise ExecError("screenshot()", exec_result.exit_code, exec_result.stderr)
219
+
220
+ b64_data = exec_result.stdout.strip()
221
+
222
+ logger.info(
223
+ f"Screenshot captured: {self._DEFAULT_DISPLAY_WIDTH_PX}x{self._DEFAULT_DISPLAY_HEIGHT_PX}")
224
+ return {
225
+ "data": b64_data,
226
+ "width": self._DEFAULT_DISPLAY_WIDTH_PX,
227
+ "height": self._DEFAULT_DISPLAY_HEIGHT_PX,
228
+ "format": "png"
229
+ }
230
+
231
+ async def mouse_move(self, x: int, y: int):
232
+ """
233
+ Move mouse cursor to absolute coordinates.
234
+
235
+ Args:
236
+ x: X coordinate
237
+ y: Y coordinate
238
+
239
+ Example:
240
+ >>> async with ComputerBox() as desktop:
241
+ ... await desktop.mouse_move(100, 200)
242
+ """
243
+ logger.info(f"Moving mouse to ({x}, {y})")
244
+ exec_result = await self.exec("xdotool", "mousemove", str(x), str(y))
245
+ if exec_result.exit_code != 0:
246
+ raise ExecError(f"mouse_move({x}, {y})", exec_result.exit_code, exec_result.stderr)
247
+ logger.debug(f"Mouse moved to ({x}, {y})")
248
+
249
+ async def left_click(self):
250
+ """
251
+ Click left mouse button at current position.
252
+
253
+ Example:
254
+ >>> async with ComputerBox() as desktop:
255
+ ... await desktop.mouse_move(100, 200)
256
+ ... await desktop.left_click()
257
+ """
258
+ logger.info("Clicking left mouse button")
259
+ exec_result = await self.exec("xdotool", "click", "1")
260
+ if exec_result.exit_code != 0:
261
+ raise ExecError("left_click()", exec_result.exit_code, exec_result.stderr)
262
+ logger.debug("Clicked left button")
263
+
264
+ async def right_click(self):
265
+ """
266
+ Click right mouse button at current position.
267
+
268
+ Example:
269
+ >>> async with ComputerBox() as desktop:
270
+ ... await desktop.mouse_move(100, 200)
271
+ ... await desktop.right_click()
272
+ """
273
+ logger.info("Clicking right mouse button")
274
+ exec_result = await self.exec("xdotool", "click", "3")
275
+ if exec_result.exit_code != 0:
276
+ raise ExecError("right_click()", exec_result.exit_code, exec_result.stderr)
277
+ logger.debug("Clicked right button")
278
+
279
+ async def middle_click(self):
280
+ """
281
+ Click middle mouse button at current position.
282
+
283
+ Example:
284
+ >>> async with ComputerBox() as desktop:
285
+ ... await desktop.mouse_move(100, 200)
286
+ ... await desktop.middle_click()
287
+ """
288
+ logger.info("Clicking middle mouse button")
289
+ exec_result = await self.exec("xdotool", "click", "2")
290
+ if exec_result.exit_code != 0:
291
+ raise ExecError("middle_click()", exec_result.exit_code, exec_result.stderr)
292
+ logger.debug("Clicked middle button")
293
+
294
+ async def double_click(self):
295
+ """
296
+ Double-click left mouse button at current position.
297
+
298
+ Example:
299
+ >>> async with ComputerBox() as desktop:
300
+ ... await desktop.mouse_move(100, 200)
301
+ ... await desktop.double_click()
302
+ """
303
+ logger.info("Double-clicking left mouse button")
304
+ exec_result = await self.exec("xdotool", "click", "--repeat", "2", "--delay",
305
+ "100", "1")
306
+ if exec_result.exit_code != 0:
307
+ raise ExecError("double_click()", exec_result.exit_code, exec_result.stderr)
308
+ logger.debug("Double-clicked left button")
309
+
310
+ async def triple_click(self):
311
+ """
312
+ Triple-click left mouse button at current position.
313
+
314
+ Useful for selecting entire lines or paragraphs of text.
315
+
316
+ Example:
317
+ >>> async with ComputerBox() as desktop:
318
+ ... await desktop.mouse_move(100, 200)
319
+ ... await desktop.triple_click()
320
+ """
321
+ logger.info("Triple-clicking left mouse button")
322
+ # Anthropic requires 100-200ms delays between clicks
323
+ exec_result = await self.exec("xdotool", "click", "--repeat", "3", "--delay",
324
+ "100", "1")
325
+ if exec_result.exit_code != 0:
326
+ raise ExecError("triple_click()", exec_result.exit_code, exec_result.stderr)
327
+ logger.debug("Triple-clicked left button")
328
+
329
+ async def left_click_drag(self, start_x: int, start_y: int, end_x: int, end_y: int):
330
+ """
331
+ Drag mouse from start position to end position with left button held.
332
+
333
+ Args:
334
+ start_x: Starting X coordinate
335
+ start_y: Starting Y coordinate
336
+ end_x: Ending X coordinate
337
+ end_y: Ending Y coordinate
338
+
339
+ Example:
340
+ >>> async with ComputerBox() as desktop:
341
+ ... await desktop.left_click_drag(100, 100, 200, 200)
342
+ """
343
+ logger.info(f"Dragging from ({start_x}, {start_y}) to ({end_x}, {end_y})")
344
+ # Chain all operations in single xdotool command: move, press, move, release
345
+ exec_result = await self.exec(
346
+ "xdotool",
347
+ "mousemove", str(start_x), str(start_y),
348
+ "mousedown", "1",
349
+ "sleep", "0.1",
350
+ "mousemove", str(end_x), str(end_y),
351
+ "sleep", "0.1",
352
+ "mouseup", "1"
353
+ )
354
+ if exec_result.exit_code != 0:
355
+ raise ExecError("left_click_drag()", exec_result.exit_code, exec_result.stderr)
356
+ logger.debug(f"Drag completed")
357
+
358
+ async def cursor_position(self) -> Tuple[int, int]:
359
+ """
360
+ Get the current mouse cursor position.
361
+
362
+ Returns:
363
+ Tuple of (x, y) coordinates
364
+
365
+ Example:
366
+ >>> async with ComputerBox() as desktop:
367
+ ... x, y = await desktop.cursor_position()
368
+ ... print(f"Cursor at ({x}, {y})")
369
+ """
370
+ logger.info("Getting cursor position")
371
+
372
+ # Use xdotool to get mouse location
373
+ exec_result = await self.exec("xdotool", "getmouselocation", "--shell")
374
+
375
+ # Check if command succeeded
376
+ if exec_result.exit_code != 0:
377
+ logger.error(f"xdotool failed with exit code {exec_result.exit_code}")
378
+ logger.error(f"stderr: {exec_result.stderr}")
379
+ raise ExecError("cursor_position()", exec_result.exit_code, exec_result.stderr)
380
+
381
+ # Parse output (format: "X=123\nY=456\nSCREEN=0\nWINDOW=...")
382
+ x, y = None, None
383
+ for line in exec_result.stdout.split('\n'):
384
+ clean_line = line.strip()
385
+ if clean_line.startswith('X='):
386
+ x = int(clean_line[2:])
387
+ elif clean_line.startswith('Y='):
388
+ y = int(clean_line[2:])
389
+
390
+ if x is not None and y is not None:
391
+ logger.info(f"Cursor position: ({x}, {y})")
392
+ return (x, y)
393
+
394
+ logger.error("Failed to parse cursor position from xdotool output")
395
+ raise ParseError("Failed to parse cursor position from xdotool output")
396
+
397
+ async def type(self, text: str):
398
+ """
399
+ Type text using the keyboard.
400
+
401
+ Args:
402
+ text: Text to type
403
+
404
+ Example:
405
+ >>> async with ComputerBox() as desktop:
406
+ ... await desktop.type("Hello World!")
407
+ """
408
+ logger.info(f"Typing text: {text[:50]}{'...' if len(text) > 50 else ''}")
409
+
410
+ # Escape special characters for xdotool
411
+ exec_result = await self.exec("xdotool", "type", "--", text)
412
+ if exec_result.exit_code != 0:
413
+ raise ExecError("type()", exec_result.exit_code, exec_result.stderr)
414
+ logger.debug(f"Typed {len(text)} characters")
415
+
416
+ async def key(self, text: str):
417
+ """
418
+ Press a special key or key combination.
419
+
420
+ Args:
421
+ text: Key to press (e.g., 'Return', 'Escape', 'ctrl+c', 'alt+F4')
422
+
423
+ Special keys: Return, Escape, Tab, space, BackSpace, Delete,
424
+ Up, Down, Left, Right, Home, End, Page_Up, Page_Down,
425
+ F1-F12, etc.
426
+
427
+ Example:
428
+ >>> async with ComputerBox() as desktop:
429
+ ... await desktop.key("Return")
430
+ ... await desktop.key("ctrl+c")
431
+ """
432
+ logger.info(f"Pressing key: {text}")
433
+ exec_result = await self.exec("xdotool", "key", text)
434
+ if exec_result.exit_code != 0:
435
+ raise ExecError("key()", exec_result.exit_code, exec_result.stderr)
436
+ logger.debug(f"Pressed key: {text}")
437
+
438
+ async def scroll(self, x: int, y: int, direction: str, amount: int = 3):
439
+ """
440
+ Scroll at a specific position.
441
+
442
+ Args:
443
+ x: X coordinate where to scroll
444
+ y: Y coordinate where to scroll
445
+ direction: Scroll direction - 'up', 'down', 'left', or 'right'
446
+ amount: Number of scroll units (default: 3)
447
+
448
+ Example:
449
+ >>> async with ComputerBox() as desktop:
450
+ ... # Scroll up in the middle of the screen
451
+ ... await desktop.scroll(512, 384, "up", amount=5)
452
+ """
453
+ logger.info(f"Scrolling {direction} at ({x}, {y}), amount={amount}")
454
+
455
+ # Map scroll directions to xdotool mouse button numbers
456
+ # In X11, scroll is simulated using mouse button clicks:
457
+ # Button 4 = scroll up, Button 5 = scroll down
458
+ # Button 6 = scroll left, Button 7 = scroll right
459
+ direction_map = {
460
+ "up": "4",
461
+ "down": "5",
462
+ "left": "6",
463
+ "right": "7"
464
+ }
465
+
466
+ button = direction_map.get(direction.lower())
467
+ if not button:
468
+ raise ValueError(
469
+ f"Invalid scroll direction: {direction}. Must be 'up', 'down', 'left', or 'right'")
470
+
471
+ # Chain mousemove and repeated clicks in single xdotool command
472
+ exec_result = await self.exec(
473
+ "xdotool",
474
+ "mousemove", str(x), str(y),
475
+ "click", "--repeat", str(amount), button
476
+ )
477
+
478
+ # Check if command succeeded
479
+ if exec_result.exit_code != 0:
480
+ logger.error(f"xdotool scroll failed with exit code {exec_result.exit_code}")
481
+ logger.error(f"stderr: {exec_result.stderr}")
482
+ raise ExecError("scroll()", exec_result.exit_code, exec_result.stderr)
483
+
484
+ logger.debug(f"Scrolled {direction} {amount} times at ({x}, {y})")
485
+
486
+ async def get_screen_size(self) -> Tuple[int, int]:
487
+ """
488
+ Get the screen resolution.
489
+
490
+ Returns:
491
+ Tuple of (width, height)
492
+
493
+ Example:
494
+ >>> async with ComputerBox() as desktop:
495
+ ... width, height = await desktop.get_screen_size()
496
+ ... print(f"Screen: {width}x{height}")
497
+ """
498
+ logger.info("Getting screen size")
499
+
500
+ # Use xdotool to get screen size
501
+ exec_result = await self.exec("xdotool", "getdisplaygeometry")
502
+
503
+ # Check if command succeeded (exit code is more reliable than stderr presence)
504
+ if exec_result.exit_code != 0:
505
+ logger.error(f"xdotool failed with exit code {exec_result.exit_code}")
506
+ logger.error(f"stderr: {exec_result.stderr}")
507
+ # Raise exception with stderr content so wait_until_ready() can detect it
508
+ raise ExecError("get_screen_size()", exec_result.exit_code, exec_result.stderr)
509
+
510
+ # Parse stdout (format: "width height")
511
+ result = exec_result.stdout.strip()
512
+ logger.debug(f"stdout result: {result}")
513
+ parts = result.split()
514
+ if len(parts) == 2:
515
+ size = (int(parts[0]), int(parts[1]))
516
+ logger.info(f"Screen size: {size[0]}x{size[1]}")
517
+ return size
518
+
519
+ logger.error("Failed to parse screen size from xdotool output")
520
+ raise ParseError("Failed to parse screen size from xdotool output")
boxlite/errors.py ADDED
@@ -0,0 +1,38 @@
1
+ """
2
+ BoxLite error types.
3
+
4
+ Provides a hierarchy of exceptions for different failure modes.
5
+ """
6
+
7
+ __all__ = ['BoxliteError', 'ExecError', 'TimeoutError', 'ParseError']
8
+
9
+
10
+ class BoxliteError(Exception):
11
+ """Base exception for all boxlite errors."""
12
+ pass
13
+
14
+
15
+ class ExecError(BoxliteError):
16
+ """
17
+ Raised when a command execution fails (non-zero exit code).
18
+
19
+ Attributes:
20
+ command: The command that failed
21
+ exit_code: The non-zero exit code
22
+ stderr: Standard error output from the command
23
+ """
24
+ def __init__(self, command: str, exit_code: int, stderr: str):
25
+ self.command = command
26
+ self.exit_code = exit_code
27
+ self.stderr = stderr
28
+ super().__init__(f"Command '{command}' failed with exit code {exit_code}: {stderr}")
29
+
30
+
31
+ class TimeoutError(BoxliteError):
32
+ """Raised when an operation times out."""
33
+ pass
34
+
35
+
36
+ class ParseError(BoxliteError):
37
+ """Raised when output parsing fails."""
38
+ pass
boxlite/exec.py ADDED
@@ -0,0 +1,26 @@
1
+ """
2
+ Execution API - Simple interface for command execution.
3
+
4
+ Provides Docker-like API for executing commands in boxes.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+
9
+ __all__ = [
10
+ 'ExecResult',
11
+ ]
12
+
13
+
14
+ @dataclass
15
+ class ExecResult:
16
+ """
17
+ Result from a command execution.
18
+
19
+ Attributes:
20
+ exit_code: Exit code from the command (negative if terminated by signal)
21
+ stdout: Standard output as string
22
+ stderr: Standard error as string
23
+ """
24
+ exit_code: int
25
+ stdout: str
26
+ stderr: str