grasp-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of grasp-sdk might be problematic. Click here for more details.

@@ -0,0 +1,414 @@
1
+ """Browser service for managing Chromium browser with CDP access.
2
+
3
+ This module provides the BrowserService class that manages Chromium browser
4
+ instances using the Grasp sandbox environment and exposes CDP endpoints.
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import logging
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Dict, Optional, Any, Union
13
+
14
+ import aiohttp
15
+ from ..utils.logger import get_logger
16
+ from .sandbox import SandboxService, CommandEventEmitter
17
+ from ..models import IBrowserConfig, ISandboxConfig, ICommandOptions
18
+
19
+
20
+ class CDPConnection:
21
+ """CDP connection information."""
22
+
23
+ def __init__(
24
+ self,
25
+ ws_url: str,
26
+ http_url: str,
27
+ port: int,
28
+ pid: Optional[int] = None
29
+ ):
30
+ self.ws_url = ws_url
31
+ self.http_url = http_url
32
+ self.port = port
33
+ self.pid = pid
34
+
35
+ def to_dict(self) -> Dict[str, Any]:
36
+ """Convert to dictionary representation."""
37
+ return {
38
+ 'wsUrl': self.ws_url,
39
+ 'httpUrl': self.http_url,
40
+ 'port': self.port,
41
+ 'pid': self.pid
42
+ }
43
+
44
+
45
+ class BrowserService:
46
+ """Browser service for managing Chromium browser with CDP access.
47
+
48
+ Uses Grasp sandbox to run browser and expose CDP endpoint.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ sandbox_config: ISandboxConfig,
54
+ browser_config: Optional[IBrowserConfig] = None
55
+ ):
56
+ """Initialize BrowserService.
57
+
58
+ Args:
59
+ sandbox_config: Sandbox configuration
60
+ browser_config: Browser configuration (optional)
61
+ """
62
+ self.sandbox_service = SandboxService(sandbox_config)
63
+
64
+ # Set default browser config
65
+ default_config: IBrowserConfig = {
66
+ 'cdpPort': 9222,
67
+ 'headless': True,
68
+ 'launchTimeout': 30000,
69
+ 'args': [],
70
+ 'envs': {}
71
+ }
72
+
73
+ if browser_config:
74
+ default_config.update(browser_config)
75
+
76
+ self.config = default_config
77
+ self.logger = self._get_default_logger()
78
+ self.cdp_connection: Optional[CDPConnection] = None
79
+ self.browser_process: Optional[CommandEventEmitter] = None
80
+ self._health_check_task: Optional[asyncio.Task] = None
81
+
82
+ def _get_default_logger(self):
83
+ """Get default logger instance.
84
+
85
+ Returns:
86
+ Logger instance
87
+ """
88
+ try:
89
+ from utils.logger import Logger
90
+ return get_logger().child('BrowserService')
91
+ except Exception:
92
+ # If logger is not initialized, create a default one
93
+ logger = logging.getLogger('BrowserService')
94
+ if not logger.handlers:
95
+ handler = logging.StreamHandler()
96
+ formatter = logging.Formatter(
97
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
98
+ )
99
+ handler.setFormatter(formatter)
100
+ logger.addHandler(handler)
101
+ logger.setLevel(logging.INFO)
102
+ return logger
103
+
104
+ async def initialize(self) -> None:
105
+ """Initialize the Grasp sandbox.
106
+
107
+ Returns:
108
+ Promise that resolves when sandbox is ready
109
+ """
110
+ self.logger.info('Initializing Browser service')
111
+ await self.sandbox_service.create_sandbox()
112
+ self.logger.info('Grasp sandbox initialized successfully')
113
+
114
+ async def launch_browser(
115
+ self,
116
+ browser_type: str = 'chromium'
117
+ ) -> CDPConnection:
118
+ """Launch Chromium browser with CDP server.
119
+
120
+ Args:
121
+ browser_type: Browser type ('chromium' or 'chrome-stable')
122
+
123
+ Returns:
124
+ CDP connection information
125
+
126
+ Raises:
127
+ RuntimeError: If browser launch fails
128
+ """
129
+ if not self.sandbox_service:
130
+ raise RuntimeError('Grasp service not initialized. Call initialize() first.')
131
+
132
+ try:
133
+ self.logger.info(
134
+ f'Launching Chromium browser with CDP (port: {self.config["cdpPort"]}, headless: {self.config["headless"]})'
135
+ )
136
+
137
+ # Check if adblock is enabled and adjust browser type
138
+ if (
139
+ self.config['envs'].get('ADBLOCK') == 'true' and
140
+ browser_type == 'chromium'
141
+ ):
142
+ self.logger.warn(
143
+ '⚠️ Adblock is enabled. Should use chrome-stable instead.'
144
+ )
145
+ browser_type = 'chrome-stable'
146
+
147
+ # Read the Playwright script
148
+ script_path = Path(__file__).parent.parent / 'sandbox' / f'{browser_type}.mjs'
149
+
150
+ try:
151
+ with open(script_path, 'r', encoding='utf-8') as f:
152
+ playwright_script = f.read()
153
+ except FileNotFoundError:
154
+ raise RuntimeError(f'Browser script not found: {script_path}')
155
+
156
+ # Prepare environment variables
157
+ envs = {
158
+ 'CDP_PORT': str(self.config['cdpPort']),
159
+ 'BROWSER_ARGS': json.dumps(self.config['args']),
160
+ 'LAUNCH_TIMEOUT': str(self.config['launchTimeout']),
161
+ 'SANDBOX_TIMEOUT': str(self.sandbox_service.timeout),
162
+ 'HEADLESS': str(self.config['headless']).lower(),
163
+ 'NODE_ENV': 'production',
164
+ 'SANDBOX_ID': self.sandbox_service.id,
165
+ 'WORKSPACE': self.sandbox_service.workspace,
166
+ 'BS_SOURCE_TOKEN': 'Qth8JGboEKVersqr1PSsUFMW',
167
+ 'BS_INGESTING_HOST': 's1363065.eu-nbg-2.betterstackdata.com',
168
+ 'SENTRY_DSN': 'https://21fa729ceb72d7f0adef06b4f786c067@o4509574910509056.ingest.us.sentry.io/4509574913720320',
169
+ **self.config['envs']
170
+ }
171
+
172
+ # Prepare script options
173
+ from ..models import IScriptOptions
174
+ script_options: IScriptOptions = {
175
+ 'type': 'esm',
176
+ 'background': True,
177
+ 'nohup': not self.sandbox_service.is_debug,
178
+ 'timeoutMs': 0,
179
+ 'envs': envs,
180
+ 'preCommand': '' if self.config['headless'] else 'xvfb-run -a -s "-screen 0 1280x1024x24" '
181
+ }
182
+
183
+ # Run the Playwright script in background
184
+ self.browser_process = await self.sandbox_service.run_script(
185
+ playwright_script,
186
+ script_options
187
+ )
188
+
189
+ # Set up event listeners for browser process
190
+ self._setup_browser_process_listeners()
191
+
192
+ # Wait for browser to start and CDP to be available
193
+ result = await self._wait_for_cdp_ready()
194
+
195
+ # Create CDP connection info
196
+ self.cdp_connection = result
197
+
198
+ self.logger.info(
199
+ f'Chromium browser launched successfully (cdpPort: {self.config["cdpPort"]}, wsUrl: {self.cdp_connection.ws_url})'
200
+ )
201
+
202
+ # Start health check if not in debug mode
203
+ if not self.sandbox_service.is_debug:
204
+ self._health_check_task = asyncio.create_task(
205
+ self._start_health_check()
206
+ )
207
+
208
+ return self.cdp_connection
209
+
210
+ except Exception as error:
211
+ self.logger.error(f'Failed to launch Chromium browser: {str(error)}')
212
+ raise RuntimeError(f'Failed to launch browser: {str(error)}') from error
213
+
214
+ def _setup_browser_process_listeners(self) -> None:
215
+ """Set up event listeners for browser process."""
216
+ if not self.browser_process:
217
+ return
218
+
219
+ def on_stdout(data: str) -> None:
220
+ self.logger.info(f'Browser stdout: {data}')
221
+
222
+ def on_stderr(data: str) -> None:
223
+ self.logger.info(f'Browser stderr: {data}')
224
+
225
+ def on_exit(exit_code: int) -> None:
226
+ self.logger.info(f'Browser process exited (exitCode: {exit_code})')
227
+ self.cdp_connection = None
228
+ self.browser_process = None
229
+ asyncio.create_task(self.sandbox_service.destroy())
230
+
231
+ def on_error(error: Exception) -> None:
232
+ self.logger.error(f'Browser process error: {error}')
233
+
234
+ # Only set up listeners if browser_process has the 'on' method
235
+ if hasattr(self.browser_process, 'on'):
236
+ self.browser_process.on('stdout', on_stdout)
237
+ self.browser_process.on('stderr', on_stderr)
238
+ self.browser_process.on('exit', on_exit)
239
+ self.browser_process.on('error', on_error)
240
+
241
+ async def _wait_for_cdp_ready(self) -> CDPConnection:
242
+ """Wait for CDP server to be ready.
243
+
244
+ Returns:
245
+ CDP connection information
246
+
247
+ Raises:
248
+ RuntimeError: If CDP server fails to become ready within timeout
249
+ """
250
+ delay_ms = 200
251
+ max_attempts = self.config['launchTimeout'] // delay_ms
252
+
253
+ for attempt in range(1, max_attempts + 1):
254
+ try:
255
+ self.logger.debug(
256
+ f'Checking CDP availability (attempt {attempt}/{max_attempts})'
257
+ )
258
+
259
+ # Check if CDP endpoint is responding
260
+ options: ICommandOptions = {
261
+ 'timeout': 0,
262
+ 'inBackground': False
263
+ }
264
+ result = await self.sandbox_service.run_command(
265
+ f'curl -s http://localhost:{self.config["cdpPort"]}/json/version',
266
+ options,
267
+ True,
268
+ )
269
+
270
+ if (
271
+ getattr(result, 'exit_code', None) == 0 and
272
+ 'Browser' in getattr(result, 'stdout', '')
273
+ ):
274
+ stdout_content = getattr(result, 'stdout', '')
275
+ metadata = json.loads(stdout_content)
276
+ host = self.sandbox_service.get_sandbox_host(
277
+ self.config['cdpPort'] + 1
278
+ )
279
+
280
+ # Update URLs for external access
281
+ ws_url = metadata['webSocketDebuggerUrl'].replace(
282
+ 'ws://', 'wss://'
283
+ ).replace(
284
+ f'localhost:{self.config["cdpPort"]}', host
285
+ )
286
+
287
+ http_url = f'https://{host}'
288
+
289
+ connection = CDPConnection(
290
+ ws_url=ws_url,
291
+ http_url=http_url,
292
+ port=self.config['cdpPort']
293
+ )
294
+
295
+ self.logger.info(f'CDP server is ready (metadata: {metadata})')
296
+ return connection
297
+
298
+ except Exception as error:
299
+ self.logger.debug(
300
+ f'CDP check failed (attempt {attempt}): {error}'
301
+ )
302
+
303
+ if attempt < max_attempts:
304
+ await asyncio.sleep(delay_ms / 1000)
305
+
306
+ raise RuntimeError('CDP server failed to become ready within timeout')
307
+
308
+ async def _start_health_check(self) -> None:
309
+ """Start health check for browser process."""
310
+ while self.cdp_connection and self.browser_process:
311
+ try:
312
+ await asyncio.sleep(5)
313
+
314
+ if not self.cdp_connection:
315
+ break
316
+
317
+ async with aiohttp.ClientSession() as session:
318
+ async with session.head(
319
+ f'{self.cdp_connection.http_url}/json/version'
320
+ ) as response:
321
+ if response.status != 200:
322
+ self.logger.info('Browser process exited')
323
+ await self.sandbox_service.destroy()
324
+ break
325
+
326
+ except Exception:
327
+ self.logger.info('Browser process exited')
328
+ await self.sandbox_service.destroy()
329
+ break
330
+
331
+ def get_cdp_connection(self) -> Optional[CDPConnection]:
332
+ """Get current CDP connection information.
333
+
334
+ Returns:
335
+ CDP connection info or None if not connected
336
+ """
337
+ return self.cdp_connection
338
+
339
+ def is_browser_running(self) -> bool:
340
+ """Check if browser is running.
341
+
342
+ Returns:
343
+ True if browser process is active
344
+ """
345
+ return self.browser_process is not None and self.cdp_connection is not None
346
+
347
+ async def stop_browser(self) -> None:
348
+ """Stop the browser and cleanup resources.
349
+
350
+ Returns:
351
+ Promise that resolves when cleanup is complete
352
+ """
353
+ if not self.browser_process:
354
+ self.logger.info('No browser process to stop')
355
+ return
356
+
357
+ try:
358
+ self.logger.info('Stopping Chromium browser')
359
+
360
+ # Cancel health check task
361
+ if self._health_check_task:
362
+ self._health_check_task.cancel()
363
+ try:
364
+ await self._health_check_task
365
+ except asyncio.CancelledError:
366
+ pass
367
+ self._health_check_task = None
368
+
369
+ # Kill the browser process
370
+ if hasattr(self.browser_process, 'kill'):
371
+ await self.browser_process.kill()
372
+
373
+ self.browser_process = None
374
+ self.cdp_connection = None
375
+
376
+ self.logger.info('Chromium browser stopped successfully')
377
+
378
+ except Exception as error:
379
+ self.logger.error(f'Error stopping browser: {error}')
380
+ raise
381
+
382
+ async def cleanup(self) -> None:
383
+ """Cleanup all resources including Grasp sandbox.
384
+
385
+ Returns:
386
+ Promise that resolves when cleanup is complete
387
+ """
388
+ self.logger.info('Cleaning up Browser service')
389
+
390
+ # Stop browser first
391
+ if self.is_browser_running():
392
+ await self.stop_browser()
393
+
394
+ # Cleanup Grasp sandbox
395
+ await self.sandbox_service.destroy()
396
+
397
+ self.logger.info('Browser service cleanup completed')
398
+
399
+ @property
400
+ def id(self) -> Optional[str]:
401
+ """Get sandbox ID.
402
+
403
+ Returns:
404
+ Sandbox ID
405
+ """
406
+ return self.sandbox_service.id
407
+
408
+ def get_sandbox(self) -> SandboxService:
409
+ """Get the underlying Grasp service instance.
410
+
411
+ Returns:
412
+ Grasp service instance
413
+ """
414
+ return self.sandbox_service