grasp-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of grasp-sdk might be problematic. Click here for more details.
- grasp_sdk/__init__.py +262 -0
- grasp_sdk/models/__init__.py +78 -0
- grasp_sdk/sandbox/chrome-stable.mjs +381 -0
- grasp_sdk/sandbox/chromium.mjs +378 -0
- grasp_sdk/sandbox/jsconfig.json +22 -0
- grasp_sdk/services/__init__.py +8 -0
- grasp_sdk/services/browser.py +414 -0
- grasp_sdk/services/sandbox.py +583 -0
- grasp_sdk/utils/__init__.py +31 -0
- grasp_sdk/utils/auth.py +227 -0
- grasp_sdk/utils/config.py +150 -0
- grasp_sdk/utils/logger.py +233 -0
- grasp_sdk-0.1.0.dist-info/METADATA +201 -0
- grasp_sdk-0.1.0.dist-info/RECORD +17 -0
- grasp_sdk-0.1.0.dist-info/WHEEL +5 -0
- grasp_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- grasp_sdk-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
"""Browser service for managing Chromium browser with CDP access.
|
|
2
|
+
|
|
3
|
+
This module provides the BrowserService class that manages Chromium browser
|
|
4
|
+
instances using the Grasp sandbox environment and exposes CDP endpoints.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, Optional, Any, Union
|
|
13
|
+
|
|
14
|
+
import aiohttp
|
|
15
|
+
from ..utils.logger import get_logger
|
|
16
|
+
from .sandbox import SandboxService, CommandEventEmitter
|
|
17
|
+
from ..models import IBrowserConfig, ISandboxConfig, ICommandOptions
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CDPConnection:
|
|
21
|
+
"""CDP connection information."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
ws_url: str,
|
|
26
|
+
http_url: str,
|
|
27
|
+
port: int,
|
|
28
|
+
pid: Optional[int] = None
|
|
29
|
+
):
|
|
30
|
+
self.ws_url = ws_url
|
|
31
|
+
self.http_url = http_url
|
|
32
|
+
self.port = port
|
|
33
|
+
self.pid = pid
|
|
34
|
+
|
|
35
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
36
|
+
"""Convert to dictionary representation."""
|
|
37
|
+
return {
|
|
38
|
+
'wsUrl': self.ws_url,
|
|
39
|
+
'httpUrl': self.http_url,
|
|
40
|
+
'port': self.port,
|
|
41
|
+
'pid': self.pid
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class BrowserService:
|
|
46
|
+
"""Browser service for managing Chromium browser with CDP access.
|
|
47
|
+
|
|
48
|
+
Uses Grasp sandbox to run browser and expose CDP endpoint.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
sandbox_config: ISandboxConfig,
|
|
54
|
+
browser_config: Optional[IBrowserConfig] = None
|
|
55
|
+
):
|
|
56
|
+
"""Initialize BrowserService.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
sandbox_config: Sandbox configuration
|
|
60
|
+
browser_config: Browser configuration (optional)
|
|
61
|
+
"""
|
|
62
|
+
self.sandbox_service = SandboxService(sandbox_config)
|
|
63
|
+
|
|
64
|
+
# Set default browser config
|
|
65
|
+
default_config: IBrowserConfig = {
|
|
66
|
+
'cdpPort': 9222,
|
|
67
|
+
'headless': True,
|
|
68
|
+
'launchTimeout': 30000,
|
|
69
|
+
'args': [],
|
|
70
|
+
'envs': {}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if browser_config:
|
|
74
|
+
default_config.update(browser_config)
|
|
75
|
+
|
|
76
|
+
self.config = default_config
|
|
77
|
+
self.logger = self._get_default_logger()
|
|
78
|
+
self.cdp_connection: Optional[CDPConnection] = None
|
|
79
|
+
self.browser_process: Optional[CommandEventEmitter] = None
|
|
80
|
+
self._health_check_task: Optional[asyncio.Task] = None
|
|
81
|
+
|
|
82
|
+
def _get_default_logger(self):
|
|
83
|
+
"""Get default logger instance.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Logger instance
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
from utils.logger import Logger
|
|
90
|
+
return get_logger().child('BrowserService')
|
|
91
|
+
except Exception:
|
|
92
|
+
# If logger is not initialized, create a default one
|
|
93
|
+
logger = logging.getLogger('BrowserService')
|
|
94
|
+
if not logger.handlers:
|
|
95
|
+
handler = logging.StreamHandler()
|
|
96
|
+
formatter = logging.Formatter(
|
|
97
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
98
|
+
)
|
|
99
|
+
handler.setFormatter(formatter)
|
|
100
|
+
logger.addHandler(handler)
|
|
101
|
+
logger.setLevel(logging.INFO)
|
|
102
|
+
return logger
|
|
103
|
+
|
|
104
|
+
async def initialize(self) -> None:
|
|
105
|
+
"""Initialize the Grasp sandbox.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Promise that resolves when sandbox is ready
|
|
109
|
+
"""
|
|
110
|
+
self.logger.info('Initializing Browser service')
|
|
111
|
+
await self.sandbox_service.create_sandbox()
|
|
112
|
+
self.logger.info('Grasp sandbox initialized successfully')
|
|
113
|
+
|
|
114
|
+
async def launch_browser(
|
|
115
|
+
self,
|
|
116
|
+
browser_type: str = 'chromium'
|
|
117
|
+
) -> CDPConnection:
|
|
118
|
+
"""Launch Chromium browser with CDP server.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
browser_type: Browser type ('chromium' or 'chrome-stable')
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
CDP connection information
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
RuntimeError: If browser launch fails
|
|
128
|
+
"""
|
|
129
|
+
if not self.sandbox_service:
|
|
130
|
+
raise RuntimeError('Grasp service not initialized. Call initialize() first.')
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
self.logger.info(
|
|
134
|
+
f'Launching Chromium browser with CDP (port: {self.config["cdpPort"]}, headless: {self.config["headless"]})'
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Check if adblock is enabled and adjust browser type
|
|
138
|
+
if (
|
|
139
|
+
self.config['envs'].get('ADBLOCK') == 'true' and
|
|
140
|
+
browser_type == 'chromium'
|
|
141
|
+
):
|
|
142
|
+
self.logger.warn(
|
|
143
|
+
'⚠️ Adblock is enabled. Should use chrome-stable instead.'
|
|
144
|
+
)
|
|
145
|
+
browser_type = 'chrome-stable'
|
|
146
|
+
|
|
147
|
+
# Read the Playwright script
|
|
148
|
+
script_path = Path(__file__).parent.parent / 'sandbox' / f'{browser_type}.mjs'
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
with open(script_path, 'r', encoding='utf-8') as f:
|
|
152
|
+
playwright_script = f.read()
|
|
153
|
+
except FileNotFoundError:
|
|
154
|
+
raise RuntimeError(f'Browser script not found: {script_path}')
|
|
155
|
+
|
|
156
|
+
# Prepare environment variables
|
|
157
|
+
envs = {
|
|
158
|
+
'CDP_PORT': str(self.config['cdpPort']),
|
|
159
|
+
'BROWSER_ARGS': json.dumps(self.config['args']),
|
|
160
|
+
'LAUNCH_TIMEOUT': str(self.config['launchTimeout']),
|
|
161
|
+
'SANDBOX_TIMEOUT': str(self.sandbox_service.timeout),
|
|
162
|
+
'HEADLESS': str(self.config['headless']).lower(),
|
|
163
|
+
'NODE_ENV': 'production',
|
|
164
|
+
'SANDBOX_ID': self.sandbox_service.id,
|
|
165
|
+
'WORKSPACE': self.sandbox_service.workspace,
|
|
166
|
+
'BS_SOURCE_TOKEN': 'Qth8JGboEKVersqr1PSsUFMW',
|
|
167
|
+
'BS_INGESTING_HOST': 's1363065.eu-nbg-2.betterstackdata.com',
|
|
168
|
+
'SENTRY_DSN': 'https://21fa729ceb72d7f0adef06b4f786c067@o4509574910509056.ingest.us.sentry.io/4509574913720320',
|
|
169
|
+
**self.config['envs']
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
# Prepare script options
|
|
173
|
+
from ..models import IScriptOptions
|
|
174
|
+
script_options: IScriptOptions = {
|
|
175
|
+
'type': 'esm',
|
|
176
|
+
'background': True,
|
|
177
|
+
'nohup': not self.sandbox_service.is_debug,
|
|
178
|
+
'timeoutMs': 0,
|
|
179
|
+
'envs': envs,
|
|
180
|
+
'preCommand': '' if self.config['headless'] else 'xvfb-run -a -s "-screen 0 1280x1024x24" '
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
# Run the Playwright script in background
|
|
184
|
+
self.browser_process = await self.sandbox_service.run_script(
|
|
185
|
+
playwright_script,
|
|
186
|
+
script_options
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Set up event listeners for browser process
|
|
190
|
+
self._setup_browser_process_listeners()
|
|
191
|
+
|
|
192
|
+
# Wait for browser to start and CDP to be available
|
|
193
|
+
result = await self._wait_for_cdp_ready()
|
|
194
|
+
|
|
195
|
+
# Create CDP connection info
|
|
196
|
+
self.cdp_connection = result
|
|
197
|
+
|
|
198
|
+
self.logger.info(
|
|
199
|
+
f'Chromium browser launched successfully (cdpPort: {self.config["cdpPort"]}, wsUrl: {self.cdp_connection.ws_url})'
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Start health check if not in debug mode
|
|
203
|
+
if not self.sandbox_service.is_debug:
|
|
204
|
+
self._health_check_task = asyncio.create_task(
|
|
205
|
+
self._start_health_check()
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return self.cdp_connection
|
|
209
|
+
|
|
210
|
+
except Exception as error:
|
|
211
|
+
self.logger.error(f'Failed to launch Chromium browser: {str(error)}')
|
|
212
|
+
raise RuntimeError(f'Failed to launch browser: {str(error)}') from error
|
|
213
|
+
|
|
214
|
+
def _setup_browser_process_listeners(self) -> None:
|
|
215
|
+
"""Set up event listeners for browser process."""
|
|
216
|
+
if not self.browser_process:
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
def on_stdout(data: str) -> None:
|
|
220
|
+
self.logger.info(f'Browser stdout: {data}')
|
|
221
|
+
|
|
222
|
+
def on_stderr(data: str) -> None:
|
|
223
|
+
self.logger.info(f'Browser stderr: {data}')
|
|
224
|
+
|
|
225
|
+
def on_exit(exit_code: int) -> None:
|
|
226
|
+
self.logger.info(f'Browser process exited (exitCode: {exit_code})')
|
|
227
|
+
self.cdp_connection = None
|
|
228
|
+
self.browser_process = None
|
|
229
|
+
asyncio.create_task(self.sandbox_service.destroy())
|
|
230
|
+
|
|
231
|
+
def on_error(error: Exception) -> None:
|
|
232
|
+
self.logger.error(f'Browser process error: {error}')
|
|
233
|
+
|
|
234
|
+
# Only set up listeners if browser_process has the 'on' method
|
|
235
|
+
if hasattr(self.browser_process, 'on'):
|
|
236
|
+
self.browser_process.on('stdout', on_stdout)
|
|
237
|
+
self.browser_process.on('stderr', on_stderr)
|
|
238
|
+
self.browser_process.on('exit', on_exit)
|
|
239
|
+
self.browser_process.on('error', on_error)
|
|
240
|
+
|
|
241
|
+
async def _wait_for_cdp_ready(self) -> CDPConnection:
|
|
242
|
+
"""Wait for CDP server to be ready.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
CDP connection information
|
|
246
|
+
|
|
247
|
+
Raises:
|
|
248
|
+
RuntimeError: If CDP server fails to become ready within timeout
|
|
249
|
+
"""
|
|
250
|
+
delay_ms = 200
|
|
251
|
+
max_attempts = self.config['launchTimeout'] // delay_ms
|
|
252
|
+
|
|
253
|
+
for attempt in range(1, max_attempts + 1):
|
|
254
|
+
try:
|
|
255
|
+
self.logger.debug(
|
|
256
|
+
f'Checking CDP availability (attempt {attempt}/{max_attempts})'
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Check if CDP endpoint is responding
|
|
260
|
+
options: ICommandOptions = {
|
|
261
|
+
'timeout': 0,
|
|
262
|
+
'inBackground': False
|
|
263
|
+
}
|
|
264
|
+
result = await self.sandbox_service.run_command(
|
|
265
|
+
f'curl -s http://localhost:{self.config["cdpPort"]}/json/version',
|
|
266
|
+
options,
|
|
267
|
+
True,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
if (
|
|
271
|
+
getattr(result, 'exit_code', None) == 0 and
|
|
272
|
+
'Browser' in getattr(result, 'stdout', '')
|
|
273
|
+
):
|
|
274
|
+
stdout_content = getattr(result, 'stdout', '')
|
|
275
|
+
metadata = json.loads(stdout_content)
|
|
276
|
+
host = self.sandbox_service.get_sandbox_host(
|
|
277
|
+
self.config['cdpPort'] + 1
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Update URLs for external access
|
|
281
|
+
ws_url = metadata['webSocketDebuggerUrl'].replace(
|
|
282
|
+
'ws://', 'wss://'
|
|
283
|
+
).replace(
|
|
284
|
+
f'localhost:{self.config["cdpPort"]}', host
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
http_url = f'https://{host}'
|
|
288
|
+
|
|
289
|
+
connection = CDPConnection(
|
|
290
|
+
ws_url=ws_url,
|
|
291
|
+
http_url=http_url,
|
|
292
|
+
port=self.config['cdpPort']
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
self.logger.info(f'CDP server is ready (metadata: {metadata})')
|
|
296
|
+
return connection
|
|
297
|
+
|
|
298
|
+
except Exception as error:
|
|
299
|
+
self.logger.debug(
|
|
300
|
+
f'CDP check failed (attempt {attempt}): {error}'
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
if attempt < max_attempts:
|
|
304
|
+
await asyncio.sleep(delay_ms / 1000)
|
|
305
|
+
|
|
306
|
+
raise RuntimeError('CDP server failed to become ready within timeout')
|
|
307
|
+
|
|
308
|
+
async def _start_health_check(self) -> None:
|
|
309
|
+
"""Start health check for browser process."""
|
|
310
|
+
while self.cdp_connection and self.browser_process:
|
|
311
|
+
try:
|
|
312
|
+
await asyncio.sleep(5)
|
|
313
|
+
|
|
314
|
+
if not self.cdp_connection:
|
|
315
|
+
break
|
|
316
|
+
|
|
317
|
+
async with aiohttp.ClientSession() as session:
|
|
318
|
+
async with session.head(
|
|
319
|
+
f'{self.cdp_connection.http_url}/json/version'
|
|
320
|
+
) as response:
|
|
321
|
+
if response.status != 200:
|
|
322
|
+
self.logger.info('Browser process exited')
|
|
323
|
+
await self.sandbox_service.destroy()
|
|
324
|
+
break
|
|
325
|
+
|
|
326
|
+
except Exception:
|
|
327
|
+
self.logger.info('Browser process exited')
|
|
328
|
+
await self.sandbox_service.destroy()
|
|
329
|
+
break
|
|
330
|
+
|
|
331
|
+
def get_cdp_connection(self) -> Optional[CDPConnection]:
|
|
332
|
+
"""Get current CDP connection information.
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
CDP connection info or None if not connected
|
|
336
|
+
"""
|
|
337
|
+
return self.cdp_connection
|
|
338
|
+
|
|
339
|
+
def is_browser_running(self) -> bool:
|
|
340
|
+
"""Check if browser is running.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
True if browser process is active
|
|
344
|
+
"""
|
|
345
|
+
return self.browser_process is not None and self.cdp_connection is not None
|
|
346
|
+
|
|
347
|
+
async def stop_browser(self) -> None:
|
|
348
|
+
"""Stop the browser and cleanup resources.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Promise that resolves when cleanup is complete
|
|
352
|
+
"""
|
|
353
|
+
if not self.browser_process:
|
|
354
|
+
self.logger.info('No browser process to stop')
|
|
355
|
+
return
|
|
356
|
+
|
|
357
|
+
try:
|
|
358
|
+
self.logger.info('Stopping Chromium browser')
|
|
359
|
+
|
|
360
|
+
# Cancel health check task
|
|
361
|
+
if self._health_check_task:
|
|
362
|
+
self._health_check_task.cancel()
|
|
363
|
+
try:
|
|
364
|
+
await self._health_check_task
|
|
365
|
+
except asyncio.CancelledError:
|
|
366
|
+
pass
|
|
367
|
+
self._health_check_task = None
|
|
368
|
+
|
|
369
|
+
# Kill the browser process
|
|
370
|
+
if hasattr(self.browser_process, 'kill'):
|
|
371
|
+
await self.browser_process.kill()
|
|
372
|
+
|
|
373
|
+
self.browser_process = None
|
|
374
|
+
self.cdp_connection = None
|
|
375
|
+
|
|
376
|
+
self.logger.info('Chromium browser stopped successfully')
|
|
377
|
+
|
|
378
|
+
except Exception as error:
|
|
379
|
+
self.logger.error(f'Error stopping browser: {error}')
|
|
380
|
+
raise
|
|
381
|
+
|
|
382
|
+
async def cleanup(self) -> None:
|
|
383
|
+
"""Cleanup all resources including Grasp sandbox.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
Promise that resolves when cleanup is complete
|
|
387
|
+
"""
|
|
388
|
+
self.logger.info('Cleaning up Browser service')
|
|
389
|
+
|
|
390
|
+
# Stop browser first
|
|
391
|
+
if self.is_browser_running():
|
|
392
|
+
await self.stop_browser()
|
|
393
|
+
|
|
394
|
+
# Cleanup Grasp sandbox
|
|
395
|
+
await self.sandbox_service.destroy()
|
|
396
|
+
|
|
397
|
+
self.logger.info('Browser service cleanup completed')
|
|
398
|
+
|
|
399
|
+
@property
|
|
400
|
+
def id(self) -> Optional[str]:
|
|
401
|
+
"""Get sandbox ID.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
Sandbox ID
|
|
405
|
+
"""
|
|
406
|
+
return self.sandbox_service.id
|
|
407
|
+
|
|
408
|
+
def get_sandbox(self) -> SandboxService:
|
|
409
|
+
"""Get the underlying Grasp service instance.
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
Grasp service instance
|
|
413
|
+
"""
|
|
414
|
+
return self.sandbox_service
|