pydoll-python 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydoll/__init__.py ADDED
File without changes
File without changes
pydoll/browser/base.py ADDED
@@ -0,0 +1,524 @@
1
+ import asyncio
2
+ from abc import ABC, abstractmethod
3
+ from functools import partial
4
+ from random import randint
5
+
6
+ from pydoll import exceptions
7
+ from pydoll.browser.managers import (
8
+ BrowserOptionsManager,
9
+ BrowserProcessManager,
10
+ ProxyManager,
11
+ TempDirectoryManager,
12
+ )
13
+ from pydoll.browser.options import Options
14
+ from pydoll.browser.page import Page
15
+ from pydoll.commands.browser import BrowserCommands
16
+ from pydoll.commands.dom import DomCommands
17
+ from pydoll.commands.fetch import FetchCommands
18
+ from pydoll.commands.network import NetworkCommands
19
+ from pydoll.commands.page import PageCommands
20
+ from pydoll.commands.storage import StorageCommands
21
+ from pydoll.commands.target import TargetCommands
22
+ from pydoll.connection.connection import ConnectionHandler
23
+ from pydoll.events.fetch import FetchEvents
24
+
25
+
26
+ class Browser(ABC): # noqa: PLR0904
27
+ """
28
+ A class to manage a browser instance for automated interactions.
29
+
30
+ This class allows users to start and stop a browser, take screenshots,
31
+ and register event callbacks.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ options: Options | None = None,
37
+ connection_port: int = None,
38
+ ):
39
+ """
40
+ Initializes the Browser instance.
41
+
42
+ Args:
43
+ options (Options | None): An instance of the Options class to
44
+ configure the browser. If None, default options will be used.
45
+ connection_port (int): The port to connect to the browser.
46
+
47
+ Raises:
48
+ TypeError: If any of the arguments are not callable.
49
+ """
50
+ self.options = BrowserOptionsManager.initialize_options(options)
51
+ self._proxy_manager = ProxyManager(self.options)
52
+ self._connection_port = (
53
+ connection_port if connection_port else randint(9223, 9322)
54
+ )
55
+ self._browser_process_manager = BrowserProcessManager()
56
+ self._temp_directory_manager = TempDirectoryManager()
57
+ self._connection_handler = ConnectionHandler(self._connection_port)
58
+ BrowserOptionsManager.add_default_arguments(self.options)
59
+
60
+ self._pages = []
61
+
62
+ async def __aenter__(self):
63
+ return self
64
+
65
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
66
+ await self.stop()
67
+ await self._connection_handler.close()
68
+
69
+ async def start(self) -> None:
70
+ """Método principal para iniciar o navegador."""
71
+ binary_location = (
72
+ self.options.binary_location or self._get_default_binary_location()
73
+ )
74
+
75
+ self._setup_user_dir()
76
+
77
+ self._browser_process_manager.start_browser_process(
78
+ binary_location,
79
+ self._connection_port,
80
+ self.options.arguments,
81
+ )
82
+ await self._verify_browser_running()
83
+
84
+ proxy_config = self._proxy_manager.get_proxy_credentials()
85
+ await self._configure_proxy(proxy_config[0], proxy_config[1])
86
+
87
+ await self._init_first_page()
88
+
89
+ async def set_download_path(self, path: str):
90
+ """
91
+ Sets the download path for the browser.
92
+ Args:
93
+ path (str): The path to the download directory.
94
+ """
95
+ await self._execute_command(BrowserCommands.set_download_path(path))
96
+
97
+ async def get_page_by_id(self, page_id: str) -> Page:
98
+ """
99
+ Retrieves a Page instance by its ID.
100
+
101
+ Args:
102
+ page_id (str): The ID of the page to retrieve.
103
+
104
+ Returns:
105
+ Page: The Page instance corresponding to the specified ID.
106
+ """
107
+ return Page(self._connection_port, page_id)
108
+
109
+ async def get_page(self) -> Page:
110
+ """
111
+ Retrieves a Page instance for an existing page in the browser.
112
+ If no pages are open, a new page will be created.
113
+ """
114
+ page_id = (
115
+ await self.new_page() if not self._pages else self._pages.pop()
116
+ )
117
+ return Page(self._connection_port, page_id)
118
+
119
+ async def delete_all_cookies(self):
120
+ """
121
+ Deletes all cookies from the browser.
122
+ """
123
+ await self._execute_command(StorageCommands.clear_cookies())
124
+ await self._execute_command(NetworkCommands.clear_browser_cookies())
125
+
126
+ async def set_cookies(self, cookies: list[dict]):
127
+ """
128
+ Sets cookies in the browser.
129
+
130
+ Args:
131
+ cookies (list[dict]): A list of dictionaries containing
132
+ the cookie data.
133
+ """
134
+ await self._execute_command(StorageCommands.set_cookies(cookies))
135
+ await self._execute_command(NetworkCommands.set_cookies(cookies))
136
+
137
+ async def get_cookies(self):
138
+ """
139
+ Retrieves all cookies from the browser.
140
+
141
+ Returns:
142
+ list[dict]: A list of dictionaries containing the cookie data.
143
+ """
144
+ response = await self._execute_command(StorageCommands.get_cookies())
145
+ return response['result']['cookies']
146
+
147
+ async def on(
148
+ self, event_name: str, callback: callable, temporary: bool = False
149
+ ) -> int:
150
+ """
151
+ Registers an event callback for a specific event. This method has
152
+ a global scope and can be used to listen for events across all pages
153
+ in the browser. Each `Page` instance also has an `on` method that
154
+ allows for listening to events on a specific page.
155
+
156
+ Args:
157
+ event_name (str): Name of the event to listen for.
158
+ callback (Callable): function to be called when the event occurs.
159
+
160
+ Returns:
161
+ int: The ID of the registered callback.
162
+ """
163
+
164
+ async def callback_wrapper(event):
165
+ asyncio.create_task(callback(event))
166
+
167
+ if asyncio.iscoroutinefunction(callback):
168
+ function_to_register = callback_wrapper
169
+ else:
170
+ function_to_register = callback
171
+ return await self._connection_handler.register_callback(
172
+ event_name, function_to_register, temporary
173
+ )
174
+
175
+ async def new_page(self, url: str = ''):
176
+ """
177
+ Opens a new page in the browser.
178
+
179
+ Returns:
180
+ Page: The new page instance.
181
+ """
182
+ response = await self._execute_command(
183
+ TargetCommands.create_target(url)
184
+ )
185
+ page_id = response['result']['targetId']
186
+ return page_id
187
+
188
+ async def get_targets(self):
189
+ """
190
+ Retrieves the list of open pages in the browser.
191
+
192
+ Returns:
193
+ list: The list of open pages in the browser.
194
+ """
195
+ response = await self._execute_command(TargetCommands.get_targets())
196
+ return response['result']['targetInfos']
197
+
198
+ async def stop(self):
199
+ """
200
+ Stops the running browser process.
201
+
202
+ Raises:
203
+ ValueError: If the browser is not currently running.
204
+ """
205
+ if await self._is_browser_running():
206
+ await self._execute_command(BrowserCommands.CLOSE)
207
+ self._browser_process_manager.stop_process()
208
+ self._temp_directory_manager.cleanup()
209
+ else:
210
+ raise exceptions.BrowserNotRunning('Browser is not running')
211
+
212
+ async def get_window_id(self):
213
+ """
214
+ Retrieves the ID of the current browser window.
215
+
216
+ Returns:
217
+ str: The ID of the current browser window.
218
+ """
219
+ response = await self._execute_command(BrowserCommands.get_window_id())
220
+ return response['result']['windowId']
221
+
222
+ async def set_window_bounds(self, bounds: dict):
223
+ """
224
+ Sets the bounds of the specified window.
225
+
226
+ Args:
227
+ bounds (dict): The bounds to set for the window.
228
+ """
229
+ window_id = await self.get_window_id()
230
+ await self._execute_command(
231
+ BrowserCommands.set_window_bounds(window_id, bounds)
232
+ )
233
+
234
+ async def set_window_maximized(self):
235
+ """
236
+ Maximizes the specified window.
237
+ """
238
+ window_id = await self.get_window_id()
239
+ await self._execute_command(
240
+ BrowserCommands.set_window_maximized(window_id)
241
+ )
242
+
243
+ async def set_window_minimized(self):
244
+ """
245
+ Minimizes the specified window.
246
+ """
247
+ window_id = await self.get_window_id()
248
+ await self._execute_command(
249
+ BrowserCommands.set_window_minimized(window_id)
250
+ )
251
+
252
+ async def enable_page_events(self):
253
+ """
254
+ Enables listening for page-related events over the websocket
255
+ connection. Once this method is invoked, the connection will emit
256
+ events pertaining to page activities, such as loading, navigation,
257
+ and DOM updates, to any registered event callbacks. For a comprehensive
258
+ list of available page events and their purposes, refer to the
259
+ PageEvents class documentation.
260
+ This functionality is crucial for monitoring and reacting to changes
261
+ in the page state in real-time.
262
+
263
+ This method has a global scope and can be used to listen
264
+ for events across all pages in the browser. Each Page instance also
265
+ has an `enable_page_events` method that allows for listening to events
266
+ on a specific page.
267
+
268
+ Returns:
269
+ None
270
+ """
271
+ await self._connection_handler.execute_command(
272
+ PageCommands.enable_page()
273
+ )
274
+
275
+ async def enable_network_events(self):
276
+ """
277
+ Activates listening for network events through the websocket
278
+ connection. After calling this method, the connection will emit
279
+ events related to network activities, such as resource loading and
280
+ response status, to any registered event callbacks. This is essential
281
+ for debugging network interactions and analyzing resource requests.
282
+ For details on available network events, consult the NetworkEvents
283
+ class documentation.
284
+
285
+ This method has a global scope and can be used to listen
286
+ for events across all pages in the browser. Each Page instance also
287
+ has an `enable_network_events` method that allows for listening to
288
+ events on a specific page.
289
+
290
+ Returns:
291
+ None
292
+ """
293
+ await self._connection_handler.execute_command(
294
+ NetworkCommands.enable_network_events()
295
+ )
296
+
297
+ async def enable_fetch_events(
298
+ self, handle_auth_requests: bool = False, resource_type: str = ''
299
+ ):
300
+ """
301
+ Enables the Fetch domain for intercepting network requests before they
302
+ are sent. This method allows you to modify, pause, or continue requests
303
+ as needed. If handle_auth_requests is set to True, the connection will
304
+ emit an event when an authentication is required during a request.
305
+ The resource_type parameter specifies which type of requests to
306
+ intercept; if omitted, all requests will be intercepted. Use the
307
+ _continue_request method to resume any paused requests. This is
308
+ especially useful for monitoring and controlling network interactions.
309
+
310
+ This method has a global scope and can be used to intercept request
311
+ across all pages in the browser. Each Page instance also has an
312
+ `enable_fetch_events` method that allows for intercepting requests
313
+ on a specific page.
314
+
315
+ Args:
316
+ handle_auth_requests (bool): Whether to handle authentication
317
+ requests that require user credentials.
318
+ resource_type (str): The type of resource to intercept (e.g.,
319
+ 'XHR', 'Script'). If not specified, all requests will
320
+ be intercepted.
321
+
322
+ Returns:
323
+ None
324
+ """
325
+ await self._connection_handler.execute_command(
326
+ FetchCommands.enable_fetch_events(
327
+ handle_auth_requests, resource_type
328
+ )
329
+ )
330
+
331
+ async def enable_dom_events(self):
332
+ """
333
+ Enables DOM-related events for the websocket connection. When invoked,
334
+ this method allows the connection to listen for changes in the DOM,
335
+ including node additions, removals, and attribute changes. This feature
336
+ is vital for applications that need to react to dynamic changes in
337
+ the page structure. For a full list of available DOM events, refer to
338
+ the DomCommands class documentation.
339
+
340
+ This method has a global scope and can be used to listen
341
+ for events across all pages in the browser. Each Page instance also has
342
+ an `enable_dom_events` method that allows for listening to events on
343
+ a specific page.
344
+
345
+ Returns:
346
+ None
347
+ """
348
+ await self._connection_handler.execute_command(
349
+ DomCommands.enable_dom_events()
350
+ )
351
+
352
+ async def disable_fetch_events(self):
353
+ """
354
+ Deactivates the Fetch domain, stopping the interception of network
355
+ requests for the websocket connection. Once this method is called,
356
+ the connection will no longer monitor or pause any network requests,
357
+ allowing normal network operations to resume. This can be useful when
358
+ you want to halt the monitoring of network activity.
359
+
360
+ This method has a global scope and can be used to disable fetch events
361
+ across all pages in the browser. Each Page instance also has a
362
+ `disable_fetch_events` method that allows for disabling fetch events
363
+ on a specific page.
364
+
365
+ Returns:
366
+ None
367
+ """
368
+ await self._connection_handler.execute_command(
369
+ FetchCommands.disable_fetch_events()
370
+ )
371
+
372
+ async def _continue_request(self, event: dict):
373
+ """
374
+ Resumes a network request that was previously paused in the browser.
375
+ When the Fetch domain is active, certain requests can be paused based
376
+ on the specified resource type. This method takes the event data that
377
+ contains the request ID and uses it to continue the paused request,
378
+ allowing the browser to proceed with the network operation. This is
379
+ particularly useful for handling requests that require conditional
380
+ logic before they are sent to the server.
381
+
382
+ Args:
383
+ event (dict): A dictionary containing the event data, including
384
+ the request ID, which identifies the paused request to be resumed.
385
+
386
+ Returns:
387
+ None
388
+ """
389
+ request_id = event['params']['requestId']
390
+ await self._execute_command(FetchCommands.continue_request(request_id))
391
+
392
+ async def _continue_request_auth_required(
393
+ self, event: dict, proxy_username: str, proxy_password: str
394
+ ):
395
+ """
396
+ Resumes a network request that was previously paused in the browser
397
+ and requires proxy authentication. This method is triggered when an
398
+ authentication challenge is encountered during the request handling.
399
+ It uses the provided proxy credentials to continue the request,
400
+ enabling successful communication through the proxy server. After
401
+ handling the request, it disables fetch event monitoring.
402
+
403
+ Args:
404
+ event (dict): A dictionary containing the event data, which
405
+ includes the request ID for the paused request that needs
406
+ to be resumed.
407
+ proxy_username (str): The username for the proxy server
408
+ authentication.
409
+ proxy_password (str): The password for the proxy server
410
+ authentication.
411
+
412
+ Raises:
413
+ IndexError: If the event data does not contain a valid request ID.
414
+
415
+ Returns:
416
+ None
417
+ """
418
+ request_id = event['params']['requestId']
419
+ await self._execute_command(
420
+ FetchCommands.continue_request_with_auth(
421
+ request_id, proxy_username, proxy_password
422
+ )
423
+ )
424
+ await self.disable_fetch_events()
425
+
426
+ async def _init_first_page(self):
427
+ pages = await self.get_targets()
428
+ valid_page = await self._get_valid_page(pages)
429
+ self._pages.append(valid_page)
430
+
431
+ async def _verify_browser_running(self):
432
+ """Verifica se o navegador está rodando."""
433
+ if not await self._is_browser_running():
434
+ raise exceptions.BrowserNotRunning('Failed to start browser')
435
+
436
+ async def _configure_proxy(self, private_proxy, proxy_credentials):
437
+ """Configura o proxy, se necessário."""
438
+ if private_proxy:
439
+ await self.enable_fetch_events(handle_auth_requests=True)
440
+ await self.on(
441
+ FetchEvents.REQUEST_PAUSED,
442
+ self._continue_request,
443
+ temporary=True,
444
+ )
445
+ await self.on(
446
+ FetchEvents.AUTH_REQUIRED,
447
+ partial(
448
+ self._continue_request_auth_required,
449
+ proxy_username=proxy_credentials[0],
450
+ proxy_password=proxy_credentials[1],
451
+ ),
452
+ temporary=True,
453
+ )
454
+
455
+ @staticmethod
456
+ def _is_valid_page(page: dict) -> bool:
457
+ """Verifica se uma página é uma nova aba válida."""
458
+ return page.get('type') == 'page' and 'chrome://newtab/' in page.get(
459
+ 'url', ''
460
+ )
461
+
462
+ async def _get_valid_page(self, pages) -> str:
463
+ """
464
+ Obtém o ID de uma página válida ou cria uma nova.
465
+
466
+ Returns:
467
+ str: targetId da página existente ou nova
468
+ """
469
+ valid_page = next(
470
+ (page for page in pages if self._is_valid_page(page)), None
471
+ )
472
+
473
+ if valid_page:
474
+ try:
475
+ return valid_page['targetId']
476
+ except KeyError:
477
+ pass
478
+
479
+ return await self.new_page()
480
+
481
+ async def _is_browser_running(self, timeout: int = 10) -> bool:
482
+ """
483
+ Checks if the browser process is currently running.
484
+ Attempts to connect to the browser to verify its status.
485
+
486
+ Returns:
487
+ bool: True if the browser is running, False otherwise.
488
+ """
489
+ for _ in range(timeout):
490
+ if await self._connection_handler.ping():
491
+ return True
492
+ await asyncio.sleep(1)
493
+ return False
494
+
495
+ async def _execute_command(self, command: str):
496
+ """
497
+ Executes a command through the connection handler.
498
+
499
+ Args:
500
+ command (str): The command to be executed.
501
+
502
+ Returns:
503
+ The response from executing the command.
504
+ """
505
+ return await self._connection_handler.execute_command(
506
+ command, timeout=60
507
+ )
508
+
509
+ def _setup_user_dir(self):
510
+ """Prepara o diretório de dados do usuário, se necessário."""
511
+ temp_dir = self._temp_directory_manager.create_temp_dir()
512
+ if '--user-data-dir' not in [
513
+ arg.split('=')[0] for arg in self.options.arguments
514
+ ]:
515
+ self.options.arguments.append(f'--user-data-dir={temp_dir.name}')
516
+
517
+ @abstractmethod
518
+ def _get_default_binary_location(self) -> str:
519
+ """
520
+ Retrieves the default location of the browser binary.
521
+
522
+ This method must be implemented by subclasses.
523
+ """
524
+ pass
@@ -0,0 +1,31 @@
1
+ import os
2
+
3
+ from pydoll.browser.base import Browser
4
+ from pydoll.browser.managers import BrowserOptionsManager
5
+ from pydoll.browser.options import Options
6
+
7
+
8
+ class Chrome(Browser):
9
+ def __init__(
10
+ self, options: Options | None = None, connection_port: int = 9222
11
+ ):
12
+ super().__init__(options, connection_port)
13
+
14
+ @staticmethod
15
+ def _get_default_binary_location():
16
+ os_name = os.name
17
+ match os_name:
18
+ case 'nt':
19
+ browser_path = (
20
+ r'C:\Program Files\Google\Chrome\Application\chrome.exe'
21
+ )
22
+ return BrowserOptionsManager.validate_browser_path(
23
+ browser_path
24
+ )
25
+ case 'posix':
26
+ browser_path = '/usr/bin/google-chrome'
27
+ return BrowserOptionsManager.validate_browser_path(
28
+ browser_path
29
+ )
30
+ case _:
31
+ raise ValueError('Unsupported OS')