pydoll-python 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ class TargetCommands:
2
+ ACTIVATE_TARGET = {'method': 'Target.activateTarget', 'params': {}}
3
+ ATTACH_TO_TARGET = {'method': 'Target.attachToTarget', 'params': {}}
4
+ CLOSE_TARGET = {'method': 'Target.closeTarget', 'params': {}}
5
+ CREATE_TARGET = {'method': 'Target.createTarget', 'params': {}}
6
+ GET_TARGETS = {'method': 'Target.getTargets', 'params': {}}
7
+ GET_TARGET_INFO = {'method': 'Target.getTargetInfo', 'params': {}}
8
+
9
+ @classmethod
10
+ def activate_target(cls, target_id: str) -> dict:
11
+ activate_target = cls.ATTACH_TO_TARGET.copy()
12
+ activate_target['params']['targetId'] = target_id
13
+ return activate_target
14
+
15
+ @classmethod
16
+ def attach_to_target(cls, target_id: str) -> dict:
17
+ attach_to_target = cls.ATTACH_TO_TARGET.copy()
18
+ attach_to_target['params']['targetId'] = target_id
19
+ return attach_to_target
20
+
21
+ @classmethod
22
+ def close_target(cls, target_id: str) -> dict:
23
+ close_target = cls.CLOSE_TARGET.copy()
24
+ close_target['params']['targetId'] = target_id
25
+ return close_target
26
+
27
+ @classmethod
28
+ def create_target(cls, url: str) -> dict:
29
+ create_target = cls.CREATE_TARGET.copy()
30
+ create_target['params']['url'] = url
31
+ return create_target
32
+
33
+ @classmethod
34
+ def get_targets(cls) -> dict:
35
+ return cls.GET_TARGETS
File without changes
@@ -0,0 +1,232 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ from typing import Callable
5
+
6
+ import websockets
7
+
8
+ from pydoll import exceptions
9
+ from pydoll.connection.managers import CommandManager, EventsHandler
10
+ from pydoll.utils import get_browser_ws_address
11
+
12
+ logger = logging.getLogger(__name__)
13
+ logger.setLevel(logging.INFO)
14
+
15
+
16
+ class ConnectionHandler:
17
+ """
18
+ A class to handle WebSocket connections for browser automation.
19
+
20
+ This class manages the connection to the browser and the associated page,
21
+ providing methods to execute commands and register event callbacks.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ connection_port: int,
27
+ page_id: str = 'browser',
28
+ ws_address_resolver: Callable[[int], str] = get_browser_ws_address,
29
+ ws_connector: Callable = websockets.connect,
30
+ ):
31
+ """
32
+ Initializes the ConnectionHandler instance.
33
+
34
+ Args:
35
+ connection_port (int): The port to connect to the browser.
36
+
37
+ Sets up the internal state including WebSocket addresses,
38
+ connection instance, event callbacks, and command ID.
39
+ """
40
+ self._connection_port = connection_port
41
+ self._page_id = page_id
42
+ self._ws_address_resolver = ws_address_resolver
43
+ self._ws_connector = ws_connector
44
+ self._ws_connection = None
45
+ self._command_manager = CommandManager()
46
+ self._events_handler = EventsHandler()
47
+ logger.info('ConnectionHandler initialized.')
48
+
49
+ @property
50
+ def network_logs(self):
51
+ return self._events_handler.network_logs
52
+
53
+ @property
54
+ def dialog(self):
55
+ return self._events_handler.dialog
56
+
57
+ async def ping(self) -> bool:
58
+ """
59
+ Sends a ping message to the browser.
60
+
61
+ Returns:
62
+ bool: True if the ping was successful, False otherwise.
63
+ """
64
+ try:
65
+ await self._ensure_active_connection()
66
+ await self._ws_connection.ping()
67
+ return True
68
+ except Exception:
69
+ return False
70
+
71
+ async def execute_command(self, command: dict, timeout: int = 10) -> dict:
72
+ """
73
+ Sends a command to the browser and awaits its response.
74
+
75
+ Args:
76
+ command (dict): The command to send, structured as a dictionary.
77
+ timeout (int, optional): Time in seconds to wait for a response.
78
+ Defaults to 10.
79
+
80
+ Returns:
81
+ dict: The response from the browser.
82
+
83
+ Raises:
84
+ InvalidCommand: If the command is not a dictionary.
85
+ TimeoutError: If the command execution exceeds the timeout.
86
+ """
87
+ if not isinstance(command, dict):
88
+ logger.error('Command must be a dictionary.')
89
+ raise exceptions.InvalidCommand('Command must be a dictionary')
90
+
91
+ await self._ensure_active_connection()
92
+ future = self._command_manager.create_command_future(command)
93
+ command_str = json.dumps(command)
94
+
95
+ try:
96
+ await self._ws_connection.send(command_str)
97
+ response: str = await asyncio.wait_for(future, timeout)
98
+ return json.loads(response)
99
+ except asyncio.TimeoutError as exc:
100
+ self._command_manager.remove_pending_command(command['id'])
101
+ raise exc
102
+ except websockets.ConnectionClosed as exc:
103
+ await self._handle_connection_loss()
104
+ raise exc
105
+
106
+ async def register_callback(
107
+ self, event_name: str, callback: Callable, temporary: bool = False
108
+ ):
109
+ return self._events_handler.register_callback(
110
+ event_name, callback, temporary
111
+ )
112
+
113
+ async def remove_callback(self, callback_id: int):
114
+ return self._events_handler.remove_callback(callback_id)
115
+
116
+ async def clear_callbacks(self):
117
+ return self._events_handler.clear_callbacks()
118
+
119
+ async def close(self):
120
+ """
121
+ Closes the WebSocket connection.
122
+
123
+ Closes the WebSocket connection and clears all event callbacks.
124
+ """
125
+ await self.clear_callbacks()
126
+ await self._ws_connection.close()
127
+ logger.info('WebSocket connection closed.')
128
+
129
+ async def _ensure_active_connection(self):
130
+ """Guarantee an active connection exists."""
131
+ if self._ws_connection is None or self._ws_connection.closed:
132
+ await self._establish_new_connection()
133
+
134
+ async def _establish_new_connection(self):
135
+ """Create fresh connection and start listening."""
136
+ ws_address = await self._resolve_ws_address()
137
+ logger.info(f'Connecting to {ws_address}')
138
+ self._ws_connection = await self._ws_connector(ws_address)
139
+ self._receive_task = asyncio.create_task(self._receive_events())
140
+ logger.debug('WebSocket connection established')
141
+
142
+ async def _resolve_ws_address(self):
143
+ """Determine correct WebSocket address."""
144
+ if 'browser' in self._page_id:
145
+ return await self._ws_address_resolver(self._connection_port)
146
+ return (
147
+ f'ws://localhost:{self._connection_port}/devtools/page/'
148
+ f'{self._page_id}'
149
+ )
150
+
151
+ async def _handle_connection_loss(self):
152
+ """Clean up after connection loss."""
153
+ if self._ws_connection and not self._ws_connection.closed:
154
+ await self._ws_connection.close()
155
+ self._ws_connection = None
156
+
157
+ if self._receive_task and not self._receive_task.done():
158
+ self._receive_task.cancel()
159
+
160
+ logger.info('Connection resources cleaned up')
161
+
162
+ async def _receive_events(self):
163
+ """
164
+ Main loop for receiving and processing incoming WebSocket messages.
165
+ Delegates processing to specialized handlers based on message type.
166
+ """
167
+ try:
168
+ async for raw_message in self._incoming_messages():
169
+ await self._process_single_message(raw_message)
170
+ except websockets.ConnectionClosed as e:
171
+ logger.info(f'Connection closed gracefully: {e}')
172
+ except Exception as e:
173
+ logger.error(f'Unexpected error in event loop: {e}')
174
+ raise
175
+
176
+ async def _incoming_messages(self):
177
+ """Generator that yields raw messages while connection is open"""
178
+ while not self._ws_connection.closed:
179
+ yield await self._ws_connection.recv()
180
+
181
+ async def _process_single_message(self, raw_message: str):
182
+ """Orchestrates processing of a single raw WebSocket message"""
183
+ message = self._parse_message(raw_message)
184
+ if not message:
185
+ return
186
+
187
+ if self._is_command_response(message):
188
+ await self._handle_command_message(message)
189
+ else:
190
+ await self._handle_event_message(message)
191
+
192
+ @staticmethod
193
+ def _parse_message(raw_message: str) -> dict | None:
194
+ """
195
+ Attempts to parse raw message string into JSON.
196
+ Returns parsed dict or None if parsing fails.
197
+ """
198
+ try:
199
+ return json.loads(raw_message)
200
+ except json.JSONDecodeError:
201
+ logger.warning(f'Failed to parse message: {raw_message[:200]}...')
202
+ return None
203
+
204
+ @staticmethod
205
+ def _is_command_response(message: dict) -> bool:
206
+ """Determines if message is a response to a command"""
207
+ return 'id' in message and isinstance(message['id'], int)
208
+
209
+ async def _handle_command_message(self, message: dict):
210
+ """Processes messages that are command responses"""
211
+ logger.debug(f'Processing command response: {message.get("id")}')
212
+ self._command_manager.resolve_command(
213
+ message['id'], json.dumps(message)
214
+ )
215
+
216
+ async def _handle_event_message(self, message: dict):
217
+ """Processes messages that are spontaneous events"""
218
+ event_type = message.get('method', 'unknown-event')
219
+ logger.debug(f'Processing {event_type} event')
220
+ await self._events_handler.process_event(message)
221
+
222
+ def __repr__(self):
223
+ return f'ConnectionHandler(port={self._connection_port})'
224
+
225
+ def __str__(self):
226
+ return f'ConnectionHandler(port={self._connection_port})'
227
+
228
+ async def __aenter__(self):
229
+ return self
230
+
231
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
232
+ await self.close()
@@ -0,0 +1,136 @@
1
+ import asyncio
2
+ import logging
3
+ from typing import Callable, Dict
4
+
5
+ from pydoll import exceptions
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class CommandManager:
11
+ def __init__(self):
12
+ self._pending_commands: dict[int, asyncio.Future] = {}
13
+ self._id = 1
14
+
15
+ def create_command_future(self, command: dict) -> asyncio.Future:
16
+ command['id'] = self._id
17
+ future = asyncio.Future()
18
+ self._pending_commands[self._id] = future
19
+ self._id += 1
20
+ return future
21
+
22
+ def resolve_command(self, response_id: int, result: str):
23
+ if response_id in self._pending_commands:
24
+ self._pending_commands[response_id].set_result(result)
25
+ del self._pending_commands[response_id]
26
+
27
+ def remove_pending_command(self, command_id: int):
28
+ """
29
+ Remove um comando pendente sem resolvê-lo (útil para timeouts).
30
+
31
+ Args:
32
+ command_id: ID do comando a ser removido
33
+ """
34
+ if command_id in self._pending_commands:
35
+ del self._pending_commands[command_id]
36
+
37
+
38
+ class EventsHandler:
39
+ """
40
+ Gerencia registro de callbacks, processamento de eventos e logs de rede.
41
+ """
42
+
43
+ def __init__(self):
44
+ self._event_callbacks: Dict[int, dict] = {}
45
+ self._callback_id = 0
46
+ self.network_logs = []
47
+ self.dialog = {}
48
+ logger.info('EventsHandler initialized')
49
+
50
+ def register_callback(
51
+ self, event_name: str, callback: Callable, temporary: bool = False
52
+ ) -> int:
53
+ """
54
+ Registra um callback para um tipo específico de evento.
55
+
56
+ Retorna:
57
+ int: ID do callback registrado
58
+ """
59
+ if not callable(callback):
60
+ logger.error('Callback must be a callable function.')
61
+ raise exceptions.InvalidCallback('Callback must be callable')
62
+
63
+ self._callback_id += 1
64
+ self._event_callbacks[self._callback_id] = {
65
+ 'event': event_name,
66
+ 'callback': callback,
67
+ 'temporary': temporary,
68
+ }
69
+ logger.info(
70
+ f"Registered callback '{event_name}' with ID {self._callback_id}"
71
+ )
72
+ return self._callback_id
73
+
74
+ def remove_callback(self, callback_id: int) -> bool:
75
+ """Remove um callback pelo ID."""
76
+ if callback_id not in self._event_callbacks:
77
+ logger.warning(f'Callback ID {callback_id} not found')
78
+ return False
79
+
80
+ del self._event_callbacks[callback_id]
81
+ logger.info(f'Removed callback ID {callback_id}')
82
+ return True
83
+
84
+ def clear_callbacks(self):
85
+ """Reseta todos os callbacks registrados."""
86
+ self._event_callbacks.clear()
87
+ logger.info('All callbacks cleared')
88
+
89
+ async def process_event(self, event_data: dict):
90
+ """
91
+ Processa um evento recebido e dispara os callbacks correspondentes.
92
+
93
+ Args:
94
+ event_data: Dados do evento no formato dicionário
95
+ """
96
+ event_name = event_data.get('method')
97
+ logger.debug(f'Processing event: {event_name}')
98
+
99
+ # Atualiza logs de rede se necessário
100
+ if 'Network.requestWillBeSent' in event_name:
101
+ self._update_network_logs(event_data)
102
+
103
+ if 'Page.javascriptDialogOpening' in event_name:
104
+ self.dialog = event_data
105
+
106
+ if 'Page.javascriptDialogClosed' in event_name:
107
+ self.dialog = {}
108
+
109
+ # Processa callbacks
110
+ await self._trigger_callbacks(event_name, event_data)
111
+
112
+ def _update_network_logs(self, event_data: dict):
113
+ """Mantém os logs de rede atualizados."""
114
+ self.network_logs.append(event_data)
115
+ self.network_logs = self.network_logs[-10000:] # Mantém tamanho máximo
116
+
117
+ async def _trigger_callbacks(self, event_name: str, event_data: dict):
118
+ """Dispara todos os callbacks registrados para o evento."""
119
+ callbacks_to_remove = []
120
+
121
+ for cb_id, cb_data in list(self._event_callbacks.items()):
122
+ if cb_data['event'] == event_name:
123
+ try:
124
+ if asyncio.iscoroutinefunction(cb_data['callback']):
125
+ await cb_data['callback'](event_data)
126
+ else:
127
+ cb_data['callback'](event_data)
128
+ except Exception as e:
129
+ logger.error(f'Error in callback {cb_id}: {str(e)}')
130
+
131
+ if cb_data['temporary']:
132
+ callbacks_to_remove.append(cb_id)
133
+
134
+ # Remove callbacks temporários após processamento
135
+ for cb_id in callbacks_to_remove:
136
+ self.remove_callback(cb_id)
pydoll/constants.py ADDED
@@ -0,0 +1,125 @@
1
+ from enum import Enum
2
+
3
+
4
+ class By(str, Enum):
5
+ CSS_SELECTOR = 'css'
6
+ XPATH = 'xpath'
7
+ CLASS_NAME = 'class_name'
8
+ ID = 'id'
9
+ TAG_NAME = 'tag_name'
10
+
11
+
12
+ class Scripts:
13
+ ELEMENT_VISIBLE = """
14
+ function() {
15
+ const rect = this.getBoundingClientRect();
16
+ return (
17
+ rect.width > 0 && rect.height > 0
18
+ && getComputedStyle(this).visibility !== 'hidden'
19
+ && getComputedStyle(this).display !== 'none'
20
+ )
21
+ }
22
+ """
23
+
24
+ ELEMENT_ON_TOP = """
25
+ function() {
26
+ const rect = this.getBoundingClientRect();
27
+ const elementFromPoint = document.elementFromPoint(
28
+ rect.x + rect.width / 2,
29
+ rect.y + rect.height / 2
30
+ );
31
+ return elementFromPoint === this;
32
+ }
33
+ """
34
+
35
+ CLICK = """
36
+ function(){
37
+ clicked = false;
38
+ this.addEventListener('click', function(){
39
+ clicked = true;
40
+ });
41
+ this.click();
42
+ return clicked;
43
+ }
44
+ """
45
+
46
+ CLICK_OPTION_TAG = """
47
+ document.querySelector('option[value="{self.value}"]').selected = true;
48
+ var selectParentXpath = (
49
+ '//option[@value="{self.value}"]//ancestor::select'
50
+ );
51
+ var select = document.evaluate(
52
+ selectParentXpath,
53
+ document,
54
+ null,
55
+ XPathResult.FIRST_ORDERED_NODE_TYPE,
56
+ null
57
+ ).singleNodeValue;
58
+ var event = new Event('change', { bubbles: true });
59
+ select.dispatchEvent(event);
60
+ """
61
+
62
+ BOUNDS = """
63
+ function() {
64
+ return JSON.stringify(this.getBoundingClientRect());
65
+ }
66
+ """
67
+
68
+ FIND_RELATIVE_XPATH_ELEMENT = """
69
+ function() {
70
+ return document.evaluate(
71
+ "{escaped_value}", this, null,
72
+ XPathResult.FIRST_ORDERED_NODE_TYPE, null
73
+ ).singleNodeValue;
74
+ }
75
+ """
76
+
77
+ FIND_XPATH_ELEMENT = """
78
+ var element = document.evaluate(
79
+ "{escaped_value}", document, null,
80
+ XPathResult.FIRST_ORDERED_NODE_TYPE, null
81
+ ).singleNodeValue;
82
+ element;
83
+ """
84
+
85
+ FIND_RELATIVE_XPATH_ELEMENTS = """
86
+ function() {
87
+ var elements = document.evaluate(
88
+ "{escaped_value}", this, null,
89
+ XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null
90
+ );
91
+ var results = [];
92
+ for (var i = 0; i < elements.snapshotLength; i++) {
93
+ results.push(elements.snapshotItem(i));
94
+ }
95
+ return results;
96
+ }
97
+ """
98
+
99
+ FIND_XPATH_ELEMENTS = """
100
+ var elements = document.evaluate(
101
+ "{escaped_value}", document, null,
102
+ XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null
103
+ );
104
+ var results = [];
105
+ for (var i = 0; i < elements.snapshotLength; i++) {
106
+ results.push(elements.snapshotItem(i));
107
+ }
108
+ results;
109
+ """
110
+
111
+ QUERY_SELECTOR = 'document.querySelector("{selector}");'
112
+
113
+ RELATIVE_QUERY_SELECTOR = """
114
+ function() {
115
+ return this.querySelector("{selector}");
116
+ }
117
+ """
118
+
119
+ QUERY_SELECTOR_ALL = 'document.querySelectorAll("{selector}");'
120
+
121
+ RELATIVE_QUERY_SELECTOR_ALL = """
122
+ function() {
123
+ return this.querySelectorAll("{selector}");
124
+ }
125
+ """