pydoll-python 1.3.1__tar.gz → 1.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/PKG-INFO +24 -14
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/README.md +23 -13
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/browser/base.py +66 -87
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/browser/chrome.py +10 -5
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/browser/managers.py +11 -5
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/browser/page.py +9 -6
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/__init__.py +4 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/browser.py +19 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/dom.py +8 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/connection/connection.py +3 -1
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/element.py +6 -4
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/events/page.py +19 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/exceptions.py +10 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/mixins/find_elements.py +4 -2
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pyproject.toml +1 -1
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/LICENSE +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/__init__.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/browser/__init__.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/browser/options.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/fetch.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/input.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/network.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/page.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/runtime.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/storage.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/commands/target.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/connection/__init__.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/connection/managers.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/constants.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/events/__init__.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/events/browser.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/events/dom.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/events/fetch.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/events/network.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/mixins/__init__.py +0 -0
- {pydoll_python-1.3.1 → pydoll_python-1.3.3}/pydoll/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pydoll-python
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.3
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Thalison Fernandes
|
|
6
6
|
Author-email: thalissfernandes99@gmail.com
|
|
@@ -740,9 +740,7 @@ from pydoll.events.page import PageEvents
|
|
|
740
740
|
async def on_page_loaded(event):
|
|
741
741
|
print(f"🌐 Navigating to: {event['params'].get('url')}")
|
|
742
742
|
|
|
743
|
-
await
|
|
744
|
-
await browser.on(PageEvents.PAGE_LOADED, on_page_loaded) # Global listener!
|
|
745
|
-
# Needs to be locally? Use the page.on method!
|
|
743
|
+
await page.enable_page_events()
|
|
746
744
|
await page.on(PageEvents.PAGE_LOADED, on_page_loaded)
|
|
747
745
|
```
|
|
748
746
|
|
|
@@ -754,20 +752,21 @@ from functools import partial
|
|
|
754
752
|
async def on_page_loaded(page, event):
|
|
755
753
|
print(f"📄 Page loaded: {await page.current_url}")
|
|
756
754
|
|
|
757
|
-
await
|
|
755
|
+
await page.enable_page_events()
|
|
756
|
+
await page.on(PageEvents.PAGE_LOADED, partial(on_page_loaded, page))
|
|
758
757
|
```
|
|
759
758
|
|
|
760
759
|
##### `async enable_page_events() -> None`
|
|
761
|
-
Track everything happening on your pages - loading states, navigation, DOM changes, and more! This
|
|
760
|
+
Track everything happening on your pages - loading states, navigation, DOM changes, and more! This only locally. Just use the page instance to enable the events.
|
|
762
761
|
|
|
763
762
|
```python
|
|
764
763
|
# Enables page event monitoring
|
|
765
|
-
await
|
|
764
|
+
await page.enable_page_events()
|
|
766
765
|
```
|
|
767
766
|
|
|
768
767
|
|
|
769
768
|
##### `async enable_network_events() -> None`
|
|
770
|
-
See all network activity in real-time - perfect for debugging or monitoring specific API calls!
|
|
769
|
+
See all network activity in real-time - perfect for debugging or monitoring specific API calls! Only works in the page domain.
|
|
771
770
|
|
|
772
771
|
```python
|
|
773
772
|
from pydoll.events.network import NetworkEvents
|
|
@@ -775,14 +774,14 @@ from pydoll.events.network import NetworkEvents
|
|
|
775
774
|
async def on_request(event):
|
|
776
775
|
print(f"🔄 Request to: {event['params']['request']['url']} will be sent")
|
|
777
776
|
|
|
778
|
-
await
|
|
779
|
-
await
|
|
777
|
+
await page.enable_network_events()
|
|
778
|
+
await page.on(NetworkEvents.REQUEST_WILL_BE_SENT, on_request)
|
|
780
779
|
|
|
781
780
|
await page.go_to('https://www.google.com') # This will trigger the on_request callback
|
|
782
781
|
```
|
|
783
782
|
|
|
784
783
|
##### `async enable_dom_events() -> None`
|
|
785
|
-
Watch the page structure change in real-time and react accordingly!
|
|
784
|
+
Watch the page structure change in real-time and react accordingly! Only works on the page domain.
|
|
786
785
|
|
|
787
786
|
```python
|
|
788
787
|
from pydoll.events.dom import DomEvents
|
|
@@ -790,8 +789,8 @@ from pydoll.events.dom import DomEvents
|
|
|
790
789
|
async def on_dom_event(event):
|
|
791
790
|
print(f"🔄 The DOM has been updated!")
|
|
792
791
|
|
|
793
|
-
await
|
|
794
|
-
await
|
|
792
|
+
await page.enable_dom_events()
|
|
793
|
+
await page.on(DomEvents.DOCUMENT_UPDATED, on_dom_event)
|
|
795
794
|
```
|
|
796
795
|
|
|
797
796
|
##### `async enable_fetch_events(handle_auth_requests: bool = False, resource_type: str = '') -> None`
|
|
@@ -822,6 +821,8 @@ async def interceptor(page, event):
|
|
|
822
821
|
|
|
823
822
|
await browser.enable_fetch_events(resource_type='xhr') # only intercept XHR requests
|
|
824
823
|
await browser.on(FetchEvents.REQUEST_PAUSED, partial(interceptor, page))
|
|
824
|
+
|
|
825
|
+
await page.enable_fetch_events() # also works in the page domain!
|
|
825
826
|
```
|
|
826
827
|
|
|
827
828
|
With this power, you can transform your automation into something truly intelligent!
|
|
@@ -832,6 +833,7 @@ Turn off request interception when you're done.
|
|
|
832
833
|
```python
|
|
833
834
|
# Disables request interception
|
|
834
835
|
await browser.disable_fetch_events()
|
|
836
|
+
await page.disable_fetch_events()
|
|
835
837
|
```
|
|
836
838
|
|
|
837
839
|
### Concurrent Scraping
|
|
@@ -860,5 +862,13 @@ Get the most out of Pydoll with these tips:
|
|
|
860
862
|
|
|
861
863
|
## 🤝 Contributing
|
|
862
864
|
|
|
863
|
-
We'd love your help making Pydoll even better! Check out our contribution guidelines to get started. Whether it's fixing bugs, adding features, or improving documentation - all contributions are welcome!
|
|
865
|
+
We'd love your help making Pydoll even better! Check out our [contribution guidelines](CONTRIBUTING.md) to get started. Whether it's fixing bugs, adding features, or improving documentation - all contributions are welcome!
|
|
866
|
+
|
|
867
|
+
Please make sure to:
|
|
868
|
+
- Write tests for new features or bug fixes
|
|
869
|
+
- Follow our coding style and conventions
|
|
870
|
+
- Use conventional commits for your pull requests
|
|
871
|
+
- Run the lint and test checks before submitting
|
|
872
|
+
|
|
873
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed instructions on how to contribute to the project.
|
|
864
874
|
|
|
@@ -721,9 +721,7 @@ from pydoll.events.page import PageEvents
|
|
|
721
721
|
async def on_page_loaded(event):
|
|
722
722
|
print(f"🌐 Navigating to: {event['params'].get('url')}")
|
|
723
723
|
|
|
724
|
-
await
|
|
725
|
-
await browser.on(PageEvents.PAGE_LOADED, on_page_loaded) # Global listener!
|
|
726
|
-
# Needs to be locally? Use the page.on method!
|
|
724
|
+
await page.enable_page_events()
|
|
727
725
|
await page.on(PageEvents.PAGE_LOADED, on_page_loaded)
|
|
728
726
|
```
|
|
729
727
|
|
|
@@ -735,20 +733,21 @@ from functools import partial
|
|
|
735
733
|
async def on_page_loaded(page, event):
|
|
736
734
|
print(f"📄 Page loaded: {await page.current_url}")
|
|
737
735
|
|
|
738
|
-
await
|
|
736
|
+
await page.enable_page_events()
|
|
737
|
+
await page.on(PageEvents.PAGE_LOADED, partial(on_page_loaded, page))
|
|
739
738
|
```
|
|
740
739
|
|
|
741
740
|
##### `async enable_page_events() -> None`
|
|
742
|
-
Track everything happening on your pages - loading states, navigation, DOM changes, and more! This
|
|
741
|
+
Track everything happening on your pages - loading states, navigation, DOM changes, and more! This only locally. Just use the page instance to enable the events.
|
|
743
742
|
|
|
744
743
|
```python
|
|
745
744
|
# Enables page event monitoring
|
|
746
|
-
await
|
|
745
|
+
await page.enable_page_events()
|
|
747
746
|
```
|
|
748
747
|
|
|
749
748
|
|
|
750
749
|
##### `async enable_network_events() -> None`
|
|
751
|
-
See all network activity in real-time - perfect for debugging or monitoring specific API calls!
|
|
750
|
+
See all network activity in real-time - perfect for debugging or monitoring specific API calls! Only works in the page domain.
|
|
752
751
|
|
|
753
752
|
```python
|
|
754
753
|
from pydoll.events.network import NetworkEvents
|
|
@@ -756,14 +755,14 @@ from pydoll.events.network import NetworkEvents
|
|
|
756
755
|
async def on_request(event):
|
|
757
756
|
print(f"🔄 Request to: {event['params']['request']['url']} will be sent")
|
|
758
757
|
|
|
759
|
-
await
|
|
760
|
-
await
|
|
758
|
+
await page.enable_network_events()
|
|
759
|
+
await page.on(NetworkEvents.REQUEST_WILL_BE_SENT, on_request)
|
|
761
760
|
|
|
762
761
|
await page.go_to('https://www.google.com') # This will trigger the on_request callback
|
|
763
762
|
```
|
|
764
763
|
|
|
765
764
|
##### `async enable_dom_events() -> None`
|
|
766
|
-
Watch the page structure change in real-time and react accordingly!
|
|
765
|
+
Watch the page structure change in real-time and react accordingly! Only works on the page domain.
|
|
767
766
|
|
|
768
767
|
```python
|
|
769
768
|
from pydoll.events.dom import DomEvents
|
|
@@ -771,8 +770,8 @@ from pydoll.events.dom import DomEvents
|
|
|
771
770
|
async def on_dom_event(event):
|
|
772
771
|
print(f"🔄 The DOM has been updated!")
|
|
773
772
|
|
|
774
|
-
await
|
|
775
|
-
await
|
|
773
|
+
await page.enable_dom_events()
|
|
774
|
+
await page.on(DomEvents.DOCUMENT_UPDATED, on_dom_event)
|
|
776
775
|
```
|
|
777
776
|
|
|
778
777
|
##### `async enable_fetch_events(handle_auth_requests: bool = False, resource_type: str = '') -> None`
|
|
@@ -803,6 +802,8 @@ async def interceptor(page, event):
|
|
|
803
802
|
|
|
804
803
|
await browser.enable_fetch_events(resource_type='xhr') # only intercept XHR requests
|
|
805
804
|
await browser.on(FetchEvents.REQUEST_PAUSED, partial(interceptor, page))
|
|
805
|
+
|
|
806
|
+
await page.enable_fetch_events() # also works in the page domain!
|
|
806
807
|
```
|
|
807
808
|
|
|
808
809
|
With this power, you can transform your automation into something truly intelligent!
|
|
@@ -813,6 +814,7 @@ Turn off request interception when you're done.
|
|
|
813
814
|
```python
|
|
814
815
|
# Disables request interception
|
|
815
816
|
await browser.disable_fetch_events()
|
|
817
|
+
await page.disable_fetch_events()
|
|
816
818
|
```
|
|
817
819
|
|
|
818
820
|
### Concurrent Scraping
|
|
@@ -841,4 +843,12 @@ Get the most out of Pydoll with these tips:
|
|
|
841
843
|
|
|
842
844
|
## 🤝 Contributing
|
|
843
845
|
|
|
844
|
-
We'd love your help making Pydoll even better! Check out our contribution guidelines to get started. Whether it's fixing bugs, adding features, or improving documentation - all contributions are welcome!
|
|
846
|
+
We'd love your help making Pydoll even better! Check out our [contribution guidelines](CONTRIBUTING.md) to get started. Whether it's fixing bugs, adding features, or improving documentation - all contributions are welcome!
|
|
847
|
+
|
|
848
|
+
Please make sure to:
|
|
849
|
+
- Write tests for new features or bug fixes
|
|
850
|
+
- Follow our coding style and conventions
|
|
851
|
+
- Use conventional commits for your pull requests
|
|
852
|
+
- Run the lint and test checks before submitting
|
|
853
|
+
|
|
854
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed instructions on how to contribute to the project.
|
|
@@ -12,15 +12,15 @@ from pydoll.browser.managers import (
|
|
|
12
12
|
)
|
|
13
13
|
from pydoll.browser.options import Options
|
|
14
14
|
from pydoll.browser.page import Page
|
|
15
|
-
from pydoll.commands
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
from pydoll.commands import (
|
|
16
|
+
BrowserCommands,
|
|
17
|
+
FetchCommands,
|
|
18
|
+
NetworkCommands,
|
|
19
|
+
StorageCommands,
|
|
20
|
+
TargetCommands,
|
|
21
|
+
)
|
|
22
22
|
from pydoll.connection.connection import ConnectionHandler
|
|
23
|
-
from pydoll.events
|
|
23
|
+
from pydoll.events import FetchEvents, PageEvents
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class Browser(ABC): # noqa: PLR0904
|
|
@@ -77,7 +77,9 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
77
77
|
exc_val: The exception value, if raised.
|
|
78
78
|
exc_tb: The traceback, if an exception was raised.
|
|
79
79
|
"""
|
|
80
|
-
await self.
|
|
80
|
+
if await self._is_browser_running():
|
|
81
|
+
await self.stop()
|
|
82
|
+
|
|
81
83
|
await self._connection_handler.close()
|
|
82
84
|
|
|
83
85
|
async def start(self) -> None:
|
|
@@ -137,6 +139,7 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
137
139
|
page_id = (
|
|
138
140
|
await self.new_page() if not self._pages else self._pages.pop()
|
|
139
141
|
)
|
|
142
|
+
|
|
140
143
|
return Page(self._connection_port, page_id)
|
|
141
144
|
|
|
142
145
|
async def delete_all_cookies(self):
|
|
@@ -185,6 +188,10 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
185
188
|
Returns:
|
|
186
189
|
int: The ID of the registered callback.
|
|
187
190
|
"""
|
|
191
|
+
if event_name in PageEvents.ALL_EVENTS:
|
|
192
|
+
raise exceptions.EventNotSupported(
|
|
193
|
+
'Page events are not supported in the browser domain.'
|
|
194
|
+
)
|
|
188
195
|
|
|
189
196
|
async def callback_wrapper(event):
|
|
190
197
|
asyncio.create_task(callback(event))
|
|
@@ -229,7 +236,7 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
229
236
|
Stops the running browser process.
|
|
230
237
|
|
|
231
238
|
Raises:
|
|
232
|
-
|
|
239
|
+
BrowserNotRunning: If the browser is not currently running.
|
|
233
240
|
"""
|
|
234
241
|
if await self._is_browser_running():
|
|
235
242
|
await self._execute_command(BrowserCommands.CLOSE)
|
|
@@ -243,10 +250,25 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
243
250
|
Retrieves the ID of the current browser window.
|
|
244
251
|
|
|
245
252
|
Returns:
|
|
246
|
-
|
|
253
|
+
int: The ID of the current browser window.
|
|
254
|
+
|
|
255
|
+
Raises:
|
|
256
|
+
RuntimeError: If unable to retrieve the window ID.
|
|
247
257
|
"""
|
|
248
|
-
|
|
249
|
-
|
|
258
|
+
command = BrowserCommands.get_window_id()
|
|
259
|
+
response = await self._execute_command(command)
|
|
260
|
+
|
|
261
|
+
if response.get('error'):
|
|
262
|
+
pages = await self.get_targets()
|
|
263
|
+
target_id = await self._get_valid_target_id(pages)
|
|
264
|
+
response = await self._execute_command(
|
|
265
|
+
BrowserCommands.get_window_id_by_target(target_id)
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
if window_id := response.get('result', {}).get('windowId'):
|
|
269
|
+
return window_id
|
|
270
|
+
|
|
271
|
+
raise RuntimeError(response.get('error', {}))
|
|
250
272
|
|
|
251
273
|
async def set_window_bounds(self, bounds: dict):
|
|
252
274
|
"""
|
|
@@ -278,51 +300,6 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
278
300
|
BrowserCommands.set_window_minimized(window_id)
|
|
279
301
|
)
|
|
280
302
|
|
|
281
|
-
async def enable_page_events(self):
|
|
282
|
-
"""
|
|
283
|
-
Enables listening for page-related events over the websocket
|
|
284
|
-
connection. Once this method is invoked, the connection will emit
|
|
285
|
-
events pertaining to page activities, such as loading, navigation,
|
|
286
|
-
and DOM updates, to any registered event callbacks. For a comprehensive
|
|
287
|
-
list of available page events and their purposes, refer to the
|
|
288
|
-
PageEvents class documentation.
|
|
289
|
-
This functionality is crucial for monitoring and reacting to changes
|
|
290
|
-
in the page state in real-time.
|
|
291
|
-
|
|
292
|
-
This method has a global scope and can be used to listen
|
|
293
|
-
for events across all pages in the browser. Each Page instance also
|
|
294
|
-
has an `enable_page_events` method that allows for listening to events
|
|
295
|
-
on a specific page.
|
|
296
|
-
|
|
297
|
-
Returns:
|
|
298
|
-
None
|
|
299
|
-
"""
|
|
300
|
-
await self._connection_handler.execute_command(
|
|
301
|
-
PageCommands.enable_page()
|
|
302
|
-
)
|
|
303
|
-
|
|
304
|
-
async def enable_network_events(self):
|
|
305
|
-
"""
|
|
306
|
-
Activates listening for network events through the websocket
|
|
307
|
-
connection. After calling this method, the connection will emit
|
|
308
|
-
events related to network activities, such as resource loading and
|
|
309
|
-
response status, to any registered event callbacks. This is essential
|
|
310
|
-
for debugging network interactions and analyzing resource requests.
|
|
311
|
-
For details on available network events, consult the NetworkEvents
|
|
312
|
-
class documentation.
|
|
313
|
-
|
|
314
|
-
This method has a global scope and can be used to listen
|
|
315
|
-
for events across all pages in the browser. Each Page instance also
|
|
316
|
-
has an `enable_network_events` method that allows for listening to
|
|
317
|
-
events on a specific page.
|
|
318
|
-
|
|
319
|
-
Returns:
|
|
320
|
-
None
|
|
321
|
-
"""
|
|
322
|
-
await self._connection_handler.execute_command(
|
|
323
|
-
NetworkCommands.enable_network_events()
|
|
324
|
-
)
|
|
325
|
-
|
|
326
303
|
async def enable_fetch_events(
|
|
327
304
|
self, handle_auth_requests: bool = False, resource_type: str = ''
|
|
328
305
|
):
|
|
@@ -357,27 +334,6 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
357
334
|
)
|
|
358
335
|
)
|
|
359
336
|
|
|
360
|
-
async def enable_dom_events(self):
|
|
361
|
-
"""
|
|
362
|
-
Enables DOM-related events for the websocket connection. When invoked,
|
|
363
|
-
this method allows the connection to listen for changes in the DOM,
|
|
364
|
-
including node additions, removals, and attribute changes. This feature
|
|
365
|
-
is vital for applications that need to react to dynamic changes in
|
|
366
|
-
the page structure. For a full list of available DOM events, refer to
|
|
367
|
-
the DomCommands class documentation.
|
|
368
|
-
|
|
369
|
-
This method has a global scope and can be used to listen
|
|
370
|
-
for events across all pages in the browser. Each Page instance also has
|
|
371
|
-
an `enable_dom_events` method that allows for listening to events on
|
|
372
|
-
a specific page.
|
|
373
|
-
|
|
374
|
-
Returns:
|
|
375
|
-
None
|
|
376
|
-
"""
|
|
377
|
-
await self._connection_handler.execute_command(
|
|
378
|
-
DomCommands.enable_dom_events()
|
|
379
|
-
)
|
|
380
|
-
|
|
381
337
|
async def disable_fetch_events(self):
|
|
382
338
|
"""
|
|
383
339
|
Deactivates the Fetch domain, stopping the interception of network
|
|
@@ -519,7 +475,7 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
519
475
|
'url', ''
|
|
520
476
|
)
|
|
521
477
|
|
|
522
|
-
async def _get_valid_page(self, pages) -> str:
|
|
478
|
+
async def _get_valid_page(self, pages: list) -> str:
|
|
523
479
|
"""
|
|
524
480
|
Gets the ID of a valid page or creates a new one.
|
|
525
481
|
|
|
@@ -530,17 +486,39 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
530
486
|
str: The target ID of an existing or new page.
|
|
531
487
|
"""
|
|
532
488
|
valid_page = next(
|
|
533
|
-
(page for page in pages if self._is_valid_page(page)),
|
|
489
|
+
(page for page in pages if self._is_valid_page(page)), {}
|
|
534
490
|
)
|
|
535
491
|
|
|
536
|
-
if valid_page:
|
|
537
|
-
|
|
538
|
-
return valid_page['targetId']
|
|
539
|
-
except KeyError:
|
|
540
|
-
pass
|
|
492
|
+
if valid_page.get('targetId', None):
|
|
493
|
+
return valid_page['targetId']
|
|
541
494
|
|
|
542
495
|
return await self.new_page()
|
|
543
496
|
|
|
497
|
+
@staticmethod
|
|
498
|
+
async def _get_valid_target_id(pages: list) -> str:
|
|
499
|
+
"""
|
|
500
|
+
Retrieves the target ID of a valid attached browser page.
|
|
501
|
+
|
|
502
|
+
Returns:
|
|
503
|
+
str: The target ID of a valid page.
|
|
504
|
+
|
|
505
|
+
"""
|
|
506
|
+
|
|
507
|
+
valid_page = next(
|
|
508
|
+
(page for page in pages
|
|
509
|
+
if page.get('type') == 'page' and page.get('attached')),
|
|
510
|
+
None
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
if not valid_page:
|
|
514
|
+
raise RuntimeError("No valid attached browser page found.")
|
|
515
|
+
|
|
516
|
+
target_id = valid_page.get('targetId')
|
|
517
|
+
if not target_id:
|
|
518
|
+
raise RuntimeError("Valid page found but missing 'targetId'.")
|
|
519
|
+
|
|
520
|
+
return target_id
|
|
521
|
+
|
|
544
522
|
async def _is_browser_running(self, timeout: int = 10) -> bool:
|
|
545
523
|
"""
|
|
546
524
|
Checks if the browser process is currently running.
|
|
@@ -553,9 +531,10 @@ class Browser(ABC): # noqa: PLR0904
|
|
|
553
531
|
if await self._connection_handler.ping():
|
|
554
532
|
return True
|
|
555
533
|
await asyncio.sleep(1)
|
|
534
|
+
|
|
556
535
|
return False
|
|
557
536
|
|
|
558
|
-
async def _execute_command(self, command:
|
|
537
|
+
async def _execute_command(self, command: dict):
|
|
559
538
|
"""
|
|
560
539
|
Executes a command through the connection handler.
|
|
561
540
|
|
|
@@ -46,13 +46,18 @@ class Chrome(Browser):
|
|
|
46
46
|
the browser executable is not found at the default location.
|
|
47
47
|
"""
|
|
48
48
|
os_name = platform.system()
|
|
49
|
+
|
|
49
50
|
browser_paths = {
|
|
50
|
-
'Windows':
|
|
51
|
+
'Windows': [
|
|
51
52
|
r'C:\Program Files\Google\Chrome\Application\chrome.exe',
|
|
52
|
-
|
|
53
|
+
r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe',
|
|
54
|
+
],
|
|
55
|
+
'Linux': [
|
|
53
56
|
'/usr/bin/google-chrome',
|
|
54
|
-
|
|
55
|
-
|
|
57
|
+
],
|
|
58
|
+
'Darwin': [
|
|
59
|
+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
60
|
+
]
|
|
56
61
|
}
|
|
57
62
|
|
|
58
63
|
browser_path = browser_paths.get(os_name)
|
|
@@ -60,6 +65,6 @@ class Chrome(Browser):
|
|
|
60
65
|
if not browser_path:
|
|
61
66
|
raise ValueError('Unsupported OS')
|
|
62
67
|
|
|
63
|
-
return BrowserOptionsManager.
|
|
68
|
+
return BrowserOptionsManager.validate_browser_paths(
|
|
64
69
|
browser_path
|
|
65
70
|
)
|
|
@@ -186,6 +186,10 @@ class BrowserProcessManager:
|
|
|
186
186
|
"""
|
|
187
187
|
if self._process:
|
|
188
188
|
self._process.terminate()
|
|
189
|
+
try:
|
|
190
|
+
self._process.wait(timeout=15)
|
|
191
|
+
except subprocess.TimeoutExpired:
|
|
192
|
+
self._process.kill()
|
|
189
193
|
|
|
190
194
|
|
|
191
195
|
class TempDirectoryManager:
|
|
@@ -275,7 +279,7 @@ class BrowserOptionsManager:
|
|
|
275
279
|
options.arguments.append('--no-default-browser-check')
|
|
276
280
|
|
|
277
281
|
@staticmethod
|
|
278
|
-
def
|
|
282
|
+
def validate_browser_paths(paths: list[str]) -> str:
|
|
279
283
|
"""
|
|
280
284
|
Validates the provided browser executable path.
|
|
281
285
|
|
|
@@ -283,7 +287,8 @@ class BrowserOptionsManager:
|
|
|
283
287
|
the specified path.
|
|
284
288
|
|
|
285
289
|
Args:
|
|
286
|
-
|
|
290
|
+
paths (list[str]): Lista de caminhos possíveis do navegador.
|
|
291
|
+
|
|
287
292
|
|
|
288
293
|
Returns:
|
|
289
294
|
str: The validated browser path if it exists.
|
|
@@ -291,6 +296,7 @@ class BrowserOptionsManager:
|
|
|
291
296
|
Raises:
|
|
292
297
|
ValueError: If the browser executable is not found at the path.
|
|
293
298
|
"""
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
299
|
+
for path in paths:
|
|
300
|
+
if os.path.exists(path) and os.access(path, os.X_OK):
|
|
301
|
+
return path
|
|
302
|
+
raise ValueError(f"No valid browser path found in: {paths}")
|
|
@@ -3,12 +3,14 @@ import json
|
|
|
3
3
|
|
|
4
4
|
import aiofiles
|
|
5
5
|
|
|
6
|
-
from pydoll.commands
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
from pydoll.commands import (
|
|
7
|
+
DomCommands,
|
|
8
|
+
FetchCommands,
|
|
9
|
+
NetworkCommands,
|
|
10
|
+
PageCommands,
|
|
11
|
+
RuntimeCommands,
|
|
12
|
+
StorageCommands,
|
|
13
|
+
)
|
|
12
14
|
from pydoll.connection.connection import ConnectionHandler
|
|
13
15
|
from pydoll.element import WebElement
|
|
14
16
|
from pydoll.exceptions import InvalidFileExtension
|
|
@@ -157,6 +159,7 @@ class Page(FindElementsMixin): # noqa: PLR0904
|
|
|
157
159
|
"""
|
|
158
160
|
if self._connection_handler.dialog:
|
|
159
161
|
return True
|
|
162
|
+
|
|
160
163
|
return False
|
|
161
164
|
|
|
162
165
|
async def get_dialog_message(self) -> str:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# global imports
|
|
2
|
+
from pydoll.commands.browser import BrowserCommands
|
|
2
3
|
from pydoll.commands.dom import DomCommands
|
|
3
4
|
from pydoll.commands.fetch import FetchCommands
|
|
4
5
|
from pydoll.commands.input import InputCommands
|
|
@@ -6,6 +7,7 @@ from pydoll.commands.network import NetworkCommands
|
|
|
6
7
|
from pydoll.commands.page import PageCommands
|
|
7
8
|
from pydoll.commands.runtime import RuntimeCommands
|
|
8
9
|
from pydoll.commands.storage import StorageCommands
|
|
10
|
+
from pydoll.commands.target import TargetCommands
|
|
9
11
|
|
|
10
12
|
__all__ = [
|
|
11
13
|
'DomCommands',
|
|
@@ -15,4 +17,6 @@ __all__ = [
|
|
|
15
17
|
'PageCommands',
|
|
16
18
|
'RuntimeCommands',
|
|
17
19
|
'StorageCommands',
|
|
20
|
+
'BrowserCommands',
|
|
21
|
+
'TargetCommands',
|
|
18
22
|
]
|
|
@@ -17,6 +17,10 @@ class BrowserCommands:
|
|
|
17
17
|
|
|
18
18
|
CLOSE = {'method': 'Browser.close'}
|
|
19
19
|
GET_WINDOW_ID = {'method': 'Browser.WindowID'}
|
|
20
|
+
GET_WINDOW_ID_BY_TARGET = {
|
|
21
|
+
'method': 'Browser.getWindowForTarget',
|
|
22
|
+
'params': {},
|
|
23
|
+
}
|
|
20
24
|
SET_WINDOW_BOUNDS_TEMPLATE = {
|
|
21
25
|
'method': 'Browser.setWindowBounds',
|
|
22
26
|
'params': {},
|
|
@@ -62,6 +66,21 @@ class BrowserCommands:
|
|
|
62
66
|
"""
|
|
63
67
|
return cls.GET_WINDOW_ID
|
|
64
68
|
|
|
69
|
+
@classmethod
|
|
70
|
+
def get_window_id_by_target(cls, target_id: str) -> dict:
|
|
71
|
+
"""
|
|
72
|
+
Generates the command to get the ID of the current window.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
target_id (str): The target_id to set for the window.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
dict: The command to be sent to the browser.
|
|
79
|
+
"""
|
|
80
|
+
command = cls.GET_WINDOW_ID_BY_TARGET.copy()
|
|
81
|
+
command['params']['targetId'] = target_id
|
|
82
|
+
return command
|
|
83
|
+
|
|
65
84
|
@classmethod
|
|
66
85
|
def set_window_bounds(cls, window_id: int, bounds: dict) -> dict:
|
|
67
86
|
"""
|
|
@@ -24,6 +24,7 @@ class DomCommands:
|
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
ENABLE = {'method': 'DOM.enable'}
|
|
27
|
+
DISABLE = {'method': 'DOM.disable'}
|
|
27
28
|
DOM_DOCUMENT = {'method': 'DOM.getDocument'}
|
|
28
29
|
DESCRIBE_NODE_TEMPLATE = {'method': 'DOM.describeNode', 'params': {}}
|
|
29
30
|
FIND_ELEMENT_TEMPLATE = {'method': 'DOM.querySelector', 'params': {}}
|
|
@@ -138,6 +139,13 @@ class DomCommands:
|
|
|
138
139
|
"""
|
|
139
140
|
return cls.ENABLE
|
|
140
141
|
|
|
142
|
+
@classmethod
|
|
143
|
+
def disable_dom_events(cls) -> dict:
|
|
144
|
+
"""
|
|
145
|
+
Generates a command to disable the DOM domain in CDP.
|
|
146
|
+
"""
|
|
147
|
+
return cls.DISABLE
|
|
148
|
+
|
|
141
149
|
@classmethod
|
|
142
150
|
def get_current_url(cls) -> dict:
|
|
143
151
|
"""
|
|
@@ -208,7 +208,9 @@ class ConnectionHandler:
|
|
|
208
208
|
"""
|
|
209
209
|
ws_address = await self._resolve_ws_address()
|
|
210
210
|
logger.info(f'Connecting to {ws_address}')
|
|
211
|
-
self._ws_connection = await self._ws_connector(
|
|
211
|
+
self._ws_connection = await self._ws_connector(
|
|
212
|
+
ws_address, max_size=1024 * 1024 * 10 # 10MB
|
|
213
|
+
)
|
|
212
214
|
self._receive_task = asyncio.create_task(self._receive_events())
|
|
213
215
|
logger.debug('WebSocket connection established')
|
|
214
216
|
|
|
@@ -5,10 +5,12 @@ import aiofiles
|
|
|
5
5
|
from bs4 import BeautifulSoup
|
|
6
6
|
|
|
7
7
|
from pydoll import exceptions
|
|
8
|
-
from pydoll.commands
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
from pydoll.commands import (
|
|
9
|
+
DomCommands,
|
|
10
|
+
InputCommands,
|
|
11
|
+
PageCommands,
|
|
12
|
+
RuntimeCommands,
|
|
13
|
+
)
|
|
12
14
|
from pydoll.connection.connection import ConnectionHandler
|
|
13
15
|
from pydoll.constants import Scripts
|
|
14
16
|
from pydoll.mixins.find_elements import FindElementsMixin
|
|
@@ -142,3 +142,22 @@ class PageEvents:
|
|
|
142
142
|
This event is useful for tracking changes in the document state, such as
|
|
143
143
|
anchor links or in-page navigation, without requiring a full page reload.
|
|
144
144
|
"""
|
|
145
|
+
|
|
146
|
+
ALL_EVENTS = [
|
|
147
|
+
PAGE_LOADED,
|
|
148
|
+
DOM_CONTENT_LOADED,
|
|
149
|
+
FILE_CHOOSER_OPENED,
|
|
150
|
+
FRAME_ATTACHED,
|
|
151
|
+
FRAME_DETACHED,
|
|
152
|
+
FRAME_NAVIGATED,
|
|
153
|
+
JS_DIALOG_CLOSED,
|
|
154
|
+
JS_DIALOG_OPENING,
|
|
155
|
+
LIFECYCLE_EVENT,
|
|
156
|
+
WINDOW_OPENED,
|
|
157
|
+
DOCUMENT_OPENED,
|
|
158
|
+
FRAME_STARTED_LOADING,
|
|
159
|
+
FRAME_STOPPED_LOADING,
|
|
160
|
+
DOWNLOAD_PROGRESS,
|
|
161
|
+
DOWNLOAD_WILL_BEGIN,
|
|
162
|
+
NAVIGATED_WITHIN_DOCUMENT,
|
|
163
|
+
]
|
|
@@ -87,3 +87,13 @@ class InvalidFileExtension(Exception):
|
|
|
87
87
|
|
|
88
88
|
def __str__(self):
|
|
89
89
|
return self.message
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class EventNotSupported(Exception):
|
|
93
|
+
message = 'The event is not supported'
|
|
94
|
+
|
|
95
|
+
def __init__(self, message: str = ''):
|
|
96
|
+
self.message = message or self.message
|
|
97
|
+
|
|
98
|
+
def __str__(self):
|
|
99
|
+
return self.message
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
|
|
3
3
|
from pydoll import exceptions
|
|
4
|
-
from pydoll.commands
|
|
5
|
-
|
|
4
|
+
from pydoll.commands import (
|
|
5
|
+
DomCommands,
|
|
6
|
+
RuntimeCommands,
|
|
7
|
+
)
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
def create_web_element(*args, **kwargs):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|