pydoll-python 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydoll/__init__.py +0 -0
- pydoll/browser/__init__.py +0 -0
- pydoll/browser/base.py +524 -0
- pydoll/browser/chrome.py +31 -0
- pydoll/browser/managers.py +154 -0
- pydoll/browser/options.py +62 -0
- pydoll/browser/page.py +433 -0
- pydoll/commands/__init__.py +18 -0
- pydoll/commands/browser.py +108 -0
- pydoll/commands/dom.py +212 -0
- pydoll/commands/fetch.py +308 -0
- pydoll/commands/input.py +106 -0
- pydoll/commands/network.py +334 -0
- pydoll/commands/page.py +187 -0
- pydoll/commands/runtime.py +45 -0
- pydoll/commands/storage.py +18 -0
- pydoll/commands/target.py +35 -0
- pydoll/connection/__init__.py +0 -0
- pydoll/connection/connection.py +232 -0
- pydoll/connection/managers.py +136 -0
- pydoll/constants.py +125 -0
- pydoll/element.py +313 -0
- pydoll/events/__init__.py +13 -0
- pydoll/events/browser.py +26 -0
- pydoll/events/dom.py +108 -0
- pydoll/events/fetch.py +29 -0
- pydoll/events/network.py +160 -0
- pydoll/events/page.py +144 -0
- pydoll/exceptions.py +82 -0
- pydoll/mixins/__init__.py +0 -0
- pydoll/mixins/find_elements.py +180 -0
- pydoll/utils.py +50 -0
- pydoll_python-1.2.0.dist-info/LICENSE +9 -0
- pydoll_python-1.2.0.dist-info/METADATA +200 -0
- pydoll_python-1.2.0.dist-info/RECORD +36 -0
- pydoll_python-1.2.0.dist-info/WHEEL +4 -0
pydoll/events/page.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
class PageEvents:
|
|
2
|
+
"""
|
|
3
|
+
A class that defines constants for various page-related events.
|
|
4
|
+
|
|
5
|
+
These constants represent significant events in the lifecycle of a web
|
|
6
|
+
page, particularly in the context of web automation, testing,
|
|
7
|
+
or monitoring.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
PAGE_LOADED = 'Page.loadEventFired'
|
|
11
|
+
"""
|
|
12
|
+
Event triggered when the page has fully loaded.
|
|
13
|
+
|
|
14
|
+
This includes the loading of all resources, such as images and stylesheets.
|
|
15
|
+
It is typically used to perform actions that require the entire page to be
|
|
16
|
+
ready for interaction or manipulation.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
DOM_CONTENT_LOADED = 'Page.domContentEventFired'
|
|
20
|
+
"""
|
|
21
|
+
Event fired when the DOMContentLoaded event is fired.
|
|
22
|
+
|
|
23
|
+
This event indicates that the initial HTML document has been completely
|
|
24
|
+
loaded and parsed, which allows for immediate manipulation of the DOM
|
|
25
|
+
before external resources like images are fully loaded.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
FILE_CHOOSER_OPENED = 'Page.fileChooserOpened'
|
|
29
|
+
"""
|
|
30
|
+
Event indicating that a file chooser dialog has been opened.
|
|
31
|
+
|
|
32
|
+
This event is crucial for applications that require user interaction for
|
|
33
|
+
file uploads, allowing for tracking when a user is prompted to select
|
|
34
|
+
files.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
FRAME_ATTACHED = 'Page.frameAttached'
|
|
38
|
+
"""
|
|
39
|
+
Event that occurs when a frame is attached to the page.
|
|
40
|
+
|
|
41
|
+
This event is significant in scenarios involving iframes or nested browsing
|
|
42
|
+
contexts, enabling developers to manage and interact with newly added
|
|
43
|
+
frames.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
FRAME_DETACHED = 'Page.frameDetached'
|
|
47
|
+
"""
|
|
48
|
+
Event triggered when a frame is detached from the page.
|
|
49
|
+
|
|
50
|
+
This can happen when iframes are removed or navigated away, and itβs
|
|
51
|
+
important for cleanup and managing resources associated with those frames.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
FRAME_NAVIGATED = 'Page.frameNavigated'
|
|
55
|
+
"""
|
|
56
|
+
Event that indicates a frame has been navigated to a new URL.
|
|
57
|
+
|
|
58
|
+
This is essential for tracking navigation within iframes, allowing for
|
|
59
|
+
updates to the application state or user interface based on the content
|
|
60
|
+
of the frame.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
JS_DIALOG_CLOSED = 'Page.javascriptDialogClosed'
|
|
64
|
+
"""
|
|
65
|
+
Event fired when a JavaScript dialog (such as an alert or confirmation)
|
|
66
|
+
is closed.
|
|
67
|
+
|
|
68
|
+
This is useful for managing user interactions with dialogs, allowing for
|
|
69
|
+
actions to be taken after a dialog has been dismissed.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
JS_DIALOG_OPENING = 'Page.javascriptDialogOpening'
|
|
73
|
+
"""
|
|
74
|
+
Event triggered when a JavaScript dialog is about to open.
|
|
75
|
+
|
|
76
|
+
This event can be used to intervene in the opening of the dialog, such as
|
|
77
|
+
providing automated responses or logging dialog interactions.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
LIFECYCLE_EVENT = 'Page.lifecycleEvent'
|
|
81
|
+
"""
|
|
82
|
+
Event representing a generic lifecycle event for the page.
|
|
83
|
+
|
|
84
|
+
This event is a catch-all for various lifecycle-related events and can be
|
|
85
|
+
used for monitoring changes in the page state throughout its lifetime.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
WINDOW_OPENED = 'Page.windowOpen'
|
|
89
|
+
"""
|
|
90
|
+
Event that indicates a new window has been opened.
|
|
91
|
+
|
|
92
|
+
This is useful for applications that need to monitor or manage multiple
|
|
93
|
+
windows and their interactions, particularly in the context of pop-ups
|
|
94
|
+
or new tabs.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
DOCUMENT_OPENED = 'Page.documentOpened'
|
|
98
|
+
"""
|
|
99
|
+
Event that signifies a new document has been opened in the page.
|
|
100
|
+
|
|
101
|
+
This event is important for tracking changes in the document context,
|
|
102
|
+
particularly in environments where documents can be dynamically created
|
|
103
|
+
or loaded.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
FRAME_STARTED_LOADING = 'Page.frameStartedLoading'
|
|
107
|
+
"""
|
|
108
|
+
Event triggered when a frame starts loading content.
|
|
109
|
+
|
|
110
|
+
This event is useful for tracking the loading state of frames,
|
|
111
|
+
enabling developers to manage loading indicators or perform actions when
|
|
112
|
+
frames begin loading resources.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
FRAME_STOPPED_LOADING = 'Page.frameStoppedLoading'
|
|
116
|
+
"""
|
|
117
|
+
Event that indicates a frame has stopped loading content.
|
|
118
|
+
|
|
119
|
+
This can signify that a frame has successfully loaded or encountered an
|
|
120
|
+
error, allowing for appropriate handling of frame loading states.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
DOWNLOAD_PROGRESS = 'Page.downloadProgress'
|
|
124
|
+
"""
|
|
125
|
+
Event fired to indicate progress on a download operation.
|
|
126
|
+
|
|
127
|
+
This event provides updates on the download status, enabling the
|
|
128
|
+
application to inform users about ongoing downloads and their completion.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
DOWNLOAD_WILL_BEGIN = 'Page.downloadWillBegin'
|
|
132
|
+
"""
|
|
133
|
+
Event that occurs when a download is about to start.
|
|
134
|
+
|
|
135
|
+
This event is significant for tracking the initiation of downloads,
|
|
136
|
+
allowing for pre-download actions such as logging or user notifications.
|
|
137
|
+
"""
|
|
138
|
+
NAVIGATED_WITHIN_DOCUMENT = 'Page.navigatedWithinDocument'
|
|
139
|
+
"""
|
|
140
|
+
Event that indicates navigation within the same document.
|
|
141
|
+
|
|
142
|
+
This event is useful for tracking changes in the document state, such as
|
|
143
|
+
anchor links or in-page navigation, without requiring a full page reload.
|
|
144
|
+
"""
|
pydoll/exceptions.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
class ConnectionFailed(Exception):
|
|
2
|
+
message = 'Failed to connect to the browser'
|
|
3
|
+
|
|
4
|
+
def __str__(self):
|
|
5
|
+
return self.message
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class InvalidCommand(Exception):
|
|
9
|
+
message = 'The command provided is invalid'
|
|
10
|
+
|
|
11
|
+
def __str__(self):
|
|
12
|
+
return self.message
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class InvalidCallback(Exception):
|
|
16
|
+
message = 'The callback provided is invalid'
|
|
17
|
+
|
|
18
|
+
def __str__(self):
|
|
19
|
+
return self.message
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NetworkError(Exception):
|
|
23
|
+
message = 'A network error occurred'
|
|
24
|
+
|
|
25
|
+
def __str__(self):
|
|
26
|
+
return self.message
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class InvalidResponse(Exception):
|
|
30
|
+
message = 'The response received is invalid'
|
|
31
|
+
|
|
32
|
+
def __str__(self):
|
|
33
|
+
return self.message
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ReconnectionFailed(Exception):
|
|
37
|
+
message = 'Failed to reconnect to the browser'
|
|
38
|
+
|
|
39
|
+
def __str__(self):
|
|
40
|
+
return self.message
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ResendCommandFailed(Exception):
|
|
44
|
+
message = 'Failed to resend the command'
|
|
45
|
+
|
|
46
|
+
def __str__(self):
|
|
47
|
+
return self.message
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class BrowserNotRunning(Exception):
|
|
51
|
+
message = 'The browser is not running'
|
|
52
|
+
|
|
53
|
+
def __str__(self):
|
|
54
|
+
return self.message
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ElementNotFound(Exception):
|
|
58
|
+
message = 'The specified element was not found'
|
|
59
|
+
|
|
60
|
+
def __str__(self):
|
|
61
|
+
return self.message
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ClickIntercepted(Exception):
|
|
65
|
+
message = 'The click was intercepted'
|
|
66
|
+
|
|
67
|
+
def __str__(self):
|
|
68
|
+
return self.message
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ElementNotVisible(Exception):
|
|
72
|
+
message = 'The element is not visible'
|
|
73
|
+
|
|
74
|
+
def __str__(self):
|
|
75
|
+
return self.message
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ElementNotInteractable(Exception):
|
|
79
|
+
message = 'The element is not interactable'
|
|
80
|
+
|
|
81
|
+
def __str__(self):
|
|
82
|
+
return self.message
|
|
File without changes
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
from pydoll import exceptions
|
|
4
|
+
from pydoll.commands.dom import DomCommands
|
|
5
|
+
from pydoll.commands.runtime import RuntimeCommands
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_web_element(*args, **kwargs):
|
|
9
|
+
"""
|
|
10
|
+
Creates a WebElement instance to avoid circular imports.
|
|
11
|
+
"""
|
|
12
|
+
from pydoll.element import WebElement # noqa: PLC0415
|
|
13
|
+
|
|
14
|
+
return WebElement(*args, **kwargs)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FindElementsMixin:
|
|
18
|
+
async def wait_element(
|
|
19
|
+
self,
|
|
20
|
+
by: DomCommands.SelectorType,
|
|
21
|
+
value: str,
|
|
22
|
+
timeout: int = 10,
|
|
23
|
+
raise_exc: bool = True,
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Waits for an element to be present in the DOM.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
by (SelectorType): The type of selector to use.
|
|
30
|
+
value (str): The value of the selector.
|
|
31
|
+
timeout (int, optional): Time in seconds to wait for the element.
|
|
32
|
+
Defaults to 10.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Element: The element found in the DOM.
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
TimeoutError: If the element is not found within the timeout.
|
|
39
|
+
"""
|
|
40
|
+
start_time = asyncio.get_event_loop().time()
|
|
41
|
+
while True:
|
|
42
|
+
try:
|
|
43
|
+
element = await self.find_element(by, value, raise_exc=False)
|
|
44
|
+
if element:
|
|
45
|
+
return element
|
|
46
|
+
except exceptions.ElementNotFound:
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
if asyncio.get_event_loop().time() - start_time > timeout:
|
|
50
|
+
if raise_exc:
|
|
51
|
+
raise TimeoutError('Element not found')
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
await asyncio.sleep(0.5)
|
|
55
|
+
|
|
56
|
+
async def find_element(
|
|
57
|
+
self, by: DomCommands.SelectorType, value: str, raise_exc: bool = True
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Finds an element on the current page using the specified selector.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
by (SelectorType): The type of selector to use.
|
|
64
|
+
value (str): The value of the selector to use.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
dict: The response from the browser.
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
ElementNotFound: If the element is not found and raise_exc is True.
|
|
71
|
+
"""
|
|
72
|
+
if hasattr(self, '_object_id'):
|
|
73
|
+
command = DomCommands.find_element(by, value, self._object_id)
|
|
74
|
+
else:
|
|
75
|
+
command = DomCommands.find_element(by, value)
|
|
76
|
+
|
|
77
|
+
response = await self._execute_command(command)
|
|
78
|
+
|
|
79
|
+
if not response.get('result', {}).get('result', {}).get('objectId'):
|
|
80
|
+
if raise_exc:
|
|
81
|
+
raise exceptions.ElementNotFound('Element not found')
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
object_id = response['result']['result']['objectId']
|
|
85
|
+
node_description = await self._describe_node(object_id=object_id)
|
|
86
|
+
attributes = node_description.get('attributes', [])
|
|
87
|
+
|
|
88
|
+
tag_name = node_description.get('nodeName', '').lower()
|
|
89
|
+
attributes.extend(['tag_name', tag_name])
|
|
90
|
+
|
|
91
|
+
return create_web_element(
|
|
92
|
+
object_id, self._connection_handler, by, value, attributes
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
async def find_elements(
|
|
96
|
+
self, by: DomCommands.SelectorType, value: str, raise_exc: bool = True
|
|
97
|
+
):
|
|
98
|
+
"""
|
|
99
|
+
Finds all elements on the current page using the specified selector.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
by (SelectorType): The type of selector to use.
|
|
103
|
+
value (str): The value of the selector to use.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
list: A list of elements found on the page.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
ElementNotFound: If no elements are found and raise_exc is True.
|
|
110
|
+
"""
|
|
111
|
+
if hasattr(self, '_object_id'):
|
|
112
|
+
command = DomCommands.find_elements(by, value, self._object_id)
|
|
113
|
+
else:
|
|
114
|
+
command = DomCommands.find_elements(by, value)
|
|
115
|
+
|
|
116
|
+
response = await self._execute_command(command)
|
|
117
|
+
|
|
118
|
+
if not response.get('result', {}).get('result', {}).get('objectId'):
|
|
119
|
+
if raise_exc:
|
|
120
|
+
raise exceptions.ElementNotFound('Element not found')
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
object_id = response['result']['result']['objectId']
|
|
124
|
+
query_response = await self._execute_command(
|
|
125
|
+
RuntimeCommands.get_properties(object_id=object_id)
|
|
126
|
+
)
|
|
127
|
+
response = []
|
|
128
|
+
for query in query_response['result']['result']:
|
|
129
|
+
query_value = query.get('value', {})
|
|
130
|
+
if query_value and query_value['type'] == 'object':
|
|
131
|
+
response.append(query_value['objectId'])
|
|
132
|
+
|
|
133
|
+
elements = []
|
|
134
|
+
for object_id in response:
|
|
135
|
+
try:
|
|
136
|
+
node_description = await self._describe_node(
|
|
137
|
+
object_id=object_id
|
|
138
|
+
)
|
|
139
|
+
except KeyError:
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
attributes = node_description.get('attributes', [])
|
|
143
|
+
tag_name = node_description.get('nodeName', '').lower()
|
|
144
|
+
attributes.extend(['tag_name', tag_name])
|
|
145
|
+
|
|
146
|
+
elements.append(
|
|
147
|
+
create_web_element(
|
|
148
|
+
object_id, self._connection_handler, by, value, attributes
|
|
149
|
+
)
|
|
150
|
+
)
|
|
151
|
+
return elements
|
|
152
|
+
|
|
153
|
+
async def _describe_node(self, object_id: str = '') -> dict:
|
|
154
|
+
"""
|
|
155
|
+
Provides a detailed description of a specific node within the DOM.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
node_id (int): The unique ID of the node to describe.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
dict: A dictionary containing the detailed description of the node.
|
|
162
|
+
"""
|
|
163
|
+
response = await self._execute_command(
|
|
164
|
+
DomCommands.describe_node(object_id=object_id)
|
|
165
|
+
)
|
|
166
|
+
return response['result']['node']
|
|
167
|
+
|
|
168
|
+
async def _execute_command(self, command: dict) -> dict:
|
|
169
|
+
"""
|
|
170
|
+
Executes a command on the page.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
command (dict): The command to execute.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
dict: The result of the command execution.
|
|
177
|
+
"""
|
|
178
|
+
return await self._connection_handler.execute_command(
|
|
179
|
+
command, timeout=60
|
|
180
|
+
)
|
pydoll/utils.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import aiohttp
|
|
5
|
+
|
|
6
|
+
from pydoll import exceptions
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def decode_image_to_bytes(image: str) -> bytes:
|
|
12
|
+
"""
|
|
13
|
+
Decodes a base64 image string to bytes.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
image (str): The base64 image string to decode.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
bytes: The decoded image as bytes.
|
|
20
|
+
"""
|
|
21
|
+
return base64.b64decode(image)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
async def get_browser_ws_address(port: int) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Fetches the WebSocket address for the browser instance.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
str: The WebSocket address for the browser.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
ValueError: If the address cannot be fetched due to network errors
|
|
33
|
+
or missing data.
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
async with aiohttp.ClientSession() as session:
|
|
37
|
+
async with session.get(
|
|
38
|
+
f'http://localhost:{port}/json/version'
|
|
39
|
+
) as response:
|
|
40
|
+
response.raise_for_status()
|
|
41
|
+
data = await response.json()
|
|
42
|
+
return data['webSocketDebuggerUrl']
|
|
43
|
+
|
|
44
|
+
except aiohttp.ClientError as e:
|
|
45
|
+
raise exceptions.NetworkError(f'Failed to get browser ws address: {e}')
|
|
46
|
+
|
|
47
|
+
except KeyError as e:
|
|
48
|
+
raise exceptions.InvalidResponse(
|
|
49
|
+
f'Failed to get browser ws address: {e}'
|
|
50
|
+
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright Β© 2025 <copyright holders>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the βSoftwareβ), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED βAS ISβ, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pydoll-python
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary:
|
|
5
|
+
Author: Thalison Fernandes
|
|
6
|
+
Author-email: thalissfernandes99@gmail.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Dist: aiofiles (>=23.2.1,<24.0.0)
|
|
13
|
+
Requires-Dist: aiohttp (>=3.9.5,<4.0.0)
|
|
14
|
+
Requires-Dist: bs4 (>=0.0.2,<0.0.3)
|
|
15
|
+
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
16
|
+
Requires-Dist: websockets (>=13.1,<14.0)
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
<p align="center">
|
|
21
|
+
<h1>π Pydoll: Async Web Automation in Python!</h1>
|
|
22
|
+
</p>
|
|
23
|
+
<br>
|
|
24
|
+
<p align="center">
|
|
25
|
+
<img src="https://github.com/user-attachments/assets/c4615101-d932-4e79-8a08-f50fbc686e3b" alt="Alt text" />
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
<p align="center">
|
|
29
|
+
<img src="https://codecov.io/github/thalissonvs/pydoll/graph/badge.svg?token=40I938OGM9"/>
|
|
30
|
+
<img src="https://github.com/thalissonvs/pydoll/actions/workflows/tests.yml/badge.svg" alt="Tests">
|
|
31
|
+
<img src="https://github.com/thalissonvs/pydoll/actions/workflows/ruff-ci.yml/badge.svg" alt="Ruff CI">
|
|
32
|
+
<img src="https://github.com/thalissonvs/pydoll/actions/workflows/release.yml/badge.svg" alt="Release">
|
|
33
|
+
<img src="https://tokei.rs/b1/github/thalissonvs/pydoll" alt="Total lines">
|
|
34
|
+
<img src="https://tokei.rs/b1/github/thalissonvs/pydoll?category=files" alt="Files">
|
|
35
|
+
<img src="https://tokei.rs/b1/github/thalissonvs/pydoll?category=comments" alt="Comments">
|
|
36
|
+
<img src="https://img.shields.io/github/issues/thalissonvs/pydoll?label=Issues" alt="GitHub issues">
|
|
37
|
+
<img src="https://img.shields.io/github/issues-closed/thalissonvs/pydoll?label=Closed issues" alt="GitHub closed issues">
|
|
38
|
+
<img src="https://img.shields.io/github/issues/thalissonvs/pydoll/bug?label=Bugs&color=red" alt="GitHub bug issues">
|
|
39
|
+
<img src="https://img.shields.io/github/issues/thalissonvs/pydoll/enhancement?label=Enhancements&color=purple" alt="GitHub enhancement issues">
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
Pydoll is an innovative Python library that's redefining Chromium browser automation! Unlike other solutions, Pydoll **completely eliminates the need for webdrivers**, providing a much more fluid and reliable automation experience.
|
|
44
|
+
|
|
45
|
+
## β Extraordinary Features
|
|
46
|
+
|
|
47
|
+
- **Zero Webdrivers!** Say goodbye to webdriver compatibility and configuration headaches
|
|
48
|
+
- **Native Captcha Bypass!** Naturally passes through Cloudflare Turnstile and reCAPTCHA v3
|
|
49
|
+
- **Performance** thanks to native asynchronous programming
|
|
50
|
+
- **Realistic Interactions** that simulate human behavior
|
|
51
|
+
- **Advanced Event System** for complex and reactive automations
|
|
52
|
+
|
|
53
|
+
## Table of Contents
|
|
54
|
+
|
|
55
|
+
- [Installation](#-installation)
|
|
56
|
+
- [Quick Start](#-quick-start)
|
|
57
|
+
- [Core Components](#-core-components)
|
|
58
|
+
- [Browser Interface](#browser-interface)
|
|
59
|
+
- [Page Interface](#page-interface)
|
|
60
|
+
- [WebElement Interface](#webelement-interface)
|
|
61
|
+
- [Advanced Features](#-advanced-features)
|
|
62
|
+
- [Event System](#event-system)
|
|
63
|
+
- [Concurrent Scraping](#concurrent-scraping)
|
|
64
|
+
- [Proxy Configuration](#proxy-configuration)
|
|
65
|
+
|
|
66
|
+
## π₯ Installation
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install git+https://github.com/thalissonvs/pydoll.git
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## β‘ Quick Start
|
|
73
|
+
|
|
74
|
+
See how simple it is to get started - no webdriver configuration needed!
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import asyncio
|
|
78
|
+
from pydoll.browser.chrome import Chrome
|
|
79
|
+
from pydoll.constants import By
|
|
80
|
+
|
|
81
|
+
async def main():
|
|
82
|
+
# Start the browser with no additional webdriver configuration!
|
|
83
|
+
async with Chrome() as browser:
|
|
84
|
+
await browser.start()
|
|
85
|
+
page = await browser.get_page()
|
|
86
|
+
|
|
87
|
+
# Navigate through captcha-protected sites without worry
|
|
88
|
+
await page.go_to('https://example-with-cloudflare.com')
|
|
89
|
+
button = await page.find_element(By.CSS_SELECTOR, 'button')
|
|
90
|
+
await button.click()
|
|
91
|
+
|
|
92
|
+
asyncio.run(main())
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## π― Core Components
|
|
96
|
+
|
|
97
|
+
### Browser Interface
|
|
98
|
+
|
|
99
|
+
Powerful interface for global browser control:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
async def browser_examples():
|
|
103
|
+
async with Chrome() as browser:
|
|
104
|
+
await browser.start()
|
|
105
|
+
# Control multiple pages with incredible ease
|
|
106
|
+
pages = [await browser.get_page() for _ in range(3)]
|
|
107
|
+
|
|
108
|
+
# Advanced settings with a simple command
|
|
109
|
+
await browser.set_window_maximized()
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Page Interface
|
|
113
|
+
|
|
114
|
+
Individual page control with surgical precision:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
async def page_examples():
|
|
118
|
+
page = await browser.get_page()
|
|
119
|
+
|
|
120
|
+
# Smooth navigation, even on protected sites
|
|
121
|
+
await page.go_to('https://site-with-recaptcha.com')
|
|
122
|
+
|
|
123
|
+
# Capture perfect screenshots
|
|
124
|
+
await page.get_screenshot('/screenshots/evidence.png')
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### WebElement Interface
|
|
128
|
+
|
|
129
|
+
Interact with elements like a real user:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
async def element_examples():
|
|
133
|
+
# Natural and precise interactions
|
|
134
|
+
input_field = await page.find_element(By.CSS_SELECTOR, 'input')
|
|
135
|
+
await input_field.type_keys('Hello World') # Realistic typing!
|
|
136
|
+
|
|
137
|
+
# Intuitive chained operations
|
|
138
|
+
dropdown = await page.find_element(By.CSS_SELECTOR, 'select')
|
|
139
|
+
await dropdown.select_option('value')
|
|
140
|
+
|
|
141
|
+
# Realistic clicks with offset
|
|
142
|
+
button = await page.find_element(By.CSS_SELECTOR, 'button')
|
|
143
|
+
await button.click(x_offset=5, y_offset=10)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## π Advanced Features
|
|
147
|
+
|
|
148
|
+
### Event System
|
|
149
|
+
|
|
150
|
+
Powerful event system for intelligent automation:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from pydoll.events.page import PageEvents
|
|
154
|
+
|
|
155
|
+
async def event_example():
|
|
156
|
+
await page.enable_page_events()
|
|
157
|
+
# React to events in real-time!
|
|
158
|
+
await page.on(PageEvents.PAGE_LOADED,
|
|
159
|
+
lambda e: print('Page loaded successfully!'))
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Concurrent Scraping
|
|
163
|
+
|
|
164
|
+
Scrape multiple pages simultaneously with extraordinary performance:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
async def concurrent_example():
|
|
168
|
+
pages = [await browser.get_page() for _ in range(10)]
|
|
169
|
+
# Parallel scraping with intelligent resource management
|
|
170
|
+
results = await asyncio.gather(
|
|
171
|
+
*(scrape_page(page) for page in pages)
|
|
172
|
+
)
|
|
173
|
+
# Just declare the scrape_page method and see the magic happens!
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Proxy Configuration
|
|
177
|
+
|
|
178
|
+
Robust proxy support, including authentication:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
async def proxy_example():
|
|
182
|
+
options = Options()
|
|
183
|
+
# Private or public proxies, you choose!
|
|
184
|
+
options.add_argument('--proxy-server=username:password@ip:port')
|
|
185
|
+
|
|
186
|
+
async with Chrome(options=options) as browser:
|
|
187
|
+
await browser.start()
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
For exploring all available methods and additional features, check out:
|
|
192
|
+
- Browser interface: [pydoll/browser/base.py](./pydoll/browser/base.py)
|
|
193
|
+
- Page interface: [pydoll/browser/page.py](./pydoll/browser/page.py)
|
|
194
|
+
- WebElement interface: [pydoll/element.py](./pydoll/element.py)
|
|
195
|
+
- Chrome options: [Chromium Command Line Switches](https://peter.sh/experiments/chromium-command-line-switches/)
|
|
196
|
+
|
|
197
|
+
## π Start Now!
|
|
198
|
+
|
|
199
|
+
Feel free to use, open issues and contributing!
|
|
200
|
+
|