cdipy 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdipy-0.4.3/LICENSE.md +21 -0
- cdipy-0.4.3/PKG-INFO +90 -0
- cdipy-0.4.3/README.md +73 -0
- cdipy-0.4.3/cdipy/__init__.py +7 -0
- cdipy-0.4.3/cdipy/cdipy.py +196 -0
- cdipy-0.4.3/cdipy/chrome.py +120 -0
- cdipy-0.4.3/cdipy/exceptions.py +10 -0
- cdipy-0.4.3/cdipy/protocol.py +102 -0
- cdipy-0.4.3/cdipy/utils.py +53 -0
- cdipy-0.4.3/cdipy.egg-info/PKG-INFO +90 -0
- cdipy-0.4.3/cdipy.egg-info/SOURCES.txt +15 -0
- cdipy-0.4.3/cdipy.egg-info/dependency_links.txt +1 -0
- cdipy-0.4.3/cdipy.egg-info/requires.txt +4 -0
- cdipy-0.4.3/cdipy.egg-info/top_level.txt +1 -0
- cdipy-0.4.3/pyproject.toml +3 -0
- cdipy-0.4.3/setup.cfg +24 -0
cdipy-0.4.3/LICENSE.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Pilate
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cdipy-0.4.3/PKG-INFO
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cdipy
|
|
3
|
+
Version: 0.4.3
|
|
4
|
+
Summary: An async Python interface for Chrome Devtools
|
|
5
|
+
Home-page: https://github.com/pilate/cdipy
|
|
6
|
+
Author: Pilate
|
|
7
|
+
Author-email: pilate@pilate.es
|
|
8
|
+
License: MIT License
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE.md
|
|
12
|
+
Requires-Dist: aiohttp<3.13.0
|
|
13
|
+
Requires-Dist: msgspec<0.20
|
|
14
|
+
Requires-Dist: pyee<13.0.0
|
|
15
|
+
Requires-Dist: websockets<=15
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# cdipy
|
|
19
|
+
Chrome Devtools Interface that instruments Chrome via the [devtools protocol](https://github.com/ChromeDevTools/devtools-protocol).
|
|
20
|
+
|
|
21
|
+
Meant to serve as a pythonic version of [chrome-remote-interface](https://github.com/cyrus-and/chrome-remote-interface).
|
|
22
|
+
|
|
23
|
+
### Example Usage
|
|
24
|
+
```python
|
|
25
|
+
|
|
26
|
+
import asyncio
|
|
27
|
+
import base64
|
|
28
|
+
import logging
|
|
29
|
+
import sys
|
|
30
|
+
|
|
31
|
+
from cdipy import ChromeDevTools
|
|
32
|
+
from cdipy import ChromeDevToolsTarget
|
|
33
|
+
from cdipy import ChromeRunner
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
LOGGER = logging.getLogger("cdipy.scripts.screenshot")
|
|
37
|
+
FILENAME = "screenshot.png"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def async_main(url):
|
|
41
|
+
# Start Chrome
|
|
42
|
+
chrome = ChromeRunner()
|
|
43
|
+
await chrome.launch()
|
|
44
|
+
|
|
45
|
+
# Connect to devtools websocket
|
|
46
|
+
cdi = ChromeDevTools(chrome.websocket_uri)
|
|
47
|
+
await cdi.connect()
|
|
48
|
+
|
|
49
|
+
# Create a new target and attach to it
|
|
50
|
+
target = await cdi.Target.createTarget(url="about:blank")
|
|
51
|
+
session = await cdi.Target.attachToTarget(targetId=target["targetId"])
|
|
52
|
+
|
|
53
|
+
# Create a ChromeDevToolsTarget class to handle target messages
|
|
54
|
+
cdit = ChromeDevToolsTarget(cdi, session["sessionId"])
|
|
55
|
+
|
|
56
|
+
# Enable 'Page' events
|
|
57
|
+
await cdit.Page.enable()
|
|
58
|
+
|
|
59
|
+
# Navigate to URL
|
|
60
|
+
LOGGER.info("Navigating to %s", url)
|
|
61
|
+
await cdit.Page.navigate(url=url)
|
|
62
|
+
|
|
63
|
+
# Wait for the Page.loadEventFired event
|
|
64
|
+
# This may not ever fire on some pages, so it's good to set a limit
|
|
65
|
+
try:
|
|
66
|
+
await cdit.wait_for("Page.loadEventFired", 10)
|
|
67
|
+
except asyncio.TimeoutError:
|
|
68
|
+
print("Loaded event never fired!")
|
|
69
|
+
|
|
70
|
+
# Take a screenshot
|
|
71
|
+
screenshot_response = await cdit.Page.captureScreenshot(format="png")
|
|
72
|
+
screenshot_bytes = base64.b64decode(screenshot_response["data"])
|
|
73
|
+
|
|
74
|
+
with open(FILENAME, "w+b") as fileobj:
|
|
75
|
+
fileobj.write(screenshot_bytes)
|
|
76
|
+
|
|
77
|
+
LOGGER.info("wrote %s", FILENAME)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def main():
|
|
81
|
+
logging.basicConfig(level=logging.INFO)
|
|
82
|
+
|
|
83
|
+
asyncio.run(async_main(sys.argv[1]))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
main()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
```
|
cdipy-0.4.3/README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# cdipy
|
|
2
|
+
Chrome Devtools Interface that instruments Chrome via the [devtools protocol](https://github.com/ChromeDevTools/devtools-protocol).
|
|
3
|
+
|
|
4
|
+
Meant to serve as a pythonic version of [chrome-remote-interface](https://github.com/cyrus-and/chrome-remote-interface).
|
|
5
|
+
|
|
6
|
+
### Example Usage
|
|
7
|
+
```python
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import base64
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
from cdipy import ChromeDevTools
|
|
15
|
+
from cdipy import ChromeDevToolsTarget
|
|
16
|
+
from cdipy import ChromeRunner
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
LOGGER = logging.getLogger("cdipy.scripts.screenshot")
|
|
20
|
+
FILENAME = "screenshot.png"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def async_main(url):
|
|
24
|
+
# Start Chrome
|
|
25
|
+
chrome = ChromeRunner()
|
|
26
|
+
await chrome.launch()
|
|
27
|
+
|
|
28
|
+
# Connect to devtools websocket
|
|
29
|
+
cdi = ChromeDevTools(chrome.websocket_uri)
|
|
30
|
+
await cdi.connect()
|
|
31
|
+
|
|
32
|
+
# Create a new target and attach to it
|
|
33
|
+
target = await cdi.Target.createTarget(url="about:blank")
|
|
34
|
+
session = await cdi.Target.attachToTarget(targetId=target["targetId"])
|
|
35
|
+
|
|
36
|
+
# Create a ChromeDevToolsTarget class to handle target messages
|
|
37
|
+
cdit = ChromeDevToolsTarget(cdi, session["sessionId"])
|
|
38
|
+
|
|
39
|
+
# Enable 'Page' events
|
|
40
|
+
await cdit.Page.enable()
|
|
41
|
+
|
|
42
|
+
# Navigate to URL
|
|
43
|
+
LOGGER.info("Navigating to %s", url)
|
|
44
|
+
await cdit.Page.navigate(url=url)
|
|
45
|
+
|
|
46
|
+
# Wait for the Page.loadEventFired event
|
|
47
|
+
# This may not ever fire on some pages, so it's good to set a limit
|
|
48
|
+
try:
|
|
49
|
+
await cdit.wait_for("Page.loadEventFired", 10)
|
|
50
|
+
except asyncio.TimeoutError:
|
|
51
|
+
print("Loaded event never fired!")
|
|
52
|
+
|
|
53
|
+
# Take a screenshot
|
|
54
|
+
screenshot_response = await cdit.Page.captureScreenshot(format="png")
|
|
55
|
+
screenshot_bytes = base64.b64decode(screenshot_response["data"])
|
|
56
|
+
|
|
57
|
+
with open(FILENAME, "w+b") as fileobj:
|
|
58
|
+
fileobj.write(screenshot_bytes)
|
|
59
|
+
|
|
60
|
+
LOGGER.info("wrote %s", FILENAME)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def main():
|
|
64
|
+
logging.basicConfig(level=logging.INFO)
|
|
65
|
+
|
|
66
|
+
asyncio.run(async_main(sys.argv[1]))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
if __name__ == "__main__":
|
|
70
|
+
main()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
```
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import typing
|
|
5
|
+
from itertools import count
|
|
6
|
+
|
|
7
|
+
import msgspec
|
|
8
|
+
import websockets.asyncio.client
|
|
9
|
+
import websockets.asyncio.connection
|
|
10
|
+
import websockets.exceptions
|
|
11
|
+
from pyee.asyncio import AsyncIOEventEmitter
|
|
12
|
+
|
|
13
|
+
from .exceptions import ResponseErrorException, UnknownMessageException
|
|
14
|
+
from .protocol import DOMAINS
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
LOGGER = logging.getLogger("cdipy.cdipy")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MessageError(msgspec.Struct): # pylint: disable=too-few-public-methods
|
|
21
|
+
message: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Message(msgspec.Struct): # pylint: disable=too-few-public-methods
|
|
25
|
+
id: int = None
|
|
26
|
+
method: str = None
|
|
27
|
+
params: typing.Any = None
|
|
28
|
+
result: typing.Any = None
|
|
29
|
+
error: MessageError = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
MSG_DECODER = msgspec.json.Decoder(type=Message)
|
|
33
|
+
MSG_ENCODER = msgspec.json.Encoder()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DevtoolsEmitter(AsyncIOEventEmitter):
|
|
37
|
+
def __init__(self):
|
|
38
|
+
super().__init__()
|
|
39
|
+
|
|
40
|
+
self.loop = asyncio.get_event_loop()
|
|
41
|
+
|
|
42
|
+
def wait_for(self, event: str, timeout: int = 0) -> asyncio.Future:
|
|
43
|
+
"""
|
|
44
|
+
Wait for a specific event to fire before returning
|
|
45
|
+
"""
|
|
46
|
+
future = self.loop.create_future()
|
|
47
|
+
|
|
48
|
+
def update_future(*args, **kwargs):
|
|
49
|
+
future.set_result((args, kwargs))
|
|
50
|
+
|
|
51
|
+
self.once(event, update_future)
|
|
52
|
+
if timeout:
|
|
53
|
+
return asyncio.wait_for(future, timeout)
|
|
54
|
+
|
|
55
|
+
return future
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Devtools(DevtoolsEmitter):
|
|
59
|
+
def __init__(self):
|
|
60
|
+
super().__init__()
|
|
61
|
+
|
|
62
|
+
self.futures = {}
|
|
63
|
+
self.counter = count()
|
|
64
|
+
|
|
65
|
+
def __getattr__(self, attr: str):
|
|
66
|
+
"""
|
|
67
|
+
Load each domain on demand
|
|
68
|
+
"""
|
|
69
|
+
if domain := DOMAINS.get(attr):
|
|
70
|
+
setattr(self, attr, domain(self))
|
|
71
|
+
|
|
72
|
+
return super().__getattribute__(attr)
|
|
73
|
+
|
|
74
|
+
def format_command(self, method: str, **kwargs) -> dict:
|
|
75
|
+
"""
|
|
76
|
+
Convert method name + arguments to a devtools command
|
|
77
|
+
"""
|
|
78
|
+
return {"id": next(self.counter), "method": method, "params": kwargs}
|
|
79
|
+
|
|
80
|
+
async def handle_message(self, message: str) -> None:
|
|
81
|
+
"""
|
|
82
|
+
Match incoming message ids to self.futures
|
|
83
|
+
Emit events for incoming methods
|
|
84
|
+
"""
|
|
85
|
+
try:
|
|
86
|
+
message_obj = MSG_DECODER.decode(message)
|
|
87
|
+
except msgspec.DecodeError:
|
|
88
|
+
message_obj = Message(**json.loads(message))
|
|
89
|
+
|
|
90
|
+
if message_obj.id is not None:
|
|
91
|
+
future = self.futures.pop(message_obj.id)
|
|
92
|
+
if not future.cancelled():
|
|
93
|
+
if error := message_obj.error:
|
|
94
|
+
future.set_exception(ResponseErrorException(error.message))
|
|
95
|
+
else:
|
|
96
|
+
future.set_result(message_obj.result)
|
|
97
|
+
|
|
98
|
+
elif message_obj.method:
|
|
99
|
+
self.emit(message_obj.method, **message_obj.params)
|
|
100
|
+
|
|
101
|
+
elif message_obj.error:
|
|
102
|
+
raise ResponseErrorException(message_obj.error.message)
|
|
103
|
+
|
|
104
|
+
else:
|
|
105
|
+
raise UnknownMessageException(f"Unknown message format: {message_obj}")
|
|
106
|
+
|
|
107
|
+
async def execute_method(self, method: str, **kwargs) -> dict:
|
|
108
|
+
"""
|
|
109
|
+
Called by the add_command wrapper with the method name and validated arguments
|
|
110
|
+
"""
|
|
111
|
+
command = self.format_command(method, **kwargs)
|
|
112
|
+
|
|
113
|
+
result_future = self.loop.create_future()
|
|
114
|
+
self.futures[command["id"]] = result_future
|
|
115
|
+
|
|
116
|
+
await self.send(command)
|
|
117
|
+
|
|
118
|
+
return await result_future
|
|
119
|
+
|
|
120
|
+
async def send(self, command):
|
|
121
|
+
raise NotImplementedError
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ChromeDevTools(Devtools):
|
|
125
|
+
def __init__(self, websocket_uri: str):
|
|
126
|
+
super().__init__()
|
|
127
|
+
|
|
128
|
+
self.task: asyncio.Future | None = None
|
|
129
|
+
self.ws_uri: str | None = websocket_uri
|
|
130
|
+
self.websocket: websockets.asyncio.connection.Connection = None
|
|
131
|
+
|
|
132
|
+
def __del__(self):
|
|
133
|
+
if task := getattr(self, "task", None):
|
|
134
|
+
task.cancel()
|
|
135
|
+
|
|
136
|
+
async def connect(self, compression: str | None = None) -> None:
|
|
137
|
+
self.websocket = await websockets.asyncio.client.connect(
|
|
138
|
+
self.ws_uri,
|
|
139
|
+
compression=compression,
|
|
140
|
+
max_size=None,
|
|
141
|
+
max_queue=None,
|
|
142
|
+
write_limit=0,
|
|
143
|
+
ping_interval=None,
|
|
144
|
+
)
|
|
145
|
+
self.task = asyncio.create_task(self._recv_loop())
|
|
146
|
+
|
|
147
|
+
async def _recv_loop(self):
|
|
148
|
+
while True:
|
|
149
|
+
try:
|
|
150
|
+
recv_data = await self.websocket.recv(decode=None)
|
|
151
|
+
LOGGER.debug("recv: %s", recv_data)
|
|
152
|
+
|
|
153
|
+
except websockets.exceptions.ConnectionClosed:
|
|
154
|
+
LOGGER.error("Websocket connection closed")
|
|
155
|
+
break
|
|
156
|
+
|
|
157
|
+
await self.handle_message(recv_data)
|
|
158
|
+
|
|
159
|
+
async def send(self, command: dict) -> None:
|
|
160
|
+
LOGGER.debug("send: %s", command)
|
|
161
|
+
await self.websocket.send(MSG_ENCODER.encode(command), text=True)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class ChromeDevToolsTarget(Devtools): # pylint: disable=abstract-method
|
|
165
|
+
def __init__(self, devtools: ChromeDevTools, session: str):
|
|
166
|
+
super().__init__()
|
|
167
|
+
|
|
168
|
+
self.devtools = devtools
|
|
169
|
+
self.devtools.on("Target.receivedMessageFromTarget", self._target_recv)
|
|
170
|
+
|
|
171
|
+
self.session = session
|
|
172
|
+
|
|
173
|
+
async def _target_recv(
|
|
174
|
+
self, sessionId, message, **_
|
|
175
|
+
): # pylint: disable=invalid-name
|
|
176
|
+
if sessionId != self.session:
|
|
177
|
+
return
|
|
178
|
+
|
|
179
|
+
await self.handle_message(message)
|
|
180
|
+
|
|
181
|
+
async def execute_method(self, method: str, **kwargs):
|
|
182
|
+
"""
|
|
183
|
+
Target commands are in the same format, but sent as a parameter to
|
|
184
|
+
the sendMessageToTarget method
|
|
185
|
+
"""
|
|
186
|
+
command = self.format_command(method, **kwargs)
|
|
187
|
+
|
|
188
|
+
result_future = self.loop.create_future()
|
|
189
|
+
self.futures[command["id"]] = result_future
|
|
190
|
+
|
|
191
|
+
message = MSG_ENCODER.encode(command).decode()
|
|
192
|
+
await self.devtools.Target.sendMessageToTarget(
|
|
193
|
+
message=message, sessionId=self.session
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return await result_future
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import signal
|
|
6
|
+
from asyncio import subprocess
|
|
7
|
+
from tempfile import TemporaryDirectory
|
|
8
|
+
|
|
9
|
+
from .exceptions import ChromeClosedException
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
LOGGER = logging.getLogger("cdipy.chrome")
|
|
13
|
+
|
|
14
|
+
CHROME_PATH = os.environ.get("CDIPY_CHROME_PATH", "/usr/bin/google-chrome-stable")
|
|
15
|
+
CHROME_PARAMS = [
|
|
16
|
+
"--disable-background-networking",
|
|
17
|
+
"--enable-features=NetworkService,NetworkServiceInProcess",
|
|
18
|
+
"--disable-background-timer-throttling",
|
|
19
|
+
"--disable-backgrounding-occluded-windows",
|
|
20
|
+
"--disable-breakpad",
|
|
21
|
+
"--disable-client-side-phishing-detection",
|
|
22
|
+
"--disable-component-extensions-with-background-pages",
|
|
23
|
+
"--disable-component-update",
|
|
24
|
+
"--disable-default-apps",
|
|
25
|
+
"--disable-domain-reliability",
|
|
26
|
+
"--disable-extensions",
|
|
27
|
+
"--disable-features=CalculateNativeWinOcclusion,InterestFeedContentSuggestions,Translate",
|
|
28
|
+
"--disable-hang-monitor",
|
|
29
|
+
"--disable-ipc-flooding-protection",
|
|
30
|
+
"--disable-popup-blocking",
|
|
31
|
+
"--disable-prompt-on-repost",
|
|
32
|
+
"--disable-renderer-backgrounding",
|
|
33
|
+
"--disable-sync",
|
|
34
|
+
"--enable-automation",
|
|
35
|
+
"--force-color-profile=srgb",
|
|
36
|
+
"--metrics-recording-only",
|
|
37
|
+
"--no-first-run",
|
|
38
|
+
"--ash-no-nudges",
|
|
39
|
+
"--disable-search-engine-choice-screen",
|
|
40
|
+
"--propagate-iph-for-testing",
|
|
41
|
+
"--no-default-browser-check",
|
|
42
|
+
"--password-store=basic",
|
|
43
|
+
"--remote-debugging-port=0",
|
|
44
|
+
"--use-mock-keychain",
|
|
45
|
+
"--enable-blink-features=IdleDetection",
|
|
46
|
+
"--disable-gpu",
|
|
47
|
+
"--hide-scrollbars",
|
|
48
|
+
"--mute-audio",
|
|
49
|
+
]
|
|
50
|
+
if not os.environ.get("CDIPY_USE_SHM"):
|
|
51
|
+
CHROME_PARAMS.append("--disable-dev-shm-usage")
|
|
52
|
+
|
|
53
|
+
WS_RE = re.compile(r"listening on (ws://[^ ]*)")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ChromeRunner:
|
|
57
|
+
def __init__(self, proxy: str = None):
|
|
58
|
+
super().__init__()
|
|
59
|
+
|
|
60
|
+
self.proxy = proxy
|
|
61
|
+
|
|
62
|
+
self.data_dir = TemporaryDirectory() # pylint: disable=consider-using-with
|
|
63
|
+
|
|
64
|
+
self.proc = None
|
|
65
|
+
self.websocket_uri = None
|
|
66
|
+
|
|
67
|
+
def __del__(self):
|
|
68
|
+
"""
|
|
69
|
+
Kill the chrome we launched and all child processes
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
if self.proc and self.proc.pid:
|
|
73
|
+
try:
|
|
74
|
+
os.killpg(os.getpgid(self.proc.pid), signal.SIGKILL)
|
|
75
|
+
except ProcessLookupError:
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
async def launch(
|
|
79
|
+
self,
|
|
80
|
+
chrome_path: str = CHROME_PATH,
|
|
81
|
+
extra_args: list = None,
|
|
82
|
+
headless: str = "new",
|
|
83
|
+
) -> None:
|
|
84
|
+
command = [
|
|
85
|
+
chrome_path,
|
|
86
|
+
*CHROME_PARAMS,
|
|
87
|
+
f"--headless={headless}",
|
|
88
|
+
f"--user-data-dir={self.data_dir.name}",
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
if extra_args:
|
|
92
|
+
command.extend(extra_args)
|
|
93
|
+
|
|
94
|
+
if self.proxy:
|
|
95
|
+
command.append(f"--proxy-server={self.proxy}")
|
|
96
|
+
|
|
97
|
+
self.proc = await asyncio.create_subprocess_exec(
|
|
98
|
+
*command,
|
|
99
|
+
stdout=subprocess.PIPE,
|
|
100
|
+
stderr=subprocess.STDOUT,
|
|
101
|
+
preexec_fn=os.setsid,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
output = ""
|
|
105
|
+
while True:
|
|
106
|
+
if self.proc.returncode is not None:
|
|
107
|
+
stderr = await self.proc.stdout.read()
|
|
108
|
+
raise ChromeClosedException(
|
|
109
|
+
f"Chrome closed unexpectedly; code: {self.proc.returncode} ({stderr})"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
data = await self.proc.stdout.readline()
|
|
113
|
+
output += data.decode()
|
|
114
|
+
|
|
115
|
+
search = WS_RE.search(output)
|
|
116
|
+
if search:
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
self.websocket_uri = search.group(1).strip()
|
|
120
|
+
LOGGER.info("Parsed websocket URI: %s", self.websocket_uri)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import inspect
|
|
3
|
+
import os
|
|
4
|
+
import types
|
|
5
|
+
|
|
6
|
+
import msgspec.json
|
|
7
|
+
|
|
8
|
+
from .utils import get_cache_path, update_protocol_data
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
SKIP_VALIDATION = os.environ.get("CDIPY_SKIP_VALIDATION", False)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DomainBase: # pylint: disable=too-few-public-methods
|
|
15
|
+
"""
|
|
16
|
+
Template class used for domains (ex: obj.Page)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
__slots__ = ("devtools",)
|
|
20
|
+
|
|
21
|
+
def __init__(self, devtools):
|
|
22
|
+
self.devtools = devtools
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def params_to_signature(params):
|
|
26
|
+
"""
|
|
27
|
+
Creates a function signature based on a list of protocol parameters
|
|
28
|
+
"""
|
|
29
|
+
new_params = []
|
|
30
|
+
|
|
31
|
+
for param in params:
|
|
32
|
+
default = inspect.Parameter.empty
|
|
33
|
+
if param.get("optional"):
|
|
34
|
+
default = None
|
|
35
|
+
|
|
36
|
+
new_param = inspect.Parameter(
|
|
37
|
+
name=param["name"],
|
|
38
|
+
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
39
|
+
default=default,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
new_params.append(new_param)
|
|
43
|
+
|
|
44
|
+
new_params.sort(key=lambda p: bool(p.default), reverse=True)
|
|
45
|
+
|
|
46
|
+
return inspect.Signature(parameters=new_params)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def add_command(domain_class, command):
|
|
50
|
+
"""
|
|
51
|
+
Creates a new function that can be used as a domain method
|
|
52
|
+
"""
|
|
53
|
+
command_name = command["name"]
|
|
54
|
+
command_str = f"{domain_class.__name__}.{command_name}"
|
|
55
|
+
|
|
56
|
+
signature = params_to_signature(command.get("parameters", []))
|
|
57
|
+
|
|
58
|
+
async def wrapper(self, **kwargs):
|
|
59
|
+
"""
|
|
60
|
+
Validate method arguments against `signature`
|
|
61
|
+
Pass validated args to execute_method
|
|
62
|
+
"""
|
|
63
|
+
if not SKIP_VALIDATION:
|
|
64
|
+
kwargs = signature.bind(**kwargs).arguments
|
|
65
|
+
|
|
66
|
+
return await self.devtools.execute_method(command_str, **kwargs)
|
|
67
|
+
|
|
68
|
+
wrapper.__name__ = wrapper.__qualname__ = command_str
|
|
69
|
+
|
|
70
|
+
setattr(domain_class, command_name, wrapper)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def load_domains():
|
|
74
|
+
cache_path = get_cache_path()
|
|
75
|
+
|
|
76
|
+
if not os.path.exists(cache_path):
|
|
77
|
+
os.makedirs(cache_path, mode=0o744)
|
|
78
|
+
|
|
79
|
+
if not os.listdir(cache_path):
|
|
80
|
+
asyncio.get_event_loop().run_until_complete(update_protocol_data())
|
|
81
|
+
|
|
82
|
+
domains = {}
|
|
83
|
+
for filename in os.listdir(cache_path):
|
|
84
|
+
with open(cache_path / filename, "rb") as fp:
|
|
85
|
+
data = msgspec.json.decode(fp.read())
|
|
86
|
+
|
|
87
|
+
for domain in data.get("domains", []):
|
|
88
|
+
domain_name = domain["domain"]
|
|
89
|
+
|
|
90
|
+
# Create a new class for each domain
|
|
91
|
+
domain_class = types.new_class(domain_name, (DomainBase,))
|
|
92
|
+
|
|
93
|
+
# Add each command to the domain class
|
|
94
|
+
for command in domain.get("commands", []):
|
|
95
|
+
add_command(domain_class, command)
|
|
96
|
+
|
|
97
|
+
domains[domain_name] = domain_class
|
|
98
|
+
|
|
99
|
+
return domains
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
DOMAINS = load_domains()
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import aiohttp
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
LOGGER = logging.getLogger("cdipy.utils")
|
|
10
|
+
|
|
11
|
+
ROOT = "https://raw.githubusercontent.com/ChromeDevTools/devtools-protocol/master/json"
|
|
12
|
+
SOURCE_FILES = [f"{ROOT}/browser_protocol.json", f"{ROOT}/js_protocol.json"]
|
|
13
|
+
OS_VAR = "CDIPY_CACHE"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_cache_path() -> Path:
|
|
17
|
+
"""
|
|
18
|
+
Search system paths for existing cache
|
|
19
|
+
"""
|
|
20
|
+
cache_dir = os.environ.get(OS_VAR)
|
|
21
|
+
if cache_dir:
|
|
22
|
+
return Path(cache_dir)
|
|
23
|
+
|
|
24
|
+
xdg_cache_home = os.getenv("XDG_CACHE_HOME")
|
|
25
|
+
if not xdg_cache_home:
|
|
26
|
+
if user_home := os.getenv("HOME"):
|
|
27
|
+
xdg_cache_home = os.path.join(user_home, ".cache")
|
|
28
|
+
|
|
29
|
+
if xdg_cache_home:
|
|
30
|
+
full_path = os.path.join(xdg_cache_home, "python-cdipy")
|
|
31
|
+
else:
|
|
32
|
+
full_path = os.path.join(os.path.dirname(__file__), ".cache")
|
|
33
|
+
|
|
34
|
+
os.environ[OS_VAR] = full_path
|
|
35
|
+
return Path(full_path)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def update_protocol_data() -> None:
|
|
39
|
+
"""
|
|
40
|
+
Download latest protocol definition
|
|
41
|
+
"""
|
|
42
|
+
async with aiohttp.ClientSession() as session:
|
|
43
|
+
requests = []
|
|
44
|
+
for url in SOURCE_FILES:
|
|
45
|
+
LOGGER.warning("Downloading %s", url)
|
|
46
|
+
requests.append(session.get(url))
|
|
47
|
+
|
|
48
|
+
responses = await asyncio.gather(*requests)
|
|
49
|
+
for response in responses:
|
|
50
|
+
new_path = get_cache_path() / response.url.name
|
|
51
|
+
with open(new_path, "w+b") as fp:
|
|
52
|
+
fp.write(await response.read())
|
|
53
|
+
LOGGER.warning("Wrote %s", new_path)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cdipy
|
|
3
|
+
Version: 0.4.3
|
|
4
|
+
Summary: An async Python interface for Chrome Devtools
|
|
5
|
+
Home-page: https://github.com/pilate/cdipy
|
|
6
|
+
Author: Pilate
|
|
7
|
+
Author-email: pilate@pilate.es
|
|
8
|
+
License: MIT License
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE.md
|
|
12
|
+
Requires-Dist: aiohttp<3.13.0
|
|
13
|
+
Requires-Dist: msgspec<0.20
|
|
14
|
+
Requires-Dist: pyee<13.0.0
|
|
15
|
+
Requires-Dist: websockets<=15
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# cdipy
|
|
19
|
+
Chrome Devtools Interface that instruments Chrome via the [devtools protocol](https://github.com/ChromeDevTools/devtools-protocol).
|
|
20
|
+
|
|
21
|
+
Meant to serve as a pythonic version of [chrome-remote-interface](https://github.com/cyrus-and/chrome-remote-interface).
|
|
22
|
+
|
|
23
|
+
### Example Usage
|
|
24
|
+
```python
|
|
25
|
+
|
|
26
|
+
import asyncio
|
|
27
|
+
import base64
|
|
28
|
+
import logging
|
|
29
|
+
import sys
|
|
30
|
+
|
|
31
|
+
from cdipy import ChromeDevTools
|
|
32
|
+
from cdipy import ChromeDevToolsTarget
|
|
33
|
+
from cdipy import ChromeRunner
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
LOGGER = logging.getLogger("cdipy.scripts.screenshot")
|
|
37
|
+
FILENAME = "screenshot.png"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def async_main(url):
|
|
41
|
+
# Start Chrome
|
|
42
|
+
chrome = ChromeRunner()
|
|
43
|
+
await chrome.launch()
|
|
44
|
+
|
|
45
|
+
# Connect to devtools websocket
|
|
46
|
+
cdi = ChromeDevTools(chrome.websocket_uri)
|
|
47
|
+
await cdi.connect()
|
|
48
|
+
|
|
49
|
+
# Create a new target and attach to it
|
|
50
|
+
target = await cdi.Target.createTarget(url="about:blank")
|
|
51
|
+
session = await cdi.Target.attachToTarget(targetId=target["targetId"])
|
|
52
|
+
|
|
53
|
+
# Create a ChromeDevToolsTarget class to handle target messages
|
|
54
|
+
cdit = ChromeDevToolsTarget(cdi, session["sessionId"])
|
|
55
|
+
|
|
56
|
+
# Enable 'Page' events
|
|
57
|
+
await cdit.Page.enable()
|
|
58
|
+
|
|
59
|
+
# Navigate to URL
|
|
60
|
+
LOGGER.info("Navigating to %s", url)
|
|
61
|
+
await cdit.Page.navigate(url=url)
|
|
62
|
+
|
|
63
|
+
# Wait for the Page.loadEventFired event
|
|
64
|
+
# This may not ever fire on some pages, so it's good to set a limit
|
|
65
|
+
try:
|
|
66
|
+
await cdit.wait_for("Page.loadEventFired", 10)
|
|
67
|
+
except asyncio.TimeoutError:
|
|
68
|
+
print("Loaded event never fired!")
|
|
69
|
+
|
|
70
|
+
# Take a screenshot
|
|
71
|
+
screenshot_response = await cdit.Page.captureScreenshot(format="png")
|
|
72
|
+
screenshot_bytes = base64.b64decode(screenshot_response["data"])
|
|
73
|
+
|
|
74
|
+
with open(FILENAME, "w+b") as fileobj:
|
|
75
|
+
fileobj.write(screenshot_bytes)
|
|
76
|
+
|
|
77
|
+
LOGGER.info("wrote %s", FILENAME)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def main():
|
|
81
|
+
logging.basicConfig(level=logging.INFO)
|
|
82
|
+
|
|
83
|
+
asyncio.run(async_main(sys.argv[1]))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
main()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
```
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE.md
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.cfg
|
|
5
|
+
cdipy/__init__.py
|
|
6
|
+
cdipy/cdipy.py
|
|
7
|
+
cdipy/chrome.py
|
|
8
|
+
cdipy/exceptions.py
|
|
9
|
+
cdipy/protocol.py
|
|
10
|
+
cdipy/utils.py
|
|
11
|
+
cdipy.egg-info/PKG-INFO
|
|
12
|
+
cdipy.egg-info/SOURCES.txt
|
|
13
|
+
cdipy.egg-info/dependency_links.txt
|
|
14
|
+
cdipy.egg-info/requires.txt
|
|
15
|
+
cdipy.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cdipy
|
cdipy-0.4.3/setup.cfg
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[metadata]
|
|
2
|
+
name = cdipy
|
|
3
|
+
version = 0.4.3
|
|
4
|
+
author = Pilate
|
|
5
|
+
author_email = pilate@pilate.es
|
|
6
|
+
url = https://github.com/pilate/cdipy
|
|
7
|
+
description = An async Python interface for Chrome Devtools
|
|
8
|
+
long_description = file: README.md
|
|
9
|
+
long_description_content_type = text/markdown
|
|
10
|
+
license = MIT License
|
|
11
|
+
|
|
12
|
+
[options]
|
|
13
|
+
packages = find:
|
|
14
|
+
python_requires = >=3.8
|
|
15
|
+
install_requires =
|
|
16
|
+
aiohttp<3.13.0
|
|
17
|
+
msgspec<0.20
|
|
18
|
+
pyee<13.0.0
|
|
19
|
+
websockets<=15
|
|
20
|
+
|
|
21
|
+
[egg_info]
|
|
22
|
+
tag_build =
|
|
23
|
+
tag_date = 0
|
|
24
|
+
|