browser-toolkit 0.0.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,408 @@
1
+ import json
2
+ import time
3
+ from random import uniform
4
+ from typing import Union, Type
5
+ from dataclasses import dataclass
6
+ from http.cookies import SimpleCookie
7
+
8
+ from selenium.webdriver.chromium.webdriver import ChromiumDriver
9
+ from selenium.webdriver.common.by import By
10
+ from selenium.webdriver.support import expected_conditions as EC
11
+ from selenium.webdriver.support.wait import WebDriverWait
12
+ from selenium.common.exceptions import (
13
+ TimeoutException,
14
+ InvalidSessionIdException,
15
+ NoSuchElementException,
16
+ WebDriverException,
17
+ )
18
+ from selenium.webdriver.remote.webdriver import WebDriver, WebElement
19
+
20
+ from browser_toolkit.selenium_toolkit.utils import create_locator
21
+ from enum import StrEnum
22
+ import functools
23
+
24
+
25
+ class RequestType(StrEnum):
26
+ DOCUMENT = "Document"
27
+ XHR = "XHR"
28
+ IMAGE = "Image"
29
+ SCRIPT = "Script"
30
+ STYLESHEET = "Stylesheet"
31
+ FONT = "Font"
32
+ FETCH = "Fetch"
33
+ OTHER = "Other"
34
+
35
+
36
+ @dataclass
37
+ class Redirect:
38
+ url: str
39
+
40
+
41
+ @dataclass
42
+ class Request:
43
+ url: str
44
+ request_id: str
45
+ cookies: dict
46
+ headers: dict
47
+ redirect: Redirect = None
48
+ type: RequestType = None
49
+
50
+
51
+ def auto_wait(func) -> Type["Response"]:
52
+ @functools.wraps(func)
53
+ def wrapper(*args, **kwargs):
54
+ obj_wait_time = args[0]._wait_time_range
55
+ wait = uniform(*obj_wait_time)
56
+ time.sleep(wait)
57
+
58
+ return func(*args, **kwargs)
59
+
60
+ return wrapper
61
+
62
+
63
+ class SeleniumToolKit:
64
+ _wait_time_range = (0, 0)
65
+
66
+ def __init__(self, driver):
67
+ self.__driver: Union[WebDriver, ChromiumDriver] = driver
68
+
69
+ @property
70
+ def driver(self) -> Union[WebDriver, ChromiumDriver]:
71
+ return self.__driver
72
+
73
+ def change_wait_time(self, range_time: tuple = (0, 0)):
74
+ first, last = range_time
75
+
76
+ if not (first >= 0 and last >= first):
77
+ raise ValueError(f"range_time must be a tuple with positive values")
78
+
79
+ self._wait_time_range = range_time
80
+
81
+ @auto_wait
82
+ def goto(self, url: str) -> None:
83
+ self.__driver.get(url=url)
84
+
85
+ def query_selector(self, query_selector: str, web_element: WebElement = None) -> Union[WebElement, None]:
86
+ """
87
+ :param query_selector: css or xpath
88
+ :param web_element: If None is passed will perform the query selector in the whole page
89
+ """
90
+ if not query_selector:
91
+ raise ValueError("You need send a query_selector")
92
+
93
+ target = web_element if web_element else self.__driver
94
+
95
+ if query_selector[0] == "/":
96
+ web_element = target.find_element(By.XPATH, query_selector)
97
+ else:
98
+ web_element = target.find_element(By.CSS_SELECTOR, query_selector)
99
+
100
+ return web_element
101
+
102
+ def query_selector_all(self, query_selector: str, web_element: WebElement = None) -> Union[list[WebElement], None]:
103
+ """
104
+ :param query_selector: css or xpath
105
+ :param web_element: If None is passed will perform the query selector in the whole page
106
+ """
107
+ if not query_selector:
108
+ raise ValueError("You need send a query_selector")
109
+
110
+ target = web_element if web_element else self.__driver
111
+
112
+ if query_selector[0] == "/":
113
+ web_elements = target.find_elements(By.XPATH, query_selector)
114
+ else:
115
+ web_elements = target.find_elements(By.CSS_SELECTOR, query_selector)
116
+
117
+ return web_elements
118
+
119
+ def find_element_by_text(self, text: str):
120
+ query_selector = f"//*[contains(text(), '{text}' )]"
121
+ web_element = self.query_selector(query_selector=query_selector)
122
+ return web_element
123
+
124
+ def find_elements_by_text(self, text: str):
125
+ query_selector = f"//*[contains(text(), '{text}' )]"
126
+ web_element = self.query_selector_all(query_selector=query_selector)
127
+ return web_element
128
+
129
+ def find_element_by_tag_and_text(self, tag: str, text: str):
130
+ query_selector = f"//{tag}[contains(text(), '{text}' )]"
131
+ web_elements = self.query_selector(query_selector=query_selector)
132
+ return web_elements
133
+
134
+ def find_elements_by_tag_and_text(self, tag: str, text: str):
135
+ query_selector = f"//{tag}[contains(text(), '{text}' )]"
136
+ web_elements = self.query_selector_all(query_selector=query_selector)
137
+ return web_elements
138
+
139
+ def get_text(self, query_selector: str) -> str:
140
+ try:
141
+ return self.query_selector(query_selector=query_selector).text
142
+ except NoSuchElementException as e:
143
+ raise e
144
+
145
+ def get_attribute(self, query_selector: str, attribute: str) -> str:
146
+ try:
147
+ return self.query_selector(query_selector=query_selector).get_attribute(attribute)
148
+ except NoSuchElementException as e:
149
+ raise e
150
+
151
+ @auto_wait
152
+ def click(self, query_selector: str) -> None:
153
+ self.query_selector(query_selector=query_selector).click()
154
+
155
+ @auto_wait
156
+ def fill(self, text: str, query_selector: str) -> None:
157
+ element = self.query_selector(query_selector=query_selector)
158
+ element.send_keys(text)
159
+
160
+ @auto_wait
161
+ def clear(self, query_selector: str) -> None:
162
+ self.query_selector(query_selector=query_selector).clear()
163
+
164
+ def fill_in_random_time(self, text: str, query_selector: str) -> None:
165
+ element = self.query_selector(query_selector=query_selector)
166
+ for letter in text:
167
+ time.sleep(uniform(0.3, 0.8))
168
+ element.send_keys(letter)
169
+
170
+ def clear_and_fill(self, text: str, query_selector: str, random_time=False) -> None:
171
+ self.clear(query_selector=query_selector)
172
+ if random_time:
173
+ self.fill_in_random_time(text=text, query_selector=query_selector)
174
+ else:
175
+ self.fill(text=text, query_selector=query_selector)
176
+
177
+ def element_is_present(self, wait_time: int, query_selector: str) -> bool:
178
+ try:
179
+ WebDriverWait(self.__driver, wait_time).until(
180
+ EC.presence_of_element_located(create_locator(query_selector))
181
+ )
182
+ return True
183
+ except TimeoutException:
184
+ return False
185
+
186
+ def element_is_visible(self, wait_time: int, query_selector: str) -> bool:
187
+ try:
188
+ WebDriverWait(self.__driver, wait_time).until(
189
+ EC.visibility_of_element_located(create_locator(query_selector))
190
+ )
191
+ return True
192
+ except TimeoutException:
193
+ return False
194
+
195
+ def element_is_invisible(self, wait_time: int, query_selector: str) -> bool:
196
+ try:
197
+ WebDriverWait(self.__driver, wait_time).until(
198
+ EC.invisibility_of_element_located(create_locator(query_selector))
199
+ )
200
+ return True
201
+ except TimeoutException:
202
+ return False
203
+
204
+ def element_is_clickable(self, wait_time: int, query_selector: str) -> bool:
205
+ try:
206
+ WebDriverWait(self.__driver, wait_time).until(EC.element_to_be_clickable(create_locator(query_selector)))
207
+ return True
208
+ except TimeoutException:
209
+ return False
210
+
211
+ def text_is_present(self, wait_time: int, query_selector: str, text: str) -> bool:
212
+ try:
213
+ WebDriverWait(self.__driver, wait_time).until(
214
+ EC.text_to_be_present_in_element(create_locator(query_selector), text_=text)
215
+ )
216
+ return True
217
+ except TimeoutException:
218
+ return False
219
+
220
+ def alert_is_present(self, wait_time: int, message: str) -> bool:
221
+ try:
222
+ WebDriverWait(self.__driver, wait_time).until(EC.alert_is_present(), message=message)
223
+ return True
224
+ except TimeoutException:
225
+ return False
226
+
227
+ def page_is_loading(self) -> bool:
228
+ if self.__driver.execute_script("return document.readyState") != "complete":
229
+ return True
230
+ else:
231
+ return False
232
+
233
+ def block_urls(self, urls: list) -> None:
234
+ if not isinstance(self.__driver, ChromiumDriver):
235
+ TypeError("Your driver must be a ChromiumDriver type to use this method")
236
+
237
+ self.execute_cdp_cmd("Network.setBlockedURLs", {"urls": urls})
238
+ self.execute_cdp_cmd("Network.enable", {})
239
+
240
+ def driver_hard_refresh(self) -> None:
241
+ self.__driver.execute_script("location.reload(true)")
242
+
243
+ def webdriver_is_open(self) -> bool:
244
+ try:
245
+ self.__driver.execute_script("console.log('ola eu estou funcionando');")
246
+ return True
247
+ except InvalidSessionIdException:
248
+ return False
249
+
250
+ def get_all_requests(self) -> list[dict]:
251
+ """
252
+ !!! ALERT !!!
253
+ For this method works the code below is necessary in the driver's creation
254
+
255
+ # selenium < 4.0
256
+ capabilities = DesiredCapabilities.CHROME
257
+ capabilities["goog:loggingPrefs"] = {"performance": "ALL"}
258
+ driver = webdriver.Chrome(desired_capabilities=capabilities
259
+
260
+ # selenium > 4.0
261
+ capabilities = {"performance": "ALL"}
262
+ options.set_capability("goog:loggingPrefs", capabilities)
263
+ """
264
+
265
+ if not isinstance(self.__driver, ChromiumDriver):
266
+ TypeError("Your driver must be a ChromiumDriver type to use this method")
267
+
268
+ logs_raw = self.__driver.get_log("performance")
269
+ parsed_logs = [json.loads(lr["message"])["message"] for lr in logs_raw]
270
+ return parsed_logs
271
+
272
+ def get_requests(self, request_url: str) -> list[Request] | None:
273
+ parsed_logs = self.get_all_requests()
274
+ methods = [
275
+ "Network.responseReceived",
276
+ "Network.requestWillBeSent",
277
+ "Network.requestWillBeSentExtraInfo",
278
+ # "Page.windowOpen" # I Only see in Redirect, maybe add in the future, does not have request_id
279
+ ]
280
+ received_response_list = [response for response in parsed_logs if response["method"] in methods]
281
+
282
+ resp_url = None
283
+ matched_requests_id = set()
284
+ for response in received_response_list:
285
+ urls_to_match = []
286
+ params = response["params"]
287
+ target_request_id = params.get("requestId")
288
+ if params.get("request"):
289
+ urls_to_match.append(params["request"]["url"])
290
+ if params.get("response"):
291
+ urls_to_match.append(params["response"]["url"])
292
+ if params.get("redirectResponse"):
293
+ urls_to_match.append(params["redirectResponse"]["url"])
294
+
295
+ for url in urls_to_match:
296
+ if request_url in url:
297
+ matched_requests_id.add(target_request_id)
298
+
299
+ if not matched_requests_id:
300
+ return None
301
+
302
+ matched_requests = []
303
+ for target_request_id in matched_requests_id:
304
+ cookies = dict()
305
+ headers = dict()
306
+ url: str = None
307
+ redirect = None
308
+ request_type: RequestType = None
309
+ for response in received_response_list:
310
+ params = response["params"]
311
+ request_id = params.get("requestId")
312
+ method = response.get("method")
313
+
314
+ if target_request_id == request_id:
315
+ if method == "Network.requestWillBeSentExtraInfo":
316
+ headers = params.get("headers")
317
+
318
+ cookies_string = headers.get("cookie")
319
+ if not cookies_string:
320
+ continue
321
+
322
+ cookie_parser = SimpleCookie()
323
+ cookie_parser.load(cookies_string)
324
+ cookies = dict(cookie_parser)
325
+
326
+ if method == "Network.requestWillBeSent":
327
+ url = params["request"]["url"]
328
+ request_type = RequestType(params["type"])
329
+
330
+ if params.get("redirectResponse"):
331
+ redirect_url = params["redirectResponse"]["url"]
332
+ if request_url in redirect_url:
333
+ redirect = Redirect(url=redirect_url)
334
+
335
+ request_data = Request(
336
+ url=url,
337
+ request_id=target_request_id,
338
+ cookies=cookies,
339
+ headers=headers,
340
+ redirect=redirect,
341
+ type=request_type,
342
+ )
343
+ matched_requests.append(request_data)
344
+
345
+ return matched_requests
346
+
347
+ def response_data_from_request(self, request_url: str, request_id: str = None) -> str | None:
348
+ if request_id:
349
+ response_body = self.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id})
350
+ return response_body
351
+
352
+ received_requests = self.get_requests(request_url=request_url)
353
+ if not received_requests:
354
+ return None
355
+
356
+ if len(received_requests) > 1:
357
+ raise ValueError("more than one request matched")
358
+
359
+ return self.get_response_body_from_request_id(request_id=received_requests[0].request_id)
360
+
361
+ def get_response_body_from_request_id(self, request_id: str = None) -> str | None:
362
+ try:
363
+ response_body = self.execute_cdp_cmd(cmd="Network.getResponseBody", cmd_args={"requestId": request_id})
364
+ except WebDriverException:
365
+ return None
366
+
367
+ return response_body
368
+
369
+ # def execute_cdp_cmd(self, cmd: str, cmd_args: dict) -> str:
370
+ # execute_command = {'method': cmd, 'params': cmd_args}
371
+ # async def execute_cdp_async():
372
+ # async with self.__driver.bidi_connection() as session:
373
+ # cdp_session = session.session
374
+ # return await cdp_session.execute(execute_command)
375
+ #
376
+ # return trio.run(execute_cdp_async)
377
+
378
+ def execute_cdp_cmd(self, cmd: str, cmd_args: dict) -> str:
379
+ """
380
+ Useful for when executing CDP command in a remote driver
381
+ """
382
+ resource = "/session/%s/chromium/send_command_and_get_result" % self.__driver.session_id
383
+ url = self.__driver.command_executor._url + resource
384
+ body = json.dumps({"cmd": cmd, "params": cmd_args})
385
+ response = self.__driver.command_executor._request("POST", url, body)
386
+ return response.get("value")
387
+
388
+ def scroll_window(self, query_selector: str = None, web_element: WebElement = None) -> None:
389
+ """
390
+ Scrolls window to element position
391
+ """
392
+ assert query_selector or web_element, "You need to provide query_selector or web_element"
393
+
394
+ if web_element is None:
395
+ web_element = self.query_selector(query_selector=query_selector)
396
+
397
+ js_code = "arguments[0].scrollIntoView();"
398
+ self.__driver.execute_script(js_code, web_element)
399
+
400
+ def get_all_local_storage(
401
+ self,
402
+ ) -> dict:
403
+ return self.__driver.execute_script(f"return window.localStorage")
404
+
405
+ def quit(self):
406
+ if self.webdriver_is_open():
407
+ self.__driver.quit()
408
+ return
@@ -0,0 +1,10 @@
1
+ from selenium.webdriver.common.by import By
2
+
3
+
4
+ def create_locator(query_selector: str) -> tuple:
5
+ if query_selector[0] == "/":
6
+ locator = (By.XPATH, query_selector)
7
+ else:
8
+ locator = (By.CSS_SELECTOR, query_selector)
9
+
10
+ return locator
@@ -0,0 +1,49 @@
1
+ from dataclasses import dataclass
2
+ from enum import StrEnum
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class Cookie(BaseModel):
9
+ name: str
10
+ value: str
11
+ url: str | None = None
12
+ domain: str | None = None
13
+ path: str | None = None
14
+ expires: float | None = None
15
+ httpOnly: bool | None = None
16
+ secure: bool | None = None
17
+ sameSite: Literal["Lax", "None", "Strict"] | None = None
18
+ partitionKey: str | None | None = None
19
+
20
+
21
+ class LocalStorage(BaseModel):
22
+ key: str
23
+ value: str
24
+
25
+
26
+ class RequestType(StrEnum):
27
+ DOCUMENT = "Document"
28
+ XHR = "XHR"
29
+ IMAGE = "Image"
30
+ SCRIPT = "Script"
31
+ STYLESHEET = "Stylesheet"
32
+ FONT = "Font"
33
+ FETCH = "Fetch"
34
+ OTHER = "Other"
35
+
36
+
37
+ @dataclass
38
+ class Redirect:
39
+ url: str
40
+
41
+
42
+ @dataclass
43
+ class Request:
44
+ url: str
45
+ request_id: str
46
+ cookies: dict
47
+ headers: dict
48
+ redirect: Redirect = None
49
+ type: RequestType = None
@@ -0,0 +1,2 @@
1
+ def raise_not_implemented():
2
+ raise NotImplementedError("This method is not implemented yet.")
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.4
2
+ Name: browser-toolkit
3
+ Version: 0.0.1a1
4
+ Summary: Toolkit that provides a single inteface to interact with different browser automations.
5
+ Project-URL: Homepage, https://github.com/toriium/browser-toolkit
6
+ Project-URL: Documentation, https://github.com/toriium/browser-toolkit/blob/master/README.md
7
+ Project-URL: Repository, https://github.com/toriium/browser-toolkit
8
+ Requires-Python: >=3.12
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: camoufox>=0.4.11
12
+ Requires-Dist: playwright>=1.60.0
13
+ Requires-Dist: pydantic>=2.13.4
14
+ Requires-Dist: pydoll-python>=2.23.0
15
+ Requires-Dist: selenium>=4.44.0
16
+ Dynamic: license-file
17
+
18
+ # browser-toolkit
19
+
20
+ Browser Toolkit that provides a single inteface to interact with different browser automations.
21
+
22
+
23
+ Supported automations include:
24
+ - Selenium
25
+ - Playwright
26
+ - Camoufox (Via Playwright implementation)
27
+ - Pydoll
28
+
29
+ Features that currently browser-toolkit can offer:
30
+
31
+ - **Async First**
32
+ - **More legible automation code**
33
+ - **Abstractions of browsers methods**
34
+ - **Helpful tools to use when interacting with browsers**
35
+
36
+
37
+
38
+ ## Install
39
+ ```
40
+ # Pip
41
+ pip install browser-toolkit
42
+
43
+ # Uv
44
+ uv add browser-toolkit
45
+
46
+ # Poetry
47
+ poetry add browser-toolkit
48
+ ```
49
+
50
+ ## Basic
51
+ ```python
52
+ from playwright.async_api import async_playwright
53
+ from browser_toolkit.playwright import PlaywrightTollKit
54
+ import asyncio
55
+
56
+ async def main():
57
+ # Create an instance
58
+ async with async_playwright() as p:
59
+ browser = await p.chromium.launch()
60
+ page = await browser.new_page()
61
+
62
+ # Pass instance to BrowserToolKit
63
+ btk = PlaywrightTollKit(browser=browser, page=page)
64
+
65
+ # Navigate to a website
66
+ await btk.goto('https://www.example.com')
67
+
68
+ # Create a selector
69
+ se_class = '.class1'
70
+
71
+ # Use BrowserToolKit to find a web element
72
+ web_element = await btk.selector(selector=se_class)
73
+
74
+ # With returned web_element use click() method
75
+ await web_element.click()
76
+
77
+ # Or you can click directly with BrowserToolKit
78
+ await btk.click(selector=se_class)
79
+
80
+ # close instance with BrowserToolKit
81
+ await btk.close()
82
+
83
+ if __name__ == "__main__":
84
+ asyncio.run(main())
85
+ ```
@@ -0,0 +1,17 @@
1
+ browser_toolkit/__init__.py,sha256=qZqNWgYPpJ7s1XTysTwSXHWDy74VpNgUyUMyRkutR0Y,104
2
+ browser_toolkit/base_toolkit.py,sha256=BTXO43T2OypYkSthv3XsqUQPUOMVvlIHfjwpAAys9P4,16834
3
+ browser_toolkit/camoufox.py,sha256=63--CS3R6yoGPjBdGYWf6HwCaqdtPqcTG_bwXTCpE0I,329
4
+ browser_toolkit/playwright.py,sha256=wFkTz1k1Fok5kJ3tJ2qL6O0F72dgbjGps4Ul4O-Lrrk,17229
5
+ browser_toolkit/selenium.py,sha256=LiG7qjivKJeusbhPYVp3nAre0Ak4MbPLtjwiUDrWWq8,13979
6
+ browser_toolkit/types.py,sha256=6oVgNX91jB7SwX_9hp8l9W4fuc3CK4AePHIqhUglIfo,921
7
+ browser_toolkit/utils.py,sha256=w0BAZrVStdQbUF-pqCmTq7QVr6b9NEWtryE3EsWTCIk,98
8
+ browser_toolkit/create_browser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ browser_toolkit/create_browser/playwright.py,sha256=NPBxXR5N82Q19q2HfgmrKXm8ovwQVkIF15SdWsaoM28,829
10
+ browser_toolkit/selenium_toolkit/__init__.py,sha256=4YBJs0Jk3YzsCEZgSmG8zj2H331m3JwEdsr_7JR0xoc,46
11
+ browser_toolkit/selenium_toolkit/selenium_toolkit.py,sha256=b5eOG2WOlYWqnYlXAasNf_KJnF0iZ0i4ZYCKlBJw0Nc,14781
12
+ browser_toolkit/selenium_toolkit/utils.py,sha256=BIYd06r5hYcrDhfXsoR3SOKsP7pe-sRS2ePj5L721bI,256
13
+ browser_toolkit-0.0.1a1.dist-info/licenses/LICENSE,sha256=gXf5dRMhNSbfLPYYTY_5hsZ1r7UU1OaKQEAQUhuIBkM,18092
14
+ browser_toolkit-0.0.1a1.dist-info/METADATA,sha256=Tft7T03QTUX3HkT3ca7u9XDfINpTWWACflkqfdKBXFQ,2238
15
+ browser_toolkit-0.0.1a1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
16
+ browser_toolkit-0.0.1a1.dist-info/top_level.txt,sha256=zELYpWop-qpcptMW6T_bAI2CadBZP7z3N58YYW3fk7c,16
17
+ browser_toolkit-0.0.1a1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+