scrapling 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +1 -1
- scrapling/engines/_browsers/_base.py +140 -9
- scrapling/engines/_browsers/_camoufox.py +47 -164
- scrapling/engines/_browsers/_config_tools.py +8 -2
- scrapling/engines/_browsers/_controllers.py +25 -96
- scrapling/engines/_browsers/_validators.py +72 -61
- scrapling/engines/toolbelt/convertor.py +37 -2
- scrapling/engines/toolbelt/custom.py +0 -12
- scrapling/engines/toolbelt/fingerprints.py +6 -8
- scrapling/fetchers/chrome.py +6 -0
- {scrapling-0.3.7.dist-info → scrapling-0.3.8.dist-info}/METADATA +6 -4
- {scrapling-0.3.7.dist-info → scrapling-0.3.8.dist-info}/RECORD +16 -16
- {scrapling-0.3.7.dist-info → scrapling-0.3.8.dist-info}/WHEEL +0 -0
- {scrapling-0.3.7.dist-info → scrapling-0.3.8.dist-info}/entry_points.txt +0 -0
- {scrapling-0.3.7.dist-info → scrapling-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {scrapling-0.3.7.dist-info → scrapling-0.3.8.dist-info}/top_level.txt +0 -0
scrapling/fetchers/chrome.py
CHANGED
|
@@ -50,6 +50,7 @@ class DynamicFetcher(BaseFetcher):
|
|
|
50
50
|
network_idle: bool = False,
|
|
51
51
|
load_dom: bool = True,
|
|
52
52
|
wait_selector_state: SelectorWaitStates = "attached",
|
|
53
|
+
extra_flags: Optional[List[str]] = None,
|
|
53
54
|
additional_args: Optional[Dict] = None,
|
|
54
55
|
custom_config: Optional[Dict] = None,
|
|
55
56
|
) -> Response:
|
|
@@ -79,6 +80,7 @@ class DynamicFetcher(BaseFetcher):
|
|
|
79
80
|
:param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
|
|
80
81
|
:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
|
|
81
82
|
:param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
|
|
83
|
+
:param extra_flags: A list of additional browser flags to pass to the browser on launch.
|
|
82
84
|
:param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
|
|
83
85
|
:param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
|
|
84
86
|
:return: A `Response` object.
|
|
@@ -108,6 +110,7 @@ class DynamicFetcher(BaseFetcher):
|
|
|
108
110
|
extra_headers=extra_headers,
|
|
109
111
|
wait_selector=wait_selector,
|
|
110
112
|
disable_webgl=disable_webgl,
|
|
113
|
+
extra_flags=extra_flags,
|
|
111
114
|
additional_args=additional_args,
|
|
112
115
|
disable_resources=disable_resources,
|
|
113
116
|
wait_selector_state=wait_selector_state,
|
|
@@ -140,6 +143,7 @@ class DynamicFetcher(BaseFetcher):
|
|
|
140
143
|
network_idle: bool = False,
|
|
141
144
|
load_dom: bool = True,
|
|
142
145
|
wait_selector_state: SelectorWaitStates = "attached",
|
|
146
|
+
extra_flags: Optional[List[str]] = None,
|
|
143
147
|
additional_args: Optional[Dict] = None,
|
|
144
148
|
custom_config: Optional[Dict] = None,
|
|
145
149
|
) -> Response:
|
|
@@ -169,6 +173,7 @@ class DynamicFetcher(BaseFetcher):
|
|
|
169
173
|
:param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
|
|
170
174
|
:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
|
|
171
175
|
:param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
|
|
176
|
+
:param extra_flags: A list of additional browser flags to pass to the browser on launch.
|
|
172
177
|
:param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
|
|
173
178
|
:param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
|
|
174
179
|
:return: A `Response` object.
|
|
@@ -199,6 +204,7 @@ class DynamicFetcher(BaseFetcher):
|
|
|
199
204
|
extra_headers=extra_headers,
|
|
200
205
|
wait_selector=wait_selector,
|
|
201
206
|
disable_webgl=disable_webgl,
|
|
207
|
+
extra_flags=extra_flags,
|
|
202
208
|
additional_args=additional_args,
|
|
203
209
|
disable_resources=disable_resources,
|
|
204
210
|
wait_selector_state=wait_selector_state,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scrapling
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.8
|
|
4
4
|
Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
|
|
5
5
|
Home-page: https://github.com/D4Vinci/Scrapling
|
|
6
6
|
Author: Karim Shoair
|
|
@@ -36,6 +36,7 @@ License: BSD 3-Clause License
|
|
|
36
36
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
37
37
|
|
|
38
38
|
Project-URL: Homepage, https://github.com/D4Vinci/Scrapling
|
|
39
|
+
Project-URL: Changelog, https://github.com/D4Vinci/Scrapling/releases
|
|
39
40
|
Project-URL: Documentation, https://scrapling.readthedocs.io/en/latest/
|
|
40
41
|
Project-URL: Repository, https://github.com/D4Vinci/Scrapling
|
|
41
42
|
Project-URL: Bug Tracker, https://github.com/D4Vinci/Scrapling/issues
|
|
@@ -66,7 +67,7 @@ Description-Content-Type: text/markdown
|
|
|
66
67
|
License-File: LICENSE
|
|
67
68
|
Requires-Dist: lxml>=6.0.2
|
|
68
69
|
Requires-Dist: cssselect>=1.3.0
|
|
69
|
-
Requires-Dist: orjson>=3.11.
|
|
70
|
+
Requires-Dist: orjson>=3.11.4
|
|
70
71
|
Requires-Dist: tldextract>=5.3.0
|
|
71
72
|
Provides-Extra: fetchers
|
|
72
73
|
Requires-Dist: click>=8.3.0; extra == "fetchers"
|
|
@@ -77,7 +78,7 @@ Requires-Dist: camoufox>=0.4.11; extra == "fetchers"
|
|
|
77
78
|
Requires-Dist: geoip2>=5.1.0; extra == "fetchers"
|
|
78
79
|
Requires-Dist: msgspec>=0.19.0; extra == "fetchers"
|
|
79
80
|
Provides-Extra: ai
|
|
80
|
-
Requires-Dist: mcp>=1.
|
|
81
|
+
Requires-Dist: mcp>=1.19.0; extra == "ai"
|
|
81
82
|
Requires-Dist: markdownify>=1.2.0; extra == "ai"
|
|
82
83
|
Requires-Dist: scrapling[fetchers]; extra == "ai"
|
|
83
84
|
Provides-Extra: shell
|
|
@@ -157,10 +158,11 @@ Built for the modern Web, Scrapling features its own rapid parsing engine and fe
|
|
|
157
158
|
|
|
158
159
|
<!-- sponsors -->
|
|
159
160
|
|
|
160
|
-
<a href="https://www.
|
|
161
|
+
<a href="https://www.scrapeless.com/en?utm_source=official&utm_term=scrapling" target="_blank" title="Effortless Web Scraping Toolkit for Business and Developers"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/scrapeless.jpg"></a>
|
|
161
162
|
<a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png"></a>
|
|
162
163
|
<a href="https://visit.decodo.com/Dy6W0b" target="_blank" title="Try the Most Efficient Residential Proxies for Free"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/decodo.png"></a>
|
|
163
164
|
<a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
|
|
165
|
+
<a href="https://app.cyberyozh.com/?utm_source=github&utm_medium=scrapling" target="_blank" title="We have gathered the best solutions for multi‑accounting and automation in one place."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/cyberyozh.png"></a>
|
|
164
166
|
<a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
|
|
165
167
|
<a href="https://www.rapidproxy.io/?ref=d4v" target="_blank" title="Affordable Access to the Proxy World – bypass CAPTCHAs blocks, and avoid additional costs."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/rapidproxy.jpg"></a>
|
|
166
168
|
<a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
scrapling/__init__.py,sha256=
|
|
1
|
+
scrapling/__init__.py,sha256=qUGIsrGoJVzw7kTPJVCX8F_O2AJSJiwkigFbeY-cAGo,1522
|
|
2
2
|
scrapling/cli.py,sha256=gbhfy2GCz_VqcWhBaNMK4wevayxNtLb72SQIUR9Ebik,26916
|
|
3
3
|
scrapling/parser.py,sha256=bQ7_c3rHjnjJsWI-qqkvEVkVx4-NM-1SWYpQrcwbflQ,58837
|
|
4
4
|
scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
|
|
@@ -18,16 +18,16 @@ scrapling/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
18
18
|
scrapling/engines/constants.py,sha256=aOIOFCjbtgxH3hehlPU_3EwlnjpdUHRFK342nDQy-Vc,3596
|
|
19
19
|
scrapling/engines/static.py,sha256=3m86QAC1bnK9MD5Cjcs5u2Bu8zb51dzQBLK4Si1K5K8,50062
|
|
20
20
|
scrapling/engines/_browsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
scrapling/engines/_browsers/_base.py,sha256=
|
|
22
|
-
scrapling/engines/_browsers/_camoufox.py,sha256=
|
|
23
|
-
scrapling/engines/_browsers/_config_tools.py,sha256=
|
|
24
|
-
scrapling/engines/_browsers/_controllers.py,sha256=
|
|
21
|
+
scrapling/engines/_browsers/_base.py,sha256=0J90iZItgkSd2oe3hYWhQRG-sA4LaBNnD1Noqfv_RwE,17071
|
|
22
|
+
scrapling/engines/_browsers/_camoufox.py,sha256=Sd08qrx9paDqDWJxEW107lMSUeq0rD6LdFVUAXJI9aE,34895
|
|
23
|
+
scrapling/engines/_browsers/_config_tools.py,sha256=KVYLmr-DcpAnWRjNk4kYBvzNTyfor_Pt5yITlSN2DR8,4764
|
|
24
|
+
scrapling/engines/_browsers/_controllers.py,sha256=7lDgCtZ6YGm-mFEnxixnPgIZEfe_SWNzOrTJE-xin2o,26699
|
|
25
25
|
scrapling/engines/_browsers/_page.py,sha256=1z-P6c97cTkULE-FVrsMY589e6eL_20Ae8pUe6vjggE,2206
|
|
26
|
-
scrapling/engines/_browsers/_validators.py,sha256=
|
|
26
|
+
scrapling/engines/_browsers/_validators.py,sha256=fcBGPmQ_Vf2TuWW-lxqvIPCWf8tzDHmK6txIngxHV1Q,8919
|
|
27
27
|
scrapling/engines/toolbelt/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
28
|
-
scrapling/engines/toolbelt/convertor.py,sha256=
|
|
29
|
-
scrapling/engines/toolbelt/custom.py,sha256=
|
|
30
|
-
scrapling/engines/toolbelt/fingerprints.py,sha256=
|
|
28
|
+
scrapling/engines/toolbelt/convertor.py,sha256=vfDUrHqHTSqxOa1203Vw0L2qLCurHf-_zCxQGiJddjE,14916
|
|
29
|
+
scrapling/engines/toolbelt/custom.py,sha256=9I704OBD9Y5mdGMcw29ShG6hCjKEKO4ttQ79gUjQOQE,7054
|
|
30
|
+
scrapling/engines/toolbelt/fingerprints.py,sha256=Mar1XiF4Zk9IjjurY55S-DPzfG26zaZSxfYiUP7RCjc,2535
|
|
31
31
|
scrapling/engines/toolbelt/navigation.py,sha256=VHQ5sMVI-5UtcSpK-_Pin0e16fRLRzW8lYu-MObCxkY,3858
|
|
32
32
|
scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
|
|
33
33
|
scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
|
|
@@ -36,12 +36,12 @@ scrapling/engines/toolbelt/bypasses/screen_props.js,sha256=fZEuHMQ1-fYuxxUMoQXUv
|
|
|
36
36
|
scrapling/engines/toolbelt/bypasses/webdriver_fully.js,sha256=hdJw4clRAJQqIdq5gIFC_eC-x7C1i2ab01KV5ylmOBs,728
|
|
37
37
|
scrapling/engines/toolbelt/bypasses/window_chrome.js,sha256=D7hqzNGGDorh8JVlvm2YIv7Bk2CoVkG55MDIdyqhT1w,6808
|
|
38
38
|
scrapling/fetchers/__init__.py,sha256=V2PSNzVPqtW7bdRrLygsaxHXqbu_7kdyI3byYr5AFbU,1687
|
|
39
|
-
scrapling/fetchers/chrome.py,sha256=
|
|
39
|
+
scrapling/fetchers/chrome.py,sha256=_z9Tc8CVBPZBwEUH-RSjk29ccuzPdWfe1L59xclhCSM,13094
|
|
40
40
|
scrapling/fetchers/firefox.py,sha256=Ix_RVatrDOnC3qR_IzkzkD_PbKv66Jd5C5P58YaOUF4,13190
|
|
41
41
|
scrapling/fetchers/requests.py,sha256=Y-ZXhm2Ui1Ugc5lvMgBDIBAmaoh3upjPlbJswdCnyok,978
|
|
42
|
-
scrapling-0.3.
|
|
43
|
-
scrapling-0.3.
|
|
44
|
-
scrapling-0.3.
|
|
45
|
-
scrapling-0.3.
|
|
46
|
-
scrapling-0.3.
|
|
47
|
-
scrapling-0.3.
|
|
42
|
+
scrapling-0.3.8.dist-info/licenses/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
|
|
43
|
+
scrapling-0.3.8.dist-info/METADATA,sha256=UMma-41uxo_BLd2I22MtcWyv4b0VoBCdWPza5gVCgw8,22745
|
|
44
|
+
scrapling-0.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
45
|
+
scrapling-0.3.8.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
|
|
46
|
+
scrapling-0.3.8.dist-info/top_level.txt,sha256=Ud-yF-PC2U5HQ3nc5QwT7HSPdIpF1RuwQ_mYgBzHHIM,10
|
|
47
|
+
scrapling-0.3.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|