scrapling 0.2.94__py3-none-any.whl → 0.2.95__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +1 -1
- scrapling/engines/static.py +9 -6
- {scrapling-0.2.94.dist-info → scrapling-0.2.95.dist-info}/METADATA +4 -2
- {scrapling-0.2.94.dist-info → scrapling-0.2.95.dist-info}/RECORD +8 -8
- {scrapling-0.2.94.dist-info → scrapling-0.2.95.dist-info}/WHEEL +1 -1
- {scrapling-0.2.94.dist-info → scrapling-0.2.95.dist-info}/LICENSE +0 -0
- {scrapling-0.2.94.dist-info → scrapling-0.2.95.dist-info}/entry_points.txt +0 -0
- {scrapling-0.2.94.dist-info → scrapling-0.2.95.dist-info}/top_level.txt +0 -0
scrapling/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
|
|
5
5
|
from scrapling.parser import Adaptor, Adaptors
|
6
6
|
|
7
7
|
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
8
|
-
__version__ = "0.2.
|
8
|
+
__version__ = "0.2.95"
|
9
9
|
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
10
10
|
|
11
11
|
|
scrapling/engines/static.py
CHANGED
@@ -42,16 +42,19 @@ class StaticEngine:
|
|
42
42
|
:return: A dictionary of the new headers.
|
43
43
|
"""
|
44
44
|
headers = headers or {}
|
45
|
-
|
46
|
-
# Validate headers
|
47
|
-
if not headers.get('user-agent') and not headers.get('User-Agent'):
|
48
|
-
headers['User-Agent'] = generate_headers(browser_mode=False).get('User-Agent')
|
49
|
-
log.debug(f"Can't find useragent in headers so '{headers['User-Agent']}' was used.")
|
45
|
+
headers_keys = set(map(str.lower, headers.keys()))
|
50
46
|
|
51
47
|
if self.stealth:
|
52
48
|
extra_headers = generate_headers(browser_mode=False)
|
49
|
+
# Don't overwrite user supplied headers
|
50
|
+
extra_headers = {key: value for key, value in extra_headers.items() if key.lower() not in headers_keys}
|
53
51
|
headers.update(extra_headers)
|
54
|
-
|
52
|
+
if 'referer' not in headers_keys:
|
53
|
+
headers.update({'referer': generate_convincing_referer(self.url)})
|
54
|
+
|
55
|
+
elif 'user-agent' not in headers_keys:
|
56
|
+
headers['User-Agent'] = generate_headers(browser_mode=False).get('User-Agent')
|
57
|
+
log.debug(f"Can't find useragent in headers so '{headers['User-Agent']}' was used.")
|
55
58
|
|
56
59
|
return headers
|
57
60
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: scrapling
|
3
|
-
Version: 0.2.
|
4
|
-
Summary: Scrapling is
|
3
|
+
Version: 0.2.95
|
4
|
+
Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again! In an internet filled with complications,
|
5
5
|
Home-page: https://github.com/D4Vinci/Scrapling
|
6
6
|
Author: Karim Shoair
|
7
7
|
Author-email: karim.shoair@pm.me
|
@@ -275,6 +275,8 @@ This class is built on top of [httpx](https://www.python-httpx.org/) with additi
|
|
275
275
|
|
276
276
|
For all methods, you have `stealthy_headers` which makes `Fetcher` create and use real browser's headers then create a referer header as if this request came from Google's search of this URL's domain. It's enabled by default. You can also set the number of retries with the argument `retries` for all methods and this will make httpx retry requests if it failed for any reason. The default number of retries for all `Fetcher` methods is 3.
|
277
277
|
|
278
|
+
> Hence: All headers generated by `stealthy_headers` argument can be overwritten by you through the `headers` argument
|
279
|
+
|
278
280
|
You can route all traffic (HTTP and HTTPS) to a proxy for any of these methods in this format `http://username:password@localhost:8030`
|
279
281
|
```python
|
280
282
|
>> page = Fetcher().get('https://httpbin.org/get', stealthy_headers=True, follow_redirects=True)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
scrapling/__init__.py,sha256=
|
1
|
+
scrapling/__init__.py,sha256=7gthgq0LYOWqlly_w1GnesFh1WzPmkXwXJyjXB3JvVY,500
|
2
2
|
scrapling/cli.py,sha256=njPdJKmbLFHeWjtSiGEm9ALBdSyfUp0IaJvxQL5C31Q,1125
|
3
3
|
scrapling/defaults.py,sha256=sdXeZjXEX7PmCtaa0weK0nRrAUzqZukNNqipZ_sltYE,469
|
4
4
|
scrapling/fetchers.py,sha256=qmiJ6S-bnPWvP48Z6rKxBnSuR-tdwHlJwlIsYxGxFM0,35405
|
@@ -15,7 +15,7 @@ scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c
|
|
15
15
|
scrapling/engines/camo.py,sha256=SHMRnIrN6599upo5-G3fZQ10455xyB-bB_EsLMjBStA,16072
|
16
16
|
scrapling/engines/constants.py,sha256=Gb_nXFoBB4ujJkd05SKkenMe1UDiRYQA3dkmA3DunLg,3723
|
17
17
|
scrapling/engines/pw.py,sha256=LvS1jvTf3s7mfdeQo7_OyQ5zpiOzvBu5g88hOLlQBCQ,20856
|
18
|
-
scrapling/engines/static.py,sha256=
|
18
|
+
scrapling/engines/static.py,sha256=8v6RmdsSP6fAtWNXaJG24evHPsZ2oDiBl7yfkLrdARU,10635
|
19
19
|
scrapling/engines/toolbelt/__init__.py,sha256=VQDdYm1zY9Apno6d8UrULk29vUjllZrQqD8mXL1E2Fc,402
|
20
20
|
scrapling/engines/toolbelt/custom.py,sha256=qgONLwpxUoEIAIQBF1RcakYu8cqAAmX8qdyaol5hfjA,12813
|
21
21
|
scrapling/engines/toolbelt/fingerprints.py,sha256=ajEHdXHr7W4hw9KcNS7XlyxNBZu37p1bRj18TiICLzU,2929
|
@@ -41,9 +41,9 @@ tests/fetchers/sync/test_playwright.py,sha256=MEyDRaMyxDIWupG7f_xz0f0jd9Cpbd5rXC
|
|
41
41
|
tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
42
|
tests/parser/test_automatch.py,sha256=SxsNdExE8zz8AcPRQFBUjZ3Q_1-tPOd9dzVvMSZpOYQ,4908
|
43
43
|
tests/parser/test_general.py,sha256=dyfOsc8lleoY4AxcfDUBUaD1i95xecfYuTUhKBsYjwo,12100
|
44
|
-
scrapling-0.2.
|
45
|
-
scrapling-0.2.
|
46
|
-
scrapling-0.2.
|
47
|
-
scrapling-0.2.
|
48
|
-
scrapling-0.2.
|
49
|
-
scrapling-0.2.
|
44
|
+
scrapling-0.2.95.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
|
45
|
+
scrapling-0.2.95.dist-info/METADATA,sha256=PTTxxxijblkcumiCbowId3Xy5I64lF9DvH3nAMPhEHQ,69066
|
46
|
+
scrapling-0.2.95.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
|
47
|
+
scrapling-0.2.95.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
|
48
|
+
scrapling-0.2.95.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
|
49
|
+
scrapling-0.2.95.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|