scrapling 0.2.94__py3-none-any.whl → 0.2.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scrapling/__init__.py CHANGED
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
5
5
  from scrapling.parser import Adaptor, Adaptors
6
6
 
7
7
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
8
- __version__ = "0.2.94"
8
+ __version__ = "0.2.95"
9
9
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
10
10
 
11
11
 
@@ -42,16 +42,19 @@ class StaticEngine:
42
42
  :return: A dictionary of the new headers.
43
43
  """
44
44
  headers = headers or {}
45
-
46
- # Validate headers
47
- if not headers.get('user-agent') and not headers.get('User-Agent'):
48
- headers['User-Agent'] = generate_headers(browser_mode=False).get('User-Agent')
49
- log.debug(f"Can't find useragent in headers so '{headers['User-Agent']}' was used.")
45
+ headers_keys = set(map(str.lower, headers.keys()))
50
46
 
51
47
  if self.stealth:
52
48
  extra_headers = generate_headers(browser_mode=False)
49
+ # Don't overwrite user supplied headers
50
+ extra_headers = {key: value for key, value in extra_headers.items() if key.lower() not in headers_keys}
53
51
  headers.update(extra_headers)
54
- headers.update({'referer': generate_convincing_referer(self.url)})
52
+ if 'referer' not in headers_keys:
53
+ headers.update({'referer': generate_convincing_referer(self.url)})
54
+
55
+ elif 'user-agent' not in headers_keys:
56
+ headers['User-Agent'] = generate_headers(browser_mode=False).get('User-Agent')
57
+ log.debug(f"Can't find useragent in headers so '{headers['User-Agent']}' was used.")
55
58
 
56
59
  return headers
57
60
 
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: scrapling
3
- Version: 0.2.94
4
- Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
3
+ Version: 0.2.95
4
+ Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again! In an internet filled with complications,
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
7
7
  Author-email: karim.shoair@pm.me
@@ -275,6 +275,8 @@ This class is built on top of [httpx](https://www.python-httpx.org/) with additi
275
275
 
276
276
  For all methods, you have `stealthy_headers` which makes `Fetcher` create and use real browser's headers then create a referer header as if this request came from Google's search of this URL's domain. It's enabled by default. You can also set the number of retries with the argument `retries` for all methods and this will make httpx retry requests if it failed for any reason. The default number of retries for all `Fetcher` methods is 3.
277
277
 
278
+ > Hence: All headers generated by `stealthy_headers` argument can be overwritten by you through the `headers` argument
279
+
278
280
  You can route all traffic (HTTP and HTTPS) to a proxy for any of these methods in this format `http://username:password@localhost:8030`
279
281
  ```python
280
282
  >> page = Fetcher().get('https://httpbin.org/get', stealthy_headers=True, follow_redirects=True)
@@ -1,4 +1,4 @@
1
- scrapling/__init__.py,sha256=pOwvxTBwxLovt0OJNZz2A5FkbfjQC0wKrDmONqoNsL0,500
1
+ scrapling/__init__.py,sha256=7gthgq0LYOWqlly_w1GnesFh1WzPmkXwXJyjXB3JvVY,500
2
2
  scrapling/cli.py,sha256=njPdJKmbLFHeWjtSiGEm9ALBdSyfUp0IaJvxQL5C31Q,1125
3
3
  scrapling/defaults.py,sha256=sdXeZjXEX7PmCtaa0weK0nRrAUzqZukNNqipZ_sltYE,469
4
4
  scrapling/fetchers.py,sha256=qmiJ6S-bnPWvP48Z6rKxBnSuR-tdwHlJwlIsYxGxFM0,35405
@@ -15,7 +15,7 @@ scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c
15
15
  scrapling/engines/camo.py,sha256=SHMRnIrN6599upo5-G3fZQ10455xyB-bB_EsLMjBStA,16072
16
16
  scrapling/engines/constants.py,sha256=Gb_nXFoBB4ujJkd05SKkenMe1UDiRYQA3dkmA3DunLg,3723
17
17
  scrapling/engines/pw.py,sha256=LvS1jvTf3s7mfdeQo7_OyQ5zpiOzvBu5g88hOLlQBCQ,20856
18
- scrapling/engines/static.py,sha256=_bqVKcsTkm8ok6NIH6PDDaXtyQ6B2ZoGWccjZJKwvBo,10414
18
+ scrapling/engines/static.py,sha256=8v6RmdsSP6fAtWNXaJG24evHPsZ2oDiBl7yfkLrdARU,10635
19
19
  scrapling/engines/toolbelt/__init__.py,sha256=VQDdYm1zY9Apno6d8UrULk29vUjllZrQqD8mXL1E2Fc,402
20
20
  scrapling/engines/toolbelt/custom.py,sha256=qgONLwpxUoEIAIQBF1RcakYu8cqAAmX8qdyaol5hfjA,12813
21
21
  scrapling/engines/toolbelt/fingerprints.py,sha256=ajEHdXHr7W4hw9KcNS7XlyxNBZu37p1bRj18TiICLzU,2929
@@ -41,9 +41,9 @@ tests/fetchers/sync/test_playwright.py,sha256=MEyDRaMyxDIWupG7f_xz0f0jd9Cpbd5rXC
41
41
  tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  tests/parser/test_automatch.py,sha256=SxsNdExE8zz8AcPRQFBUjZ3Q_1-tPOd9dzVvMSZpOYQ,4908
43
43
  tests/parser/test_general.py,sha256=dyfOsc8lleoY4AxcfDUBUaD1i95xecfYuTUhKBsYjwo,12100
44
- scrapling-0.2.94.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
45
- scrapling-0.2.94.dist-info/METADATA,sha256=nF08IkBzVob418wgav0uHzbNdVXH1-FrTYZAxrTfg24,68878
46
- scrapling-0.2.94.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
47
- scrapling-0.2.94.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
48
- scrapling-0.2.94.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
49
- scrapling-0.2.94.dist-info/RECORD,,
44
+ scrapling-0.2.95.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
45
+ scrapling-0.2.95.dist-info/METADATA,sha256=PTTxxxijblkcumiCbowId3Xy5I64lF9DvH3nAMPhEHQ,69066
46
+ scrapling-0.2.95.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
47
+ scrapling-0.2.95.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
48
+ scrapling-0.2.95.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
49
+ scrapling-0.2.95.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5