scrapling 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. scrapling/__init__.py +5 -4
  2. scrapling/core/_types.py +2 -3
  3. scrapling/core/custom_types.py +93 -11
  4. scrapling/core/storage_adaptors.py +9 -10
  5. scrapling/core/translator.py +6 -7
  6. scrapling/core/utils.py +35 -30
  7. scrapling/defaults.py +2 -1
  8. scrapling/engines/__init__.py +2 -2
  9. scrapling/engines/camo.py +96 -26
  10. scrapling/engines/constants.py +4 -4
  11. scrapling/engines/pw.py +166 -96
  12. scrapling/engines/static.py +94 -50
  13. scrapling/engines/toolbelt/__init__.py +6 -20
  14. scrapling/engines/toolbelt/custom.py +22 -23
  15. scrapling/engines/toolbelt/fingerprints.py +7 -7
  16. scrapling/engines/toolbelt/navigation.py +25 -12
  17. scrapling/fetchers.py +233 -17
  18. scrapling/parser.py +63 -28
  19. {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/METADATA +41 -25
  20. scrapling-0.2.9.dist-info/RECORD +47 -0
  21. tests/fetchers/async/__init__.py +0 -0
  22. tests/fetchers/async/test_camoufox.py +95 -0
  23. tests/fetchers/async/test_httpx.py +83 -0
  24. tests/fetchers/async/test_playwright.py +99 -0
  25. tests/fetchers/sync/__init__.py +0 -0
  26. tests/fetchers/sync/test_camoufox.py +68 -0
  27. tests/fetchers/sync/test_httpx.py +82 -0
  28. tests/fetchers/sync/test_playwright.py +87 -0
  29. tests/fetchers/test_utils.py +90 -122
  30. tests/parser/test_automatch.py +64 -9
  31. tests/parser/test_general.py +263 -219
  32. scrapling-0.2.7.dist-info/RECORD +0 -42
  33. tests/fetchers/test_camoufox.py +0 -64
  34. tests/fetchers/test_httpx.py +0 -67
  35. tests/fetchers/test_playwright.py +0 -76
  36. {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/LICENSE +0 -0
  37. {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/WHEEL +0 -0
  38. {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/top_level.txt +0 -0
@@ -1,42 +0,0 @@
1
- scrapling/__init__.py,sha256=WjvhJ6xkiSHp7St2YJYYJIsiKL8WDYuAQ_qIsg03v-0,435
2
- scrapling/defaults.py,sha256=blYDLiuI5DgDSLRWnUgpp21WtFOsv1BsCRCmPeg8Xc4,287
3
- scrapling/fetchers.py,sha256=vjAsa-oleb7FfYsxqmEUVZGNxdo7LMVuiLuyjIGySQE,17417
4
- scrapling/parser.py,sha256=d2n00uF5i7W5lf0afLNRdk17ZFcNyiF9EzXLRQGA0NM,54111
5
- scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
6
- scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- scrapling/core/_types.py,sha256=nD2ZY_fitLohx3MfDmqoKJ9ZShrnRhQ8-d1SU1zEGAY,552
8
- scrapling/core/custom_types.py,sha256=ztE_tshJ8i5uKqqSbsN5S6MoIUSfX6SexlhRjAnkclk,8402
9
- scrapling/core/mixins.py,sha256=sozbpaGL1_O_x3U-ABM5aYWpnxpCLfdbcA9SG3P7weY,3532
10
- scrapling/core/storage_adaptors.py,sha256=Kbak0BOJX5e9I1PbUS_4sUJi2Wxw8Bv5XsaLHAu1l2Q,6218
11
- scrapling/core/translator.py,sha256=R97lKGq1SDbx8S8Hg_w_5d4ePgukTHj_hRIKFzWiRuc,5229
12
- scrapling/core/utils.py,sha256=fXdANUgRBbVbOerJ94fRY9vi7n5zsbm8t3G4qQ-F3ak,3792
13
- scrapling/engines/__init__.py,sha256=zwMqcSdNGh-IX0d4zXazrgAeHrkqIN_v5Ia7RU1g8W0,267
14
- scrapling/engines/camo.py,sha256=Lw_uZ5SMBy3T6MkCNOMPk1i51Lnpfd0M7HyAUJAzKIg,8284
15
- scrapling/engines/constants.py,sha256=WTn-X4kFIDWjXTiqOT0tm4XT5pijcdohFyZ0Af2C5Xc,3723
16
- scrapling/engines/pw.py,sha256=ZRmbFNQWzvxUHVrIUcKefyg6fDpBrN6erdatDpcLBaw,13762
17
- scrapling/engines/static.py,sha256=ryVCIjTpVLNlCxSf_NYwDSdsoDbafnsGpkCoCROPhlI,8021
18
- scrapling/engines/toolbelt/__init__.py,sha256=BbxfC0depVOV3i3BnBnyfjHtLcZrDbhz6c5rTRczZUc,383
19
- scrapling/engines/toolbelt/custom.py,sha256=KopO0SVWzFoNB8LbFDQhtErm8KCid6nkQcGqRaItC6U,12752
20
- scrapling/engines/toolbelt/fingerprints.py,sha256=T9HQejHzAnHsD5EIXvrYVC5siiG5q2gOOXVIIANmzMc,2917
21
- scrapling/engines/toolbelt/navigation.py,sha256=Tde5_6Wv7lOeWXMzs9D6TRaxAbJ3b-zIX6-4HggZbCQ,4017
22
- scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
23
- scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
24
- scrapling/engines/toolbelt/bypasses/pdf_viewer.js,sha256=mKjjSuP1-BOGC_2WhRYHJo_LP7lTBi2KXmP_zsHO_tI,173
25
- scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js,sha256=3RP1AE_XZRvpupeV_i-WSNVqRxyUy0qd8rQV8j_4j3U,221
26
- scrapling/engines/toolbelt/bypasses/screen_props.js,sha256=fZEuHMQ1-fYuxxUMoQXUvVWYUkPUbblkfMfpiLvBY7w,599
27
- scrapling/engines/toolbelt/bypasses/webdriver_fully.js,sha256=hdJw4clRAJQqIdq5gIFC_eC-x7C1i2ab01KV5ylmOBs,728
28
- scrapling/engines/toolbelt/bypasses/window_chrome.js,sha256=D7hqzNGGDorh8JVlvm2YIv7Bk2CoVkG55MDIdyqhT1w,6808
29
- tests/__init__.py,sha256=YHFB5ftzgLQVh6gbPfbYcY4yOS9DOBp5dBa6I-qtm8U,32
30
- tests/fetchers/__init__.py,sha256=6H4NgARhyTcGGd3dNCKQJ8kUFdrAEMSScQL7Ga_vU3c,43
31
- tests/fetchers/test_camoufox.py,sha256=53piGA5uuPvOx5BeUEA0bbizYihwHGxehnj5uqCr6Q0,3115
32
- tests/fetchers/test_httpx.py,sha256=UivOItR3-l-bXp9E6TP5Tvn2OrCdgiVkWsti-f9xdpU,3507
33
- tests/fetchers/test_playwright.py,sha256=7qwbIU2SwjiQEbaGPA_MBo6kAXM4IBmfvy5kUvKT11M,3701
34
- tests/fetchers/test_utils.py,sha256=FPPJkBrqgYxdGeWwapH8Vj8zyfYVLiTE1qSLu8eBWik,5728
35
- tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- tests/parser/test_automatch.py,sha256=BeeYJi3cYCghbiZmi57z4bqcGPaoUA8GAm7MALBBkkk,2486
37
- tests/parser/test_general.py,sha256=qaiVzpvqESfdXYFat6QrpnMkevPYgCzIcTZK5FwdC0s,11783
38
- scrapling-0.2.7.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
39
- scrapling-0.2.7.dist-info/METADATA,sha256=kYARTFqiiLsL_cvnU03pf2I1E5N_NmJk25gbeLzSR4M,66607
40
- scrapling-0.2.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
41
- scrapling-0.2.7.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
42
- scrapling-0.2.7.dist-info/RECORD,,
@@ -1,64 +0,0 @@
1
- import unittest
2
- import pytest_httpbin
3
-
4
- from scrapling import StealthyFetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- # @pytest_httpbin.use_class_based_httpbin_secure
9
- class TestStealthyFetcher(unittest.TestCase):
10
- def setUp(self):
11
- self.fetcher = StealthyFetcher(auto_match=False)
12
- url = self.httpbin.url
13
- self.status_200 = f'{url}/status/200'
14
- self.status_404 = f'{url}/status/404'
15
- self.status_501 = f'{url}/status/501'
16
- self.basic_url = f'{url}/get'
17
- self.html_url = f'{url}/html'
18
- self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response
19
- self.cookies_url = f"{url}/cookies/set/test/value"
20
-
21
- def test_basic_fetch(self):
22
- """Test doing basic fetch request with multiple statuses"""
23
- self.assertEqual(self.fetcher.fetch(self.status_200).status, 200)
24
- self.assertEqual(self.fetcher.fetch(self.status_404).status, 404)
25
- self.assertEqual(self.fetcher.fetch(self.status_501).status, 501)
26
-
27
- def test_networkidle(self):
28
- """Test if waiting for `networkidle` make page does not finish loading or not"""
29
- self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200)
30
-
31
- def test_blocking_resources(self):
32
- """Test if blocking resources make page does not finish loading or not"""
33
- self.assertEqual(self.fetcher.fetch(self.basic_url, block_images=True).status, 200)
34
- self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200)
35
-
36
- def test_waiting_selector(self):
37
- """Test if waiting for a selector make page does not finish loading or not"""
38
- self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200)
39
- self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200)
40
-
41
- def test_cookies_loading(self):
42
- """Test if cookies are set after the request"""
43
- self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'})
44
-
45
- def test_automation(self):
46
- """Test if automation break the code or not"""
47
- def scroll_page(page):
48
- page.mouse.wheel(10, 0)
49
- page.mouse.move(100, 400)
50
- page.mouse.up()
51
- return page
52
-
53
- self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200)
54
-
55
- def test_properties(self):
56
- """Test if different arguments breaks the code or not"""
57
- self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status, 200)
58
- self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status, 200)
59
- self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status, 200)
60
- self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status, 200)
61
-
62
- def test_infinite_timeout(self):
63
- """Test if infinite timeout breaks the code or not"""
64
- self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200)
@@ -1,67 +0,0 @@
1
- import unittest
2
- import pytest_httpbin
3
-
4
- from scrapling import Fetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- class TestFetcher(unittest.TestCase):
9
- def setUp(self):
10
- self.fetcher = Fetcher(auto_match=False)
11
- url = self.httpbin.url
12
- self.status_200 = f'{url}/status/200'
13
- self.status_404 = f'{url}/status/404'
14
- self.status_501 = f'{url}/status/501'
15
- self.basic_url = f'{url}/get'
16
- self.post_url = f'{url}/post'
17
- self.put_url = f'{url}/put'
18
- self.delete_url = f'{url}/delete'
19
- self.html_url = f'{url}/html'
20
-
21
- def test_basic_get(self):
22
- """Test doing basic get request with multiple statuses"""
23
- self.assertEqual(self.fetcher.get(self.status_200).status, 200)
24
- self.assertEqual(self.fetcher.get(self.status_404).status, 404)
25
- self.assertEqual(self.fetcher.get(self.status_501).status, 501)
26
-
27
- def test_get_properties(self):
28
- """Test if different arguments with GET request breaks the code or not"""
29
- self.assertEqual(self.fetcher.get(self.status_200, stealthy_headers=True).status, 200)
30
- self.assertEqual(self.fetcher.get(self.status_200, follow_redirects=True).status, 200)
31
- self.assertEqual(self.fetcher.get(self.status_200, timeout=None).status, 200)
32
- self.assertEqual(
33
- self.fetcher.get(self.status_200, stealthy_headers=True, follow_redirects=True, timeout=None).status,
34
- 200
35
- )
36
-
37
- def test_post_properties(self):
38
- """Test if different arguments with POST request breaks the code or not"""
39
- self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}).status, 200)
40
- self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status, 200)
41
- self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status, 200)
42
- self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status, 200)
43
- self.assertEqual(
44
- self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status,
45
- 200
46
- )
47
-
48
- def test_put_properties(self):
49
- """Test if different arguments with PUT request breaks the code or not"""
50
- self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}).status, 200)
51
- self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status, 200)
52
- self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status, 200)
53
- self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status, 200)
54
- self.assertEqual(
55
- self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status,
56
- 200
57
- )
58
-
59
- def test_delete_properties(self):
60
- """Test if different arguments with DELETE request breaks the code or not"""
61
- self.assertEqual(self.fetcher.delete(self.delete_url, stealthy_headers=True).status, 200)
62
- self.assertEqual(self.fetcher.delete(self.delete_url, follow_redirects=True).status, 200)
63
- self.assertEqual(self.fetcher.delete(self.delete_url, timeout=None).status, 200)
64
- self.assertEqual(
65
- self.fetcher.delete(self.delete_url, stealthy_headers=True, follow_redirects=True, timeout=None).status,
66
- 200
67
- )
@@ -1,76 +0,0 @@
1
- import unittest
2
- import pytest_httpbin
3
-
4
- from scrapling import PlayWrightFetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- # @pytest_httpbin.use_class_based_httpbin_secure
9
- class TestPlayWrightFetcher(unittest.TestCase):
10
- def setUp(self):
11
- self.fetcher = PlayWrightFetcher(auto_match=False)
12
- url = self.httpbin.url
13
- self.status_200 = f'{url}/status/200'
14
- self.status_404 = f'{url}/status/404'
15
- self.status_501 = f'{url}/status/501'
16
- self.basic_url = f'{url}/get'
17
- self.html_url = f'{url}/html'
18
- self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response
19
- self.cookies_url = f"{url}/cookies/set/test/value"
20
-
21
- def test_basic_fetch(self):
22
- """Test doing basic fetch request with multiple statuses"""
23
- self.assertEqual(self.fetcher.fetch(self.status_200).status, 200)
24
- self.assertEqual(self.fetcher.fetch(self.status_404).status, 404)
25
- self.assertEqual(self.fetcher.fetch(self.status_501).status, 501)
26
-
27
- def test_networkidle(self):
28
- """Test if waiting for `networkidle` make page does not finish loading or not"""
29
- self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200)
30
-
31
- def test_blocking_resources(self):
32
- """Test if blocking resources make page does not finish loading or not"""
33
- self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200)
34
-
35
- def test_waiting_selector(self):
36
- """Test if waiting for a selector make page does not finish loading or not"""
37
- self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200)
38
- self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200)
39
-
40
- def test_cookies_loading(self):
41
- """Test if cookies are set after the request"""
42
- self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'})
43
-
44
- def test_automation(self):
45
- """Test if automation break the code or not"""
46
- def scroll_page(page):
47
- page.mouse.wheel(10, 0)
48
- page.mouse.move(100, 400)
49
- page.mouse.up()
50
- return page
51
-
52
- self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200)
53
-
54
- def test_properties(self):
55
- """Test if different arguments breaks the code or not"""
56
- self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=True, hide_canvas=False).status, 200)
57
- self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=False, hide_canvas=True).status, 200)
58
- self.assertEqual(self.fetcher.fetch(self.html_url, stealth=True).status, 200)
59
- self.assertEqual(self.fetcher.fetch(self.html_url, useragent='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0').status, 200)
60
- self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}).status, 200)
61
-
62
- def test_cdp_url(self):
63
- """Test if it's going to try to connect to cdp url or not"""
64
- with self.assertRaises(ValueError):
65
- _ = self.fetcher.fetch(self.html_url, cdp_url='blahblah')
66
-
67
- with self.assertRaises(ValueError):
68
- _ = self.fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True)
69
-
70
- with self.assertRaises(Exception):
71
- # There's no type for this error in PlayWright, it's just `Error`
72
- _ = self.fetcher.fetch(self.html_url, cdp_url='ws://blahblah')
73
-
74
- def test_infinite_timeout(self):
75
- """Test if infinite timeout breaks the code or not"""
76
- self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200)