scrapling 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. scrapling/__init__.py +4 -4
  2. scrapling/core/custom_types.py +88 -6
  3. scrapling/core/storage_adaptors.py +5 -6
  4. scrapling/core/translator.py +2 -2
  5. scrapling/core/utils.py +29 -27
  6. scrapling/defaults.py +2 -1
  7. scrapling/engines/camo.py +89 -15
  8. scrapling/engines/constants.py +4 -4
  9. scrapling/engines/pw.py +158 -83
  10. scrapling/engines/static.py +91 -48
  11. scrapling/engines/toolbelt/__init__.py +3 -3
  12. scrapling/engines/toolbelt/custom.py +20 -22
  13. scrapling/engines/toolbelt/fingerprints.py +3 -3
  14. scrapling/engines/toolbelt/navigation.py +21 -8
  15. scrapling/fetchers.py +229 -14
  16. scrapling/parser.py +49 -21
  17. {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/METADATA +32 -16
  18. scrapling-0.2.9.dist-info/RECORD +47 -0
  19. tests/fetchers/async/__init__.py +0 -0
  20. tests/fetchers/async/test_camoufox.py +95 -0
  21. tests/fetchers/async/test_httpx.py +83 -0
  22. tests/fetchers/async/test_playwright.py +99 -0
  23. tests/fetchers/sync/__init__.py +0 -0
  24. tests/fetchers/sync/test_camoufox.py +68 -0
  25. tests/fetchers/sync/test_httpx.py +82 -0
  26. tests/fetchers/sync/test_playwright.py +87 -0
  27. tests/fetchers/test_utils.py +90 -122
  28. tests/parser/test_automatch.py +64 -9
  29. tests/parser/test_general.py +260 -218
  30. scrapling-0.2.8.dist-info/RECORD +0 -42
  31. tests/fetchers/test_camoufox.py +0 -65
  32. tests/fetchers/test_httpx.py +0 -68
  33. tests/fetchers/test_playwright.py +0 -77
  34. {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/LICENSE +0 -0
  35. {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/WHEEL +0 -0
  36. {scrapling-0.2.8.dist-info → scrapling-0.2.9.dist-info}/top_level.txt +0 -0
@@ -1,65 +0,0 @@
1
- import unittest
2
-
3
- import pytest_httpbin
4
-
5
- from scrapling import StealthyFetcher
6
-
7
-
8
- @pytest_httpbin.use_class_based_httpbin
9
- # @pytest_httpbin.use_class_based_httpbin_secure
10
- class TestStealthyFetcher(unittest.TestCase):
11
- def setUp(self):
12
- self.fetcher = StealthyFetcher(auto_match=False)
13
- url = self.httpbin.url
14
- self.status_200 = f'{url}/status/200'
15
- self.status_404 = f'{url}/status/404'
16
- self.status_501 = f'{url}/status/501'
17
- self.basic_url = f'{url}/get'
18
- self.html_url = f'{url}/html'
19
- self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response
20
- self.cookies_url = f"{url}/cookies/set/test/value"
21
-
22
- def test_basic_fetch(self):
23
- """Test doing basic fetch request with multiple statuses"""
24
- self.assertEqual(self.fetcher.fetch(self.status_200).status, 200)
25
- self.assertEqual(self.fetcher.fetch(self.status_404).status, 404)
26
- self.assertEqual(self.fetcher.fetch(self.status_501).status, 501)
27
-
28
- def test_networkidle(self):
29
- """Test if waiting for `networkidle` make page does not finish loading or not"""
30
- self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200)
31
-
32
- def test_blocking_resources(self):
33
- """Test if blocking resources make page does not finish loading or not"""
34
- self.assertEqual(self.fetcher.fetch(self.basic_url, block_images=True).status, 200)
35
- self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200)
36
-
37
- def test_waiting_selector(self):
38
- """Test if waiting for a selector make page does not finish loading or not"""
39
- self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200)
40
- self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200)
41
-
42
- def test_cookies_loading(self):
43
- """Test if cookies are set after the request"""
44
- self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'})
45
-
46
- def test_automation(self):
47
- """Test if automation break the code or not"""
48
- def scroll_page(page):
49
- page.mouse.wheel(10, 0)
50
- page.mouse.move(100, 400)
51
- page.mouse.up()
52
- return page
53
-
54
- self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200)
55
-
56
- def test_properties(self):
57
- """Test if different arguments breaks the code or not"""
58
- self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status, 200)
59
- self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status, 200)
60
- self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status, 200)
61
- self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status, 200)
62
-
63
- def test_infinite_timeout(self):
64
- """Test if infinite timeout breaks the code or not"""
65
- self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200)
@@ -1,68 +0,0 @@
1
- import unittest
2
-
3
- import pytest_httpbin
4
-
5
- from scrapling import Fetcher
6
-
7
-
8
- @pytest_httpbin.use_class_based_httpbin
9
- class TestFetcher(unittest.TestCase):
10
- def setUp(self):
11
- self.fetcher = Fetcher(auto_match=False)
12
- url = self.httpbin.url
13
- self.status_200 = f'{url}/status/200'
14
- self.status_404 = f'{url}/status/404'
15
- self.status_501 = f'{url}/status/501'
16
- self.basic_url = f'{url}/get'
17
- self.post_url = f'{url}/post'
18
- self.put_url = f'{url}/put'
19
- self.delete_url = f'{url}/delete'
20
- self.html_url = f'{url}/html'
21
-
22
- def test_basic_get(self):
23
- """Test doing basic get request with multiple statuses"""
24
- self.assertEqual(self.fetcher.get(self.status_200).status, 200)
25
- self.assertEqual(self.fetcher.get(self.status_404).status, 404)
26
- self.assertEqual(self.fetcher.get(self.status_501).status, 501)
27
-
28
- def test_get_properties(self):
29
- """Test if different arguments with GET request breaks the code or not"""
30
- self.assertEqual(self.fetcher.get(self.status_200, stealthy_headers=True).status, 200)
31
- self.assertEqual(self.fetcher.get(self.status_200, follow_redirects=True).status, 200)
32
- self.assertEqual(self.fetcher.get(self.status_200, timeout=None).status, 200)
33
- self.assertEqual(
34
- self.fetcher.get(self.status_200, stealthy_headers=True, follow_redirects=True, timeout=None).status,
35
- 200
36
- )
37
-
38
- def test_post_properties(self):
39
- """Test if different arguments with POST request breaks the code or not"""
40
- self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}).status, 200)
41
- self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status, 200)
42
- self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status, 200)
43
- self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status, 200)
44
- self.assertEqual(
45
- self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status,
46
- 200
47
- )
48
-
49
- def test_put_properties(self):
50
- """Test if different arguments with PUT request breaks the code or not"""
51
- self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}).status, 200)
52
- self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status, 200)
53
- self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status, 200)
54
- self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status, 200)
55
- self.assertEqual(
56
- self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status,
57
- 200
58
- )
59
-
60
- def test_delete_properties(self):
61
- """Test if different arguments with DELETE request breaks the code or not"""
62
- self.assertEqual(self.fetcher.delete(self.delete_url, stealthy_headers=True).status, 200)
63
- self.assertEqual(self.fetcher.delete(self.delete_url, follow_redirects=True).status, 200)
64
- self.assertEqual(self.fetcher.delete(self.delete_url, timeout=None).status, 200)
65
- self.assertEqual(
66
- self.fetcher.delete(self.delete_url, stealthy_headers=True, follow_redirects=True, timeout=None).status,
67
- 200
68
- )
@@ -1,77 +0,0 @@
1
- import unittest
2
-
3
- import pytest_httpbin
4
-
5
- from scrapling import PlayWrightFetcher
6
-
7
-
8
- @pytest_httpbin.use_class_based_httpbin
9
- # @pytest_httpbin.use_class_based_httpbin_secure
10
- class TestPlayWrightFetcher(unittest.TestCase):
11
- def setUp(self):
12
- self.fetcher = PlayWrightFetcher(auto_match=False)
13
- url = self.httpbin.url
14
- self.status_200 = f'{url}/status/200'
15
- self.status_404 = f'{url}/status/404'
16
- self.status_501 = f'{url}/status/501'
17
- self.basic_url = f'{url}/get'
18
- self.html_url = f'{url}/html'
19
- self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response
20
- self.cookies_url = f"{url}/cookies/set/test/value"
21
-
22
- def test_basic_fetch(self):
23
- """Test doing basic fetch request with multiple statuses"""
24
- self.assertEqual(self.fetcher.fetch(self.status_200).status, 200)
25
- self.assertEqual(self.fetcher.fetch(self.status_404).status, 404)
26
- self.assertEqual(self.fetcher.fetch(self.status_501).status, 501)
27
-
28
- def test_networkidle(self):
29
- """Test if waiting for `networkidle` make page does not finish loading or not"""
30
- self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200)
31
-
32
- def test_blocking_resources(self):
33
- """Test if blocking resources make page does not finish loading or not"""
34
- self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200)
35
-
36
- def test_waiting_selector(self):
37
- """Test if waiting for a selector make page does not finish loading or not"""
38
- self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200)
39
- self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200)
40
-
41
- def test_cookies_loading(self):
42
- """Test if cookies are set after the request"""
43
- self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'})
44
-
45
- def test_automation(self):
46
- """Test if automation break the code or not"""
47
- def scroll_page(page):
48
- page.mouse.wheel(10, 0)
49
- page.mouse.move(100, 400)
50
- page.mouse.up()
51
- return page
52
-
53
- self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200)
54
-
55
- def test_properties(self):
56
- """Test if different arguments breaks the code or not"""
57
- self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=True, hide_canvas=False).status, 200)
58
- self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=False, hide_canvas=True).status, 200)
59
- self.assertEqual(self.fetcher.fetch(self.html_url, stealth=True).status, 200)
60
- self.assertEqual(self.fetcher.fetch(self.html_url, useragent='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0').status, 200)
61
- self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}).status, 200)
62
-
63
- def test_cdp_url(self):
64
- """Test if it's going to try to connect to cdp url or not"""
65
- with self.assertRaises(ValueError):
66
- _ = self.fetcher.fetch(self.html_url, cdp_url='blahblah')
67
-
68
- with self.assertRaises(ValueError):
69
- _ = self.fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True)
70
-
71
- with self.assertRaises(Exception):
72
- # There's no type for this error in PlayWright, it's just `Error`
73
- _ = self.fetcher.fetch(self.html_url, cdp_url='ws://blahblah')
74
-
75
- def test_infinite_timeout(self):
76
- """Test if infinite timeout breaks the code or not"""
77
- self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200)