scrapling 0.2.8__py3-none-any.whl → 0.2.91__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- scrapling/__init__.py +4 -4
- scrapling/core/_types.py +2 -0
- scrapling/core/custom_types.py +88 -6
- scrapling/core/storage_adaptors.py +5 -6
- scrapling/core/translator.py +2 -2
- scrapling/core/utils.py +29 -27
- scrapling/defaults.py +2 -1
- scrapling/engines/camo.py +124 -24
- scrapling/engines/constants.py +4 -4
- scrapling/engines/pw.py +195 -91
- scrapling/engines/static.py +91 -48
- scrapling/engines/toolbelt/__init__.py +3 -3
- scrapling/engines/toolbelt/custom.py +16 -22
- scrapling/engines/toolbelt/fingerprints.py +3 -3
- scrapling/engines/toolbelt/navigation.py +21 -8
- scrapling/fetchers.py +231 -16
- scrapling/parser.py +50 -22
- {scrapling-0.2.8.dist-info → scrapling-0.2.91.dist-info}/METADATA +33 -18
- scrapling-0.2.91.dist-info/RECORD +47 -0
- tests/fetchers/async/__init__.py +0 -0
- tests/fetchers/async/test_camoufox.py +95 -0
- tests/fetchers/async/test_httpx.py +83 -0
- tests/fetchers/async/test_playwright.py +99 -0
- tests/fetchers/sync/__init__.py +0 -0
- tests/fetchers/sync/test_camoufox.py +68 -0
- tests/fetchers/sync/test_httpx.py +82 -0
- tests/fetchers/sync/test_playwright.py +87 -0
- tests/fetchers/test_utils.py +90 -122
- tests/parser/test_automatch.py +64 -9
- tests/parser/test_general.py +260 -218
- scrapling-0.2.8.dist-info/RECORD +0 -42
- tests/fetchers/test_camoufox.py +0 -65
- tests/fetchers/test_httpx.py +0 -68
- tests/fetchers/test_playwright.py +0 -77
- {scrapling-0.2.8.dist-info → scrapling-0.2.91.dist-info}/LICENSE +0 -0
- {scrapling-0.2.8.dist-info → scrapling-0.2.91.dist-info}/WHEEL +0 -0
- {scrapling-0.2.8.dist-info → scrapling-0.2.91.dist-info}/top_level.txt +0 -0
tests/fetchers/test_camoufox.py
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
import unittest
|
2
|
-
|
3
|
-
import pytest_httpbin
|
4
|
-
|
5
|
-
from scrapling import StealthyFetcher
|
6
|
-
|
7
|
-
|
8
|
-
@pytest_httpbin.use_class_based_httpbin
|
9
|
-
# @pytest_httpbin.use_class_based_httpbin_secure
|
10
|
-
class TestStealthyFetcher(unittest.TestCase):
|
11
|
-
def setUp(self):
|
12
|
-
self.fetcher = StealthyFetcher(auto_match=False)
|
13
|
-
url = self.httpbin.url
|
14
|
-
self.status_200 = f'{url}/status/200'
|
15
|
-
self.status_404 = f'{url}/status/404'
|
16
|
-
self.status_501 = f'{url}/status/501'
|
17
|
-
self.basic_url = f'{url}/get'
|
18
|
-
self.html_url = f'{url}/html'
|
19
|
-
self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response
|
20
|
-
self.cookies_url = f"{url}/cookies/set/test/value"
|
21
|
-
|
22
|
-
def test_basic_fetch(self):
|
23
|
-
"""Test doing basic fetch request with multiple statuses"""
|
24
|
-
self.assertEqual(self.fetcher.fetch(self.status_200).status, 200)
|
25
|
-
self.assertEqual(self.fetcher.fetch(self.status_404).status, 404)
|
26
|
-
self.assertEqual(self.fetcher.fetch(self.status_501).status, 501)
|
27
|
-
|
28
|
-
def test_networkidle(self):
|
29
|
-
"""Test if waiting for `networkidle` make page does not finish loading or not"""
|
30
|
-
self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200)
|
31
|
-
|
32
|
-
def test_blocking_resources(self):
|
33
|
-
"""Test if blocking resources make page does not finish loading or not"""
|
34
|
-
self.assertEqual(self.fetcher.fetch(self.basic_url, block_images=True).status, 200)
|
35
|
-
self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200)
|
36
|
-
|
37
|
-
def test_waiting_selector(self):
|
38
|
-
"""Test if waiting for a selector make page does not finish loading or not"""
|
39
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200)
|
40
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200)
|
41
|
-
|
42
|
-
def test_cookies_loading(self):
|
43
|
-
"""Test if cookies are set after the request"""
|
44
|
-
self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'})
|
45
|
-
|
46
|
-
def test_automation(self):
|
47
|
-
"""Test if automation break the code or not"""
|
48
|
-
def scroll_page(page):
|
49
|
-
page.mouse.wheel(10, 0)
|
50
|
-
page.mouse.move(100, 400)
|
51
|
-
page.mouse.up()
|
52
|
-
return page
|
53
|
-
|
54
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200)
|
55
|
-
|
56
|
-
def test_properties(self):
|
57
|
-
"""Test if different arguments breaks the code or not"""
|
58
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status, 200)
|
59
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status, 200)
|
60
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status, 200)
|
61
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status, 200)
|
62
|
-
|
63
|
-
def test_infinite_timeout(self):
|
64
|
-
"""Test if infinite timeout breaks the code or not"""
|
65
|
-
self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200)
|
tests/fetchers/test_httpx.py
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
import unittest
|
2
|
-
|
3
|
-
import pytest_httpbin
|
4
|
-
|
5
|
-
from scrapling import Fetcher
|
6
|
-
|
7
|
-
|
8
|
-
@pytest_httpbin.use_class_based_httpbin
|
9
|
-
class TestFetcher(unittest.TestCase):
|
10
|
-
def setUp(self):
|
11
|
-
self.fetcher = Fetcher(auto_match=False)
|
12
|
-
url = self.httpbin.url
|
13
|
-
self.status_200 = f'{url}/status/200'
|
14
|
-
self.status_404 = f'{url}/status/404'
|
15
|
-
self.status_501 = f'{url}/status/501'
|
16
|
-
self.basic_url = f'{url}/get'
|
17
|
-
self.post_url = f'{url}/post'
|
18
|
-
self.put_url = f'{url}/put'
|
19
|
-
self.delete_url = f'{url}/delete'
|
20
|
-
self.html_url = f'{url}/html'
|
21
|
-
|
22
|
-
def test_basic_get(self):
|
23
|
-
"""Test doing basic get request with multiple statuses"""
|
24
|
-
self.assertEqual(self.fetcher.get(self.status_200).status, 200)
|
25
|
-
self.assertEqual(self.fetcher.get(self.status_404).status, 404)
|
26
|
-
self.assertEqual(self.fetcher.get(self.status_501).status, 501)
|
27
|
-
|
28
|
-
def test_get_properties(self):
|
29
|
-
"""Test if different arguments with GET request breaks the code or not"""
|
30
|
-
self.assertEqual(self.fetcher.get(self.status_200, stealthy_headers=True).status, 200)
|
31
|
-
self.assertEqual(self.fetcher.get(self.status_200, follow_redirects=True).status, 200)
|
32
|
-
self.assertEqual(self.fetcher.get(self.status_200, timeout=None).status, 200)
|
33
|
-
self.assertEqual(
|
34
|
-
self.fetcher.get(self.status_200, stealthy_headers=True, follow_redirects=True, timeout=None).status,
|
35
|
-
200
|
36
|
-
)
|
37
|
-
|
38
|
-
def test_post_properties(self):
|
39
|
-
"""Test if different arguments with POST request breaks the code or not"""
|
40
|
-
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}).status, 200)
|
41
|
-
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status, 200)
|
42
|
-
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status, 200)
|
43
|
-
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status, 200)
|
44
|
-
self.assertEqual(
|
45
|
-
self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status,
|
46
|
-
200
|
47
|
-
)
|
48
|
-
|
49
|
-
def test_put_properties(self):
|
50
|
-
"""Test if different arguments with PUT request breaks the code or not"""
|
51
|
-
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}).status, 200)
|
52
|
-
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status, 200)
|
53
|
-
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status, 200)
|
54
|
-
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status, 200)
|
55
|
-
self.assertEqual(
|
56
|
-
self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status,
|
57
|
-
200
|
58
|
-
)
|
59
|
-
|
60
|
-
def test_delete_properties(self):
|
61
|
-
"""Test if different arguments with DELETE request breaks the code or not"""
|
62
|
-
self.assertEqual(self.fetcher.delete(self.delete_url, stealthy_headers=True).status, 200)
|
63
|
-
self.assertEqual(self.fetcher.delete(self.delete_url, follow_redirects=True).status, 200)
|
64
|
-
self.assertEqual(self.fetcher.delete(self.delete_url, timeout=None).status, 200)
|
65
|
-
self.assertEqual(
|
66
|
-
self.fetcher.delete(self.delete_url, stealthy_headers=True, follow_redirects=True, timeout=None).status,
|
67
|
-
200
|
68
|
-
)
|
@@ -1,77 +0,0 @@
|
|
1
|
-
import unittest
|
2
|
-
|
3
|
-
import pytest_httpbin
|
4
|
-
|
5
|
-
from scrapling import PlayWrightFetcher
|
6
|
-
|
7
|
-
|
8
|
-
@pytest_httpbin.use_class_based_httpbin
|
9
|
-
# @pytest_httpbin.use_class_based_httpbin_secure
|
10
|
-
class TestPlayWrightFetcher(unittest.TestCase):
|
11
|
-
def setUp(self):
|
12
|
-
self.fetcher = PlayWrightFetcher(auto_match=False)
|
13
|
-
url = self.httpbin.url
|
14
|
-
self.status_200 = f'{url}/status/200'
|
15
|
-
self.status_404 = f'{url}/status/404'
|
16
|
-
self.status_501 = f'{url}/status/501'
|
17
|
-
self.basic_url = f'{url}/get'
|
18
|
-
self.html_url = f'{url}/html'
|
19
|
-
self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response
|
20
|
-
self.cookies_url = f"{url}/cookies/set/test/value"
|
21
|
-
|
22
|
-
def test_basic_fetch(self):
|
23
|
-
"""Test doing basic fetch request with multiple statuses"""
|
24
|
-
self.assertEqual(self.fetcher.fetch(self.status_200).status, 200)
|
25
|
-
self.assertEqual(self.fetcher.fetch(self.status_404).status, 404)
|
26
|
-
self.assertEqual(self.fetcher.fetch(self.status_501).status, 501)
|
27
|
-
|
28
|
-
def test_networkidle(self):
|
29
|
-
"""Test if waiting for `networkidle` make page does not finish loading or not"""
|
30
|
-
self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200)
|
31
|
-
|
32
|
-
def test_blocking_resources(self):
|
33
|
-
"""Test if blocking resources make page does not finish loading or not"""
|
34
|
-
self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200)
|
35
|
-
|
36
|
-
def test_waiting_selector(self):
|
37
|
-
"""Test if waiting for a selector make page does not finish loading or not"""
|
38
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200)
|
39
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200)
|
40
|
-
|
41
|
-
def test_cookies_loading(self):
|
42
|
-
"""Test if cookies are set after the request"""
|
43
|
-
self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'})
|
44
|
-
|
45
|
-
def test_automation(self):
|
46
|
-
"""Test if automation break the code or not"""
|
47
|
-
def scroll_page(page):
|
48
|
-
page.mouse.wheel(10, 0)
|
49
|
-
page.mouse.move(100, 400)
|
50
|
-
page.mouse.up()
|
51
|
-
return page
|
52
|
-
|
53
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200)
|
54
|
-
|
55
|
-
def test_properties(self):
|
56
|
-
"""Test if different arguments breaks the code or not"""
|
57
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=True, hide_canvas=False).status, 200)
|
58
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=False, hide_canvas=True).status, 200)
|
59
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, stealth=True).status, 200)
|
60
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, useragent='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0').status, 200)
|
61
|
-
self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}).status, 200)
|
62
|
-
|
63
|
-
def test_cdp_url(self):
|
64
|
-
"""Test if it's going to try to connect to cdp url or not"""
|
65
|
-
with self.assertRaises(ValueError):
|
66
|
-
_ = self.fetcher.fetch(self.html_url, cdp_url='blahblah')
|
67
|
-
|
68
|
-
with self.assertRaises(ValueError):
|
69
|
-
_ = self.fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True)
|
70
|
-
|
71
|
-
with self.assertRaises(Exception):
|
72
|
-
# There's no type for this error in PlayWright, it's just `Error`
|
73
|
-
_ = self.fetcher.fetch(self.html_url, cdp_url='ws://blahblah')
|
74
|
-
|
75
|
-
def test_infinite_timeout(self):
|
76
|
-
"""Test if infinite timeout breaks the code or not"""
|
77
|
-
self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200)
|
File without changes
|
File without changes
|
File without changes
|