scrapling 0.2.98__py3-none-any.whl → 0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. scrapling/__init__.py +18 -31
  2. scrapling/cli.py +818 -20
  3. scrapling/core/_html_utils.py +348 -0
  4. scrapling/core/_types.py +34 -17
  5. scrapling/core/ai.py +611 -0
  6. scrapling/core/custom_types.py +183 -100
  7. scrapling/core/mixins.py +27 -19
  8. scrapling/core/shell.py +647 -0
  9. scrapling/core/{storage_adaptors.py → storage.py} +41 -33
  10. scrapling/core/translator.py +20 -26
  11. scrapling/core/utils.py +49 -54
  12. scrapling/engines/__init__.py +15 -6
  13. scrapling/engines/_browsers/__init__.py +2 -0
  14. scrapling/engines/_browsers/_camoufox.py +745 -0
  15. scrapling/engines/_browsers/_config_tools.py +130 -0
  16. scrapling/engines/_browsers/_controllers.py +630 -0
  17. scrapling/engines/_browsers/_page.py +93 -0
  18. scrapling/engines/_browsers/_validators.py +150 -0
  19. scrapling/engines/constants.py +101 -88
  20. scrapling/engines/static.py +667 -110
  21. scrapling/engines/toolbelt/__init__.py +20 -6
  22. scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +2 -1
  23. scrapling/engines/toolbelt/convertor.py +254 -0
  24. scrapling/engines/toolbelt/custom.py +205 -186
  25. scrapling/engines/toolbelt/fingerprints.py +32 -46
  26. scrapling/engines/toolbelt/navigation.py +68 -39
  27. scrapling/fetchers.py +255 -260
  28. scrapling/parser.py +781 -449
  29. scrapling-0.3.dist-info/METADATA +409 -0
  30. scrapling-0.3.dist-info/RECORD +41 -0
  31. {scrapling-0.2.98.dist-info → scrapling-0.3.dist-info}/WHEEL +1 -1
  32. {scrapling-0.2.98.dist-info → scrapling-0.3.dist-info}/top_level.txt +0 -1
  33. scrapling/defaults.py +0 -19
  34. scrapling/engines/camo.py +0 -299
  35. scrapling/engines/pw.py +0 -428
  36. scrapling/engines/toolbelt/bypasses/pdf_viewer.js +0 -5
  37. scrapling-0.2.98.dist-info/METADATA +0 -867
  38. scrapling-0.2.98.dist-info/RECORD +0 -49
  39. tests/__init__.py +0 -1
  40. tests/fetchers/__init__.py +0 -1
  41. tests/fetchers/async/__init__.py +0 -0
  42. tests/fetchers/async/test_camoufox.py +0 -95
  43. tests/fetchers/async/test_httpx.py +0 -83
  44. tests/fetchers/async/test_playwright.py +0 -99
  45. tests/fetchers/sync/__init__.py +0 -0
  46. tests/fetchers/sync/test_camoufox.py +0 -68
  47. tests/fetchers/sync/test_httpx.py +0 -82
  48. tests/fetchers/sync/test_playwright.py +0 -87
  49. tests/fetchers/test_utils.py +0 -97
  50. tests/parser/__init__.py +0 -0
  51. tests/parser/test_automatch.py +0 -111
  52. tests/parser/test_general.py +0 -330
  53. {scrapling-0.2.98.dist-info → scrapling-0.3.dist-info}/entry_points.txt +0 -0
  54. {scrapling-0.2.98.dist-info → scrapling-0.3.dist-info/licenses}/LICENSE +0 -0
@@ -1,49 +0,0 @@
1
- scrapling/__init__.py,sha256=S-SWj9O2r0Tu8Z-mPxDJ-z3h5k-bBfhFOETaCY4A9dc,1510
2
- scrapling/cli.py,sha256=7yTsMhVAqqS8Z27T5dFKrR9_X8vuFjBlwYgAF22W7T8,1292
3
- scrapling/defaults.py,sha256=MAn2MMLBFvoe4i3u_qlp6YEvGUiCjNPPDux1cFCdpsU,866
4
- scrapling/fetchers.py,sha256=xwVCjAg0VCXwhB2igSLQvb0D0bOPGfg5WNtxgE7m-W0,34987
5
- scrapling/parser.py,sha256=1xS1UjCm1GVnKcVAtup9rSE1xuYPxXOgJe-8LJE5gUk,53956
6
- scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
7
- scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- scrapling/core/_types.py,sha256=dKVi_dUxdxNtTr7sj7ySkHXDfrsmjFTfpCQeO5tGuBY,670
9
- scrapling/core/custom_types.py,sha256=EWGx5t5scHEB1SMsitzc8duskq-5f-Qaj40IWkNTRzM,12947
10
- scrapling/core/mixins.py,sha256=sozbpaGL1_O_x3U-ABM5aYWpnxpCLfdbcA9SG3P7weY,3532
11
- scrapling/core/storage_adaptors.py,sha256=gZbUpHtLOL7o_oZbES_o40r39zShxTeTM8YK6dXA5Zo,6214
12
- scrapling/core/translator.py,sha256=3a2VX9KR-q-GzwT1OgGDv1UlzIkvBggkQXUdiMyL-4c,5277
13
- scrapling/core/utils.py,sha256=KX88B3tV1-SgCAr69TUN3LfmsTDcLnEhYJiPuWd31yA,3704
14
- scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c,267
15
- scrapling/engines/camo.py,sha256=oYKA0l3EpOcQW2APRj5FEmslqtp9A8i_ZljqlKvIDeI,16129
16
- scrapling/engines/constants.py,sha256=Gb_nXFoBB4ujJkd05SKkenMe1UDiRYQA3dkmA3DunLg,3723
17
- scrapling/engines/pw.py,sha256=cZraIBWd9ulEGEdhETIGmpevi62CN9JGcUU1OIDdxkA,21369
18
- scrapling/engines/static.py,sha256=EjdaR0beqWfEKKavT7vlBnozoayQaVpqeVtaOuzd384,9306
19
- scrapling/engines/toolbelt/__init__.py,sha256=VQDdYm1zY9Apno6d8UrULk29vUjllZrQqD8mXL1E2Fc,402
20
- scrapling/engines/toolbelt/custom.py,sha256=_-baGB8oOOHogbaddtGsq_K_01ccOjOkGA6tOKk28hM,12811
21
- scrapling/engines/toolbelt/fingerprints.py,sha256=Zzoqq3p6X_8D7eTxACz3z96cBZWWK61iKOGo2sZUtlg,2924
22
- scrapling/engines/toolbelt/navigation.py,sha256=fMjDgicqy2MoZZll2h5EvrrxkL6yNrC09v8isTpwAt0,4565
23
- scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
24
- scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
25
- scrapling/engines/toolbelt/bypasses/pdf_viewer.js,sha256=mKjjSuP1-BOGC_2WhRYHJo_LP7lTBi2KXmP_zsHO_tI,173
26
- scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js,sha256=3RP1AE_XZRvpupeV_i-WSNVqRxyUy0qd8rQV8j_4j3U,221
27
- scrapling/engines/toolbelt/bypasses/screen_props.js,sha256=fZEuHMQ1-fYuxxUMoQXUvVWYUkPUbblkfMfpiLvBY7w,599
28
- scrapling/engines/toolbelt/bypasses/webdriver_fully.js,sha256=hdJw4clRAJQqIdq5gIFC_eC-x7C1i2ab01KV5ylmOBs,728
29
- scrapling/engines/toolbelt/bypasses/window_chrome.js,sha256=D7hqzNGGDorh8JVlvm2YIv7Bk2CoVkG55MDIdyqhT1w,6808
30
- tests/__init__.py,sha256=YHFB5ftzgLQVh6gbPfbYcY4yOS9DOBp5dBa6I-qtm8U,32
31
- tests/fetchers/__init__.py,sha256=6H4NgARhyTcGGd3dNCKQJ8kUFdrAEMSScQL7Ga_vU3c,43
32
- tests/fetchers/test_utils.py,sha256=ANFu-4FFhtyGFGIwJksUO2M2tTTcKU2M_t6F2aav8lM,4967
33
- tests/fetchers/async/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- tests/fetchers/async/test_camoufox.py,sha256=BANJ0TVqEdsjkYlsyU-q_spfaMsqTLOBQU8LUDurL9I,3685
35
- tests/fetchers/async/test_httpx.py,sha256=6WgsvqV1-rYTjZ9na5x-wt49C3Ur9D99HXBFbewO0gc,3888
36
- tests/fetchers/async/test_playwright.py,sha256=rr_3vB9LWclbl7PBNMH2MNU6CsirvJAIx_LsI9mLil0,4106
37
- tests/fetchers/sync/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- tests/fetchers/sync/test_camoufox.py,sha256=IcDXPAWSSJnYT6psDFKSbCeym5n7hCrMPYQEghaOX3A,3165
39
- tests/fetchers/sync/test_httpx.py,sha256=xItYWjnDOIswKJzua2tDq8Oy43nTeFl0O1bci7lzGmg,3615
40
- tests/fetchers/sync/test_playwright.py,sha256=MEyDRaMyxDIWupG7f_xz0f0jd9Cpbd5rXCPz6qUy8cs,3818
41
- tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- tests/parser/test_automatch.py,sha256=SxsNdExE8zz8AcPRQFBUjZ3Q_1-tPOd9dzVvMSZpOYQ,4908
43
- tests/parser/test_general.py,sha256=dyfOsc8lleoY4AxcfDUBUaD1i95xecfYuTUhKBsYjwo,12100
44
- scrapling-0.2.98.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
45
- scrapling-0.2.98.dist-info/METADATA,sha256=Un_ROxrGIvk_8w-ECQbwKAcJzYyx3MTWS1DHt9FRqdI,69718
46
- scrapling-0.2.98.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
47
- scrapling-0.2.98.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
48
- scrapling-0.2.98.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
49
- scrapling-0.2.98.dist-info/RECORD,,
tests/__init__.py DELETED
@@ -1 +0,0 @@
1
- """Package for test project."""
@@ -1 +0,0 @@
1
- # Because I'm too lazy to mock requests :)
File without changes
@@ -1,95 +0,0 @@
1
- import pytest
2
- import pytest_httpbin
3
-
4
- from scrapling import StealthyFetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- @pytest.mark.asyncio
9
- class TestStealthyFetcher:
10
- @pytest.fixture(scope="class")
11
- def fetcher(self):
12
- return StealthyFetcher(auto_match=False)
13
-
14
- @pytest.fixture(scope="class")
15
- def urls(self, httpbin):
16
- url = httpbin.url
17
- return {
18
- 'status_200': f'{url}/status/200',
19
- 'status_404': f'{url}/status/404',
20
- 'status_501': f'{url}/status/501',
21
- 'basic_url': f'{url}/get',
22
- 'html_url': f'{url}/html',
23
- 'delayed_url': f'{url}/delay/10', # 10 Seconds delay response
24
- 'cookies_url': f"{url}/cookies/set/test/value"
25
- }
26
-
27
- async def test_basic_fetch(self, fetcher, urls):
28
- """Test doing basic fetch request with multiple statuses"""
29
- assert (await fetcher.async_fetch(urls['status_200'])).status == 200
30
- assert (await fetcher.async_fetch(urls['status_404'])).status == 404
31
- assert (await fetcher.async_fetch(urls['status_501'])).status == 501
32
-
33
- async def test_networkidle(self, fetcher, urls):
34
- """Test if waiting for `networkidle` make page does not finish loading or not"""
35
- assert (await fetcher.async_fetch(urls['basic_url'], network_idle=True)).status == 200
36
-
37
- async def test_blocking_resources(self, fetcher, urls):
38
- """Test if blocking resources make page does not finish loading or not"""
39
- assert (await fetcher.async_fetch(urls['basic_url'], block_images=True)).status == 200
40
- assert (await fetcher.async_fetch(urls['basic_url'], disable_resources=True)).status == 200
41
-
42
- async def test_waiting_selector(self, fetcher, urls):
43
- """Test if waiting for a selector make page does not finish loading or not"""
44
- assert (await fetcher.async_fetch(urls['html_url'], wait_selector='h1')).status == 200
45
- assert (await fetcher.async_fetch(
46
- urls['html_url'],
47
- wait_selector='h1',
48
- wait_selector_state='visible'
49
- )).status == 200
50
-
51
- async def test_cookies_loading(self, fetcher, urls):
52
- """Test if cookies are set after the request"""
53
- response = await fetcher.async_fetch(urls['cookies_url'])
54
- assert response.cookies == {'test': 'value'}
55
-
56
- async def test_automation(self, fetcher, urls):
57
- """Test if automation break the code or not"""
58
-
59
- async def scroll_page(page):
60
- await page.mouse.wheel(10, 0)
61
- await page.mouse.move(100, 400)
62
- await page.mouse.up()
63
- return page
64
-
65
- assert (await fetcher.async_fetch(urls['html_url'], page_action=scroll_page)).status == 200
66
-
67
- async def test_properties(self, fetcher, urls):
68
- """Test if different arguments breaks the code or not"""
69
- assert (await fetcher.async_fetch(
70
- urls['html_url'],
71
- block_webrtc=True,
72
- allow_webgl=True
73
- )).status == 200
74
-
75
- assert (await fetcher.async_fetch(
76
- urls['html_url'],
77
- block_webrtc=False,
78
- allow_webgl=True
79
- )).status == 200
80
-
81
- assert (await fetcher.async_fetch(
82
- urls['html_url'],
83
- block_webrtc=True,
84
- allow_webgl=False
85
- )).status == 200
86
-
87
- assert (await fetcher.async_fetch(
88
- urls['html_url'],
89
- extra_headers={'ayo': ''},
90
- os_randomize=True
91
- )).status == 200
92
-
93
- async def test_infinite_timeout(self, fetcher, urls):
94
- """Test if infinite timeout breaks the code or not"""
95
- assert (await fetcher.async_fetch(urls['delayed_url'], timeout=None)).status == 200
@@ -1,83 +0,0 @@
1
- import pytest
2
- import pytest_httpbin
3
-
4
- from scrapling.fetchers import AsyncFetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- @pytest.mark.asyncio
9
- class TestAsyncFetcher:
10
- @pytest.fixture(scope="class")
11
- def fetcher(self):
12
- return AsyncFetcher(auto_match=True)
13
-
14
- @pytest.fixture(scope="class")
15
- def urls(self, httpbin):
16
- return {
17
- 'status_200': f'{httpbin.url}/status/200',
18
- 'status_404': f'{httpbin.url}/status/404',
19
- 'status_501': f'{httpbin.url}/status/501',
20
- 'basic_url': f'{httpbin.url}/get',
21
- 'post_url': f'{httpbin.url}/post',
22
- 'put_url': f'{httpbin.url}/put',
23
- 'delete_url': f'{httpbin.url}/delete',
24
- 'html_url': f'{httpbin.url}/html'
25
- }
26
-
27
- async def test_basic_get(self, fetcher, urls):
28
- """Test doing basic get request with multiple statuses"""
29
- assert (await fetcher.get(urls['status_200'])).status == 200
30
- assert (await fetcher.get(urls['status_404'])).status == 404
31
- assert (await fetcher.get(urls['status_501'])).status == 501
32
-
33
- async def test_get_properties(self, fetcher, urls):
34
- """Test if different arguments with GET request breaks the code or not"""
35
- assert (await fetcher.get(urls['status_200'], stealthy_headers=True)).status == 200
36
- assert (await fetcher.get(urls['status_200'], follow_redirects=True)).status == 200
37
- assert (await fetcher.get(urls['status_200'], timeout=None)).status == 200
38
- assert (await fetcher.get(
39
- urls['status_200'],
40
- stealthy_headers=True,
41
- follow_redirects=True,
42
- timeout=None
43
- )).status == 200
44
-
45
- async def test_post_properties(self, fetcher, urls):
46
- """Test if different arguments with POST request breaks the code or not"""
47
- assert (await fetcher.post(urls['post_url'], data={'key': 'value'})).status == 200
48
- assert (await fetcher.post(urls['post_url'], data={'key': 'value'}, stealthy_headers=True)).status == 200
49
- assert (await fetcher.post(urls['post_url'], data={'key': 'value'}, follow_redirects=True)).status == 200
50
- assert (await fetcher.post(urls['post_url'], data={'key': 'value'}, timeout=None)).status == 200
51
- assert (await fetcher.post(
52
- urls['post_url'],
53
- data={'key': 'value'},
54
- stealthy_headers=True,
55
- follow_redirects=True,
56
- timeout=None
57
- )).status == 200
58
-
59
- async def test_put_properties(self, fetcher, urls):
60
- """Test if different arguments with PUT request breaks the code or not"""
61
- assert (await fetcher.put(urls['put_url'], data={'key': 'value'})).status in [200, 405]
62
- assert (await fetcher.put(urls['put_url'], data={'key': 'value'}, stealthy_headers=True)).status in [200, 405]
63
- assert (await fetcher.put(urls['put_url'], data={'key': 'value'}, follow_redirects=True)).status in [200, 405]
64
- assert (await fetcher.put(urls['put_url'], data={'key': 'value'}, timeout=None)).status in [200, 405]
65
- assert (await fetcher.put(
66
- urls['put_url'],
67
- data={'key': 'value'},
68
- stealthy_headers=True,
69
- follow_redirects=True,
70
- timeout=None
71
- )).status in [200, 405]
72
-
73
- async def test_delete_properties(self, fetcher, urls):
74
- """Test if different arguments with DELETE request breaks the code or not"""
75
- assert (await fetcher.delete(urls['delete_url'], stealthy_headers=True)).status == 200
76
- assert (await fetcher.delete(urls['delete_url'], follow_redirects=True)).status == 200
77
- assert (await fetcher.delete(urls['delete_url'], timeout=None)).status == 200
78
- assert (await fetcher.delete(
79
- urls['delete_url'],
80
- stealthy_headers=True,
81
- follow_redirects=True,
82
- timeout=None
83
- )).status == 200
@@ -1,99 +0,0 @@
1
- import pytest
2
- import pytest_httpbin
3
-
4
- from scrapling import PlayWrightFetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- class TestPlayWrightFetcherAsync:
9
- @pytest.fixture
10
- def fetcher(self):
11
- return PlayWrightFetcher(auto_match=False)
12
-
13
- @pytest.fixture
14
- def urls(self, httpbin):
15
- return {
16
- 'status_200': f'{httpbin.url}/status/200',
17
- 'status_404': f'{httpbin.url}/status/404',
18
- 'status_501': f'{httpbin.url}/status/501',
19
- 'basic_url': f'{httpbin.url}/get',
20
- 'html_url': f'{httpbin.url}/html',
21
- 'delayed_url': f'{httpbin.url}/delay/10',
22
- 'cookies_url': f"{httpbin.url}/cookies/set/test/value"
23
- }
24
-
25
- @pytest.mark.asyncio
26
- async def test_basic_fetch(self, fetcher, urls):
27
- """Test doing basic fetch request with multiple statuses"""
28
- response = await fetcher.async_fetch(urls['status_200'])
29
- assert response.status == 200
30
-
31
- @pytest.mark.asyncio
32
- async def test_networkidle(self, fetcher, urls):
33
- """Test if waiting for `networkidle` make page does not finish loading or not"""
34
- response = await fetcher.async_fetch(urls['basic_url'], network_idle=True)
35
- assert response.status == 200
36
-
37
- @pytest.mark.asyncio
38
- async def test_blocking_resources(self, fetcher, urls):
39
- """Test if blocking resources make page does not finish loading or not"""
40
- response = await fetcher.async_fetch(urls['basic_url'], disable_resources=True)
41
- assert response.status == 200
42
-
43
- @pytest.mark.asyncio
44
- async def test_waiting_selector(self, fetcher, urls):
45
- """Test if waiting for a selector make page does not finish loading or not"""
46
- response1 = await fetcher.async_fetch(urls['html_url'], wait_selector='h1')
47
- assert response1.status == 200
48
-
49
- response2 = await fetcher.async_fetch(urls['html_url'], wait_selector='h1', wait_selector_state='visible')
50
- assert response2.status == 200
51
-
52
- @pytest.mark.asyncio
53
- async def test_cookies_loading(self, fetcher, urls):
54
- """Test if cookies are set after the request"""
55
- response = await fetcher.async_fetch(urls['cookies_url'])
56
- assert response.cookies == {'test': 'value'}
57
-
58
- @pytest.mark.asyncio
59
- async def test_automation(self, fetcher, urls):
60
- """Test if automation break the code or not"""
61
- async def scroll_page(page):
62
- await page.mouse.wheel(10, 0)
63
- await page.mouse.move(100, 400)
64
- await page.mouse.up()
65
- return page
66
-
67
- response = await fetcher.async_fetch(urls['html_url'], page_action=scroll_page)
68
- assert response.status == 200
69
-
70
- @pytest.mark.parametrize("kwargs", [
71
- {"disable_webgl": True, "hide_canvas": False},
72
- {"disable_webgl": False, "hide_canvas": True},
73
- # {"stealth": True}, # causes issues with Github Actions
74
- {"useragent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0'},
75
- {"extra_headers": {'ayo': ''}}
76
- ])
77
- @pytest.mark.asyncio
78
- async def test_properties(self, fetcher, urls, kwargs):
79
- """Test if different arguments breaks the code or not"""
80
- response = await fetcher.async_fetch(urls['html_url'], **kwargs)
81
- assert response.status == 200
82
-
83
- @pytest.mark.asyncio
84
- async def test_cdp_url_invalid(self, fetcher, urls):
85
- """Test if invalid CDP URLs raise appropriate exceptions"""
86
- with pytest.raises(ValueError):
87
- await fetcher.async_fetch(urls['html_url'], cdp_url='blahblah')
88
-
89
- with pytest.raises(ValueError):
90
- await fetcher.async_fetch(urls['html_url'], cdp_url='blahblah', nstbrowser_mode=True)
91
-
92
- with pytest.raises(Exception):
93
- await fetcher.async_fetch(urls['html_url'], cdp_url='ws://blahblah')
94
-
95
- @pytest.mark.asyncio
96
- async def test_infinite_timeout(self, fetcher, urls):
97
- """Test if infinite timeout breaks the code or not"""
98
- response = await fetcher.async_fetch(urls['delayed_url'], timeout=None)
99
- assert response.status == 200
File without changes
@@ -1,68 +0,0 @@
1
- import pytest
2
- import pytest_httpbin
3
-
4
- from scrapling import StealthyFetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- class TestStealthyFetcher:
9
- @pytest.fixture(scope="class")
10
- def fetcher(self):
11
- """Fixture to create a StealthyFetcher instance for the entire test class"""
12
- return StealthyFetcher(auto_match=False)
13
-
14
- @pytest.fixture(autouse=True)
15
- def setup_urls(self, httpbin):
16
- """Fixture to set up URLs for testing"""
17
- self.status_200 = f'{httpbin.url}/status/200'
18
- self.status_404 = f'{httpbin.url}/status/404'
19
- self.status_501 = f'{httpbin.url}/status/501'
20
- self.basic_url = f'{httpbin.url}/get'
21
- self.html_url = f'{httpbin.url}/html'
22
- self.delayed_url = f'{httpbin.url}/delay/10' # 10 Seconds delay response
23
- self.cookies_url = f"{httpbin.url}/cookies/set/test/value"
24
-
25
- def test_basic_fetch(self, fetcher):
26
- """Test doing basic fetch request with multiple statuses"""
27
- assert fetcher.fetch(self.status_200).status == 200
28
- assert fetcher.fetch(self.status_404).status == 404
29
- assert fetcher.fetch(self.status_501).status == 501
30
-
31
- def test_networkidle(self, fetcher):
32
- """Test if waiting for `networkidle` make page does not finish loading or not"""
33
- assert fetcher.fetch(self.basic_url, network_idle=True).status == 200
34
-
35
- def test_blocking_resources(self, fetcher):
36
- """Test if blocking resources make page does not finish loading or not"""
37
- assert fetcher.fetch(self.basic_url, block_images=True).status == 200
38
- assert fetcher.fetch(self.basic_url, disable_resources=True).status == 200
39
-
40
- def test_waiting_selector(self, fetcher):
41
- """Test if waiting for a selector make page does not finish loading or not"""
42
- assert fetcher.fetch(self.html_url, wait_selector='h1').status == 200
43
- assert fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status == 200
44
-
45
- def test_cookies_loading(self, fetcher):
46
- """Test if cookies are set after the request"""
47
- assert fetcher.fetch(self.cookies_url).cookies == {'test': 'value'}
48
-
49
- def test_automation(self, fetcher):
50
- """Test if automation break the code or not"""
51
- def scroll_page(page):
52
- page.mouse.wheel(10, 0)
53
- page.mouse.move(100, 400)
54
- page.mouse.up()
55
- return page
56
-
57
- assert fetcher.fetch(self.html_url, page_action=scroll_page).status == 200
58
-
59
- def test_properties(self, fetcher):
60
- """Test if different arguments breaks the code or not"""
61
- assert fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status == 200
62
- assert fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status == 200
63
- assert fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status == 200
64
- assert fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status == 200
65
-
66
- def test_infinite_timeout(self, fetcher):
67
- """Test if infinite timeout breaks the code or not"""
68
- assert fetcher.fetch(self.delayed_url, timeout=None).status == 200
@@ -1,82 +0,0 @@
1
- import pytest
2
- import pytest_httpbin
3
-
4
- from scrapling import Fetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- class TestFetcher:
9
- @pytest.fixture(scope="class")
10
- def fetcher(self):
11
- """Fixture to create a Fetcher instance for the entire test class"""
12
- return Fetcher(auto_match=False)
13
-
14
- @pytest.fixture(autouse=True)
15
- def setup_urls(self, httpbin):
16
- """Fixture to set up URLs for testing"""
17
- self.status_200 = f'{httpbin.url}/status/200'
18
- self.status_404 = f'{httpbin.url}/status/404'
19
- self.status_501 = f'{httpbin.url}/status/501'
20
- self.basic_url = f'{httpbin.url}/get'
21
- self.post_url = f'{httpbin.url}/post'
22
- self.put_url = f'{httpbin.url}/put'
23
- self.delete_url = f'{httpbin.url}/delete'
24
- self.html_url = f'{httpbin.url}/html'
25
-
26
- def test_basic_get(self, fetcher):
27
- """Test doing basic get request with multiple statuses"""
28
- assert fetcher.get(self.status_200).status == 200
29
- assert fetcher.get(self.status_404).status == 404
30
- assert fetcher.get(self.status_501).status == 501
31
-
32
- def test_get_properties(self, fetcher):
33
- """Test if different arguments with GET request breaks the code or not"""
34
- assert fetcher.get(self.status_200, stealthy_headers=True).status == 200
35
- assert fetcher.get(self.status_200, follow_redirects=True).status == 200
36
- assert fetcher.get(self.status_200, timeout=None).status == 200
37
- assert fetcher.get(
38
- self.status_200,
39
- stealthy_headers=True,
40
- follow_redirects=True,
41
- timeout=None
42
- ).status == 200
43
-
44
- def test_post_properties(self, fetcher):
45
- """Test if different arguments with POST request breaks the code or not"""
46
- assert fetcher.post(self.post_url, data={'key': 'value'}).status == 200
47
- assert fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status == 200
48
- assert fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status == 200
49
- assert fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status == 200
50
- assert fetcher.post(
51
- self.post_url,
52
- data={'key': 'value'},
53
- stealthy_headers=True,
54
- follow_redirects=True,
55
- timeout=None
56
- ).status == 200
57
-
58
- def test_put_properties(self, fetcher):
59
- """Test if different arguments with PUT request breaks the code or not"""
60
- assert fetcher.put(self.put_url, data={'key': 'value'}).status == 200
61
- assert fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status == 200
62
- assert fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status == 200
63
- assert fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status == 200
64
- assert fetcher.put(
65
- self.put_url,
66
- data={'key': 'value'},
67
- stealthy_headers=True,
68
- follow_redirects=True,
69
- timeout=None
70
- ).status == 200
71
-
72
- def test_delete_properties(self, fetcher):
73
- """Test if different arguments with DELETE request breaks the code or not"""
74
- assert fetcher.delete(self.delete_url, stealthy_headers=True).status == 200
75
- assert fetcher.delete(self.delete_url, follow_redirects=True).status == 200
76
- assert fetcher.delete(self.delete_url, timeout=None).status == 200
77
- assert fetcher.delete(
78
- self.delete_url,
79
- stealthy_headers=True,
80
- follow_redirects=True,
81
- timeout=None
82
- ).status == 200
@@ -1,87 +0,0 @@
1
- import pytest
2
- import pytest_httpbin
3
-
4
- from scrapling import PlayWrightFetcher
5
-
6
-
7
- @pytest_httpbin.use_class_based_httpbin
8
- class TestPlayWrightFetcher:
9
-
10
- @pytest.fixture(scope="class")
11
- def fetcher(self):
12
- """Fixture to create a StealthyFetcher instance for the entire test class"""
13
- return PlayWrightFetcher(auto_match=False)
14
-
15
- @pytest.fixture(autouse=True)
16
- def setup_urls(self, httpbin):
17
- """Fixture to set up URLs for testing"""
18
- self.status_200 = f'{httpbin.url}/status/200'
19
- self.status_404 = f'{httpbin.url}/status/404'
20
- self.status_501 = f'{httpbin.url}/status/501'
21
- self.basic_url = f'{httpbin.url}/get'
22
- self.html_url = f'{httpbin.url}/html'
23
- self.delayed_url = f'{httpbin.url}/delay/10' # 10 Seconds delay response
24
- self.cookies_url = f"{httpbin.url}/cookies/set/test/value"
25
-
26
- def test_basic_fetch(self, fetcher):
27
- """Test doing basic fetch request with multiple statuses"""
28
- assert fetcher.fetch(self.status_200).status == 200
29
- # There's a bug with playwright makes it crashes if a URL returns status code 4xx/5xx without body, let's disable this till they reply to my issue report
30
- # assert fetcher.fetch(self.status_404).status == 404
31
- # assert fetcher.fetch(self.status_501).status == 501
32
-
33
- def test_networkidle(self, fetcher):
34
- """Test if waiting for `networkidle` make page does not finish loading or not"""
35
- assert fetcher.fetch(self.basic_url, network_idle=True).status == 200
36
-
37
- def test_blocking_resources(self, fetcher):
38
- """Test if blocking resources make page does not finish loading or not"""
39
- assert fetcher.fetch(self.basic_url, disable_resources=True).status == 200
40
-
41
- def test_waiting_selector(self, fetcher):
42
- """Test if waiting for a selector make page does not finish loading or not"""
43
- assert fetcher.fetch(self.html_url, wait_selector='h1').status == 200
44
- assert fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status == 200
45
-
46
- def test_cookies_loading(self, fetcher):
47
- """Test if cookies are set after the request"""
48
- assert fetcher.fetch(self.cookies_url).cookies == {'test': 'value'}
49
-
50
- def test_automation(self, fetcher):
51
- """Test if automation break the code or not"""
52
-
53
- def scroll_page(page):
54
- page.mouse.wheel(10, 0)
55
- page.mouse.move(100, 400)
56
- page.mouse.up()
57
- return page
58
-
59
- assert fetcher.fetch(self.html_url, page_action=scroll_page).status == 200
60
-
61
- @pytest.mark.parametrize("kwargs", [
62
- {"disable_webgl": True, "hide_canvas": False},
63
- {"disable_webgl": False, "hide_canvas": True},
64
- # {"stealth": True}, # causes issues with Github Actions
65
- {"useragent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0'},
66
- {"extra_headers": {'ayo': ''}}
67
- ])
68
- def test_properties(self, fetcher, kwargs):
69
- """Test if different arguments breaks the code or not"""
70
- response = fetcher.fetch(self.html_url, **kwargs)
71
- assert response.status == 200
72
-
73
- def test_cdp_url_invalid(self, fetcher):
74
- """Test if invalid CDP URLs raise appropriate exceptions"""
75
- with pytest.raises(ValueError):
76
- fetcher.fetch(self.html_url, cdp_url='blahblah')
77
-
78
- with pytest.raises(ValueError):
79
- fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True)
80
-
81
- with pytest.raises(Exception):
82
- fetcher.fetch(self.html_url, cdp_url='ws://blahblah')
83
-
84
- def test_infinite_timeout(self, fetcher, ):
85
- """Test if infinite timeout breaks the code or not"""
86
- response = fetcher.fetch(self.delayed_url, timeout=None)
87
- assert response.status == 200