scrapling 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +29 -19
- scrapling/cli.py +25 -8
- scrapling/core/_types.py +0 -2
- scrapling/core/ai.py +22 -14
- scrapling/core/custom_types.py +2 -2
- scrapling/core/shell.py +6 -5
- scrapling/core/storage.py +2 -1
- scrapling/core/utils/__init__.py +0 -1
- scrapling/engines/_browsers/__init__.py +0 -2
- scrapling/engines/_browsers/_base.py +11 -36
- scrapling/engines/_browsers/_camoufox.py +75 -60
- scrapling/engines/_browsers/_controllers.py +43 -52
- scrapling/engines/_browsers/_page.py +1 -42
- scrapling/engines/_browsers/_validators.py +130 -65
- scrapling/engines/constants.py +0 -15
- scrapling/engines/static.py +417 -16
- scrapling/engines/toolbelt/navigation.py +1 -1
- scrapling/fetchers/__init__.py +36 -0
- scrapling/fetchers/chrome.py +205 -0
- scrapling/fetchers/firefox.py +216 -0
- scrapling/fetchers/requests.py +28 -0
- scrapling/parser.py +7 -7
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/METADATA +25 -23
- scrapling-0.3.6.dist-info/RECORD +47 -0
- scrapling/fetchers.py +0 -444
- scrapling-0.3.4.dist-info/RECORD +0 -44
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/WHEEL +0 -0
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/entry_points.txt +0 -0
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/licenses/LICENSE +0 -0
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/top_level.txt +0 -0
scrapling/engines/static.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from time import sleep as time_sleep
|
2
2
|
from asyncio import sleep as asyncio_sleep
|
3
3
|
|
4
|
-
from curl_cffi.
|
4
|
+
from curl_cffi.curl import CurlError
|
5
5
|
from curl_cffi import CurlHttpVersion
|
6
6
|
from curl_cffi.requests.impersonate import DEFAULT_CHROME
|
7
7
|
from curl_cffi.requests import (
|
@@ -22,13 +22,14 @@ from scrapling.core._types import (
|
|
22
22
|
Awaitable,
|
23
23
|
List,
|
24
24
|
Any,
|
25
|
+
cast,
|
25
26
|
)
|
26
27
|
|
27
28
|
from .toolbelt.custom import Response
|
28
29
|
from .toolbelt.convertor import ResponseFactory
|
29
30
|
from .toolbelt.fingerprints import generate_convincing_referer, generate_headers, __default_useragent__
|
30
31
|
|
31
|
-
_UNSET = object()
|
32
|
+
_UNSET: Any = object()
|
32
33
|
|
33
34
|
|
34
35
|
class FetcherSession:
|
@@ -233,13 +234,12 @@ class FetcherSession:
|
|
233
234
|
request_args: Dict[str, Any],
|
234
235
|
max_retries: int,
|
235
236
|
retry_delay: int,
|
236
|
-
selector_config:
|
237
|
+
selector_config: Dict,
|
237
238
|
) -> Response:
|
238
239
|
"""
|
239
240
|
Perform an HTTP request using the configured session.
|
240
241
|
|
241
242
|
:param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
|
242
|
-
:param url: Target URL for the request.
|
243
243
|
:param request_args: Arguments to be passed to the session's `request()` method.
|
244
244
|
:param max_retries: Maximum number of retries for the request.
|
245
245
|
:param retry_delay: Number of seconds to wait between retries.
|
@@ -274,13 +274,12 @@ class FetcherSession:
|
|
274
274
|
request_args: Dict[str, Any],
|
275
275
|
max_retries: int,
|
276
276
|
retry_delay: int,
|
277
|
-
selector_config:
|
277
|
+
selector_config: Dict,
|
278
278
|
) -> Response:
|
279
279
|
"""
|
280
280
|
Perform an HTTP request using the configured session.
|
281
281
|
|
282
282
|
:param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
|
283
|
-
:param url: Target URL for the request.
|
284
283
|
:param request_args: Arguments to be passed to the session's `request()` method.
|
285
284
|
:param max_retries: Maximum number of retries for the request.
|
286
285
|
:param retry_delay: Number of seconds to wait between retries.
|
@@ -646,18 +645,420 @@ class FetcherSession:
|
|
646
645
|
class FetcherClient(FetcherSession):
|
647
646
|
def __init__(self, *args, **kwargs):
|
648
647
|
super().__init__(*args, **kwargs)
|
649
|
-
self.__enter__ = None
|
650
|
-
self.__exit__ = None
|
651
|
-
self.__aenter__ = None
|
652
|
-
self.__aexit__ = None
|
653
|
-
self._curl_session = True
|
648
|
+
self.__enter__: Any = None
|
649
|
+
self.__exit__: Any = None
|
650
|
+
self.__aenter__: Any = None
|
651
|
+
self.__aexit__: Any = None
|
652
|
+
self._curl_session: Any = True
|
653
|
+
|
654
|
+
# Setting the correct return types for the type checking/autocompletion
|
655
|
+
def get(
|
656
|
+
self,
|
657
|
+
url: str,
|
658
|
+
params: Optional[Dict | List | Tuple] = None,
|
659
|
+
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
660
|
+
cookies: Optional[CookieTypes] = None,
|
661
|
+
timeout: Optional[int | float] = _UNSET,
|
662
|
+
follow_redirects: Optional[bool] = _UNSET,
|
663
|
+
max_redirects: Optional[int] = _UNSET,
|
664
|
+
retries: Optional[int] = _UNSET,
|
665
|
+
retry_delay: Optional[int] = _UNSET,
|
666
|
+
proxies: Optional[ProxySpec] = _UNSET,
|
667
|
+
proxy: Optional[str] = _UNSET,
|
668
|
+
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
669
|
+
auth: Optional[Tuple[str, str]] = None,
|
670
|
+
verify: Optional[bool] = _UNSET,
|
671
|
+
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
672
|
+
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
673
|
+
http3: Optional[bool] = _UNSET,
|
674
|
+
stealthy_headers: Optional[bool] = _UNSET,
|
675
|
+
**kwargs,
|
676
|
+
) -> Response:
|
677
|
+
return cast(
|
678
|
+
Response,
|
679
|
+
super().get(
|
680
|
+
url,
|
681
|
+
params,
|
682
|
+
headers,
|
683
|
+
cookies,
|
684
|
+
timeout,
|
685
|
+
follow_redirects,
|
686
|
+
max_redirects,
|
687
|
+
retries,
|
688
|
+
retry_delay,
|
689
|
+
proxies,
|
690
|
+
proxy,
|
691
|
+
proxy_auth,
|
692
|
+
auth,
|
693
|
+
verify,
|
694
|
+
cert,
|
695
|
+
impersonate,
|
696
|
+
http3,
|
697
|
+
stealthy_headers,
|
698
|
+
**kwargs,
|
699
|
+
),
|
700
|
+
)
|
701
|
+
|
702
|
+
def post(
|
703
|
+
self,
|
704
|
+
url: str,
|
705
|
+
data: Optional[Dict | str] = None,
|
706
|
+
json: Optional[Dict | List] = None,
|
707
|
+
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
708
|
+
params: Optional[Dict | List | Tuple] = None,
|
709
|
+
cookies: Optional[CookieTypes] = None,
|
710
|
+
timeout: Optional[int | float] = _UNSET,
|
711
|
+
follow_redirects: Optional[bool] = _UNSET,
|
712
|
+
max_redirects: Optional[int] = _UNSET,
|
713
|
+
retries: Optional[int] = _UNSET,
|
714
|
+
retry_delay: Optional[int] = _UNSET,
|
715
|
+
proxies: Optional[ProxySpec] = _UNSET,
|
716
|
+
proxy: Optional[str] = _UNSET,
|
717
|
+
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
718
|
+
auth: Optional[Tuple[str, str]] = None,
|
719
|
+
verify: Optional[bool] = _UNSET,
|
720
|
+
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
721
|
+
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
722
|
+
http3: Optional[bool] = _UNSET,
|
723
|
+
stealthy_headers: Optional[bool] = _UNSET,
|
724
|
+
**kwargs,
|
725
|
+
) -> Response:
|
726
|
+
return cast(
|
727
|
+
Response,
|
728
|
+
super().post(
|
729
|
+
url,
|
730
|
+
data,
|
731
|
+
json,
|
732
|
+
headers,
|
733
|
+
params,
|
734
|
+
cookies,
|
735
|
+
timeout,
|
736
|
+
follow_redirects,
|
737
|
+
max_redirects,
|
738
|
+
retries,
|
739
|
+
retry_delay,
|
740
|
+
proxies,
|
741
|
+
proxy,
|
742
|
+
proxy_auth,
|
743
|
+
auth,
|
744
|
+
verify,
|
745
|
+
cert,
|
746
|
+
impersonate,
|
747
|
+
http3,
|
748
|
+
stealthy_headers,
|
749
|
+
**kwargs,
|
750
|
+
),
|
751
|
+
)
|
752
|
+
|
753
|
+
def put(
|
754
|
+
self,
|
755
|
+
url: str,
|
756
|
+
data: Optional[Dict | str] = None,
|
757
|
+
json: Optional[Dict | List] = None,
|
758
|
+
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
759
|
+
params: Optional[Dict | List | Tuple] = None,
|
760
|
+
cookies: Optional[CookieTypes] = None,
|
761
|
+
timeout: Optional[int | float] = _UNSET,
|
762
|
+
follow_redirects: Optional[bool] = _UNSET,
|
763
|
+
max_redirects: Optional[int] = _UNSET,
|
764
|
+
retries: Optional[int] = _UNSET,
|
765
|
+
retry_delay: Optional[int] = _UNSET,
|
766
|
+
proxies: Optional[ProxySpec] = _UNSET,
|
767
|
+
proxy: Optional[str] = _UNSET,
|
768
|
+
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
769
|
+
auth: Optional[Tuple[str, str]] = None,
|
770
|
+
verify: Optional[bool] = _UNSET,
|
771
|
+
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
772
|
+
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
773
|
+
http3: Optional[bool] = _UNSET,
|
774
|
+
stealthy_headers: Optional[bool] = _UNSET,
|
775
|
+
**kwargs,
|
776
|
+
) -> Response:
|
777
|
+
return cast(
|
778
|
+
Response,
|
779
|
+
super().put(
|
780
|
+
url,
|
781
|
+
data,
|
782
|
+
json,
|
783
|
+
headers,
|
784
|
+
params,
|
785
|
+
cookies,
|
786
|
+
timeout,
|
787
|
+
follow_redirects,
|
788
|
+
max_redirects,
|
789
|
+
retries,
|
790
|
+
retry_delay,
|
791
|
+
proxies,
|
792
|
+
proxy,
|
793
|
+
proxy_auth,
|
794
|
+
auth,
|
795
|
+
verify,
|
796
|
+
cert,
|
797
|
+
impersonate,
|
798
|
+
http3,
|
799
|
+
stealthy_headers,
|
800
|
+
**kwargs,
|
801
|
+
),
|
802
|
+
)
|
803
|
+
|
804
|
+
def delete(
|
805
|
+
self,
|
806
|
+
url: str,
|
807
|
+
data: Optional[Dict | str] = None,
|
808
|
+
json: Optional[Dict | List] = None,
|
809
|
+
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
810
|
+
params: Optional[Dict | List | Tuple] = None,
|
811
|
+
cookies: Optional[CookieTypes] = None,
|
812
|
+
timeout: Optional[int | float] = _UNSET,
|
813
|
+
follow_redirects: Optional[bool] = _UNSET,
|
814
|
+
max_redirects: Optional[int] = _UNSET,
|
815
|
+
retries: Optional[int] = _UNSET,
|
816
|
+
retry_delay: Optional[int] = _UNSET,
|
817
|
+
proxies: Optional[ProxySpec] = _UNSET,
|
818
|
+
proxy: Optional[str] = _UNSET,
|
819
|
+
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
820
|
+
auth: Optional[Tuple[str, str]] = None,
|
821
|
+
verify: Optional[bool] = _UNSET,
|
822
|
+
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
823
|
+
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
824
|
+
http3: Optional[bool] = _UNSET,
|
825
|
+
stealthy_headers: Optional[bool] = _UNSET,
|
826
|
+
**kwargs,
|
827
|
+
) -> Response:
|
828
|
+
return cast(
|
829
|
+
Response,
|
830
|
+
super().delete(
|
831
|
+
url,
|
832
|
+
data,
|
833
|
+
json,
|
834
|
+
headers,
|
835
|
+
params,
|
836
|
+
cookies,
|
837
|
+
timeout,
|
838
|
+
follow_redirects,
|
839
|
+
max_redirects,
|
840
|
+
retries,
|
841
|
+
retry_delay,
|
842
|
+
proxies,
|
843
|
+
proxy,
|
844
|
+
proxy_auth,
|
845
|
+
auth,
|
846
|
+
verify,
|
847
|
+
cert,
|
848
|
+
impersonate,
|
849
|
+
http3,
|
850
|
+
stealthy_headers,
|
851
|
+
**kwargs,
|
852
|
+
),
|
853
|
+
)
|
654
854
|
|
655
855
|
|
656
856
|
class AsyncFetcherClient(FetcherSession):
|
657
857
|
def __init__(self, *args, **kwargs):
|
658
858
|
super().__init__(*args, **kwargs)
|
659
|
-
self.__enter__ = None
|
660
|
-
self.__exit__ = None
|
661
|
-
self.__aenter__ = None
|
662
|
-
self.__aexit__ = None
|
663
|
-
self._async_curl_session = True
|
859
|
+
self.__enter__: Any = None
|
860
|
+
self.__exit__: Any = None
|
861
|
+
self.__aenter__: Any = None
|
862
|
+
self.__aexit__: Any = None
|
863
|
+
self._async_curl_session: Any = True
|
864
|
+
|
865
|
+
# Setting the correct return types for the type checking/autocompletion
|
866
|
+
def get(
|
867
|
+
self,
|
868
|
+
url: str,
|
869
|
+
params: Optional[Dict | List | Tuple] = None,
|
870
|
+
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
871
|
+
cookies: Optional[CookieTypes] = None,
|
872
|
+
timeout: Optional[int | float] = _UNSET,
|
873
|
+
follow_redirects: Optional[bool] = _UNSET,
|
874
|
+
max_redirects: Optional[int] = _UNSET,
|
875
|
+
retries: Optional[int] = _UNSET,
|
876
|
+
retry_delay: Optional[int] = _UNSET,
|
877
|
+
proxies: Optional[ProxySpec] = _UNSET,
|
878
|
+
proxy: Optional[str] = _UNSET,
|
879
|
+
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
880
|
+
auth: Optional[Tuple[str, str]] = None,
|
881
|
+
verify: Optional[bool] = _UNSET,
|
882
|
+
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
883
|
+
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
884
|
+
http3: Optional[bool] = _UNSET,
|
885
|
+
stealthy_headers: Optional[bool] = _UNSET,
|
886
|
+
**kwargs,
|
887
|
+
) -> Awaitable[Response]:
|
888
|
+
return cast(
|
889
|
+
Awaitable[Response],
|
890
|
+
super().get(
|
891
|
+
url,
|
892
|
+
params,
|
893
|
+
headers,
|
894
|
+
cookies,
|
895
|
+
timeout,
|
896
|
+
follow_redirects,
|
897
|
+
max_redirects,
|
898
|
+
retries,
|
899
|
+
retry_delay,
|
900
|
+
proxies,
|
901
|
+
proxy,
|
902
|
+
proxy_auth,
|
903
|
+
auth,
|
904
|
+
verify,
|
905
|
+
cert,
|
906
|
+
impersonate,
|
907
|
+
http3,
|
908
|
+
stealthy_headers,
|
909
|
+
**kwargs,
|
910
|
+
),
|
911
|
+
)
|
912
|
+
|
913
|
+
def post(
|
914
|
+
self,
|
915
|
+
url: str,
|
916
|
+
data: Optional[Dict | str] = None,
|
917
|
+
json: Optional[Dict | List] = None,
|
918
|
+
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
919
|
+
params: Optional[Dict | List | Tuple] = None,
|
920
|
+
cookies: Optional[CookieTypes] = None,
|
921
|
+
timeout: Optional[int | float] = _UNSET,
|
922
|
+
follow_redirects: Optional[bool] = _UNSET,
|
923
|
+
max_redirects: Optional[int] = _UNSET,
|
924
|
+
retries: Optional[int] = _UNSET,
|
925
|
+
retry_delay: Optional[int] = _UNSET,
|
926
|
+
proxies: Optional[ProxySpec] = _UNSET,
|
927
|
+
proxy: Optional[str] = _UNSET,
|
928
|
+
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
929
|
+
auth: Optional[Tuple[str, str]] = None,
|
930
|
+
verify: Optional[bool] = _UNSET,
|
931
|
+
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
932
|
+
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
933
|
+
http3: Optional[bool] = _UNSET,
|
934
|
+
stealthy_headers: Optional[bool] = _UNSET,
|
935
|
+
**kwargs,
|
936
|
+
) -> Awaitable[Response]:
|
937
|
+
return cast(
|
938
|
+
Awaitable[Response],
|
939
|
+
super().post(
|
940
|
+
url,
|
941
|
+
data,
|
942
|
+
json,
|
943
|
+
headers,
|
944
|
+
params,
|
945
|
+
cookies,
|
946
|
+
timeout,
|
947
|
+
follow_redirects,
|
948
|
+
max_redirects,
|
949
|
+
retries,
|
950
|
+
retry_delay,
|
951
|
+
proxies,
|
952
|
+
proxy,
|
953
|
+
proxy_auth,
|
954
|
+
auth,
|
955
|
+
verify,
|
956
|
+
cert,
|
957
|
+
impersonate,
|
958
|
+
http3,
|
959
|
+
stealthy_headers,
|
960
|
+
**kwargs,
|
961
|
+
),
|
962
|
+
)
|
963
|
+
|
964
|
+
def put(
|
965
|
+
self,
|
966
|
+
url: str,
|
967
|
+
data: Optional[Dict | str] = None,
|
968
|
+
json: Optional[Dict | List] = None,
|
969
|
+
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
970
|
+
params: Optional[Dict | List | Tuple] = None,
|
971
|
+
cookies: Optional[CookieTypes] = None,
|
972
|
+
timeout: Optional[int | float] = _UNSET,
|
973
|
+
follow_redirects: Optional[bool] = _UNSET,
|
974
|
+
max_redirects: Optional[int] = _UNSET,
|
975
|
+
retries: Optional[int] = _UNSET,
|
976
|
+
retry_delay: Optional[int] = _UNSET,
|
977
|
+
proxies: Optional[ProxySpec] = _UNSET,
|
978
|
+
proxy: Optional[str] = _UNSET,
|
979
|
+
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
980
|
+
auth: Optional[Tuple[str, str]] = None,
|
981
|
+
verify: Optional[bool] = _UNSET,
|
982
|
+
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
983
|
+
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
984
|
+
http3: Optional[bool] = _UNSET,
|
985
|
+
stealthy_headers: Optional[bool] = _UNSET,
|
986
|
+
**kwargs,
|
987
|
+
) -> Awaitable[Response]:
|
988
|
+
return cast(
|
989
|
+
Awaitable[Response],
|
990
|
+
super().put(
|
991
|
+
url,
|
992
|
+
data,
|
993
|
+
json,
|
994
|
+
headers,
|
995
|
+
params,
|
996
|
+
cookies,
|
997
|
+
timeout,
|
998
|
+
follow_redirects,
|
999
|
+
max_redirects,
|
1000
|
+
retries,
|
1001
|
+
retry_delay,
|
1002
|
+
proxies,
|
1003
|
+
proxy,
|
1004
|
+
proxy_auth,
|
1005
|
+
auth,
|
1006
|
+
verify,
|
1007
|
+
cert,
|
1008
|
+
impersonate,
|
1009
|
+
http3,
|
1010
|
+
stealthy_headers,
|
1011
|
+
**kwargs,
|
1012
|
+
),
|
1013
|
+
)
|
1014
|
+
|
1015
|
+
def delete(
|
1016
|
+
self,
|
1017
|
+
url: str,
|
1018
|
+
data: Optional[Dict | str] = None,
|
1019
|
+
json: Optional[Dict | List] = None,
|
1020
|
+
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
1021
|
+
params: Optional[Dict | List | Tuple] = None,
|
1022
|
+
cookies: Optional[CookieTypes] = None,
|
1023
|
+
timeout: Optional[int | float] = _UNSET,
|
1024
|
+
follow_redirects: Optional[bool] = _UNSET,
|
1025
|
+
max_redirects: Optional[int] = _UNSET,
|
1026
|
+
retries: Optional[int] = _UNSET,
|
1027
|
+
retry_delay: Optional[int] = _UNSET,
|
1028
|
+
proxies: Optional[ProxySpec] = _UNSET,
|
1029
|
+
proxy: Optional[str] = _UNSET,
|
1030
|
+
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
1031
|
+
auth: Optional[Tuple[str, str]] = None,
|
1032
|
+
verify: Optional[bool] = _UNSET,
|
1033
|
+
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
1034
|
+
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
1035
|
+
http3: Optional[bool] = _UNSET,
|
1036
|
+
stealthy_headers: Optional[bool] = _UNSET,
|
1037
|
+
**kwargs,
|
1038
|
+
) -> Awaitable[Response]:
|
1039
|
+
return cast(
|
1040
|
+
Awaitable[Response],
|
1041
|
+
super().delete(
|
1042
|
+
url,
|
1043
|
+
data,
|
1044
|
+
json,
|
1045
|
+
headers,
|
1046
|
+
params,
|
1047
|
+
cookies,
|
1048
|
+
timeout,
|
1049
|
+
follow_redirects,
|
1050
|
+
max_redirects,
|
1051
|
+
retries,
|
1052
|
+
retry_delay,
|
1053
|
+
proxies,
|
1054
|
+
proxy,
|
1055
|
+
proxy_auth,
|
1056
|
+
auth,
|
1057
|
+
verify,
|
1058
|
+
cert,
|
1059
|
+
impersonate,
|
1060
|
+
http3,
|
1061
|
+
stealthy_headers,
|
1062
|
+
**kwargs,
|
1063
|
+
),
|
1064
|
+
)
|
@@ -4,7 +4,7 @@ Functions related to files and URLs
|
|
4
4
|
|
5
5
|
from pathlib import Path
|
6
6
|
from functools import lru_cache
|
7
|
-
from urllib.parse import
|
7
|
+
from urllib.parse import urlparse
|
8
8
|
|
9
9
|
from playwright.async_api import Route as async_Route
|
10
10
|
from msgspec import Struct, structs, convert, ValidationError
|
@@ -0,0 +1,36 @@
|
|
1
|
+
from typing import TYPE_CHECKING, Any
|
2
|
+
|
3
|
+
if TYPE_CHECKING:
|
4
|
+
from scrapling.fetchers.requests import Fetcher, AsyncFetcher, FetcherSession
|
5
|
+
from scrapling.fetchers.chrome import DynamicFetcher, DynamicSession, AsyncDynamicSession
|
6
|
+
from scrapling.fetchers.firefox import StealthyFetcher, StealthySession, AsyncStealthySession
|
7
|
+
|
8
|
+
|
9
|
+
# Lazy import mapping
|
10
|
+
_LAZY_IMPORTS = {
|
11
|
+
"Fetcher": ("scrapling.fetchers.requests", "Fetcher"),
|
12
|
+
"AsyncFetcher": ("scrapling.fetchers.requests", "AsyncFetcher"),
|
13
|
+
"FetcherSession": ("scrapling.fetchers.requests", "FetcherSession"),
|
14
|
+
"DynamicFetcher": ("scrapling.fetchers.chrome", "DynamicFetcher"),
|
15
|
+
"DynamicSession": ("scrapling.fetchers.chrome", "DynamicSession"),
|
16
|
+
"AsyncDynamicSession": ("scrapling.fetchers.chrome", "AsyncDynamicSession"),
|
17
|
+
"StealthyFetcher": ("scrapling.fetchers.firefox", "StealthyFetcher"),
|
18
|
+
"StealthySession": ("scrapling.fetchers.firefox", "StealthySession"),
|
19
|
+
"AsyncStealthySession": ("scrapling.fetchers.firefox", "AsyncStealthySession"),
|
20
|
+
}
|
21
|
+
|
22
|
+
__all__ = ["Fetcher", "AsyncFetcher", "StealthyFetcher", "DynamicFetcher"]
|
23
|
+
|
24
|
+
|
25
|
+
def __getattr__(name: str) -> Any:
|
26
|
+
if name in _LAZY_IMPORTS:
|
27
|
+
module_path, class_name = _LAZY_IMPORTS[name]
|
28
|
+
module = __import__(module_path, fromlist=[class_name])
|
29
|
+
return getattr(module, class_name)
|
30
|
+
else:
|
31
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
32
|
+
|
33
|
+
|
34
|
+
def __dir__() -> list[str]:
|
35
|
+
"""Support for dir() and autocomplete."""
|
36
|
+
return sorted(list(_LAZY_IMPORTS.keys()))
|