scrapling 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  from time import sleep as time_sleep
2
2
  from asyncio import sleep as asyncio_sleep
3
3
 
4
- from curl_cffi.requests.session import CurlError
4
+ from curl_cffi.curl import CurlError
5
5
  from curl_cffi import CurlHttpVersion
6
6
  from curl_cffi.requests.impersonate import DEFAULT_CHROME
7
7
  from curl_cffi.requests import (
@@ -22,13 +22,14 @@ from scrapling.core._types import (
22
22
  Awaitable,
23
23
  List,
24
24
  Any,
25
+ cast,
25
26
  )
26
27
 
27
28
  from .toolbelt.custom import Response
28
29
  from .toolbelt.convertor import ResponseFactory
29
30
  from .toolbelt.fingerprints import generate_convincing_referer, generate_headers, __default_useragent__
30
31
 
31
- _UNSET = object()
32
+ _UNSET: Any = object()
32
33
 
33
34
 
34
35
  class FetcherSession:
@@ -233,13 +234,12 @@ class FetcherSession:
233
234
  request_args: Dict[str, Any],
234
235
  max_retries: int,
235
236
  retry_delay: int,
236
- selector_config: Optional[Dict] = None,
237
+ selector_config: Dict,
237
238
  ) -> Response:
238
239
  """
239
240
  Perform an HTTP request using the configured session.
240
241
 
241
242
  :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
242
- :param url: Target URL for the request.
243
243
  :param request_args: Arguments to be passed to the session's `request()` method.
244
244
  :param max_retries: Maximum number of retries for the request.
245
245
  :param retry_delay: Number of seconds to wait between retries.
@@ -274,13 +274,12 @@ class FetcherSession:
274
274
  request_args: Dict[str, Any],
275
275
  max_retries: int,
276
276
  retry_delay: int,
277
- selector_config: Optional[Dict] = None,
277
+ selector_config: Dict,
278
278
  ) -> Response:
279
279
  """
280
280
  Perform an HTTP request using the configured session.
281
281
 
282
282
  :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
283
- :param url: Target URL for the request.
284
283
  :param request_args: Arguments to be passed to the session's `request()` method.
285
284
  :param max_retries: Maximum number of retries for the request.
286
285
  :param retry_delay: Number of seconds to wait between retries.
@@ -646,18 +645,420 @@ class FetcherSession:
646
645
  class FetcherClient(FetcherSession):
647
646
  def __init__(self, *args, **kwargs):
648
647
  super().__init__(*args, **kwargs)
649
- self.__enter__ = None
650
- self.__exit__ = None
651
- self.__aenter__ = None
652
- self.__aexit__ = None
653
- self._curl_session = True
648
+ self.__enter__: Any = None
649
+ self.__exit__: Any = None
650
+ self.__aenter__: Any = None
651
+ self.__aexit__: Any = None
652
+ self._curl_session: Any = True
653
+
654
+ # Setting the correct return types for the type checking/autocompletion
655
+ def get(
656
+ self,
657
+ url: str,
658
+ params: Optional[Dict | List | Tuple] = None,
659
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
660
+ cookies: Optional[CookieTypes] = None,
661
+ timeout: Optional[int | float] = _UNSET,
662
+ follow_redirects: Optional[bool] = _UNSET,
663
+ max_redirects: Optional[int] = _UNSET,
664
+ retries: Optional[int] = _UNSET,
665
+ retry_delay: Optional[int] = _UNSET,
666
+ proxies: Optional[ProxySpec] = _UNSET,
667
+ proxy: Optional[str] = _UNSET,
668
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
669
+ auth: Optional[Tuple[str, str]] = None,
670
+ verify: Optional[bool] = _UNSET,
671
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
672
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
673
+ http3: Optional[bool] = _UNSET,
674
+ stealthy_headers: Optional[bool] = _UNSET,
675
+ **kwargs,
676
+ ) -> Response:
677
+ return cast(
678
+ Response,
679
+ super().get(
680
+ url,
681
+ params,
682
+ headers,
683
+ cookies,
684
+ timeout,
685
+ follow_redirects,
686
+ max_redirects,
687
+ retries,
688
+ retry_delay,
689
+ proxies,
690
+ proxy,
691
+ proxy_auth,
692
+ auth,
693
+ verify,
694
+ cert,
695
+ impersonate,
696
+ http3,
697
+ stealthy_headers,
698
+ **kwargs,
699
+ ),
700
+ )
701
+
702
+ def post(
703
+ self,
704
+ url: str,
705
+ data: Optional[Dict | str] = None,
706
+ json: Optional[Dict | List] = None,
707
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
708
+ params: Optional[Dict | List | Tuple] = None,
709
+ cookies: Optional[CookieTypes] = None,
710
+ timeout: Optional[int | float] = _UNSET,
711
+ follow_redirects: Optional[bool] = _UNSET,
712
+ max_redirects: Optional[int] = _UNSET,
713
+ retries: Optional[int] = _UNSET,
714
+ retry_delay: Optional[int] = _UNSET,
715
+ proxies: Optional[ProxySpec] = _UNSET,
716
+ proxy: Optional[str] = _UNSET,
717
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
718
+ auth: Optional[Tuple[str, str]] = None,
719
+ verify: Optional[bool] = _UNSET,
720
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
721
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
722
+ http3: Optional[bool] = _UNSET,
723
+ stealthy_headers: Optional[bool] = _UNSET,
724
+ **kwargs,
725
+ ) -> Response:
726
+ return cast(
727
+ Response,
728
+ super().post(
729
+ url,
730
+ data,
731
+ json,
732
+ headers,
733
+ params,
734
+ cookies,
735
+ timeout,
736
+ follow_redirects,
737
+ max_redirects,
738
+ retries,
739
+ retry_delay,
740
+ proxies,
741
+ proxy,
742
+ proxy_auth,
743
+ auth,
744
+ verify,
745
+ cert,
746
+ impersonate,
747
+ http3,
748
+ stealthy_headers,
749
+ **kwargs,
750
+ ),
751
+ )
752
+
753
+ def put(
754
+ self,
755
+ url: str,
756
+ data: Optional[Dict | str] = None,
757
+ json: Optional[Dict | List] = None,
758
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
759
+ params: Optional[Dict | List | Tuple] = None,
760
+ cookies: Optional[CookieTypes] = None,
761
+ timeout: Optional[int | float] = _UNSET,
762
+ follow_redirects: Optional[bool] = _UNSET,
763
+ max_redirects: Optional[int] = _UNSET,
764
+ retries: Optional[int] = _UNSET,
765
+ retry_delay: Optional[int] = _UNSET,
766
+ proxies: Optional[ProxySpec] = _UNSET,
767
+ proxy: Optional[str] = _UNSET,
768
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
769
+ auth: Optional[Tuple[str, str]] = None,
770
+ verify: Optional[bool] = _UNSET,
771
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
772
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
773
+ http3: Optional[bool] = _UNSET,
774
+ stealthy_headers: Optional[bool] = _UNSET,
775
+ **kwargs,
776
+ ) -> Response:
777
+ return cast(
778
+ Response,
779
+ super().put(
780
+ url,
781
+ data,
782
+ json,
783
+ headers,
784
+ params,
785
+ cookies,
786
+ timeout,
787
+ follow_redirects,
788
+ max_redirects,
789
+ retries,
790
+ retry_delay,
791
+ proxies,
792
+ proxy,
793
+ proxy_auth,
794
+ auth,
795
+ verify,
796
+ cert,
797
+ impersonate,
798
+ http3,
799
+ stealthy_headers,
800
+ **kwargs,
801
+ ),
802
+ )
803
+
804
+ def delete(
805
+ self,
806
+ url: str,
807
+ data: Optional[Dict | str] = None,
808
+ json: Optional[Dict | List] = None,
809
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
810
+ params: Optional[Dict | List | Tuple] = None,
811
+ cookies: Optional[CookieTypes] = None,
812
+ timeout: Optional[int | float] = _UNSET,
813
+ follow_redirects: Optional[bool] = _UNSET,
814
+ max_redirects: Optional[int] = _UNSET,
815
+ retries: Optional[int] = _UNSET,
816
+ retry_delay: Optional[int] = _UNSET,
817
+ proxies: Optional[ProxySpec] = _UNSET,
818
+ proxy: Optional[str] = _UNSET,
819
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
820
+ auth: Optional[Tuple[str, str]] = None,
821
+ verify: Optional[bool] = _UNSET,
822
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
823
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
824
+ http3: Optional[bool] = _UNSET,
825
+ stealthy_headers: Optional[bool] = _UNSET,
826
+ **kwargs,
827
+ ) -> Response:
828
+ return cast(
829
+ Response,
830
+ super().delete(
831
+ url,
832
+ data,
833
+ json,
834
+ headers,
835
+ params,
836
+ cookies,
837
+ timeout,
838
+ follow_redirects,
839
+ max_redirects,
840
+ retries,
841
+ retry_delay,
842
+ proxies,
843
+ proxy,
844
+ proxy_auth,
845
+ auth,
846
+ verify,
847
+ cert,
848
+ impersonate,
849
+ http3,
850
+ stealthy_headers,
851
+ **kwargs,
852
+ ),
853
+ )
654
854
 
655
855
 
656
856
  class AsyncFetcherClient(FetcherSession):
657
857
  def __init__(self, *args, **kwargs):
658
858
  super().__init__(*args, **kwargs)
659
- self.__enter__ = None
660
- self.__exit__ = None
661
- self.__aenter__ = None
662
- self.__aexit__ = None
663
- self._async_curl_session = True
859
+ self.__enter__: Any = None
860
+ self.__exit__: Any = None
861
+ self.__aenter__: Any = None
862
+ self.__aexit__: Any = None
863
+ self._async_curl_session: Any = True
864
+
865
+ # Setting the correct return types for the type checking/autocompletion
866
+ def get(
867
+ self,
868
+ url: str,
869
+ params: Optional[Dict | List | Tuple] = None,
870
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
871
+ cookies: Optional[CookieTypes] = None,
872
+ timeout: Optional[int | float] = _UNSET,
873
+ follow_redirects: Optional[bool] = _UNSET,
874
+ max_redirects: Optional[int] = _UNSET,
875
+ retries: Optional[int] = _UNSET,
876
+ retry_delay: Optional[int] = _UNSET,
877
+ proxies: Optional[ProxySpec] = _UNSET,
878
+ proxy: Optional[str] = _UNSET,
879
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
880
+ auth: Optional[Tuple[str, str]] = None,
881
+ verify: Optional[bool] = _UNSET,
882
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
883
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
884
+ http3: Optional[bool] = _UNSET,
885
+ stealthy_headers: Optional[bool] = _UNSET,
886
+ **kwargs,
887
+ ) -> Awaitable[Response]:
888
+ return cast(
889
+ Awaitable[Response],
890
+ super().get(
891
+ url,
892
+ params,
893
+ headers,
894
+ cookies,
895
+ timeout,
896
+ follow_redirects,
897
+ max_redirects,
898
+ retries,
899
+ retry_delay,
900
+ proxies,
901
+ proxy,
902
+ proxy_auth,
903
+ auth,
904
+ verify,
905
+ cert,
906
+ impersonate,
907
+ http3,
908
+ stealthy_headers,
909
+ **kwargs,
910
+ ),
911
+ )
912
+
913
+ def post(
914
+ self,
915
+ url: str,
916
+ data: Optional[Dict | str] = None,
917
+ json: Optional[Dict | List] = None,
918
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
919
+ params: Optional[Dict | List | Tuple] = None,
920
+ cookies: Optional[CookieTypes] = None,
921
+ timeout: Optional[int | float] = _UNSET,
922
+ follow_redirects: Optional[bool] = _UNSET,
923
+ max_redirects: Optional[int] = _UNSET,
924
+ retries: Optional[int] = _UNSET,
925
+ retry_delay: Optional[int] = _UNSET,
926
+ proxies: Optional[ProxySpec] = _UNSET,
927
+ proxy: Optional[str] = _UNSET,
928
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
929
+ auth: Optional[Tuple[str, str]] = None,
930
+ verify: Optional[bool] = _UNSET,
931
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
932
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
933
+ http3: Optional[bool] = _UNSET,
934
+ stealthy_headers: Optional[bool] = _UNSET,
935
+ **kwargs,
936
+ ) -> Awaitable[Response]:
937
+ return cast(
938
+ Awaitable[Response],
939
+ super().post(
940
+ url,
941
+ data,
942
+ json,
943
+ headers,
944
+ params,
945
+ cookies,
946
+ timeout,
947
+ follow_redirects,
948
+ max_redirects,
949
+ retries,
950
+ retry_delay,
951
+ proxies,
952
+ proxy,
953
+ proxy_auth,
954
+ auth,
955
+ verify,
956
+ cert,
957
+ impersonate,
958
+ http3,
959
+ stealthy_headers,
960
+ **kwargs,
961
+ ),
962
+ )
963
+
964
+ def put(
965
+ self,
966
+ url: str,
967
+ data: Optional[Dict | str] = None,
968
+ json: Optional[Dict | List] = None,
969
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
970
+ params: Optional[Dict | List | Tuple] = None,
971
+ cookies: Optional[CookieTypes] = None,
972
+ timeout: Optional[int | float] = _UNSET,
973
+ follow_redirects: Optional[bool] = _UNSET,
974
+ max_redirects: Optional[int] = _UNSET,
975
+ retries: Optional[int] = _UNSET,
976
+ retry_delay: Optional[int] = _UNSET,
977
+ proxies: Optional[ProxySpec] = _UNSET,
978
+ proxy: Optional[str] = _UNSET,
979
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
980
+ auth: Optional[Tuple[str, str]] = None,
981
+ verify: Optional[bool] = _UNSET,
982
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
983
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
984
+ http3: Optional[bool] = _UNSET,
985
+ stealthy_headers: Optional[bool] = _UNSET,
986
+ **kwargs,
987
+ ) -> Awaitable[Response]:
988
+ return cast(
989
+ Awaitable[Response],
990
+ super().put(
991
+ url,
992
+ data,
993
+ json,
994
+ headers,
995
+ params,
996
+ cookies,
997
+ timeout,
998
+ follow_redirects,
999
+ max_redirects,
1000
+ retries,
1001
+ retry_delay,
1002
+ proxies,
1003
+ proxy,
1004
+ proxy_auth,
1005
+ auth,
1006
+ verify,
1007
+ cert,
1008
+ impersonate,
1009
+ http3,
1010
+ stealthy_headers,
1011
+ **kwargs,
1012
+ ),
1013
+ )
1014
+
1015
+ def delete(
1016
+ self,
1017
+ url: str,
1018
+ data: Optional[Dict | str] = None,
1019
+ json: Optional[Dict | List] = None,
1020
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
1021
+ params: Optional[Dict | List | Tuple] = None,
1022
+ cookies: Optional[CookieTypes] = None,
1023
+ timeout: Optional[int | float] = _UNSET,
1024
+ follow_redirects: Optional[bool] = _UNSET,
1025
+ max_redirects: Optional[int] = _UNSET,
1026
+ retries: Optional[int] = _UNSET,
1027
+ retry_delay: Optional[int] = _UNSET,
1028
+ proxies: Optional[ProxySpec] = _UNSET,
1029
+ proxy: Optional[str] = _UNSET,
1030
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
1031
+ auth: Optional[Tuple[str, str]] = None,
1032
+ verify: Optional[bool] = _UNSET,
1033
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
1034
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
1035
+ http3: Optional[bool] = _UNSET,
1036
+ stealthy_headers: Optional[bool] = _UNSET,
1037
+ **kwargs,
1038
+ ) -> Awaitable[Response]:
1039
+ return cast(
1040
+ Awaitable[Response],
1041
+ super().delete(
1042
+ url,
1043
+ data,
1044
+ json,
1045
+ headers,
1046
+ params,
1047
+ cookies,
1048
+ timeout,
1049
+ follow_redirects,
1050
+ max_redirects,
1051
+ retries,
1052
+ retry_delay,
1053
+ proxies,
1054
+ proxy,
1055
+ proxy_auth,
1056
+ auth,
1057
+ verify,
1058
+ cert,
1059
+ impersonate,
1060
+ http3,
1061
+ stealthy_headers,
1062
+ **kwargs,
1063
+ ),
1064
+ )
@@ -4,7 +4,7 @@ Functions related to files and URLs
4
4
 
5
5
  from pathlib import Path
6
6
  from functools import lru_cache
7
- from urllib.parse import urlencode, urlparse
7
+ from urllib.parse import urlparse
8
8
 
9
9
  from playwright.async_api import Route as async_Route
10
10
  from msgspec import Struct, structs, convert, ValidationError
@@ -0,0 +1,36 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ if TYPE_CHECKING:
4
+ from scrapling.fetchers.requests import Fetcher, AsyncFetcher, FetcherSession
5
+ from scrapling.fetchers.chrome import DynamicFetcher, DynamicSession, AsyncDynamicSession
6
+ from scrapling.fetchers.firefox import StealthyFetcher, StealthySession, AsyncStealthySession
7
+
8
+
9
+ # Lazy import mapping
10
+ _LAZY_IMPORTS = {
11
+ "Fetcher": ("scrapling.fetchers.requests", "Fetcher"),
12
+ "AsyncFetcher": ("scrapling.fetchers.requests", "AsyncFetcher"),
13
+ "FetcherSession": ("scrapling.fetchers.requests", "FetcherSession"),
14
+ "DynamicFetcher": ("scrapling.fetchers.chrome", "DynamicFetcher"),
15
+ "DynamicSession": ("scrapling.fetchers.chrome", "DynamicSession"),
16
+ "AsyncDynamicSession": ("scrapling.fetchers.chrome", "AsyncDynamicSession"),
17
+ "StealthyFetcher": ("scrapling.fetchers.firefox", "StealthyFetcher"),
18
+ "StealthySession": ("scrapling.fetchers.firefox", "StealthySession"),
19
+ "AsyncStealthySession": ("scrapling.fetchers.firefox", "AsyncStealthySession"),
20
+ }
21
+
22
+ __all__ = ["Fetcher", "AsyncFetcher", "StealthyFetcher", "DynamicFetcher"]
23
+
24
+
25
+ def __getattr__(name: str) -> Any:
26
+ if name in _LAZY_IMPORTS:
27
+ module_path, class_name = _LAZY_IMPORTS[name]
28
+ module = __import__(module_path, fromlist=[class_name])
29
+ return getattr(module, class_name)
30
+ else:
31
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
32
+
33
+
34
+ def __dir__() -> list[str]:
35
+ """Support for dir() and autocomplete."""
36
+ return sorted(list(_LAZY_IMPORTS.keys()))