scrapling 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  from time import sleep as time_sleep
2
2
  from asyncio import sleep as asyncio_sleep
3
3
 
4
- from curl_cffi.requests.session import CurlError
4
+ from curl_cffi.curl import CurlError
5
5
  from curl_cffi import CurlHttpVersion
6
6
  from curl_cffi.requests.impersonate import DEFAULT_CHROME
7
7
  from curl_cffi.requests import (
@@ -22,13 +22,14 @@ from scrapling.core._types import (
22
22
  Awaitable,
23
23
  List,
24
24
  Any,
25
+ cast,
25
26
  )
26
27
 
27
28
  from .toolbelt.custom import Response
28
29
  from .toolbelt.convertor import ResponseFactory
29
30
  from .toolbelt.fingerprints import generate_convincing_referer, generate_headers, __default_useragent__
30
31
 
31
- _UNSET = object()
32
+ _UNSET: Any = object()
32
33
 
33
34
 
34
35
  class FetcherSession:
@@ -94,8 +95,8 @@ class FetcherSession:
94
95
  self.default_http3 = http3
95
96
  self.selector_config = selector_config or {}
96
97
 
97
- self._curl_session: Optional[CurlSession] | bool = None
98
- self._async_curl_session: Optional[AsyncCurlSession] | bool = None
98
+ self._curl_session: Optional[CurlSession] = None
99
+ self._async_curl_session: Optional[AsyncCurlSession] = None
99
100
 
100
101
  def _merge_request_args(self, **kwargs) -> Dict[str, Any]:
101
102
  """Merge request-specific arguments with default session arguments."""
@@ -233,7 +234,7 @@ class FetcherSession:
233
234
  request_args: Dict[str, Any],
234
235
  max_retries: int,
235
236
  retry_delay: int,
236
- selector_config: Optional[Dict] = None,
237
+ selector_config: Dict,
237
238
  ) -> Response:
238
239
  """
239
240
  Perform an HTTP request using the configured session.
@@ -273,7 +274,7 @@ class FetcherSession:
273
274
  request_args: Dict[str, Any],
274
275
  max_retries: int,
275
276
  retry_delay: int,
276
- selector_config: Optional[Dict] = None,
277
+ selector_config: Dict,
277
278
  ) -> Response:
278
279
  """
279
280
  Perform an HTTP request using the configured session.
@@ -644,18 +645,420 @@ class FetcherSession:
644
645
  class FetcherClient(FetcherSession):
645
646
  def __init__(self, *args, **kwargs):
646
647
  super().__init__(*args, **kwargs)
647
- self.__enter__ = None
648
- self.__exit__ = None
649
- self.__aenter__ = None
650
- self.__aexit__ = None
651
- self._curl_session = True
648
+ self.__enter__: Any = None
649
+ self.__exit__: Any = None
650
+ self.__aenter__: Any = None
651
+ self.__aexit__: Any = None
652
+ self._curl_session: Any = True
653
+
654
+ # Setting the correct return types for the type checking/autocompletion
655
+ def get(
656
+ self,
657
+ url: str,
658
+ params: Optional[Dict | List | Tuple] = None,
659
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
660
+ cookies: Optional[CookieTypes] = None,
661
+ timeout: Optional[int | float] = _UNSET,
662
+ follow_redirects: Optional[bool] = _UNSET,
663
+ max_redirects: Optional[int] = _UNSET,
664
+ retries: Optional[int] = _UNSET,
665
+ retry_delay: Optional[int] = _UNSET,
666
+ proxies: Optional[ProxySpec] = _UNSET,
667
+ proxy: Optional[str] = _UNSET,
668
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
669
+ auth: Optional[Tuple[str, str]] = None,
670
+ verify: Optional[bool] = _UNSET,
671
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
672
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
673
+ http3: Optional[bool] = _UNSET,
674
+ stealthy_headers: Optional[bool] = _UNSET,
675
+ **kwargs,
676
+ ) -> Response:
677
+ return cast(
678
+ Response,
679
+ super().get(
680
+ url,
681
+ params,
682
+ headers,
683
+ cookies,
684
+ timeout,
685
+ follow_redirects,
686
+ max_redirects,
687
+ retries,
688
+ retry_delay,
689
+ proxies,
690
+ proxy,
691
+ proxy_auth,
692
+ auth,
693
+ verify,
694
+ cert,
695
+ impersonate,
696
+ http3,
697
+ stealthy_headers,
698
+ **kwargs,
699
+ ),
700
+ )
701
+
702
+ def post(
703
+ self,
704
+ url: str,
705
+ data: Optional[Dict | str] = None,
706
+ json: Optional[Dict | List] = None,
707
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
708
+ params: Optional[Dict | List | Tuple] = None,
709
+ cookies: Optional[CookieTypes] = None,
710
+ timeout: Optional[int | float] = _UNSET,
711
+ follow_redirects: Optional[bool] = _UNSET,
712
+ max_redirects: Optional[int] = _UNSET,
713
+ retries: Optional[int] = _UNSET,
714
+ retry_delay: Optional[int] = _UNSET,
715
+ proxies: Optional[ProxySpec] = _UNSET,
716
+ proxy: Optional[str] = _UNSET,
717
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
718
+ auth: Optional[Tuple[str, str]] = None,
719
+ verify: Optional[bool] = _UNSET,
720
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
721
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
722
+ http3: Optional[bool] = _UNSET,
723
+ stealthy_headers: Optional[bool] = _UNSET,
724
+ **kwargs,
725
+ ) -> Response:
726
+ return cast(
727
+ Response,
728
+ super().post(
729
+ url,
730
+ data,
731
+ json,
732
+ headers,
733
+ params,
734
+ cookies,
735
+ timeout,
736
+ follow_redirects,
737
+ max_redirects,
738
+ retries,
739
+ retry_delay,
740
+ proxies,
741
+ proxy,
742
+ proxy_auth,
743
+ auth,
744
+ verify,
745
+ cert,
746
+ impersonate,
747
+ http3,
748
+ stealthy_headers,
749
+ **kwargs,
750
+ ),
751
+ )
752
+
753
+ def put(
754
+ self,
755
+ url: str,
756
+ data: Optional[Dict | str] = None,
757
+ json: Optional[Dict | List] = None,
758
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
759
+ params: Optional[Dict | List | Tuple] = None,
760
+ cookies: Optional[CookieTypes] = None,
761
+ timeout: Optional[int | float] = _UNSET,
762
+ follow_redirects: Optional[bool] = _UNSET,
763
+ max_redirects: Optional[int] = _UNSET,
764
+ retries: Optional[int] = _UNSET,
765
+ retry_delay: Optional[int] = _UNSET,
766
+ proxies: Optional[ProxySpec] = _UNSET,
767
+ proxy: Optional[str] = _UNSET,
768
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
769
+ auth: Optional[Tuple[str, str]] = None,
770
+ verify: Optional[bool] = _UNSET,
771
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
772
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
773
+ http3: Optional[bool] = _UNSET,
774
+ stealthy_headers: Optional[bool] = _UNSET,
775
+ **kwargs,
776
+ ) -> Response:
777
+ return cast(
778
+ Response,
779
+ super().put(
780
+ url,
781
+ data,
782
+ json,
783
+ headers,
784
+ params,
785
+ cookies,
786
+ timeout,
787
+ follow_redirects,
788
+ max_redirects,
789
+ retries,
790
+ retry_delay,
791
+ proxies,
792
+ proxy,
793
+ proxy_auth,
794
+ auth,
795
+ verify,
796
+ cert,
797
+ impersonate,
798
+ http3,
799
+ stealthy_headers,
800
+ **kwargs,
801
+ ),
802
+ )
803
+
804
+ def delete(
805
+ self,
806
+ url: str,
807
+ data: Optional[Dict | str] = None,
808
+ json: Optional[Dict | List] = None,
809
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
810
+ params: Optional[Dict | List | Tuple] = None,
811
+ cookies: Optional[CookieTypes] = None,
812
+ timeout: Optional[int | float] = _UNSET,
813
+ follow_redirects: Optional[bool] = _UNSET,
814
+ max_redirects: Optional[int] = _UNSET,
815
+ retries: Optional[int] = _UNSET,
816
+ retry_delay: Optional[int] = _UNSET,
817
+ proxies: Optional[ProxySpec] = _UNSET,
818
+ proxy: Optional[str] = _UNSET,
819
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
820
+ auth: Optional[Tuple[str, str]] = None,
821
+ verify: Optional[bool] = _UNSET,
822
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
823
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
824
+ http3: Optional[bool] = _UNSET,
825
+ stealthy_headers: Optional[bool] = _UNSET,
826
+ **kwargs,
827
+ ) -> Response:
828
+ return cast(
829
+ Response,
830
+ super().delete(
831
+ url,
832
+ data,
833
+ json,
834
+ headers,
835
+ params,
836
+ cookies,
837
+ timeout,
838
+ follow_redirects,
839
+ max_redirects,
840
+ retries,
841
+ retry_delay,
842
+ proxies,
843
+ proxy,
844
+ proxy_auth,
845
+ auth,
846
+ verify,
847
+ cert,
848
+ impersonate,
849
+ http3,
850
+ stealthy_headers,
851
+ **kwargs,
852
+ ),
853
+ )
652
854
 
653
855
 
654
856
  class AsyncFetcherClient(FetcherSession):
655
857
  def __init__(self, *args, **kwargs):
656
858
  super().__init__(*args, **kwargs)
657
- self.__enter__ = None
658
- self.__exit__ = None
659
- self.__aenter__ = None
660
- self.__aexit__ = None
661
- self._async_curl_session = True
859
+ self.__enter__: Any = None
860
+ self.__exit__: Any = None
861
+ self.__aenter__: Any = None
862
+ self.__aexit__: Any = None
863
+ self._async_curl_session: Any = True
864
+
865
+ # Setting the correct return types for the type checking/autocompletion
866
+ def get(
867
+ self,
868
+ url: str,
869
+ params: Optional[Dict | List | Tuple] = None,
870
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
871
+ cookies: Optional[CookieTypes] = None,
872
+ timeout: Optional[int | float] = _UNSET,
873
+ follow_redirects: Optional[bool] = _UNSET,
874
+ max_redirects: Optional[int] = _UNSET,
875
+ retries: Optional[int] = _UNSET,
876
+ retry_delay: Optional[int] = _UNSET,
877
+ proxies: Optional[ProxySpec] = _UNSET,
878
+ proxy: Optional[str] = _UNSET,
879
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
880
+ auth: Optional[Tuple[str, str]] = None,
881
+ verify: Optional[bool] = _UNSET,
882
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
883
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
884
+ http3: Optional[bool] = _UNSET,
885
+ stealthy_headers: Optional[bool] = _UNSET,
886
+ **kwargs,
887
+ ) -> Awaitable[Response]:
888
+ return cast(
889
+ Awaitable[Response],
890
+ super().get(
891
+ url,
892
+ params,
893
+ headers,
894
+ cookies,
895
+ timeout,
896
+ follow_redirects,
897
+ max_redirects,
898
+ retries,
899
+ retry_delay,
900
+ proxies,
901
+ proxy,
902
+ proxy_auth,
903
+ auth,
904
+ verify,
905
+ cert,
906
+ impersonate,
907
+ http3,
908
+ stealthy_headers,
909
+ **kwargs,
910
+ ),
911
+ )
912
+
913
+ def post(
914
+ self,
915
+ url: str,
916
+ data: Optional[Dict | str] = None,
917
+ json: Optional[Dict | List] = None,
918
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
919
+ params: Optional[Dict | List | Tuple] = None,
920
+ cookies: Optional[CookieTypes] = None,
921
+ timeout: Optional[int | float] = _UNSET,
922
+ follow_redirects: Optional[bool] = _UNSET,
923
+ max_redirects: Optional[int] = _UNSET,
924
+ retries: Optional[int] = _UNSET,
925
+ retry_delay: Optional[int] = _UNSET,
926
+ proxies: Optional[ProxySpec] = _UNSET,
927
+ proxy: Optional[str] = _UNSET,
928
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
929
+ auth: Optional[Tuple[str, str]] = None,
930
+ verify: Optional[bool] = _UNSET,
931
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
932
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
933
+ http3: Optional[bool] = _UNSET,
934
+ stealthy_headers: Optional[bool] = _UNSET,
935
+ **kwargs,
936
+ ) -> Awaitable[Response]:
937
+ return cast(
938
+ Awaitable[Response],
939
+ super().post(
940
+ url,
941
+ data,
942
+ json,
943
+ headers,
944
+ params,
945
+ cookies,
946
+ timeout,
947
+ follow_redirects,
948
+ max_redirects,
949
+ retries,
950
+ retry_delay,
951
+ proxies,
952
+ proxy,
953
+ proxy_auth,
954
+ auth,
955
+ verify,
956
+ cert,
957
+ impersonate,
958
+ http3,
959
+ stealthy_headers,
960
+ **kwargs,
961
+ ),
962
+ )
963
+
964
+ def put(
965
+ self,
966
+ url: str,
967
+ data: Optional[Dict | str] = None,
968
+ json: Optional[Dict | List] = None,
969
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
970
+ params: Optional[Dict | List | Tuple] = None,
971
+ cookies: Optional[CookieTypes] = None,
972
+ timeout: Optional[int | float] = _UNSET,
973
+ follow_redirects: Optional[bool] = _UNSET,
974
+ max_redirects: Optional[int] = _UNSET,
975
+ retries: Optional[int] = _UNSET,
976
+ retry_delay: Optional[int] = _UNSET,
977
+ proxies: Optional[ProxySpec] = _UNSET,
978
+ proxy: Optional[str] = _UNSET,
979
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
980
+ auth: Optional[Tuple[str, str]] = None,
981
+ verify: Optional[bool] = _UNSET,
982
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
983
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
984
+ http3: Optional[bool] = _UNSET,
985
+ stealthy_headers: Optional[bool] = _UNSET,
986
+ **kwargs,
987
+ ) -> Awaitable[Response]:
988
+ return cast(
989
+ Awaitable[Response],
990
+ super().put(
991
+ url,
992
+ data,
993
+ json,
994
+ headers,
995
+ params,
996
+ cookies,
997
+ timeout,
998
+ follow_redirects,
999
+ max_redirects,
1000
+ retries,
1001
+ retry_delay,
1002
+ proxies,
1003
+ proxy,
1004
+ proxy_auth,
1005
+ auth,
1006
+ verify,
1007
+ cert,
1008
+ impersonate,
1009
+ http3,
1010
+ stealthy_headers,
1011
+ **kwargs,
1012
+ ),
1013
+ )
1014
+
1015
+ def delete(
1016
+ self,
1017
+ url: str,
1018
+ data: Optional[Dict | str] = None,
1019
+ json: Optional[Dict | List] = None,
1020
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
1021
+ params: Optional[Dict | List | Tuple] = None,
1022
+ cookies: Optional[CookieTypes] = None,
1023
+ timeout: Optional[int | float] = _UNSET,
1024
+ follow_redirects: Optional[bool] = _UNSET,
1025
+ max_redirects: Optional[int] = _UNSET,
1026
+ retries: Optional[int] = _UNSET,
1027
+ retry_delay: Optional[int] = _UNSET,
1028
+ proxies: Optional[ProxySpec] = _UNSET,
1029
+ proxy: Optional[str] = _UNSET,
1030
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
1031
+ auth: Optional[Tuple[str, str]] = None,
1032
+ verify: Optional[bool] = _UNSET,
1033
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
1034
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
1035
+ http3: Optional[bool] = _UNSET,
1036
+ stealthy_headers: Optional[bool] = _UNSET,
1037
+ **kwargs,
1038
+ ) -> Awaitable[Response]:
1039
+ return cast(
1040
+ Awaitable[Response],
1041
+ super().delete(
1042
+ url,
1043
+ data,
1044
+ json,
1045
+ headers,
1046
+ params,
1047
+ cookies,
1048
+ timeout,
1049
+ follow_redirects,
1050
+ max_redirects,
1051
+ retries,
1052
+ retry_delay,
1053
+ proxies,
1054
+ proxy,
1055
+ proxy_auth,
1056
+ auth,
1057
+ verify,
1058
+ cert,
1059
+ impersonate,
1060
+ http3,
1061
+ stealthy_headers,
1062
+ **kwargs,
1063
+ ),
1064
+ )
@@ -0,0 +1,36 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ if TYPE_CHECKING:
4
+ from scrapling.fetchers.requests import Fetcher, AsyncFetcher, FetcherSession
5
+ from scrapling.fetchers.chrome import DynamicFetcher, DynamicSession, AsyncDynamicSession
6
+ from scrapling.fetchers.firefox import StealthyFetcher, StealthySession, AsyncStealthySession
7
+
8
+
9
+ # Lazy import mapping
10
+ _LAZY_IMPORTS = {
11
+ "Fetcher": ("scrapling.fetchers.requests", "Fetcher"),
12
+ "AsyncFetcher": ("scrapling.fetchers.requests", "AsyncFetcher"),
13
+ "FetcherSession": ("scrapling.fetchers.requests", "FetcherSession"),
14
+ "DynamicFetcher": ("scrapling.fetchers.chrome", "DynamicFetcher"),
15
+ "DynamicSession": ("scrapling.fetchers.chrome", "DynamicSession"),
16
+ "AsyncDynamicSession": ("scrapling.fetchers.chrome", "AsyncDynamicSession"),
17
+ "StealthyFetcher": ("scrapling.fetchers.firefox", "StealthyFetcher"),
18
+ "StealthySession": ("scrapling.fetchers.firefox", "StealthySession"),
19
+ "AsyncStealthySession": ("scrapling.fetchers.firefox", "AsyncStealthySession"),
20
+ }
21
+
22
+ __all__ = ["Fetcher", "AsyncFetcher", "StealthyFetcher", "DynamicFetcher"]
23
+
24
+
25
+ def __getattr__(name: str) -> Any:
26
+ if name in _LAZY_IMPORTS:
27
+ module_path, class_name = _LAZY_IMPORTS[name]
28
+ module = __import__(module_path, fromlist=[class_name])
29
+ return getattr(module, class_name)
30
+ else:
31
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
32
+
33
+
34
+ def __dir__() -> list[str]:
35
+ """Support for dir() and autocomplete."""
36
+ return sorted(list(_LAZY_IMPORTS.keys()))