scrapling 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +1 -1
- scrapling/core/_types.py +3 -0
- scrapling/core/ai.py +2 -1
- scrapling/core/custom_types.py +20 -27
- scrapling/core/mixins.py +15 -9
- scrapling/core/shell.py +4 -3
- scrapling/core/storage.py +5 -5
- scrapling/core/translator.py +13 -8
- scrapling/engines/_browsers/_base.py +37 -14
- scrapling/engines/_browsers/_camoufox.py +76 -35
- scrapling/engines/_browsers/_config_tools.py +1 -1
- scrapling/engines/_browsers/_controllers.py +32 -11
- scrapling/engines/_browsers/_validators.py +31 -10
- scrapling/engines/static.py +678 -668
- scrapling/engines/toolbelt/convertor.py +13 -15
- scrapling/engines/toolbelt/custom.py +6 -9
- scrapling/engines/toolbelt/fingerprints.py +17 -10
- scrapling/engines/toolbelt/navigation.py +11 -3
- scrapling/fetchers/__init__.py +11 -1
- scrapling/fetchers/chrome.py +9 -4
- scrapling/fetchers/firefox.py +0 -4
- scrapling/parser.py +105 -80
- {scrapling-0.3.6.dist-info → scrapling-0.3.7.dist-info}/METADATA +3 -4
- scrapling-0.3.7.dist-info/RECORD +47 -0
- scrapling-0.3.6.dist-info/RECORD +0 -47
- {scrapling-0.3.6.dist-info → scrapling-0.3.7.dist-info}/WHEEL +0 -0
- {scrapling-0.3.6.dist-info → scrapling-0.3.7.dist-info}/entry_points.txt +0 -0
- {scrapling-0.3.6.dist-info → scrapling-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {scrapling-0.3.6.dist-info → scrapling-0.3.7.dist-info}/top_level.txt +0 -0
scrapling/engines/static.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
from abc import ABC
|
1
2
|
from time import sleep as time_sleep
|
2
3
|
from asyncio import sleep as asyncio_sleep
|
3
4
|
|
4
5
|
from curl_cffi.curl import CurlError
|
5
6
|
from curl_cffi import CurlHttpVersion
|
6
|
-
from curl_cffi.requests.impersonate import DEFAULT_CHROME
|
7
7
|
from curl_cffi.requests import (
|
8
8
|
ProxySpec,
|
9
9
|
CookieTypes,
|
@@ -22,7 +22,6 @@ from scrapling.core._types import (
|
|
22
22
|
Awaitable,
|
23
23
|
List,
|
24
24
|
Any,
|
25
|
-
cast,
|
26
25
|
)
|
27
26
|
|
28
27
|
from .toolbelt.custom import Response
|
@@ -30,22 +29,14 @@ from .toolbelt.convertor import ResponseFactory
|
|
30
29
|
from .toolbelt.fingerprints import generate_convincing_referer, generate_headers, __default_useragent__
|
31
30
|
|
32
31
|
_UNSET: Any = object()
|
32
|
+
_NO_SESSION: Any = object()
|
33
33
|
|
34
34
|
|
35
|
-
class
|
36
|
-
|
37
|
-
A context manager that provides configured Fetcher sessions.
|
38
|
-
|
39
|
-
When this manager is used in a 'with' or 'async with' block,
|
40
|
-
it yields a new session configured with the manager's defaults.
|
41
|
-
A single instance of this manager should ideally be used for one active
|
42
|
-
session at a time (or sequentially). Re-entering a context with the
|
43
|
-
same manager instance while a session is already active is disallowed.
|
44
|
-
"""
|
45
|
-
|
35
|
+
class _ConfigurationLogic(ABC):
|
36
|
+
# Core Logic Handler (Internal Engine)
|
46
37
|
def __init__(
|
47
38
|
self,
|
48
|
-
impersonate: Optional[BrowserTypeLiteral] =
|
39
|
+
impersonate: Optional[BrowserTypeLiteral] = "chrome",
|
49
40
|
http3: Optional[bool] = False,
|
50
41
|
stealthy_headers: Optional[bool] = True,
|
51
42
|
proxies: Optional[Dict[str, str]] = None,
|
@@ -61,203 +52,185 @@ class FetcherSession:
|
|
61
52
|
cert: Optional[str | Tuple[str, str]] = None,
|
62
53
|
selector_config: Optional[Dict] = None,
|
63
54
|
):
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
:param verify: Whether to verify HTTPS certificates. Defaults to True.
|
79
|
-
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
80
|
-
:param selector_config: Arguments passed when creating the final Selector class.
|
81
|
-
"""
|
82
|
-
self.default_impersonate = impersonate
|
83
|
-
self.stealth = stealthy_headers
|
84
|
-
self.default_proxies = proxies or {}
|
85
|
-
self.default_proxy = proxy or None
|
86
|
-
self.default_proxy_auth = proxy_auth or None
|
87
|
-
self.default_timeout = timeout
|
88
|
-
self.default_headers = headers or {}
|
89
|
-
self.default_retries = retries
|
90
|
-
self.default_retry_delay = retry_delay
|
91
|
-
self.default_follow_redirects = follow_redirects
|
92
|
-
self.default_max_redirects = max_redirects
|
93
|
-
self.default_verify = verify
|
94
|
-
self.default_cert = cert
|
95
|
-
self.default_http3 = http3
|
55
|
+
self._default_impersonate = impersonate
|
56
|
+
self._stealth = stealthy_headers
|
57
|
+
self._default_proxies = proxies or {}
|
58
|
+
self._default_proxy = proxy or None
|
59
|
+
self._default_proxy_auth = proxy_auth or None
|
60
|
+
self._default_timeout = timeout
|
61
|
+
self._default_headers = headers or {}
|
62
|
+
self._default_retries = retries
|
63
|
+
self._default_retry_delay = retry_delay
|
64
|
+
self._default_follow_redirects = follow_redirects
|
65
|
+
self._default_max_redirects = max_redirects
|
66
|
+
self._default_verify = verify
|
67
|
+
self._default_cert = cert
|
68
|
+
self._default_http3 = http3
|
96
69
|
self.selector_config = selector_config or {}
|
97
70
|
|
98
|
-
|
99
|
-
|
71
|
+
@staticmethod
|
72
|
+
def _get_with_precedence(request_val: Any, default_val: Any) -> Any:
|
73
|
+
"""Get value with request-level priority over session-level"""
|
74
|
+
return request_val if request_val is not _UNSET else default_val
|
100
75
|
|
101
|
-
def _merge_request_args(self, **
|
76
|
+
def _merge_request_args(self, **method_kwargs) -> Dict[str, Any]:
|
102
77
|
"""Merge request-specific arguments with default session arguments."""
|
103
|
-
url =
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
78
|
+
url = method_kwargs.pop("url")
|
79
|
+
impersonate = self._get_with_precedence(method_kwargs.pop("impersonate"), self._default_impersonate)
|
80
|
+
http3_enabled = self._get_with_precedence(method_kwargs.pop("http3"), self._default_http3)
|
81
|
+
final_args = {
|
82
|
+
"url": url,
|
83
|
+
# Curl automatically generates the suitable browser headers when you use `impersonate`
|
84
|
+
"headers": self._headers_job(
|
85
|
+
url,
|
86
|
+
self._get_with_precedence(method_kwargs.pop("headers"), self._default_headers),
|
87
|
+
self._get_with_precedence(method_kwargs.pop("stealth"), self._stealth),
|
88
|
+
bool(impersonate),
|
89
|
+
),
|
90
|
+
"proxies": self._get_with_precedence(method_kwargs.pop("proxies"), self._default_proxies),
|
91
|
+
"proxy": self._get_with_precedence(method_kwargs.pop("proxy"), self._default_proxy),
|
92
|
+
"proxy_auth": self._get_with_precedence(method_kwargs.pop("proxy_auth"), self._default_proxy_auth),
|
93
|
+
"timeout": self._get_with_precedence(method_kwargs.pop("timeout"), self._default_timeout),
|
94
|
+
"allow_redirects": self._get_with_precedence(
|
95
|
+
method_kwargs.pop("follow_redirects"), self._default_follow_redirects
|
96
|
+
),
|
97
|
+
"max_redirects": self._get_with_precedence(method_kwargs.pop("max_redirects"), self._default_max_redirects),
|
98
|
+
"verify": self._get_with_precedence(method_kwargs.pop("verify"), self._default_verify),
|
99
|
+
"cert": self._get_with_precedence(method_kwargs.pop("cert"), self._default_cert),
|
100
|
+
"impersonate": impersonate,
|
101
|
+
**{
|
102
|
+
k: v
|
103
|
+
for k, v in method_kwargs.items()
|
104
|
+
if v
|
105
|
+
not in (
|
106
|
+
_UNSET,
|
107
|
+
None,
|
108
|
+
)
|
109
|
+
}, # Add any remaining parameters (after all known ones are popped)
|
110
|
+
}
|
111
|
+
if http3_enabled: # pragma: no cover
|
112
|
+
final_args["http_version"] = CurlHttpVersion.V3ONLY
|
112
113
|
if impersonate:
|
113
114
|
log.warning(
|
114
115
|
"The argument `http3` might cause errors if used with `impersonate` argument, try switching it off if you encounter any curl errors."
|
115
116
|
)
|
116
117
|
|
117
|
-
|
118
|
-
{
|
119
|
-
"url": url,
|
120
|
-
# Curl automatically generates the suitable browser headers when you use `impersonate`
|
121
|
-
"headers": self._headers_job(url, headers, stealth, bool(impersonate)),
|
122
|
-
"proxies": self.get_with_precedence(kwargs, "proxies", self.default_proxies),
|
123
|
-
"proxy": self.get_with_precedence(kwargs, "proxy", self.default_proxy),
|
124
|
-
"proxy_auth": self.get_with_precedence(kwargs, "proxy_auth", self.default_proxy_auth),
|
125
|
-
"timeout": self.get_with_precedence(kwargs, "timeout", self.default_timeout),
|
126
|
-
"allow_redirects": self.get_with_precedence(kwargs, "allow_redirects", self.default_follow_redirects),
|
127
|
-
"max_redirects": self.get_with_precedence(kwargs, "max_redirects", self.default_max_redirects),
|
128
|
-
"verify": self.get_with_precedence(kwargs, "verify", self.default_verify),
|
129
|
-
"cert": self.get_with_precedence(kwargs, "cert", self.default_cert),
|
130
|
-
"impersonate": impersonate,
|
131
|
-
**{
|
132
|
-
k: v
|
133
|
-
for k, v in kwargs.items()
|
134
|
-
if v
|
135
|
-
not in (
|
136
|
-
_UNSET,
|
137
|
-
None,
|
138
|
-
)
|
139
|
-
}, # Add any remaining parameters (after all known ones are popped)
|
140
|
-
}
|
141
|
-
)
|
142
|
-
return request_args
|
143
|
-
|
144
|
-
def _headers_job(
|
145
|
-
self,
|
146
|
-
url,
|
147
|
-
headers: Optional[Dict],
|
148
|
-
stealth: Optional[bool],
|
149
|
-
impersonate_enabled: bool,
|
150
|
-
) -> Dict:
|
151
|
-
"""Adds useragent to headers if it doesn't exist, generates real headers and append it to current headers, and
|
152
|
-
finally generates a referer header that looks like if this request came from Google's search of the current URL's domain.
|
118
|
+
return final_args
|
153
119
|
|
154
|
-
|
155
|
-
:param stealth: Whether to enable the `stealthy_headers` argument to this request or not. If `None`, it defaults to the session default value.
|
156
|
-
:param impersonate_enabled: Whether the browser impersonation is enabled or not.
|
157
|
-
:return: A dictionary of the new headers.
|
120
|
+
def _headers_job(self, url, headers: Dict, stealth: bool, impersonate_enabled: bool) -> Dict:
|
158
121
|
"""
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
headers_keys = set(map(str.lower, headers.keys()))
|
122
|
+
1. Adds a useragent to the headers if it doesn't have one
|
123
|
+
2. Generates real headers and append them to current headers
|
124
|
+
3. Generates a referer header that looks like as if this request came from a Google's search of the current URL's domain.
|
125
|
+
"""
|
126
|
+
# Merge session headers with request headers, request takes precedence (if it was set)
|
127
|
+
final_headers = {**self._default_headers, **(headers if headers and headers is not _UNSET else {})}
|
128
|
+
headers_keys = {k.lower() for k in final_headers}
|
167
129
|
if stealth:
|
168
130
|
if "referer" not in headers_keys:
|
169
|
-
|
131
|
+
final_headers["referer"] = generate_convincing_referer(url)
|
170
132
|
|
171
|
-
if impersonate_enabled: # Curl will generate the suitable headers
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
extra_headers = {key: value for key, value in extra_headers.items() if key.lower() not in headers_keys}
|
177
|
-
headers.update(extra_headers)
|
133
|
+
if not impersonate_enabled: # Curl will generate the suitable headers
|
134
|
+
extra_headers = generate_headers(browser_mode=False)
|
135
|
+
final_headers.update(
|
136
|
+
{k: v for k, v in extra_headers.items() if k.lower() not in headers_keys}
|
137
|
+
) # Don't overwrite user-supplied headers
|
178
138
|
|
179
139
|
elif "user-agent" not in headers_keys and not impersonate_enabled:
|
180
|
-
|
181
|
-
log.debug(f"Can't find useragent in headers so '{
|
140
|
+
final_headers["User-Agent"] = __default_useragent__
|
141
|
+
log.debug(f"Can't find useragent in headers so '{final_headers['User-Agent']}' was used.")
|
142
|
+
|
143
|
+
return final_headers
|
182
144
|
|
183
|
-
|
145
|
+
|
146
|
+
class _SyncSessionLogic(_ConfigurationLogic):
|
147
|
+
def __init__(
|
148
|
+
self,
|
149
|
+
impersonate: Optional[BrowserTypeLiteral] = "chrome",
|
150
|
+
http3: Optional[bool] = False,
|
151
|
+
stealthy_headers: Optional[bool] = True,
|
152
|
+
proxies: Optional[Dict[str, str]] = None,
|
153
|
+
proxy: Optional[str] = None,
|
154
|
+
proxy_auth: Optional[Tuple[str, str]] = None,
|
155
|
+
timeout: Optional[int | float] = 30,
|
156
|
+
headers: Optional[Dict[str, str]] = None,
|
157
|
+
retries: Optional[int] = 3,
|
158
|
+
retry_delay: Optional[int] = 1,
|
159
|
+
follow_redirects: bool = True,
|
160
|
+
max_redirects: int = 30,
|
161
|
+
verify: bool = True,
|
162
|
+
cert: Optional[str | Tuple[str, str]] = None,
|
163
|
+
selector_config: Optional[Dict] = None,
|
164
|
+
):
|
165
|
+
super().__init__(
|
166
|
+
impersonate,
|
167
|
+
http3,
|
168
|
+
stealthy_headers,
|
169
|
+
proxies,
|
170
|
+
proxy,
|
171
|
+
proxy_auth,
|
172
|
+
timeout,
|
173
|
+
headers,
|
174
|
+
retries,
|
175
|
+
retry_delay,
|
176
|
+
follow_redirects,
|
177
|
+
max_redirects,
|
178
|
+
verify,
|
179
|
+
cert,
|
180
|
+
selector_config,
|
181
|
+
)
|
182
|
+
self._curl_session: Optional[CurlSession] = None
|
184
183
|
|
185
184
|
def __enter__(self):
|
186
185
|
"""Creates and returns a new synchronous Fetcher Session"""
|
187
186
|
if self._curl_session:
|
188
|
-
raise RuntimeError(
|
189
|
-
"This FetcherSession instance already has an active synchronous session. "
|
190
|
-
"Create a new FetcherSession instance for a new independent session, "
|
191
|
-
"or use the current instance sequentially after the previous context has exited."
|
192
|
-
)
|
193
|
-
if self._async_curl_session: # Prevent mixing if async is active from this instance
|
194
|
-
raise RuntimeError(
|
195
|
-
"This FetcherSession instance has an active asynchronous session. "
|
196
|
-
"Cannot enter a synchronous context simultaneously with the same manager instance."
|
197
|
-
)
|
187
|
+
raise RuntimeError("This FetcherSession instance already has an active synchronous session.")
|
198
188
|
|
199
189
|
self._curl_session = CurlSession()
|
200
190
|
return self
|
201
191
|
|
202
192
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
203
193
|
"""Closes the active synchronous session managed by this instance, if any."""
|
194
|
+
# For type checking (not accessed error)
|
195
|
+
_ = (
|
196
|
+
exc_type,
|
197
|
+
exc_val,
|
198
|
+
exc_tb,
|
199
|
+
)
|
204
200
|
if self._curl_session:
|
205
201
|
self._curl_session.close()
|
206
202
|
self._curl_session = None
|
207
203
|
|
208
|
-
async def __aenter__(self):
|
209
|
-
"""Creates and returns a new asynchronous Session."""
|
210
|
-
if self._async_curl_session:
|
211
|
-
raise RuntimeError(
|
212
|
-
"This FetcherSession instance already has an active asynchronous session. "
|
213
|
-
"Create a new FetcherSession instance for a new independent session, "
|
214
|
-
"or use the current instance sequentially after the previous context has exited."
|
215
|
-
)
|
216
|
-
if self._curl_session: # Prevent mixing if sync is active from this instance
|
217
|
-
raise RuntimeError(
|
218
|
-
"This FetcherSession instance has an active synchronous session. "
|
219
|
-
"Cannot enter an asynchronous context simultaneously with the same manager instance."
|
220
|
-
)
|
221
|
-
|
222
|
-
self._async_curl_session = AsyncCurlSession()
|
223
|
-
return self
|
224
|
-
|
225
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
226
|
-
"""Closes the active asynchronous session managed by this instance, if any."""
|
227
|
-
if self._async_curl_session:
|
228
|
-
await self._async_curl_session.close()
|
229
|
-
self._async_curl_session = None
|
230
|
-
|
231
204
|
def __make_request(
|
232
205
|
self,
|
233
206
|
method: SUPPORTED_HTTP_METHODS,
|
234
|
-
|
235
|
-
|
236
|
-
retry_delay: int,
|
237
|
-
selector_config: Dict,
|
207
|
+
stealth: Optional[bool] = None,
|
208
|
+
**kwargs,
|
238
209
|
) -> Response:
|
239
210
|
"""
|
240
211
|
Perform an HTTP request using the configured session.
|
241
|
-
|
242
|
-
:param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
|
243
|
-
:param request_args: Arguments to be passed to the session's `request()` method.
|
244
|
-
:param max_retries: Maximum number of retries for the request.
|
245
|
-
:param retry_delay: Number of seconds to wait between retries.
|
246
|
-
:param selector_config: Arguments passed when creating the final Selector class.
|
247
|
-
:return: A `Response` object for synchronous requests or an awaitable for asynchronous.
|
248
212
|
"""
|
213
|
+
stealth = self._stealth if stealth is None else stealth
|
214
|
+
|
215
|
+
selector_config = kwargs.pop("selector_config", {}) or self.selector_config
|
216
|
+
max_retries = self._get_with_precedence(kwargs.pop("retries"), self._default_retries)
|
217
|
+
retry_delay = self._get_with_precedence(kwargs.pop("retry_delay"), self._default_retry_delay)
|
218
|
+
request_args = self._merge_request_args(stealth=stealth, **kwargs)
|
219
|
+
|
249
220
|
session = self._curl_session
|
250
|
-
|
221
|
+
one_off_request = False
|
222
|
+
if session is _NO_SESSION and self.__enter__ is None:
|
251
223
|
# For usage inside FetcherClient
|
252
224
|
# It turns out `curl_cffi` caches impersonation state, so if you turned it off, then on then off, it won't be off on the last time.
|
253
225
|
session = CurlSession()
|
226
|
+
one_off_request = True
|
254
227
|
|
255
228
|
if session:
|
256
229
|
for attempt in range(max_retries):
|
257
230
|
try:
|
258
231
|
response = session.request(method, **request_args)
|
259
|
-
|
260
|
-
return
|
232
|
+
result = ResponseFactory.from_http_request(response, selector_config)
|
233
|
+
return result
|
261
234
|
except CurlError as e: # pragma: no cover
|
262
235
|
if attempt < max_retries - 1:
|
263
236
|
log.error(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
|
@@ -265,86 +238,12 @@ class FetcherSession:
|
|
265
238
|
else:
|
266
239
|
log.error(f"Failed after {max_retries} attempts: {e}")
|
267
240
|
raise # Raise the exception if all retries fail
|
241
|
+
finally:
|
242
|
+
if session and one_off_request:
|
243
|
+
session.close()
|
268
244
|
|
269
245
|
raise RuntimeError("No active session available.") # pragma: no cover
|
270
246
|
|
271
|
-
async def __make_async_request(
|
272
|
-
self,
|
273
|
-
method: SUPPORTED_HTTP_METHODS,
|
274
|
-
request_args: Dict[str, Any],
|
275
|
-
max_retries: int,
|
276
|
-
retry_delay: int,
|
277
|
-
selector_config: Dict,
|
278
|
-
) -> Response:
|
279
|
-
"""
|
280
|
-
Perform an HTTP request using the configured session.
|
281
|
-
|
282
|
-
:param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
|
283
|
-
:param request_args: Arguments to be passed to the session's `request()` method.
|
284
|
-
:param max_retries: Maximum number of retries for the request.
|
285
|
-
:param retry_delay: Number of seconds to wait between retries.
|
286
|
-
:param selector_config: Arguments passed when creating the final Selector class.
|
287
|
-
:return: A `Response` object for synchronous requests or an awaitable for asynchronous.
|
288
|
-
"""
|
289
|
-
session = self._async_curl_session
|
290
|
-
if session is True and not any((self.__enter__, self.__exit__, self.__aenter__, self.__aexit__)):
|
291
|
-
# For usage inside the ` AsyncFetcherClient ` class, and that's for several reasons
|
292
|
-
# 1. It turns out `curl_cffi` caches impersonation state, so if you turned it off, then on then off, it won't be off on the last time.
|
293
|
-
# 2. `curl_cffi` doesn't support making async requests without sessions
|
294
|
-
# 3. Using a single session for many requests at the same time in async doesn't sit well with curl_cffi.
|
295
|
-
session = AsyncCurlSession()
|
296
|
-
|
297
|
-
if session:
|
298
|
-
for attempt in range(max_retries):
|
299
|
-
try:
|
300
|
-
response = await session.request(method, **request_args)
|
301
|
-
# response.raise_for_status() # Retry responses with a status code between 200-400
|
302
|
-
return ResponseFactory.from_http_request(response, selector_config)
|
303
|
-
except CurlError as e: # pragma: no cover
|
304
|
-
if attempt < max_retries - 1:
|
305
|
-
log.error(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
|
306
|
-
await asyncio_sleep(retry_delay)
|
307
|
-
else:
|
308
|
-
log.error(f"Failed after {max_retries} attempts: {e}")
|
309
|
-
raise # Raise the exception if all retries fail
|
310
|
-
|
311
|
-
raise RuntimeError("No active session available.") # pragma: no cover
|
312
|
-
|
313
|
-
@staticmethod
|
314
|
-
def get_with_precedence(kwargs, key, default_value):
|
315
|
-
"""Get value with request-level priority over session-level"""
|
316
|
-
request_value = kwargs.pop(key, _UNSET)
|
317
|
-
return request_value if request_value is not _UNSET else default_value
|
318
|
-
|
319
|
-
def __prepare_and_dispatch(
|
320
|
-
self,
|
321
|
-
method: SUPPORTED_HTTP_METHODS,
|
322
|
-
stealth: Optional[bool] = None,
|
323
|
-
**kwargs,
|
324
|
-
) -> Response | Awaitable[Response]:
|
325
|
-
"""
|
326
|
-
Internal dispatcher. Prepares arguments and calls sync or async request helper.
|
327
|
-
|
328
|
-
:param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
|
329
|
-
:param stealth: Whether to enable the `stealthy_headers` argument to this request or not. If `None`, it defaults to the session default value.
|
330
|
-
:param url: Target URL for the request.
|
331
|
-
:param kwargs: Additional request-specific arguments.
|
332
|
-
:return: A `Response` object for synchronous requests or an awaitable for asynchronous.
|
333
|
-
"""
|
334
|
-
stealth = self.stealth if stealth is None else stealth
|
335
|
-
|
336
|
-
selector_config = kwargs.pop("selector_config", {}) or self.selector_config
|
337
|
-
max_retries = self.get_with_precedence(kwargs, "retries", self.default_retries)
|
338
|
-
retry_delay = self.get_with_precedence(kwargs, "retry_delay", self.default_retry_delay)
|
339
|
-
request_args = self._merge_request_args(stealth=stealth, **kwargs)
|
340
|
-
if self._curl_session:
|
341
|
-
return self.__make_request(method, request_args, max_retries, retry_delay, selector_config)
|
342
|
-
elif self._async_curl_session:
|
343
|
-
# The returned value is a Coroutine
|
344
|
-
return self.__make_async_request(method, request_args, max_retries, retry_delay, selector_config)
|
345
|
-
|
346
|
-
raise RuntimeError("No active session available.")
|
347
|
-
|
348
247
|
def get(
|
349
248
|
self,
|
350
249
|
url: str,
|
@@ -366,7 +265,7 @@ class FetcherSession:
|
|
366
265
|
http3: Optional[bool] = _UNSET,
|
367
266
|
stealthy_headers: Optional[bool] = _UNSET,
|
368
267
|
**kwargs,
|
369
|
-
) -> Response
|
268
|
+
) -> Response:
|
370
269
|
"""
|
371
270
|
Perform a GET request.
|
372
271
|
|
@@ -390,29 +289,31 @@ class FetcherSession:
|
|
390
289
|
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
391
290
|
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
392
291
|
:param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
|
393
|
-
:return: A `Response` object
|
292
|
+
:return: A `Response` object.
|
394
293
|
"""
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
294
|
+
method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
|
295
|
+
method_args.update(kwargs)
|
296
|
+
# For type checking (not accessed error)
|
297
|
+
_ = (
|
298
|
+
url,
|
299
|
+
params,
|
300
|
+
headers,
|
301
|
+
cookies,
|
302
|
+
timeout,
|
303
|
+
follow_redirects,
|
304
|
+
max_redirects,
|
305
|
+
retries,
|
306
|
+
retry_delay,
|
307
|
+
proxies,
|
308
|
+
proxy,
|
309
|
+
proxy_auth,
|
310
|
+
auth,
|
311
|
+
verify,
|
312
|
+
cert,
|
313
|
+
impersonate,
|
314
|
+
http3,
|
315
|
+
)
|
316
|
+
return self.__make_request("GET", stealth=stealthy_headers, **method_args)
|
416
317
|
|
417
318
|
def post(
|
418
319
|
self,
|
@@ -437,57 +338,59 @@ class FetcherSession:
|
|
437
338
|
http3: Optional[bool] = _UNSET,
|
438
339
|
stealthy_headers: Optional[bool] = _UNSET,
|
439
340
|
**kwargs,
|
440
|
-
) -> Response
|
341
|
+
) -> Response:
|
441
342
|
"""
|
442
343
|
Perform a POST request.
|
443
344
|
|
444
345
|
:param url: Target URL for the request.
|
445
346
|
:param data: Form data to include in the request body.
|
446
347
|
:param json: A JSON serializable object to include in the body of the request.
|
447
|
-
:param headers: Headers to include in the request.
|
448
348
|
:param params: Query string parameters for the request.
|
349
|
+
:param headers: Headers to include in the request.
|
449
350
|
:param cookies: Cookies to use in the request.
|
450
351
|
:param timeout: Number of seconds to wait before timing out.
|
451
352
|
:param follow_redirects: Whether to follow redirects. Defaults to True.
|
452
353
|
:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
|
453
354
|
:param retries: Number of retry attempts. Defaults to 3.
|
454
355
|
:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
|
455
|
-
:param proxies: Dict of proxies to use.
|
356
|
+
:param proxies: Dict of proxies to use.
|
456
357
|
:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
|
457
358
|
Cannot be used together with the `proxies` parameter.
|
458
359
|
:param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
|
459
360
|
:param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
|
460
|
-
:param verify: Whether to verify HTTPS certificates.
|
361
|
+
:param verify: Whether to verify HTTPS certificates.
|
461
362
|
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
462
363
|
:param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
|
463
364
|
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
464
365
|
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
465
366
|
:param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
|
466
|
-
:return: A `Response` object
|
367
|
+
:return: A `Response` object.
|
467
368
|
"""
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
369
|
+
method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
|
370
|
+
method_args.update(kwargs)
|
371
|
+
# For type checking (not accessed error)
|
372
|
+
_ = (
|
373
|
+
url,
|
374
|
+
params,
|
375
|
+
headers,
|
376
|
+
data,
|
377
|
+
json,
|
378
|
+
cookies,
|
379
|
+
timeout,
|
380
|
+
follow_redirects,
|
381
|
+
max_redirects,
|
382
|
+
retries,
|
383
|
+
retry_delay,
|
384
|
+
proxies,
|
385
|
+
proxy,
|
386
|
+
proxy_auth,
|
387
|
+
auth,
|
388
|
+
verify,
|
389
|
+
cert,
|
390
|
+
impersonate,
|
391
|
+
http3,
|
392
|
+
)
|
393
|
+
return self.__make_request("POST", stealth=stealthy_headers, **method_args)
|
491
394
|
|
492
395
|
def put(
|
493
396
|
self,
|
@@ -512,57 +415,59 @@ class FetcherSession:
|
|
512
415
|
http3: Optional[bool] = _UNSET,
|
513
416
|
stealthy_headers: Optional[bool] = _UNSET,
|
514
417
|
**kwargs,
|
515
|
-
) -> Response
|
418
|
+
) -> Response:
|
516
419
|
"""
|
517
420
|
Perform a PUT request.
|
518
421
|
|
519
422
|
:param url: Target URL for the request.
|
520
423
|
:param data: Form data to include in the request body.
|
521
424
|
:param json: A JSON serializable object to include in the body of the request.
|
522
|
-
:param headers: Headers to include in the request.
|
523
425
|
:param params: Query string parameters for the request.
|
426
|
+
:param headers: Headers to include in the request.
|
524
427
|
:param cookies: Cookies to use in the request.
|
525
428
|
:param timeout: Number of seconds to wait before timing out.
|
526
429
|
:param follow_redirects: Whether to follow redirects. Defaults to True.
|
527
430
|
:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
|
528
431
|
:param retries: Number of retry attempts. Defaults to 3.
|
529
432
|
:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
|
530
|
-
:param proxies: Dict of proxies to use.
|
433
|
+
:param proxies: Dict of proxies to use.
|
531
434
|
:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
|
532
435
|
Cannot be used together with the `proxies` parameter.
|
533
436
|
:param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
|
534
437
|
:param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
|
535
|
-
:param verify: Whether to verify HTTPS certificates.
|
438
|
+
:param verify: Whether to verify HTTPS certificates.
|
536
439
|
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
537
440
|
:param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
|
538
441
|
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
539
442
|
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
540
443
|
:param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
|
541
|
-
:return: A `Response` object
|
444
|
+
:return: A `Response` object.
|
542
445
|
"""
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
446
|
+
method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
|
447
|
+
method_args.update(kwargs)
|
448
|
+
# For type checking (not accessed error)
|
449
|
+
_ = (
|
450
|
+
url,
|
451
|
+
params,
|
452
|
+
headers,
|
453
|
+
data,
|
454
|
+
json,
|
455
|
+
cookies,
|
456
|
+
timeout,
|
457
|
+
follow_redirects,
|
458
|
+
max_redirects,
|
459
|
+
retries,
|
460
|
+
retry_delay,
|
461
|
+
proxies,
|
462
|
+
proxy,
|
463
|
+
proxy_auth,
|
464
|
+
auth,
|
465
|
+
verify,
|
466
|
+
cert,
|
467
|
+
impersonate,
|
468
|
+
http3,
|
469
|
+
)
|
470
|
+
return self.__make_request("PUT", stealth=stealthy_headers, **method_args)
|
566
471
|
|
567
472
|
def delete(
|
568
473
|
self,
|
@@ -587,71 +492,166 @@ class FetcherSession:
|
|
587
492
|
http3: Optional[bool] = _UNSET,
|
588
493
|
stealthy_headers: Optional[bool] = _UNSET,
|
589
494
|
**kwargs,
|
590
|
-
) -> Response
|
495
|
+
) -> Response:
|
591
496
|
"""
|
592
497
|
Perform a DELETE request.
|
593
498
|
|
594
499
|
:param url: Target URL for the request.
|
595
500
|
:param data: Form data to include in the request body.
|
596
501
|
:param json: A JSON serializable object to include in the body of the request.
|
597
|
-
:param headers: Headers to include in the request.
|
598
502
|
:param params: Query string parameters for the request.
|
503
|
+
:param headers: Headers to include in the request.
|
599
504
|
:param cookies: Cookies to use in the request.
|
600
505
|
:param timeout: Number of seconds to wait before timing out.
|
601
506
|
:param follow_redirects: Whether to follow redirects. Defaults to True.
|
602
507
|
:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
|
603
508
|
:param retries: Number of retry attempts. Defaults to 3.
|
604
509
|
:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
|
605
|
-
:param proxies: Dict of proxies to use.
|
510
|
+
:param proxies: Dict of proxies to use.
|
606
511
|
:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
|
607
512
|
Cannot be used together with the `proxies` parameter.
|
608
513
|
:param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
|
609
514
|
:param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
|
610
|
-
:param verify: Whether to verify HTTPS certificates.
|
515
|
+
:param verify: Whether to verify HTTPS certificates.
|
611
516
|
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
612
517
|
:param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
|
613
518
|
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
614
519
|
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
615
520
|
:param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
|
616
|
-
:return: A `Response` object
|
521
|
+
:return: A `Response` object.
|
617
522
|
"""
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
523
|
+
# Careful of sending a body in a DELETE request, it might cause some websites to reject the request as per https://www.rfc-editor.org/rfc/rfc7231#section-4.3.5,
|
524
|
+
# But some websites accept it, it depends on the implementation used.
|
525
|
+
method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
|
526
|
+
method_args.update(kwargs)
|
527
|
+
# For type checking (not accessed error)
|
528
|
+
_ = (
|
529
|
+
url,
|
530
|
+
params,
|
531
|
+
headers,
|
532
|
+
data,
|
533
|
+
json,
|
534
|
+
cookies,
|
535
|
+
timeout,
|
536
|
+
follow_redirects,
|
537
|
+
max_redirects,
|
538
|
+
retries,
|
539
|
+
retry_delay,
|
540
|
+
proxies,
|
541
|
+
proxy,
|
542
|
+
proxy_auth,
|
543
|
+
auth,
|
544
|
+
verify,
|
545
|
+
cert,
|
546
|
+
impersonate,
|
547
|
+
http3,
|
548
|
+
)
|
549
|
+
return self.__make_request("DELETE", stealth=stealthy_headers, **method_args)
|
643
550
|
|
644
551
|
|
645
|
-
class
|
646
|
-
def __init__(
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
552
|
+
class _ASyncSessionLogic(_ConfigurationLogic):
|
553
|
+
def __init__(
|
554
|
+
self,
|
555
|
+
impersonate: Optional[BrowserTypeLiteral] = "chrome",
|
556
|
+
http3: Optional[bool] = False,
|
557
|
+
stealthy_headers: Optional[bool] = True,
|
558
|
+
proxies: Optional[Dict[str, str]] = None,
|
559
|
+
proxy: Optional[str] = None,
|
560
|
+
proxy_auth: Optional[Tuple[str, str]] = None,
|
561
|
+
timeout: Optional[int | float] = 30,
|
562
|
+
headers: Optional[Dict[str, str]] = None,
|
563
|
+
retries: Optional[int] = 3,
|
564
|
+
retry_delay: Optional[int] = 1,
|
565
|
+
follow_redirects: bool = True,
|
566
|
+
max_redirects: int = 30,
|
567
|
+
verify: bool = True,
|
568
|
+
cert: Optional[str | Tuple[str, str]] = None,
|
569
|
+
selector_config: Optional[Dict] = None,
|
570
|
+
):
|
571
|
+
super().__init__(
|
572
|
+
impersonate,
|
573
|
+
http3,
|
574
|
+
stealthy_headers,
|
575
|
+
proxies,
|
576
|
+
proxy,
|
577
|
+
proxy_auth,
|
578
|
+
timeout,
|
579
|
+
headers,
|
580
|
+
retries,
|
581
|
+
retry_delay,
|
582
|
+
follow_redirects,
|
583
|
+
max_redirects,
|
584
|
+
verify,
|
585
|
+
cert,
|
586
|
+
selector_config,
|
587
|
+
)
|
588
|
+
self._async_curl_session: Optional[AsyncCurlSession] = None
|
589
|
+
|
590
|
+
async def __aenter__(self):
|
591
|
+
"""Creates and returns a new asynchronous Session."""
|
592
|
+
if self._async_curl_session:
|
593
|
+
raise RuntimeError("This FetcherSession instance already has an active asynchronous session.")
|
594
|
+
|
595
|
+
self._async_curl_session = AsyncCurlSession()
|
596
|
+
return self
|
597
|
+
|
598
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
599
|
+
"""Closes the active asynchronous session managed by this instance, if any."""
|
600
|
+
# For type checking (not accessed error)
|
601
|
+
_ = (
|
602
|
+
exc_type,
|
603
|
+
exc_val,
|
604
|
+
exc_tb,
|
605
|
+
)
|
606
|
+
if self._async_curl_session:
|
607
|
+
await self._async_curl_session.close()
|
608
|
+
self._async_curl_session = None
|
609
|
+
|
610
|
+
async def __make_request(
|
611
|
+
self,
|
612
|
+
method: SUPPORTED_HTTP_METHODS,
|
613
|
+
stealth: Optional[bool] = None,
|
614
|
+
**kwargs,
|
615
|
+
) -> Response:
|
616
|
+
"""
|
617
|
+
Perform an HTTP request using the configured session.
|
618
|
+
"""
|
619
|
+
stealth = self._stealth if stealth is None else stealth
|
620
|
+
|
621
|
+
selector_config = kwargs.pop("selector_config", {}) or self.selector_config
|
622
|
+
max_retries = self._get_with_precedence(kwargs.pop("retries"), self._default_retries)
|
623
|
+
retry_delay = self._get_with_precedence(kwargs.pop("retry_delay"), self._default_retry_delay)
|
624
|
+
request_args = self._merge_request_args(stealth=stealth, **kwargs)
|
625
|
+
|
626
|
+
session = self._async_curl_session
|
627
|
+
one_off_request = False
|
628
|
+
if session is _NO_SESSION and self.__aenter__ is None:
|
629
|
+
# For usage inside the ` AsyncFetcherClient ` class, and that's for several reasons
|
630
|
+
# 1. It turns out `curl_cffi` caches impersonation state, so if you turned it off, then on then off, it won't be off on the last time.
|
631
|
+
# 2. `curl_cffi` doesn't support making async requests without sessions
|
632
|
+
# 3. Using a single session for many requests at the same time in async doesn't sit well with curl_cffi.
|
633
|
+
session = AsyncCurlSession()
|
634
|
+
one_off_request = True
|
635
|
+
|
636
|
+
if session:
|
637
|
+
for attempt in range(max_retries):
|
638
|
+
try:
|
639
|
+
response = await session.request(method, **request_args)
|
640
|
+
result = ResponseFactory.from_http_request(response, selector_config)
|
641
|
+
return result
|
642
|
+
except CurlError as e: # pragma: no cover
|
643
|
+
if attempt < max_retries - 1:
|
644
|
+
log.error(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
|
645
|
+
await asyncio_sleep(retry_delay)
|
646
|
+
else:
|
647
|
+
log.error(f"Failed after {max_retries} attempts: {e}")
|
648
|
+
raise # Raise the exception if all retries fail
|
649
|
+
finally:
|
650
|
+
if session and one_off_request:
|
651
|
+
await session.close()
|
652
|
+
|
653
|
+
raise RuntimeError("No active session available.") # pragma: no cover
|
653
654
|
|
654
|
-
# Setting the correct return types for the type checking/autocompletion
|
655
655
|
def get(
|
656
656
|
self,
|
657
657
|
url: str,
|
@@ -673,242 +673,55 @@ class FetcherClient(FetcherSession):
|
|
673
673
|
http3: Optional[bool] = _UNSET,
|
674
674
|
stealthy_headers: Optional[bool] = _UNSET,
|
675
675
|
**kwargs,
|
676
|
-
) -> Response:
|
677
|
-
|
678
|
-
|
679
|
-
super().get(
|
680
|
-
url,
|
681
|
-
params,
|
682
|
-
headers,
|
683
|
-
cookies,
|
684
|
-
timeout,
|
685
|
-
follow_redirects,
|
686
|
-
max_redirects,
|
687
|
-
retries,
|
688
|
-
retry_delay,
|
689
|
-
proxies,
|
690
|
-
proxy,
|
691
|
-
proxy_auth,
|
692
|
-
auth,
|
693
|
-
verify,
|
694
|
-
cert,
|
695
|
-
impersonate,
|
696
|
-
http3,
|
697
|
-
stealthy_headers,
|
698
|
-
**kwargs,
|
699
|
-
),
|
700
|
-
)
|
676
|
+
) -> Awaitable[Response]:
|
677
|
+
"""
|
678
|
+
Perform a GET request.
|
701
679
|
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
cert,
|
746
|
-
impersonate,
|
747
|
-
http3,
|
748
|
-
stealthy_headers,
|
749
|
-
**kwargs,
|
750
|
-
),
|
751
|
-
)
|
752
|
-
|
753
|
-
def put(
|
754
|
-
self,
|
755
|
-
url: str,
|
756
|
-
data: Optional[Dict | str] = None,
|
757
|
-
json: Optional[Dict | List] = None,
|
758
|
-
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
759
|
-
params: Optional[Dict | List | Tuple] = None,
|
760
|
-
cookies: Optional[CookieTypes] = None,
|
761
|
-
timeout: Optional[int | float] = _UNSET,
|
762
|
-
follow_redirects: Optional[bool] = _UNSET,
|
763
|
-
max_redirects: Optional[int] = _UNSET,
|
764
|
-
retries: Optional[int] = _UNSET,
|
765
|
-
retry_delay: Optional[int] = _UNSET,
|
766
|
-
proxies: Optional[ProxySpec] = _UNSET,
|
767
|
-
proxy: Optional[str] = _UNSET,
|
768
|
-
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
769
|
-
auth: Optional[Tuple[str, str]] = None,
|
770
|
-
verify: Optional[bool] = _UNSET,
|
771
|
-
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
772
|
-
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
773
|
-
http3: Optional[bool] = _UNSET,
|
774
|
-
stealthy_headers: Optional[bool] = _UNSET,
|
775
|
-
**kwargs,
|
776
|
-
) -> Response:
|
777
|
-
return cast(
|
778
|
-
Response,
|
779
|
-
super().put(
|
780
|
-
url,
|
781
|
-
data,
|
782
|
-
json,
|
783
|
-
headers,
|
784
|
-
params,
|
785
|
-
cookies,
|
786
|
-
timeout,
|
787
|
-
follow_redirects,
|
788
|
-
max_redirects,
|
789
|
-
retries,
|
790
|
-
retry_delay,
|
791
|
-
proxies,
|
792
|
-
proxy,
|
793
|
-
proxy_auth,
|
794
|
-
auth,
|
795
|
-
verify,
|
796
|
-
cert,
|
797
|
-
impersonate,
|
798
|
-
http3,
|
799
|
-
stealthy_headers,
|
800
|
-
**kwargs,
|
801
|
-
),
|
802
|
-
)
|
803
|
-
|
804
|
-
def delete(
|
805
|
-
self,
|
806
|
-
url: str,
|
807
|
-
data: Optional[Dict | str] = None,
|
808
|
-
json: Optional[Dict | List] = None,
|
809
|
-
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
810
|
-
params: Optional[Dict | List | Tuple] = None,
|
811
|
-
cookies: Optional[CookieTypes] = None,
|
812
|
-
timeout: Optional[int | float] = _UNSET,
|
813
|
-
follow_redirects: Optional[bool] = _UNSET,
|
814
|
-
max_redirects: Optional[int] = _UNSET,
|
815
|
-
retries: Optional[int] = _UNSET,
|
816
|
-
retry_delay: Optional[int] = _UNSET,
|
817
|
-
proxies: Optional[ProxySpec] = _UNSET,
|
818
|
-
proxy: Optional[str] = _UNSET,
|
819
|
-
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
820
|
-
auth: Optional[Tuple[str, str]] = None,
|
821
|
-
verify: Optional[bool] = _UNSET,
|
822
|
-
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
823
|
-
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
824
|
-
http3: Optional[bool] = _UNSET,
|
825
|
-
stealthy_headers: Optional[bool] = _UNSET,
|
826
|
-
**kwargs,
|
827
|
-
) -> Response:
|
828
|
-
return cast(
|
829
|
-
Response,
|
830
|
-
super().delete(
|
831
|
-
url,
|
832
|
-
data,
|
833
|
-
json,
|
834
|
-
headers,
|
835
|
-
params,
|
836
|
-
cookies,
|
837
|
-
timeout,
|
838
|
-
follow_redirects,
|
839
|
-
max_redirects,
|
840
|
-
retries,
|
841
|
-
retry_delay,
|
842
|
-
proxies,
|
843
|
-
proxy,
|
844
|
-
proxy_auth,
|
845
|
-
auth,
|
846
|
-
verify,
|
847
|
-
cert,
|
848
|
-
impersonate,
|
849
|
-
http3,
|
850
|
-
stealthy_headers,
|
851
|
-
**kwargs,
|
852
|
-
),
|
853
|
-
)
|
854
|
-
|
855
|
-
|
856
|
-
class AsyncFetcherClient(FetcherSession):
|
857
|
-
def __init__(self, *args, **kwargs):
|
858
|
-
super().__init__(*args, **kwargs)
|
859
|
-
self.__enter__: Any = None
|
860
|
-
self.__exit__: Any = None
|
861
|
-
self.__aenter__: Any = None
|
862
|
-
self.__aexit__: Any = None
|
863
|
-
self._async_curl_session: Any = True
|
864
|
-
|
865
|
-
# Setting the correct return types for the type checking/autocompletion
|
866
|
-
def get(
|
867
|
-
self,
|
868
|
-
url: str,
|
869
|
-
params: Optional[Dict | List | Tuple] = None,
|
870
|
-
headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
|
871
|
-
cookies: Optional[CookieTypes] = None,
|
872
|
-
timeout: Optional[int | float] = _UNSET,
|
873
|
-
follow_redirects: Optional[bool] = _UNSET,
|
874
|
-
max_redirects: Optional[int] = _UNSET,
|
875
|
-
retries: Optional[int] = _UNSET,
|
876
|
-
retry_delay: Optional[int] = _UNSET,
|
877
|
-
proxies: Optional[ProxySpec] = _UNSET,
|
878
|
-
proxy: Optional[str] = _UNSET,
|
879
|
-
proxy_auth: Optional[Tuple[str, str]] = _UNSET,
|
880
|
-
auth: Optional[Tuple[str, str]] = None,
|
881
|
-
verify: Optional[bool] = _UNSET,
|
882
|
-
cert: Optional[str | Tuple[str, str]] = _UNSET,
|
883
|
-
impersonate: Optional[BrowserTypeLiteral] = _UNSET,
|
884
|
-
http3: Optional[bool] = _UNSET,
|
885
|
-
stealthy_headers: Optional[bool] = _UNSET,
|
886
|
-
**kwargs,
|
887
|
-
) -> Awaitable[Response]:
|
888
|
-
return cast(
|
889
|
-
Awaitable[Response],
|
890
|
-
super().get(
|
891
|
-
url,
|
892
|
-
params,
|
893
|
-
headers,
|
894
|
-
cookies,
|
895
|
-
timeout,
|
896
|
-
follow_redirects,
|
897
|
-
max_redirects,
|
898
|
-
retries,
|
899
|
-
retry_delay,
|
900
|
-
proxies,
|
901
|
-
proxy,
|
902
|
-
proxy_auth,
|
903
|
-
auth,
|
904
|
-
verify,
|
905
|
-
cert,
|
906
|
-
impersonate,
|
907
|
-
http3,
|
908
|
-
stealthy_headers,
|
909
|
-
**kwargs,
|
910
|
-
),
|
680
|
+
:param url: Target URL for the request.
|
681
|
+
:param params: Query string parameters for the request.
|
682
|
+
:param headers: Headers to include in the request.
|
683
|
+
:param cookies: Cookies to use in the request.
|
684
|
+
:param timeout: Number of seconds to wait before timing out.
|
685
|
+
:param follow_redirects: Whether to follow redirects. Defaults to True.
|
686
|
+
:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
|
687
|
+
:param retries: Number of retry attempts. Defaults to 3.
|
688
|
+
:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
|
689
|
+
:param proxies: Dict of proxies to use.
|
690
|
+
:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
|
691
|
+
Cannot be used together with the `proxies` parameter.
|
692
|
+
:param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
|
693
|
+
:param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
|
694
|
+
:param verify: Whether to verify HTTPS certificates.
|
695
|
+
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
696
|
+
:param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
|
697
|
+
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
698
|
+
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
699
|
+
:param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
|
700
|
+
:return: A `Response` object.
|
701
|
+
"""
|
702
|
+
method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
|
703
|
+
method_args.update(kwargs)
|
704
|
+
# For type checking (not accessed error)
|
705
|
+
_ = (
|
706
|
+
url,
|
707
|
+
params,
|
708
|
+
headers,
|
709
|
+
cookies,
|
710
|
+
timeout,
|
711
|
+
follow_redirects,
|
712
|
+
max_redirects,
|
713
|
+
retries,
|
714
|
+
retry_delay,
|
715
|
+
proxies,
|
716
|
+
proxy,
|
717
|
+
proxy_auth,
|
718
|
+
auth,
|
719
|
+
verify,
|
720
|
+
cert,
|
721
|
+
impersonate,
|
722
|
+
http3,
|
911
723
|
)
|
724
|
+
return self.__make_request("GET", stealth=stealthy_headers, **method_args)
|
912
725
|
|
913
726
|
def post(
|
914
727
|
self,
|
@@ -934,32 +747,58 @@ class AsyncFetcherClient(FetcherSession):
|
|
934
747
|
stealthy_headers: Optional[bool] = _UNSET,
|
935
748
|
**kwargs,
|
936
749
|
) -> Awaitable[Response]:
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
750
|
+
"""
|
751
|
+
Perform a POST request.
|
752
|
+
|
753
|
+
:param url: Target URL for the request.
|
754
|
+
:param data: Form data to include in the request body.
|
755
|
+
:param json: A JSON serializable object to include in the body of the request.
|
756
|
+
:param params: Query string parameters for the request.
|
757
|
+
:param headers: Headers to include in the request.
|
758
|
+
:param cookies: Cookies to use in the request.
|
759
|
+
:param timeout: Number of seconds to wait before timing out.
|
760
|
+
:param follow_redirects: Whether to follow redirects. Defaults to True.
|
761
|
+
:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
|
762
|
+
:param retries: Number of retry attempts. Defaults to 3.
|
763
|
+
:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
|
764
|
+
:param proxies: Dict of proxies to use.
|
765
|
+
:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
|
766
|
+
Cannot be used together with the `proxies` parameter.
|
767
|
+
:param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
|
768
|
+
:param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
|
769
|
+
:param verify: Whether to verify HTTPS certificates.
|
770
|
+
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
771
|
+
:param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
|
772
|
+
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
773
|
+
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
774
|
+
:param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
|
775
|
+
:return: A `Response` object.
|
776
|
+
"""
|
777
|
+
method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
|
778
|
+
method_args.update(kwargs)
|
779
|
+
# For type checking (not accessed error)
|
780
|
+
_ = (
|
781
|
+
url,
|
782
|
+
params,
|
783
|
+
headers,
|
784
|
+
data,
|
785
|
+
json,
|
786
|
+
cookies,
|
787
|
+
timeout,
|
788
|
+
follow_redirects,
|
789
|
+
max_redirects,
|
790
|
+
retries,
|
791
|
+
retry_delay,
|
792
|
+
proxies,
|
793
|
+
proxy,
|
794
|
+
proxy_auth,
|
795
|
+
auth,
|
796
|
+
verify,
|
797
|
+
cert,
|
798
|
+
impersonate,
|
799
|
+
http3,
|
962
800
|
)
|
801
|
+
return self.__make_request("POST", stealth=stealthy_headers, **method_args)
|
963
802
|
|
964
803
|
def put(
|
965
804
|
self,
|
@@ -985,32 +824,58 @@ class AsyncFetcherClient(FetcherSession):
|
|
985
824
|
stealthy_headers: Optional[bool] = _UNSET,
|
986
825
|
**kwargs,
|
987
826
|
) -> Awaitable[Response]:
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
827
|
+
"""
|
828
|
+
Perform a PUT request.
|
829
|
+
|
830
|
+
:param url: Target URL for the request.
|
831
|
+
:param data: Form data to include in the request body.
|
832
|
+
:param json: A JSON serializable object to include in the body of the request.
|
833
|
+
:param params: Query string parameters for the request.
|
834
|
+
:param headers: Headers to include in the request.
|
835
|
+
:param cookies: Cookies to use in the request.
|
836
|
+
:param timeout: Number of seconds to wait before timing out.
|
837
|
+
:param follow_redirects: Whether to follow redirects. Defaults to True.
|
838
|
+
:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
|
839
|
+
:param retries: Number of retry attempts. Defaults to 3.
|
840
|
+
:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
|
841
|
+
:param proxies: Dict of proxies to use.
|
842
|
+
:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
|
843
|
+
Cannot be used together with the `proxies` parameter.
|
844
|
+
:param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
|
845
|
+
:param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
|
846
|
+
:param verify: Whether to verify HTTPS certificates.
|
847
|
+
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
848
|
+
:param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
|
849
|
+
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
850
|
+
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
851
|
+
:param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
|
852
|
+
:return: A `Response` object.
|
853
|
+
"""
|
854
|
+
method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
|
855
|
+
method_args.update(kwargs)
|
856
|
+
# For type checking (not accessed error)
|
857
|
+
_ = (
|
858
|
+
url,
|
859
|
+
params,
|
860
|
+
headers,
|
861
|
+
data,
|
862
|
+
json,
|
863
|
+
cookies,
|
864
|
+
timeout,
|
865
|
+
follow_redirects,
|
866
|
+
max_redirects,
|
867
|
+
retries,
|
868
|
+
retry_delay,
|
869
|
+
proxies,
|
870
|
+
proxy,
|
871
|
+
proxy_auth,
|
872
|
+
auth,
|
873
|
+
verify,
|
874
|
+
cert,
|
875
|
+
impersonate,
|
876
|
+
http3,
|
1013
877
|
)
|
878
|
+
return self.__make_request("PUT", stealth=stealthy_headers, **method_args)
|
1014
879
|
|
1015
880
|
def delete(
|
1016
881
|
self,
|
@@ -1036,29 +901,174 @@ class AsyncFetcherClient(FetcherSession):
|
|
1036
901
|
stealthy_headers: Optional[bool] = _UNSET,
|
1037
902
|
**kwargs,
|
1038
903
|
) -> Awaitable[Response]:
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
904
|
+
"""
|
905
|
+
Perform a DELETE request.
|
906
|
+
|
907
|
+
:param url: Target URL for the request.
|
908
|
+
:param data: Form data to include in the request body.
|
909
|
+
:param json: A JSON serializable object to include in the body of the request.
|
910
|
+
:param params: Query string parameters for the request.
|
911
|
+
:param headers: Headers to include in the request.
|
912
|
+
:param cookies: Cookies to use in the request.
|
913
|
+
:param timeout: Number of seconds to wait before timing out.
|
914
|
+
:param follow_redirects: Whether to follow redirects. Defaults to True.
|
915
|
+
:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
|
916
|
+
:param retries: Number of retry attempts. Defaults to 3.
|
917
|
+
:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
|
918
|
+
:param proxies: Dict of proxies to use.
|
919
|
+
:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
|
920
|
+
Cannot be used together with the `proxies` parameter.
|
921
|
+
:param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
|
922
|
+
:param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
|
923
|
+
:param verify: Whether to verify HTTPS certificates.
|
924
|
+
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
925
|
+
:param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
|
926
|
+
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
927
|
+
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
928
|
+
:param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
|
929
|
+
:return: A `Response` object.
|
930
|
+
"""
|
931
|
+
# Careful of sending a body in a DELETE request, it might cause some websites to reject the request as per https://www.rfc-editor.org/rfc/rfc7231#section-4.3.5,
|
932
|
+
# But some websites accept it, it depends on the implementation used.
|
933
|
+
method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
|
934
|
+
method_args.update(kwargs)
|
935
|
+
# For type checking (not accessed error)
|
936
|
+
_ = (
|
937
|
+
url,
|
938
|
+
params,
|
939
|
+
headers,
|
940
|
+
data,
|
941
|
+
json,
|
942
|
+
cookies,
|
943
|
+
timeout,
|
944
|
+
follow_redirects,
|
945
|
+
max_redirects,
|
946
|
+
retries,
|
947
|
+
retry_delay,
|
948
|
+
proxies,
|
949
|
+
proxy,
|
950
|
+
proxy_auth,
|
951
|
+
auth,
|
952
|
+
verify,
|
953
|
+
cert,
|
954
|
+
impersonate,
|
955
|
+
http3,
|
1064
956
|
)
|
957
|
+
return self.__make_request("DELETE", stealth=stealthy_headers, **method_args)
|
958
|
+
|
959
|
+
|
960
|
+
class FetcherSession:
|
961
|
+
"""
|
962
|
+
A factory context manager that provides configured Fetcher sessions.
|
963
|
+
|
964
|
+
When this manager is used in a 'with' or 'async with' block,
|
965
|
+
it yields a new session configured with the manager's defaults.
|
966
|
+
A single instance of this manager should ideally be used for one active
|
967
|
+
session at a time (or sequentially). Re-entering a context with the
|
968
|
+
same manager instance while a session is already active is disallowed.
|
969
|
+
"""
|
970
|
+
|
971
|
+
def __init__(
|
972
|
+
self,
|
973
|
+
impersonate: Optional[BrowserTypeLiteral] = "chrome",
|
974
|
+
http3: Optional[bool] = False,
|
975
|
+
stealthy_headers: Optional[bool] = True,
|
976
|
+
proxies: Optional[Dict[str, str]] = None,
|
977
|
+
proxy: Optional[str] = None,
|
978
|
+
proxy_auth: Optional[Tuple[str, str]] = None,
|
979
|
+
timeout: Optional[int | float] = 30,
|
980
|
+
headers: Optional[Dict[str, str]] = None,
|
981
|
+
retries: Optional[int] = 3,
|
982
|
+
retry_delay: Optional[int] = 1,
|
983
|
+
follow_redirects: bool = True,
|
984
|
+
max_redirects: int = 30,
|
985
|
+
verify: bool = True,
|
986
|
+
cert: Optional[str | Tuple[str, str]] = None,
|
987
|
+
selector_config: Optional[Dict] = None,
|
988
|
+
):
|
989
|
+
"""
|
990
|
+
:param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
|
991
|
+
:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
|
992
|
+
:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
|
993
|
+
:param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
|
994
|
+
:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
|
995
|
+
Cannot be used together with the `proxies` parameter.
|
996
|
+
:param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
|
997
|
+
:param timeout: Number of seconds to wait before timing out.
|
998
|
+
:param headers: Headers to include in the session with every request.
|
999
|
+
:param retries: Number of retry attempts. Defaults to 3.
|
1000
|
+
:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
|
1001
|
+
:param follow_redirects: Whether to follow redirects. Defaults to True.
|
1002
|
+
:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
|
1003
|
+
:param verify: Whether to verify HTTPS certificates. Defaults to True.
|
1004
|
+
:param cert: Tuple of (cert, key) filenames for the client certificate.
|
1005
|
+
:param selector_config: Arguments passed when creating the final Selector class.
|
1006
|
+
"""
|
1007
|
+
self._default_impersonate: Optional[BrowserTypeLiteral] = impersonate
|
1008
|
+
self._stealth = stealthy_headers
|
1009
|
+
self._default_proxies = proxies or {}
|
1010
|
+
self._default_proxy = proxy or None
|
1011
|
+
self._default_proxy_auth = proxy_auth or None
|
1012
|
+
self._default_timeout = timeout
|
1013
|
+
self._default_headers = headers or {}
|
1014
|
+
self._default_retries = retries
|
1015
|
+
self._default_retry_delay = retry_delay
|
1016
|
+
self._default_follow_redirects = follow_redirects
|
1017
|
+
self._default_max_redirects = max_redirects
|
1018
|
+
self._default_verify = verify
|
1019
|
+
self._default_cert = cert
|
1020
|
+
self._default_http3 = http3
|
1021
|
+
self.selector_config = selector_config or {}
|
1022
|
+
self._client: _SyncSessionLogic | _ASyncSessionLogic | None = None
|
1023
|
+
|
1024
|
+
def __enter__(self) -> _SyncSessionLogic:
|
1025
|
+
"""Creates and returns a new synchronous Fetcher Session"""
|
1026
|
+
if self._client is None:
|
1027
|
+
# Use **vars(self) to avoid repeating all parameters
|
1028
|
+
config = {k.replace("_default_", ""): v for k, v in vars(self).items() if k.startswith("_default")}
|
1029
|
+
config["stealthy_headers"] = self._stealth
|
1030
|
+
config["selector_config"] = self.selector_config
|
1031
|
+
self._client = _SyncSessionLogic(**config)
|
1032
|
+
return self._client.__enter__()
|
1033
|
+
raise RuntimeError("This FetcherSession instance already has an active synchronous session.")
|
1034
|
+
|
1035
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
1036
|
+
if self._client is not None and isinstance(self._client, _SyncSessionLogic):
|
1037
|
+
self._client.__exit__(exc_type, exc_val, exc_tb)
|
1038
|
+
self._client = None
|
1039
|
+
return
|
1040
|
+
raise RuntimeError("Cannot exit invalid session")
|
1041
|
+
|
1042
|
+
async def __aenter__(self) -> _ASyncSessionLogic:
|
1043
|
+
"""Creates and returns a new asynchronous Session."""
|
1044
|
+
if self._client is None:
|
1045
|
+
# Use **vars(self) to avoid repeating all parameters
|
1046
|
+
config = {k.replace("_default_", ""): v for k, v in vars(self).items() if k.startswith("_default")}
|
1047
|
+
config["stealthy_headers"] = self._stealth
|
1048
|
+
config["selector_config"] = self.selector_config
|
1049
|
+
self._client = _ASyncSessionLogic(**config)
|
1050
|
+
return await self._client.__aenter__()
|
1051
|
+
raise RuntimeError("This FetcherSession instance already has an active asynchronous session.")
|
1052
|
+
|
1053
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
1054
|
+
if self._client is not None and isinstance(self._client, _ASyncSessionLogic):
|
1055
|
+
await self._client.__aexit__(exc_type, exc_val, exc_tb)
|
1056
|
+
self._client = None
|
1057
|
+
return
|
1058
|
+
raise RuntimeError("Cannot exit invalid session")
|
1059
|
+
|
1060
|
+
|
1061
|
+
class FetcherClient(_SyncSessionLogic):
|
1062
|
+
def __init__(self, *args, **kwargs):
|
1063
|
+
super().__init__(*args, **kwargs)
|
1064
|
+
self.__enter__: Any = None
|
1065
|
+
self.__exit__: Any = None
|
1066
|
+
self._curl_session: Any = _NO_SESSION
|
1067
|
+
|
1068
|
+
|
1069
|
+
class AsyncFetcherClient(_ASyncSessionLogic):
|
1070
|
+
def __init__(self, *args, **kwargs):
|
1071
|
+
super().__init__(*args, **kwargs)
|
1072
|
+
self.__aenter__: Any = None
|
1073
|
+
self.__aexit__: Any = None
|
1074
|
+
self._async_curl_session: Any = _NO_SESSION
|