scrapling 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
+ from abc import ABC
1
2
  from time import sleep as time_sleep
2
3
  from asyncio import sleep as asyncio_sleep
3
4
 
4
5
  from curl_cffi.curl import CurlError
5
6
  from curl_cffi import CurlHttpVersion
6
- from curl_cffi.requests.impersonate import DEFAULT_CHROME
7
7
  from curl_cffi.requests import (
8
8
  ProxySpec,
9
9
  CookieTypes,
@@ -22,7 +22,6 @@ from scrapling.core._types import (
22
22
  Awaitable,
23
23
  List,
24
24
  Any,
25
- cast,
26
25
  )
27
26
 
28
27
  from .toolbelt.custom import Response
@@ -30,22 +29,14 @@ from .toolbelt.convertor import ResponseFactory
30
29
  from .toolbelt.fingerprints import generate_convincing_referer, generate_headers, __default_useragent__
31
30
 
32
31
  _UNSET: Any = object()
32
+ _NO_SESSION: Any = object()
33
33
 
34
34
 
35
- class FetcherSession:
36
- """
37
- A context manager that provides configured Fetcher sessions.
38
-
39
- When this manager is used in a 'with' or 'async with' block,
40
- it yields a new session configured with the manager's defaults.
41
- A single instance of this manager should ideally be used for one active
42
- session at a time (or sequentially). Re-entering a context with the
43
- same manager instance while a session is already active is disallowed.
44
- """
45
-
35
+ class _ConfigurationLogic(ABC):
36
+ # Core Logic Handler (Internal Engine)
46
37
  def __init__(
47
38
  self,
48
- impersonate: Optional[BrowserTypeLiteral] = DEFAULT_CHROME,
39
+ impersonate: Optional[BrowserTypeLiteral] = "chrome",
49
40
  http3: Optional[bool] = False,
50
41
  stealthy_headers: Optional[bool] = True,
51
42
  proxies: Optional[Dict[str, str]] = None,
@@ -61,203 +52,185 @@ class FetcherSession:
61
52
  cert: Optional[str | Tuple[str, str]] = None,
62
53
  selector_config: Optional[Dict] = None,
63
54
  ):
64
- """
65
- :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
66
- :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
67
- :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
68
- :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
69
- :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
70
- Cannot be used together with the `proxies` parameter.
71
- :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
72
- :param timeout: Number of seconds to wait before timing out.
73
- :param headers: Headers to include in the session with every request.
74
- :param retries: Number of retry attempts. Defaults to 3.
75
- :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
76
- :param follow_redirects: Whether to follow redirects. Defaults to True.
77
- :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
78
- :param verify: Whether to verify HTTPS certificates. Defaults to True.
79
- :param cert: Tuple of (cert, key) filenames for the client certificate.
80
- :param selector_config: Arguments passed when creating the final Selector class.
81
- """
82
- self.default_impersonate = impersonate
83
- self.stealth = stealthy_headers
84
- self.default_proxies = proxies or {}
85
- self.default_proxy = proxy or None
86
- self.default_proxy_auth = proxy_auth or None
87
- self.default_timeout = timeout
88
- self.default_headers = headers or {}
89
- self.default_retries = retries
90
- self.default_retry_delay = retry_delay
91
- self.default_follow_redirects = follow_redirects
92
- self.default_max_redirects = max_redirects
93
- self.default_verify = verify
94
- self.default_cert = cert
95
- self.default_http3 = http3
55
+ self._default_impersonate = impersonate
56
+ self._stealth = stealthy_headers
57
+ self._default_proxies = proxies or {}
58
+ self._default_proxy = proxy or None
59
+ self._default_proxy_auth = proxy_auth or None
60
+ self._default_timeout = timeout
61
+ self._default_headers = headers or {}
62
+ self._default_retries = retries
63
+ self._default_retry_delay = retry_delay
64
+ self._default_follow_redirects = follow_redirects
65
+ self._default_max_redirects = max_redirects
66
+ self._default_verify = verify
67
+ self._default_cert = cert
68
+ self._default_http3 = http3
96
69
  self.selector_config = selector_config or {}
97
70
 
98
- self._curl_session: Optional[CurlSession] = None
99
- self._async_curl_session: Optional[AsyncCurlSession] = None
71
+ @staticmethod
72
+ def _get_with_precedence(request_val: Any, default_val: Any) -> Any:
73
+ """Get value with request-level priority over session-level"""
74
+ return request_val if request_val is not _UNSET else default_val
100
75
 
101
- def _merge_request_args(self, **kwargs) -> Dict[str, Any]:
76
+ def _merge_request_args(self, **method_kwargs) -> Dict[str, Any]:
102
77
  """Merge request-specific arguments with default session arguments."""
103
- url = kwargs.pop("url")
104
- request_args = {}
105
-
106
- headers = self.get_with_precedence(kwargs, "headers", self.default_headers)
107
- stealth = self.get_with_precedence(kwargs, "stealth", self.stealth)
108
- impersonate = self.get_with_precedence(kwargs, "impersonate", self.default_impersonate)
109
-
110
- if self.get_with_precedence(kwargs, "http3", self.default_http3): # pragma: no cover
111
- request_args["http_version"] = CurlHttpVersion.V3ONLY
78
+ url = method_kwargs.pop("url")
79
+ impersonate = self._get_with_precedence(method_kwargs.pop("impersonate"), self._default_impersonate)
80
+ http3_enabled = self._get_with_precedence(method_kwargs.pop("http3"), self._default_http3)
81
+ final_args = {
82
+ "url": url,
83
+ # Curl automatically generates the suitable browser headers when you use `impersonate`
84
+ "headers": self._headers_job(
85
+ url,
86
+ self._get_with_precedence(method_kwargs.pop("headers"), self._default_headers),
87
+ self._get_with_precedence(method_kwargs.pop("stealth"), self._stealth),
88
+ bool(impersonate),
89
+ ),
90
+ "proxies": self._get_with_precedence(method_kwargs.pop("proxies"), self._default_proxies),
91
+ "proxy": self._get_with_precedence(method_kwargs.pop("proxy"), self._default_proxy),
92
+ "proxy_auth": self._get_with_precedence(method_kwargs.pop("proxy_auth"), self._default_proxy_auth),
93
+ "timeout": self._get_with_precedence(method_kwargs.pop("timeout"), self._default_timeout),
94
+ "allow_redirects": self._get_with_precedence(
95
+ method_kwargs.pop("follow_redirects"), self._default_follow_redirects
96
+ ),
97
+ "max_redirects": self._get_with_precedence(method_kwargs.pop("max_redirects"), self._default_max_redirects),
98
+ "verify": self._get_with_precedence(method_kwargs.pop("verify"), self._default_verify),
99
+ "cert": self._get_with_precedence(method_kwargs.pop("cert"), self._default_cert),
100
+ "impersonate": impersonate,
101
+ **{
102
+ k: v
103
+ for k, v in method_kwargs.items()
104
+ if v
105
+ not in (
106
+ _UNSET,
107
+ None,
108
+ )
109
+ }, # Add any remaining parameters (after all known ones are popped)
110
+ }
111
+ if http3_enabled: # pragma: no cover
112
+ final_args["http_version"] = CurlHttpVersion.V3ONLY
112
113
  if impersonate:
113
114
  log.warning(
114
115
  "The argument `http3` might cause errors if used with `impersonate` argument, try switching it off if you encounter any curl errors."
115
116
  )
116
117
 
117
- request_args.update(
118
- {
119
- "url": url,
120
- # Curl automatically generates the suitable browser headers when you use `impersonate`
121
- "headers": self._headers_job(url, headers, stealth, bool(impersonate)),
122
- "proxies": self.get_with_precedence(kwargs, "proxies", self.default_proxies),
123
- "proxy": self.get_with_precedence(kwargs, "proxy", self.default_proxy),
124
- "proxy_auth": self.get_with_precedence(kwargs, "proxy_auth", self.default_proxy_auth),
125
- "timeout": self.get_with_precedence(kwargs, "timeout", self.default_timeout),
126
- "allow_redirects": self.get_with_precedence(kwargs, "allow_redirects", self.default_follow_redirects),
127
- "max_redirects": self.get_with_precedence(kwargs, "max_redirects", self.default_max_redirects),
128
- "verify": self.get_with_precedence(kwargs, "verify", self.default_verify),
129
- "cert": self.get_with_precedence(kwargs, "cert", self.default_cert),
130
- "impersonate": impersonate,
131
- **{
132
- k: v
133
- for k, v in kwargs.items()
134
- if v
135
- not in (
136
- _UNSET,
137
- None,
138
- )
139
- }, # Add any remaining parameters (after all known ones are popped)
140
- }
141
- )
142
- return request_args
143
-
144
- def _headers_job(
145
- self,
146
- url,
147
- headers: Optional[Dict],
148
- stealth: Optional[bool],
149
- impersonate_enabled: bool,
150
- ) -> Dict:
151
- """Adds useragent to headers if it doesn't exist, generates real headers and append it to current headers, and
152
- finally generates a referer header that looks like if this request came from Google's search of the current URL's domain.
118
+ return final_args
153
119
 
154
- :param headers: Current headers in the request if the user passed any
155
- :param stealth: Whether to enable the `stealthy_headers` argument to this request or not. If `None`, it defaults to the session default value.
156
- :param impersonate_enabled: Whether the browser impersonation is enabled or not.
157
- :return: A dictionary of the new headers.
120
+ def _headers_job(self, url, headers: Dict, stealth: bool, impersonate_enabled: bool) -> Dict:
158
121
  """
159
- # Handle headers - if it was _UNSET, use default_headers
160
- if headers is _UNSET:
161
- headers = self.default_headers.copy()
162
- else:
163
- # Merge session headers with request headers, request takes precedence
164
- headers = {**self.default_headers, **(headers or {})}
165
-
166
- headers_keys = set(map(str.lower, headers.keys()))
122
+ 1. Adds a useragent to the headers if it doesn't have one
123
+ 2. Generates real headers and append them to current headers
124
+ 3. Generates a referer header that looks like as if this request came from a Google's search of the current URL's domain.
125
+ """
126
+ # Merge session headers with request headers, request takes precedence (if it was set)
127
+ final_headers = {**self._default_headers, **(headers if headers and headers is not _UNSET else {})}
128
+ headers_keys = {k.lower() for k in final_headers}
167
129
  if stealth:
168
130
  if "referer" not in headers_keys:
169
- headers.update({"referer": generate_convincing_referer(url)})
131
+ final_headers["referer"] = generate_convincing_referer(url)
170
132
 
171
- if impersonate_enabled: # Curl will generate the suitable headers
172
- return headers
173
-
174
- extra_headers = generate_headers(browser_mode=False)
175
- # Don't overwrite user-supplied headers
176
- extra_headers = {key: value for key, value in extra_headers.items() if key.lower() not in headers_keys}
177
- headers.update(extra_headers)
133
+ if not impersonate_enabled: # Curl will generate the suitable headers
134
+ extra_headers = generate_headers(browser_mode=False)
135
+ final_headers.update(
136
+ {k: v for k, v in extra_headers.items() if k.lower() not in headers_keys}
137
+ ) # Don't overwrite user-supplied headers
178
138
 
179
139
  elif "user-agent" not in headers_keys and not impersonate_enabled:
180
- headers["User-Agent"] = __default_useragent__
181
- log.debug(f"Can't find useragent in headers so '{headers['User-Agent']}' was used.")
140
+ final_headers["User-Agent"] = __default_useragent__
141
+ log.debug(f"Can't find useragent in headers so '{final_headers['User-Agent']}' was used.")
142
+
143
+ return final_headers
182
144
 
183
- return headers
145
+
146
+ class _SyncSessionLogic(_ConfigurationLogic):
147
+ def __init__(
148
+ self,
149
+ impersonate: Optional[BrowserTypeLiteral] = "chrome",
150
+ http3: Optional[bool] = False,
151
+ stealthy_headers: Optional[bool] = True,
152
+ proxies: Optional[Dict[str, str]] = None,
153
+ proxy: Optional[str] = None,
154
+ proxy_auth: Optional[Tuple[str, str]] = None,
155
+ timeout: Optional[int | float] = 30,
156
+ headers: Optional[Dict[str, str]] = None,
157
+ retries: Optional[int] = 3,
158
+ retry_delay: Optional[int] = 1,
159
+ follow_redirects: bool = True,
160
+ max_redirects: int = 30,
161
+ verify: bool = True,
162
+ cert: Optional[str | Tuple[str, str]] = None,
163
+ selector_config: Optional[Dict] = None,
164
+ ):
165
+ super().__init__(
166
+ impersonate,
167
+ http3,
168
+ stealthy_headers,
169
+ proxies,
170
+ proxy,
171
+ proxy_auth,
172
+ timeout,
173
+ headers,
174
+ retries,
175
+ retry_delay,
176
+ follow_redirects,
177
+ max_redirects,
178
+ verify,
179
+ cert,
180
+ selector_config,
181
+ )
182
+ self._curl_session: Optional[CurlSession] = None
184
183
 
185
184
  def __enter__(self):
186
185
  """Creates and returns a new synchronous Fetcher Session"""
187
186
  if self._curl_session:
188
- raise RuntimeError(
189
- "This FetcherSession instance already has an active synchronous session. "
190
- "Create a new FetcherSession instance for a new independent session, "
191
- "or use the current instance sequentially after the previous context has exited."
192
- )
193
- if self._async_curl_session: # Prevent mixing if async is active from this instance
194
- raise RuntimeError(
195
- "This FetcherSession instance has an active asynchronous session. "
196
- "Cannot enter a synchronous context simultaneously with the same manager instance."
197
- )
187
+ raise RuntimeError("This FetcherSession instance already has an active synchronous session.")
198
188
 
199
189
  self._curl_session = CurlSession()
200
190
  return self
201
191
 
202
192
  def __exit__(self, exc_type, exc_val, exc_tb):
203
193
  """Closes the active synchronous session managed by this instance, if any."""
194
+ # For type checking (not accessed error)
195
+ _ = (
196
+ exc_type,
197
+ exc_val,
198
+ exc_tb,
199
+ )
204
200
  if self._curl_session:
205
201
  self._curl_session.close()
206
202
  self._curl_session = None
207
203
 
208
- async def __aenter__(self):
209
- """Creates and returns a new asynchronous Session."""
210
- if self._async_curl_session:
211
- raise RuntimeError(
212
- "This FetcherSession instance already has an active asynchronous session. "
213
- "Create a new FetcherSession instance for a new independent session, "
214
- "or use the current instance sequentially after the previous context has exited."
215
- )
216
- if self._curl_session: # Prevent mixing if sync is active from this instance
217
- raise RuntimeError(
218
- "This FetcherSession instance has an active synchronous session. "
219
- "Cannot enter an asynchronous context simultaneously with the same manager instance."
220
- )
221
-
222
- self._async_curl_session = AsyncCurlSession()
223
- return self
224
-
225
- async def __aexit__(self, exc_type, exc_val, exc_tb):
226
- """Closes the active asynchronous session managed by this instance, if any."""
227
- if self._async_curl_session:
228
- await self._async_curl_session.close()
229
- self._async_curl_session = None
230
-
231
204
  def __make_request(
232
205
  self,
233
206
  method: SUPPORTED_HTTP_METHODS,
234
- request_args: Dict[str, Any],
235
- max_retries: int,
236
- retry_delay: int,
237
- selector_config: Dict,
207
+ stealth: Optional[bool] = None,
208
+ **kwargs,
238
209
  ) -> Response:
239
210
  """
240
211
  Perform an HTTP request using the configured session.
241
-
242
- :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
243
- :param request_args: Arguments to be passed to the session's `request()` method.
244
- :param max_retries: Maximum number of retries for the request.
245
- :param retry_delay: Number of seconds to wait between retries.
246
- :param selector_config: Arguments passed when creating the final Selector class.
247
- :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
248
212
  """
213
+ stealth = self._stealth if stealth is None else stealth
214
+
215
+ selector_config = kwargs.pop("selector_config", {}) or self.selector_config
216
+ max_retries = self._get_with_precedence(kwargs.pop("retries"), self._default_retries)
217
+ retry_delay = self._get_with_precedence(kwargs.pop("retry_delay"), self._default_retry_delay)
218
+ request_args = self._merge_request_args(stealth=stealth, **kwargs)
219
+
249
220
  session = self._curl_session
250
- if session is True and not any((self.__enter__, self.__exit__, self.__aenter__, self.__aexit__)):
221
+ one_off_request = False
222
+ if session is _NO_SESSION and self.__enter__ is None:
251
223
  # For usage inside FetcherClient
252
224
  # It turns out `curl_cffi` caches impersonation state, so if you turned it off, then on then off, it won't be off on the last time.
253
225
  session = CurlSession()
226
+ one_off_request = True
254
227
 
255
228
  if session:
256
229
  for attempt in range(max_retries):
257
230
  try:
258
231
  response = session.request(method, **request_args)
259
- # response.raise_for_status() # Retry responses with a status code between 200-400
260
- return ResponseFactory.from_http_request(response, selector_config)
232
+ result = ResponseFactory.from_http_request(response, selector_config)
233
+ return result
261
234
  except CurlError as e: # pragma: no cover
262
235
  if attempt < max_retries - 1:
263
236
  log.error(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
@@ -265,86 +238,12 @@ class FetcherSession:
265
238
  else:
266
239
  log.error(f"Failed after {max_retries} attempts: {e}")
267
240
  raise # Raise the exception if all retries fail
241
+ finally:
242
+ if session and one_off_request:
243
+ session.close()
268
244
 
269
245
  raise RuntimeError("No active session available.") # pragma: no cover
270
246
 
271
- async def __make_async_request(
272
- self,
273
- method: SUPPORTED_HTTP_METHODS,
274
- request_args: Dict[str, Any],
275
- max_retries: int,
276
- retry_delay: int,
277
- selector_config: Dict,
278
- ) -> Response:
279
- """
280
- Perform an HTTP request using the configured session.
281
-
282
- :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
283
- :param request_args: Arguments to be passed to the session's `request()` method.
284
- :param max_retries: Maximum number of retries for the request.
285
- :param retry_delay: Number of seconds to wait between retries.
286
- :param selector_config: Arguments passed when creating the final Selector class.
287
- :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
288
- """
289
- session = self._async_curl_session
290
- if session is True and not any((self.__enter__, self.__exit__, self.__aenter__, self.__aexit__)):
291
- # For usage inside the ` AsyncFetcherClient ` class, and that's for several reasons
292
- # 1. It turns out `curl_cffi` caches impersonation state, so if you turned it off, then on then off, it won't be off on the last time.
293
- # 2. `curl_cffi` doesn't support making async requests without sessions
294
- # 3. Using a single session for many requests at the same time in async doesn't sit well with curl_cffi.
295
- session = AsyncCurlSession()
296
-
297
- if session:
298
- for attempt in range(max_retries):
299
- try:
300
- response = await session.request(method, **request_args)
301
- # response.raise_for_status() # Retry responses with a status code between 200-400
302
- return ResponseFactory.from_http_request(response, selector_config)
303
- except CurlError as e: # pragma: no cover
304
- if attempt < max_retries - 1:
305
- log.error(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
306
- await asyncio_sleep(retry_delay)
307
- else:
308
- log.error(f"Failed after {max_retries} attempts: {e}")
309
- raise # Raise the exception if all retries fail
310
-
311
- raise RuntimeError("No active session available.") # pragma: no cover
312
-
313
- @staticmethod
314
- def get_with_precedence(kwargs, key, default_value):
315
- """Get value with request-level priority over session-level"""
316
- request_value = kwargs.pop(key, _UNSET)
317
- return request_value if request_value is not _UNSET else default_value
318
-
319
- def __prepare_and_dispatch(
320
- self,
321
- method: SUPPORTED_HTTP_METHODS,
322
- stealth: Optional[bool] = None,
323
- **kwargs,
324
- ) -> Response | Awaitable[Response]:
325
- """
326
- Internal dispatcher. Prepares arguments and calls sync or async request helper.
327
-
328
- :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
329
- :param stealth: Whether to enable the `stealthy_headers` argument to this request or not. If `None`, it defaults to the session default value.
330
- :param url: Target URL for the request.
331
- :param kwargs: Additional request-specific arguments.
332
- :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
333
- """
334
- stealth = self.stealth if stealth is None else stealth
335
-
336
- selector_config = kwargs.pop("selector_config", {}) or self.selector_config
337
- max_retries = self.get_with_precedence(kwargs, "retries", self.default_retries)
338
- retry_delay = self.get_with_precedence(kwargs, "retry_delay", self.default_retry_delay)
339
- request_args = self._merge_request_args(stealth=stealth, **kwargs)
340
- if self._curl_session:
341
- return self.__make_request(method, request_args, max_retries, retry_delay, selector_config)
342
- elif self._async_curl_session:
343
- # The returned value is a Coroutine
344
- return self.__make_async_request(method, request_args, max_retries, retry_delay, selector_config)
345
-
346
- raise RuntimeError("No active session available.")
347
-
348
247
  def get(
349
248
  self,
350
249
  url: str,
@@ -366,7 +265,7 @@ class FetcherSession:
366
265
  http3: Optional[bool] = _UNSET,
367
266
  stealthy_headers: Optional[bool] = _UNSET,
368
267
  **kwargs,
369
- ) -> Response | Awaitable[Response]:
268
+ ) -> Response:
370
269
  """
371
270
  Perform a GET request.
372
271
 
@@ -390,29 +289,31 @@ class FetcherSession:
390
289
  :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
391
290
  :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
392
291
  :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
393
- :return: A `Response` object or an awaitable for async.
292
+ :return: A `Response` object.
394
293
  """
395
- request_args = {
396
- "url": url,
397
- "params": params,
398
- "headers": headers,
399
- "cookies": cookies,
400
- "timeout": timeout,
401
- "retry_delay": retry_delay,
402
- "allow_redirects": follow_redirects,
403
- "max_redirects": max_redirects,
404
- "retries": retries,
405
- "proxies": proxies,
406
- "proxy": proxy,
407
- "proxy_auth": proxy_auth,
408
- "auth": auth,
409
- "verify": verify,
410
- "cert": cert,
411
- "impersonate": impersonate,
412
- "http3": http3,
413
- **kwargs,
414
- }
415
- return self.__prepare_and_dispatch("GET", stealth=stealthy_headers, **request_args)
294
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
295
+ method_args.update(kwargs)
296
+ # For type checking (not accessed error)
297
+ _ = (
298
+ url,
299
+ params,
300
+ headers,
301
+ cookies,
302
+ timeout,
303
+ follow_redirects,
304
+ max_redirects,
305
+ retries,
306
+ retry_delay,
307
+ proxies,
308
+ proxy,
309
+ proxy_auth,
310
+ auth,
311
+ verify,
312
+ cert,
313
+ impersonate,
314
+ http3,
315
+ )
316
+ return self.__make_request("GET", stealth=stealthy_headers, **method_args)
416
317
 
417
318
  def post(
418
319
  self,
@@ -437,57 +338,59 @@ class FetcherSession:
437
338
  http3: Optional[bool] = _UNSET,
438
339
  stealthy_headers: Optional[bool] = _UNSET,
439
340
  **kwargs,
440
- ) -> Response | Awaitable[Response]:
341
+ ) -> Response:
441
342
  """
442
343
  Perform a POST request.
443
344
 
444
345
  :param url: Target URL for the request.
445
346
  :param data: Form data to include in the request body.
446
347
  :param json: A JSON serializable object to include in the body of the request.
447
- :param headers: Headers to include in the request.
448
348
  :param params: Query string parameters for the request.
349
+ :param headers: Headers to include in the request.
449
350
  :param cookies: Cookies to use in the request.
450
351
  :param timeout: Number of seconds to wait before timing out.
451
352
  :param follow_redirects: Whether to follow redirects. Defaults to True.
452
353
  :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
453
354
  :param retries: Number of retry attempts. Defaults to 3.
454
355
  :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
455
- :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
356
+ :param proxies: Dict of proxies to use.
456
357
  :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
457
358
  Cannot be used together with the `proxies` parameter.
458
359
  :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
459
360
  :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
460
- :param verify: Whether to verify HTTPS certificates. Defaults to True.
361
+ :param verify: Whether to verify HTTPS certificates.
461
362
  :param cert: Tuple of (cert, key) filenames for the client certificate.
462
363
  :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
463
364
  :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
464
365
  :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
465
366
  :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
466
- :return: A `Response` object or an awaitable for async.
367
+ :return: A `Response` object.
467
368
  """
468
- request_args = {
469
- "url": url,
470
- "data": data,
471
- "json": json,
472
- "headers": headers,
473
- "params": params,
474
- "cookies": cookies,
475
- "timeout": timeout,
476
- "retry_delay": retry_delay,
477
- "proxy": proxy,
478
- "impersonate": impersonate,
479
- "allow_redirects": follow_redirects,
480
- "max_redirects": max_redirects,
481
- "retries": retries,
482
- "proxies": proxies,
483
- "proxy_auth": proxy_auth,
484
- "auth": auth,
485
- "verify": verify,
486
- "cert": cert,
487
- "http3": http3,
488
- **kwargs,
489
- }
490
- return self.__prepare_and_dispatch("POST", stealth=stealthy_headers, **request_args)
369
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
370
+ method_args.update(kwargs)
371
+ # For type checking (not accessed error)
372
+ _ = (
373
+ url,
374
+ params,
375
+ headers,
376
+ data,
377
+ json,
378
+ cookies,
379
+ timeout,
380
+ follow_redirects,
381
+ max_redirects,
382
+ retries,
383
+ retry_delay,
384
+ proxies,
385
+ proxy,
386
+ proxy_auth,
387
+ auth,
388
+ verify,
389
+ cert,
390
+ impersonate,
391
+ http3,
392
+ )
393
+ return self.__make_request("POST", stealth=stealthy_headers, **method_args)
491
394
 
492
395
  def put(
493
396
  self,
@@ -512,57 +415,59 @@ class FetcherSession:
512
415
  http3: Optional[bool] = _UNSET,
513
416
  stealthy_headers: Optional[bool] = _UNSET,
514
417
  **kwargs,
515
- ) -> Response | Awaitable[Response]:
418
+ ) -> Response:
516
419
  """
517
420
  Perform a PUT request.
518
421
 
519
422
  :param url: Target URL for the request.
520
423
  :param data: Form data to include in the request body.
521
424
  :param json: A JSON serializable object to include in the body of the request.
522
- :param headers: Headers to include in the request.
523
425
  :param params: Query string parameters for the request.
426
+ :param headers: Headers to include in the request.
524
427
  :param cookies: Cookies to use in the request.
525
428
  :param timeout: Number of seconds to wait before timing out.
526
429
  :param follow_redirects: Whether to follow redirects. Defaults to True.
527
430
  :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
528
431
  :param retries: Number of retry attempts. Defaults to 3.
529
432
  :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
530
- :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
433
+ :param proxies: Dict of proxies to use.
531
434
  :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
532
435
  Cannot be used together with the `proxies` parameter.
533
436
  :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
534
437
  :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
535
- :param verify: Whether to verify HTTPS certificates. Defaults to True.
438
+ :param verify: Whether to verify HTTPS certificates.
536
439
  :param cert: Tuple of (cert, key) filenames for the client certificate.
537
440
  :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
538
441
  :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
539
442
  :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
540
443
  :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
541
- :return: A `Response` object or an awaitable for async.
444
+ :return: A `Response` object.
542
445
  """
543
- request_args = {
544
- "url": url,
545
- "data": data,
546
- "json": json,
547
- "headers": headers,
548
- "params": params,
549
- "cookies": cookies,
550
- "timeout": timeout,
551
- "retry_delay": retry_delay,
552
- "proxy": proxy,
553
- "impersonate": impersonate,
554
- "allow_redirects": follow_redirects,
555
- "max_redirects": max_redirects,
556
- "retries": retries,
557
- "proxies": proxies,
558
- "proxy_auth": proxy_auth,
559
- "auth": auth,
560
- "verify": verify,
561
- "cert": cert,
562
- "http3": http3,
563
- **kwargs,
564
- }
565
- return self.__prepare_and_dispatch("PUT", stealth=stealthy_headers, **request_args)
446
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
447
+ method_args.update(kwargs)
448
+ # For type checking (not accessed error)
449
+ _ = (
450
+ url,
451
+ params,
452
+ headers,
453
+ data,
454
+ json,
455
+ cookies,
456
+ timeout,
457
+ follow_redirects,
458
+ max_redirects,
459
+ retries,
460
+ retry_delay,
461
+ proxies,
462
+ proxy,
463
+ proxy_auth,
464
+ auth,
465
+ verify,
466
+ cert,
467
+ impersonate,
468
+ http3,
469
+ )
470
+ return self.__make_request("PUT", stealth=stealthy_headers, **method_args)
566
471
 
567
472
  def delete(
568
473
  self,
@@ -587,71 +492,166 @@ class FetcherSession:
587
492
  http3: Optional[bool] = _UNSET,
588
493
  stealthy_headers: Optional[bool] = _UNSET,
589
494
  **kwargs,
590
- ) -> Response | Awaitable[Response]:
495
+ ) -> Response:
591
496
  """
592
497
  Perform a DELETE request.
593
498
 
594
499
  :param url: Target URL for the request.
595
500
  :param data: Form data to include in the request body.
596
501
  :param json: A JSON serializable object to include in the body of the request.
597
- :param headers: Headers to include in the request.
598
502
  :param params: Query string parameters for the request.
503
+ :param headers: Headers to include in the request.
599
504
  :param cookies: Cookies to use in the request.
600
505
  :param timeout: Number of seconds to wait before timing out.
601
506
  :param follow_redirects: Whether to follow redirects. Defaults to True.
602
507
  :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
603
508
  :param retries: Number of retry attempts. Defaults to 3.
604
509
  :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
605
- :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
510
+ :param proxies: Dict of proxies to use.
606
511
  :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
607
512
  Cannot be used together with the `proxies` parameter.
608
513
  :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
609
514
  :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
610
- :param verify: Whether to verify HTTPS certificates. Defaults to True.
515
+ :param verify: Whether to verify HTTPS certificates.
611
516
  :param cert: Tuple of (cert, key) filenames for the client certificate.
612
517
  :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
613
518
  :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
614
519
  :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
615
520
  :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
616
- :return: A `Response` object or an awaitable for async.
521
+ :return: A `Response` object.
617
522
  """
618
- request_args = {
619
- "url": url,
620
- # Careful of sending a body in a DELETE request, it might cause some websites to reject the request as per https://www.rfc-editor.org/rfc/rfc7231#section-4.3.5,
621
- # But some websites accept it, it depends on the implementation used.
622
- "data": data,
623
- "json": json,
624
- "headers": headers,
625
- "params": params,
626
- "cookies": cookies,
627
- "timeout": timeout,
628
- "retry_delay": retry_delay,
629
- "proxy": proxy,
630
- "impersonate": impersonate,
631
- "allow_redirects": follow_redirects,
632
- "max_redirects": max_redirects,
633
- "retries": retries,
634
- "proxies": proxies,
635
- "proxy_auth": proxy_auth,
636
- "auth": auth,
637
- "verify": verify,
638
- "cert": cert,
639
- "http3": http3,
640
- **kwargs,
641
- }
642
- return self.__prepare_and_dispatch("DELETE", stealth=stealthy_headers, **request_args)
523
+ # Careful of sending a body in a DELETE request, it might cause some websites to reject the request as per https://www.rfc-editor.org/rfc/rfc7231#section-4.3.5,
524
+ # But some websites accept it, it depends on the implementation used.
525
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
526
+ method_args.update(kwargs)
527
+ # For type checking (not accessed error)
528
+ _ = (
529
+ url,
530
+ params,
531
+ headers,
532
+ data,
533
+ json,
534
+ cookies,
535
+ timeout,
536
+ follow_redirects,
537
+ max_redirects,
538
+ retries,
539
+ retry_delay,
540
+ proxies,
541
+ proxy,
542
+ proxy_auth,
543
+ auth,
544
+ verify,
545
+ cert,
546
+ impersonate,
547
+ http3,
548
+ )
549
+ return self.__make_request("DELETE", stealth=stealthy_headers, **method_args)
643
550
 
644
551
 
645
- class FetcherClient(FetcherSession):
646
- def __init__(self, *args, **kwargs):
647
- super().__init__(*args, **kwargs)
648
- self.__enter__: Any = None
649
- self.__exit__: Any = None
650
- self.__aenter__: Any = None
651
- self.__aexit__: Any = None
652
- self._curl_session: Any = True
552
+ class _ASyncSessionLogic(_ConfigurationLogic):
553
+ def __init__(
554
+ self,
555
+ impersonate: Optional[BrowserTypeLiteral] = "chrome",
556
+ http3: Optional[bool] = False,
557
+ stealthy_headers: Optional[bool] = True,
558
+ proxies: Optional[Dict[str, str]] = None,
559
+ proxy: Optional[str] = None,
560
+ proxy_auth: Optional[Tuple[str, str]] = None,
561
+ timeout: Optional[int | float] = 30,
562
+ headers: Optional[Dict[str, str]] = None,
563
+ retries: Optional[int] = 3,
564
+ retry_delay: Optional[int] = 1,
565
+ follow_redirects: bool = True,
566
+ max_redirects: int = 30,
567
+ verify: bool = True,
568
+ cert: Optional[str | Tuple[str, str]] = None,
569
+ selector_config: Optional[Dict] = None,
570
+ ):
571
+ super().__init__(
572
+ impersonate,
573
+ http3,
574
+ stealthy_headers,
575
+ proxies,
576
+ proxy,
577
+ proxy_auth,
578
+ timeout,
579
+ headers,
580
+ retries,
581
+ retry_delay,
582
+ follow_redirects,
583
+ max_redirects,
584
+ verify,
585
+ cert,
586
+ selector_config,
587
+ )
588
+ self._async_curl_session: Optional[AsyncCurlSession] = None
589
+
590
+ async def __aenter__(self):
591
+ """Creates and returns a new asynchronous Session."""
592
+ if self._async_curl_session:
593
+ raise RuntimeError("This FetcherSession instance already has an active asynchronous session.")
594
+
595
+ self._async_curl_session = AsyncCurlSession()
596
+ return self
597
+
598
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
599
+ """Closes the active asynchronous session managed by this instance, if any."""
600
+ # For type checking (not accessed error)
601
+ _ = (
602
+ exc_type,
603
+ exc_val,
604
+ exc_tb,
605
+ )
606
+ if self._async_curl_session:
607
+ await self._async_curl_session.close()
608
+ self._async_curl_session = None
609
+
610
+ async def __make_request(
611
+ self,
612
+ method: SUPPORTED_HTTP_METHODS,
613
+ stealth: Optional[bool] = None,
614
+ **kwargs,
615
+ ) -> Response:
616
+ """
617
+ Perform an HTTP request using the configured session.
618
+ """
619
+ stealth = self._stealth if stealth is None else stealth
620
+
621
+ selector_config = kwargs.pop("selector_config", {}) or self.selector_config
622
+ max_retries = self._get_with_precedence(kwargs.pop("retries"), self._default_retries)
623
+ retry_delay = self._get_with_precedence(kwargs.pop("retry_delay"), self._default_retry_delay)
624
+ request_args = self._merge_request_args(stealth=stealth, **kwargs)
625
+
626
+ session = self._async_curl_session
627
+ one_off_request = False
628
+ if session is _NO_SESSION and self.__aenter__ is None:
629
+ # For usage inside the ` AsyncFetcherClient ` class, and that's for several reasons
630
+ # 1. It turns out `curl_cffi` caches impersonation state, so if you turned it off, then on then off, it won't be off on the last time.
631
+ # 2. `curl_cffi` doesn't support making async requests without sessions
632
+ # 3. Using a single session for many requests at the same time in async doesn't sit well with curl_cffi.
633
+ session = AsyncCurlSession()
634
+ one_off_request = True
635
+
636
+ if session:
637
+ for attempt in range(max_retries):
638
+ try:
639
+ response = await session.request(method, **request_args)
640
+ result = ResponseFactory.from_http_request(response, selector_config)
641
+ return result
642
+ except CurlError as e: # pragma: no cover
643
+ if attempt < max_retries - 1:
644
+ log.error(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
645
+ await asyncio_sleep(retry_delay)
646
+ else:
647
+ log.error(f"Failed after {max_retries} attempts: {e}")
648
+ raise # Raise the exception if all retries fail
649
+ finally:
650
+ if session and one_off_request:
651
+ await session.close()
652
+
653
+ raise RuntimeError("No active session available.") # pragma: no cover
653
654
 
654
- # Setting the correct return types for the type checking/autocompletion
655
655
  def get(
656
656
  self,
657
657
  url: str,
@@ -673,242 +673,55 @@ class FetcherClient(FetcherSession):
673
673
  http3: Optional[bool] = _UNSET,
674
674
  stealthy_headers: Optional[bool] = _UNSET,
675
675
  **kwargs,
676
- ) -> Response:
677
- return cast(
678
- Response,
679
- super().get(
680
- url,
681
- params,
682
- headers,
683
- cookies,
684
- timeout,
685
- follow_redirects,
686
- max_redirects,
687
- retries,
688
- retry_delay,
689
- proxies,
690
- proxy,
691
- proxy_auth,
692
- auth,
693
- verify,
694
- cert,
695
- impersonate,
696
- http3,
697
- stealthy_headers,
698
- **kwargs,
699
- ),
700
- )
676
+ ) -> Awaitable[Response]:
677
+ """
678
+ Perform a GET request.
701
679
 
702
- def post(
703
- self,
704
- url: str,
705
- data: Optional[Dict | str] = None,
706
- json: Optional[Dict | List] = None,
707
- headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
708
- params: Optional[Dict | List | Tuple] = None,
709
- cookies: Optional[CookieTypes] = None,
710
- timeout: Optional[int | float] = _UNSET,
711
- follow_redirects: Optional[bool] = _UNSET,
712
- max_redirects: Optional[int] = _UNSET,
713
- retries: Optional[int] = _UNSET,
714
- retry_delay: Optional[int] = _UNSET,
715
- proxies: Optional[ProxySpec] = _UNSET,
716
- proxy: Optional[str] = _UNSET,
717
- proxy_auth: Optional[Tuple[str, str]] = _UNSET,
718
- auth: Optional[Tuple[str, str]] = None,
719
- verify: Optional[bool] = _UNSET,
720
- cert: Optional[str | Tuple[str, str]] = _UNSET,
721
- impersonate: Optional[BrowserTypeLiteral] = _UNSET,
722
- http3: Optional[bool] = _UNSET,
723
- stealthy_headers: Optional[bool] = _UNSET,
724
- **kwargs,
725
- ) -> Response:
726
- return cast(
727
- Response,
728
- super().post(
729
- url,
730
- data,
731
- json,
732
- headers,
733
- params,
734
- cookies,
735
- timeout,
736
- follow_redirects,
737
- max_redirects,
738
- retries,
739
- retry_delay,
740
- proxies,
741
- proxy,
742
- proxy_auth,
743
- auth,
744
- verify,
745
- cert,
746
- impersonate,
747
- http3,
748
- stealthy_headers,
749
- **kwargs,
750
- ),
751
- )
752
-
753
- def put(
754
- self,
755
- url: str,
756
- data: Optional[Dict | str] = None,
757
- json: Optional[Dict | List] = None,
758
- headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
759
- params: Optional[Dict | List | Tuple] = None,
760
- cookies: Optional[CookieTypes] = None,
761
- timeout: Optional[int | float] = _UNSET,
762
- follow_redirects: Optional[bool] = _UNSET,
763
- max_redirects: Optional[int] = _UNSET,
764
- retries: Optional[int] = _UNSET,
765
- retry_delay: Optional[int] = _UNSET,
766
- proxies: Optional[ProxySpec] = _UNSET,
767
- proxy: Optional[str] = _UNSET,
768
- proxy_auth: Optional[Tuple[str, str]] = _UNSET,
769
- auth: Optional[Tuple[str, str]] = None,
770
- verify: Optional[bool] = _UNSET,
771
- cert: Optional[str | Tuple[str, str]] = _UNSET,
772
- impersonate: Optional[BrowserTypeLiteral] = _UNSET,
773
- http3: Optional[bool] = _UNSET,
774
- stealthy_headers: Optional[bool] = _UNSET,
775
- **kwargs,
776
- ) -> Response:
777
- return cast(
778
- Response,
779
- super().put(
780
- url,
781
- data,
782
- json,
783
- headers,
784
- params,
785
- cookies,
786
- timeout,
787
- follow_redirects,
788
- max_redirects,
789
- retries,
790
- retry_delay,
791
- proxies,
792
- proxy,
793
- proxy_auth,
794
- auth,
795
- verify,
796
- cert,
797
- impersonate,
798
- http3,
799
- stealthy_headers,
800
- **kwargs,
801
- ),
802
- )
803
-
804
- def delete(
805
- self,
806
- url: str,
807
- data: Optional[Dict | str] = None,
808
- json: Optional[Dict | List] = None,
809
- headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
810
- params: Optional[Dict | List | Tuple] = None,
811
- cookies: Optional[CookieTypes] = None,
812
- timeout: Optional[int | float] = _UNSET,
813
- follow_redirects: Optional[bool] = _UNSET,
814
- max_redirects: Optional[int] = _UNSET,
815
- retries: Optional[int] = _UNSET,
816
- retry_delay: Optional[int] = _UNSET,
817
- proxies: Optional[ProxySpec] = _UNSET,
818
- proxy: Optional[str] = _UNSET,
819
- proxy_auth: Optional[Tuple[str, str]] = _UNSET,
820
- auth: Optional[Tuple[str, str]] = None,
821
- verify: Optional[bool] = _UNSET,
822
- cert: Optional[str | Tuple[str, str]] = _UNSET,
823
- impersonate: Optional[BrowserTypeLiteral] = _UNSET,
824
- http3: Optional[bool] = _UNSET,
825
- stealthy_headers: Optional[bool] = _UNSET,
826
- **kwargs,
827
- ) -> Response:
828
- return cast(
829
- Response,
830
- super().delete(
831
- url,
832
- data,
833
- json,
834
- headers,
835
- params,
836
- cookies,
837
- timeout,
838
- follow_redirects,
839
- max_redirects,
840
- retries,
841
- retry_delay,
842
- proxies,
843
- proxy,
844
- proxy_auth,
845
- auth,
846
- verify,
847
- cert,
848
- impersonate,
849
- http3,
850
- stealthy_headers,
851
- **kwargs,
852
- ),
853
- )
854
-
855
-
856
- class AsyncFetcherClient(FetcherSession):
857
- def __init__(self, *args, **kwargs):
858
- super().__init__(*args, **kwargs)
859
- self.__enter__: Any = None
860
- self.__exit__: Any = None
861
- self.__aenter__: Any = None
862
- self.__aexit__: Any = None
863
- self._async_curl_session: Any = True
864
-
865
- # Setting the correct return types for the type checking/autocompletion
866
- def get(
867
- self,
868
- url: str,
869
- params: Optional[Dict | List | Tuple] = None,
870
- headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
871
- cookies: Optional[CookieTypes] = None,
872
- timeout: Optional[int | float] = _UNSET,
873
- follow_redirects: Optional[bool] = _UNSET,
874
- max_redirects: Optional[int] = _UNSET,
875
- retries: Optional[int] = _UNSET,
876
- retry_delay: Optional[int] = _UNSET,
877
- proxies: Optional[ProxySpec] = _UNSET,
878
- proxy: Optional[str] = _UNSET,
879
- proxy_auth: Optional[Tuple[str, str]] = _UNSET,
880
- auth: Optional[Tuple[str, str]] = None,
881
- verify: Optional[bool] = _UNSET,
882
- cert: Optional[str | Tuple[str, str]] = _UNSET,
883
- impersonate: Optional[BrowserTypeLiteral] = _UNSET,
884
- http3: Optional[bool] = _UNSET,
885
- stealthy_headers: Optional[bool] = _UNSET,
886
- **kwargs,
887
- ) -> Awaitable[Response]:
888
- return cast(
889
- Awaitable[Response],
890
- super().get(
891
- url,
892
- params,
893
- headers,
894
- cookies,
895
- timeout,
896
- follow_redirects,
897
- max_redirects,
898
- retries,
899
- retry_delay,
900
- proxies,
901
- proxy,
902
- proxy_auth,
903
- auth,
904
- verify,
905
- cert,
906
- impersonate,
907
- http3,
908
- stealthy_headers,
909
- **kwargs,
910
- ),
680
+ :param url: Target URL for the request.
681
+ :param params: Query string parameters for the request.
682
+ :param headers: Headers to include in the request.
683
+ :param cookies: Cookies to use in the request.
684
+ :param timeout: Number of seconds to wait before timing out.
685
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
686
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
687
+ :param retries: Number of retry attempts. Defaults to 3.
688
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
689
+ :param proxies: Dict of proxies to use.
690
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
691
+ Cannot be used together with the `proxies` parameter.
692
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
693
+ :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
694
+ :param verify: Whether to verify HTTPS certificates.
695
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
696
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
697
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
698
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
699
+ :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
700
+ :return: A `Response` object.
701
+ """
702
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
703
+ method_args.update(kwargs)
704
+ # For type checking (not accessed error)
705
+ _ = (
706
+ url,
707
+ params,
708
+ headers,
709
+ cookies,
710
+ timeout,
711
+ follow_redirects,
712
+ max_redirects,
713
+ retries,
714
+ retry_delay,
715
+ proxies,
716
+ proxy,
717
+ proxy_auth,
718
+ auth,
719
+ verify,
720
+ cert,
721
+ impersonate,
722
+ http3,
911
723
  )
724
+ return self.__make_request("GET", stealth=stealthy_headers, **method_args)
912
725
 
913
726
  def post(
914
727
  self,
@@ -934,32 +747,58 @@ class AsyncFetcherClient(FetcherSession):
934
747
  stealthy_headers: Optional[bool] = _UNSET,
935
748
  **kwargs,
936
749
  ) -> Awaitable[Response]:
937
- return cast(
938
- Awaitable[Response],
939
- super().post(
940
- url,
941
- data,
942
- json,
943
- headers,
944
- params,
945
- cookies,
946
- timeout,
947
- follow_redirects,
948
- max_redirects,
949
- retries,
950
- retry_delay,
951
- proxies,
952
- proxy,
953
- proxy_auth,
954
- auth,
955
- verify,
956
- cert,
957
- impersonate,
958
- http3,
959
- stealthy_headers,
960
- **kwargs,
961
- ),
750
+ """
751
+ Perform a POST request.
752
+
753
+ :param url: Target URL for the request.
754
+ :param data: Form data to include in the request body.
755
+ :param json: A JSON serializable object to include in the body of the request.
756
+ :param params: Query string parameters for the request.
757
+ :param headers: Headers to include in the request.
758
+ :param cookies: Cookies to use in the request.
759
+ :param timeout: Number of seconds to wait before timing out.
760
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
761
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
762
+ :param retries: Number of retry attempts. Defaults to 3.
763
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
764
+ :param proxies: Dict of proxies to use.
765
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
766
+ Cannot be used together with the `proxies` parameter.
767
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
768
+ :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
769
+ :param verify: Whether to verify HTTPS certificates.
770
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
771
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
772
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
773
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
774
+ :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
775
+ :return: A `Response` object.
776
+ """
777
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
778
+ method_args.update(kwargs)
779
+ # For type checking (not accessed error)
780
+ _ = (
781
+ url,
782
+ params,
783
+ headers,
784
+ data,
785
+ json,
786
+ cookies,
787
+ timeout,
788
+ follow_redirects,
789
+ max_redirects,
790
+ retries,
791
+ retry_delay,
792
+ proxies,
793
+ proxy,
794
+ proxy_auth,
795
+ auth,
796
+ verify,
797
+ cert,
798
+ impersonate,
799
+ http3,
962
800
  )
801
+ return self.__make_request("POST", stealth=stealthy_headers, **method_args)
963
802
 
964
803
  def put(
965
804
  self,
@@ -985,32 +824,58 @@ class AsyncFetcherClient(FetcherSession):
985
824
  stealthy_headers: Optional[bool] = _UNSET,
986
825
  **kwargs,
987
826
  ) -> Awaitable[Response]:
988
- return cast(
989
- Awaitable[Response],
990
- super().put(
991
- url,
992
- data,
993
- json,
994
- headers,
995
- params,
996
- cookies,
997
- timeout,
998
- follow_redirects,
999
- max_redirects,
1000
- retries,
1001
- retry_delay,
1002
- proxies,
1003
- proxy,
1004
- proxy_auth,
1005
- auth,
1006
- verify,
1007
- cert,
1008
- impersonate,
1009
- http3,
1010
- stealthy_headers,
1011
- **kwargs,
1012
- ),
827
+ """
828
+ Perform a PUT request.
829
+
830
+ :param url: Target URL for the request.
831
+ :param data: Form data to include in the request body.
832
+ :param json: A JSON serializable object to include in the body of the request.
833
+ :param params: Query string parameters for the request.
834
+ :param headers: Headers to include in the request.
835
+ :param cookies: Cookies to use in the request.
836
+ :param timeout: Number of seconds to wait before timing out.
837
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
838
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
839
+ :param retries: Number of retry attempts. Defaults to 3.
840
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
841
+ :param proxies: Dict of proxies to use.
842
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
843
+ Cannot be used together with the `proxies` parameter.
844
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
845
+ :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
846
+ :param verify: Whether to verify HTTPS certificates.
847
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
848
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
849
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
850
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
851
+ :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
852
+ :return: A `Response` object.
853
+ """
854
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
855
+ method_args.update(kwargs)
856
+ # For type checking (not accessed error)
857
+ _ = (
858
+ url,
859
+ params,
860
+ headers,
861
+ data,
862
+ json,
863
+ cookies,
864
+ timeout,
865
+ follow_redirects,
866
+ max_redirects,
867
+ retries,
868
+ retry_delay,
869
+ proxies,
870
+ proxy,
871
+ proxy_auth,
872
+ auth,
873
+ verify,
874
+ cert,
875
+ impersonate,
876
+ http3,
1013
877
  )
878
+ return self.__make_request("PUT", stealth=stealthy_headers, **method_args)
1014
879
 
1015
880
  def delete(
1016
881
  self,
@@ -1036,29 +901,174 @@ class AsyncFetcherClient(FetcherSession):
1036
901
  stealthy_headers: Optional[bool] = _UNSET,
1037
902
  **kwargs,
1038
903
  ) -> Awaitable[Response]:
1039
- return cast(
1040
- Awaitable[Response],
1041
- super().delete(
1042
- url,
1043
- data,
1044
- json,
1045
- headers,
1046
- params,
1047
- cookies,
1048
- timeout,
1049
- follow_redirects,
1050
- max_redirects,
1051
- retries,
1052
- retry_delay,
1053
- proxies,
1054
- proxy,
1055
- proxy_auth,
1056
- auth,
1057
- verify,
1058
- cert,
1059
- impersonate,
1060
- http3,
1061
- stealthy_headers,
1062
- **kwargs,
1063
- ),
904
+ """
905
+ Perform a DELETE request.
906
+
907
+ :param url: Target URL for the request.
908
+ :param data: Form data to include in the request body.
909
+ :param json: A JSON serializable object to include in the body of the request.
910
+ :param params: Query string parameters for the request.
911
+ :param headers: Headers to include in the request.
912
+ :param cookies: Cookies to use in the request.
913
+ :param timeout: Number of seconds to wait before timing out.
914
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
915
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
916
+ :param retries: Number of retry attempts. Defaults to 3.
917
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
918
+ :param proxies: Dict of proxies to use.
919
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
920
+ Cannot be used together with the `proxies` parameter.
921
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
922
+ :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
923
+ :param verify: Whether to verify HTTPS certificates.
924
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
925
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
926
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
927
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
928
+ :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
929
+ :return: A `Response` object.
930
+ """
931
+ # Careful of sending a body in a DELETE request, it might cause some websites to reject the request as per https://www.rfc-editor.org/rfc/rfc7231#section-4.3.5,
932
+ # But some websites accept it, it depends on the implementation used.
933
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
934
+ method_args.update(kwargs)
935
+ # For type checking (not accessed error)
936
+ _ = (
937
+ url,
938
+ params,
939
+ headers,
940
+ data,
941
+ json,
942
+ cookies,
943
+ timeout,
944
+ follow_redirects,
945
+ max_redirects,
946
+ retries,
947
+ retry_delay,
948
+ proxies,
949
+ proxy,
950
+ proxy_auth,
951
+ auth,
952
+ verify,
953
+ cert,
954
+ impersonate,
955
+ http3,
1064
956
  )
957
+ return self.__make_request("DELETE", stealth=stealthy_headers, **method_args)
958
+
959
+
960
+ class FetcherSession:
961
+ """
962
+ A factory context manager that provides configured Fetcher sessions.
963
+
964
+ When this manager is used in a 'with' or 'async with' block,
965
+ it yields a new session configured with the manager's defaults.
966
+ A single instance of this manager should ideally be used for one active
967
+ session at a time (or sequentially). Re-entering a context with the
968
+ same manager instance while a session is already active is disallowed.
969
+ """
970
+
971
+ def __init__(
972
+ self,
973
+ impersonate: Optional[BrowserTypeLiteral] = "chrome",
974
+ http3: Optional[bool] = False,
975
+ stealthy_headers: Optional[bool] = True,
976
+ proxies: Optional[Dict[str, str]] = None,
977
+ proxy: Optional[str] = None,
978
+ proxy_auth: Optional[Tuple[str, str]] = None,
979
+ timeout: Optional[int | float] = 30,
980
+ headers: Optional[Dict[str, str]] = None,
981
+ retries: Optional[int] = 3,
982
+ retry_delay: Optional[int] = 1,
983
+ follow_redirects: bool = True,
984
+ max_redirects: int = 30,
985
+ verify: bool = True,
986
+ cert: Optional[str | Tuple[str, str]] = None,
987
+ selector_config: Optional[Dict] = None,
988
+ ):
989
+ """
990
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
991
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
992
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
993
+ :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
994
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
995
+ Cannot be used together with the `proxies` parameter.
996
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
997
+ :param timeout: Number of seconds to wait before timing out.
998
+ :param headers: Headers to include in the session with every request.
999
+ :param retries: Number of retry attempts. Defaults to 3.
1000
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
1001
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
1002
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
1003
+ :param verify: Whether to verify HTTPS certificates. Defaults to True.
1004
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
1005
+ :param selector_config: Arguments passed when creating the final Selector class.
1006
+ """
1007
+ self._default_impersonate: Optional[BrowserTypeLiteral] = impersonate
1008
+ self._stealth = stealthy_headers
1009
+ self._default_proxies = proxies or {}
1010
+ self._default_proxy = proxy or None
1011
+ self._default_proxy_auth = proxy_auth or None
1012
+ self._default_timeout = timeout
1013
+ self._default_headers = headers or {}
1014
+ self._default_retries = retries
1015
+ self._default_retry_delay = retry_delay
1016
+ self._default_follow_redirects = follow_redirects
1017
+ self._default_max_redirects = max_redirects
1018
+ self._default_verify = verify
1019
+ self._default_cert = cert
1020
+ self._default_http3 = http3
1021
+ self.selector_config = selector_config or {}
1022
+ self._client: _SyncSessionLogic | _ASyncSessionLogic | None = None
1023
+
1024
+ def __enter__(self) -> _SyncSessionLogic:
1025
+ """Creates and returns a new synchronous Fetcher Session"""
1026
+ if self._client is None:
1027
+ # Use **vars(self) to avoid repeating all parameters
1028
+ config = {k.replace("_default_", ""): v for k, v in vars(self).items() if k.startswith("_default")}
1029
+ config["stealthy_headers"] = self._stealth
1030
+ config["selector_config"] = self.selector_config
1031
+ self._client = _SyncSessionLogic(**config)
1032
+ return self._client.__enter__()
1033
+ raise RuntimeError("This FetcherSession instance already has an active synchronous session.")
1034
+
1035
+ def __exit__(self, exc_type, exc_val, exc_tb):
1036
+ if self._client is not None and isinstance(self._client, _SyncSessionLogic):
1037
+ self._client.__exit__(exc_type, exc_val, exc_tb)
1038
+ self._client = None
1039
+ return
1040
+ raise RuntimeError("Cannot exit invalid session")
1041
+
1042
+ async def __aenter__(self) -> _ASyncSessionLogic:
1043
+ """Creates and returns a new asynchronous Session."""
1044
+ if self._client is None:
1045
+ # Use **vars(self) to avoid repeating all parameters
1046
+ config = {k.replace("_default_", ""): v for k, v in vars(self).items() if k.startswith("_default")}
1047
+ config["stealthy_headers"] = self._stealth
1048
+ config["selector_config"] = self.selector_config
1049
+ self._client = _ASyncSessionLogic(**config)
1050
+ return await self._client.__aenter__()
1051
+ raise RuntimeError("This FetcherSession instance already has an active asynchronous session.")
1052
+
1053
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1054
+ if self._client is not None and isinstance(self._client, _ASyncSessionLogic):
1055
+ await self._client.__aexit__(exc_type, exc_val, exc_tb)
1056
+ self._client = None
1057
+ return
1058
+ raise RuntimeError("Cannot exit invalid session")
1059
+
1060
+
1061
+ class FetcherClient(_SyncSessionLogic):
1062
+ def __init__(self, *args, **kwargs):
1063
+ super().__init__(*args, **kwargs)
1064
+ self.__enter__: Any = None
1065
+ self.__exit__: Any = None
1066
+ self._curl_session: Any = _NO_SESSION
1067
+
1068
+
1069
+ class AsyncFetcherClient(_ASyncSessionLogic):
1070
+ def __init__(self, *args, **kwargs):
1071
+ super().__init__(*args, **kwargs)
1072
+ self.__aenter__: Any = None
1073
+ self.__aexit__: Any = None
1074
+ self._async_curl_session: Any = _NO_SESSION