scrapling 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. scrapling/__init__.py +29 -19
  2. scrapling/cli.py +21 -4
  3. scrapling/core/_types.py +3 -2
  4. scrapling/core/ai.py +24 -15
  5. scrapling/core/custom_types.py +20 -27
  6. scrapling/core/mixins.py +15 -9
  7. scrapling/core/shell.py +6 -4
  8. scrapling/core/storage.py +7 -6
  9. scrapling/core/translator.py +13 -8
  10. scrapling/core/utils/__init__.py +0 -1
  11. scrapling/engines/_browsers/__init__.py +0 -2
  12. scrapling/engines/_browsers/_base.py +45 -21
  13. scrapling/engines/_browsers/_camoufox.py +98 -43
  14. scrapling/engines/_browsers/_config_tools.py +1 -1
  15. scrapling/engines/_browsers/_controllers.py +34 -13
  16. scrapling/engines/_browsers/_validators.py +31 -10
  17. scrapling/engines/constants.py +0 -15
  18. scrapling/engines/static.py +749 -336
  19. scrapling/engines/toolbelt/convertor.py +13 -15
  20. scrapling/engines/toolbelt/custom.py +6 -9
  21. scrapling/engines/toolbelt/fingerprints.py +17 -10
  22. scrapling/engines/toolbelt/navigation.py +11 -3
  23. scrapling/fetchers/__init__.py +46 -0
  24. scrapling/fetchers/chrome.py +210 -0
  25. scrapling/fetchers/firefox.py +212 -0
  26. scrapling/fetchers/requests.py +28 -0
  27. scrapling/parser.py +109 -84
  28. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/METADATA +17 -16
  29. scrapling-0.3.7.dist-info/RECORD +47 -0
  30. scrapling/fetchers.py +0 -444
  31. scrapling-0.3.5.dist-info/RECORD +0 -44
  32. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/WHEEL +0 -0
  33. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/entry_points.txt +0 -0
  34. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/licenses/LICENSE +0 -0
  35. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
+ from abc import ABC
1
2
  from time import sleep as time_sleep
2
3
  from asyncio import sleep as asyncio_sleep
3
4
 
4
- from curl_cffi.requests.session import CurlError
5
+ from curl_cffi.curl import CurlError
5
6
  from curl_cffi import CurlHttpVersion
6
- from curl_cffi.requests.impersonate import DEFAULT_CHROME
7
7
  from curl_cffi.requests import (
8
8
  ProxySpec,
9
9
  CookieTypes,
@@ -28,23 +28,15 @@ from .toolbelt.custom import Response
28
28
  from .toolbelt.convertor import ResponseFactory
29
29
  from .toolbelt.fingerprints import generate_convincing_referer, generate_headers, __default_useragent__
30
30
 
31
- _UNSET = object()
31
+ _UNSET: Any = object()
32
+ _NO_SESSION: Any = object()
32
33
 
33
34
 
34
- class FetcherSession:
35
- """
36
- A context manager that provides configured Fetcher sessions.
37
-
38
- When this manager is used in a 'with' or 'async with' block,
39
- it yields a new session configured with the manager's defaults.
40
- A single instance of this manager should ideally be used for one active
41
- session at a time (or sequentially). Re-entering a context with the
42
- same manager instance while a session is already active is disallowed.
43
- """
44
-
35
+ class _ConfigurationLogic(ABC):
36
+ # Core Logic Handler (Internal Engine)
45
37
  def __init__(
46
38
  self,
47
- impersonate: Optional[BrowserTypeLiteral] = DEFAULT_CHROME,
39
+ impersonate: Optional[BrowserTypeLiteral] = "chrome",
48
40
  http3: Optional[bool] = False,
49
41
  stealthy_headers: Optional[bool] = True,
50
42
  proxies: Optional[Dict[str, str]] = None,
@@ -60,203 +52,185 @@ class FetcherSession:
60
52
  cert: Optional[str | Tuple[str, str]] = None,
61
53
  selector_config: Optional[Dict] = None,
62
54
  ):
63
- """
64
- :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
65
- :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
66
- :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
67
- :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
68
- :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
69
- Cannot be used together with the `proxies` parameter.
70
- :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
71
- :param timeout: Number of seconds to wait before timing out.
72
- :param headers: Headers to include in the session with every request.
73
- :param retries: Number of retry attempts. Defaults to 3.
74
- :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
75
- :param follow_redirects: Whether to follow redirects. Defaults to True.
76
- :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
77
- :param verify: Whether to verify HTTPS certificates. Defaults to True.
78
- :param cert: Tuple of (cert, key) filenames for the client certificate.
79
- :param selector_config: Arguments passed when creating the final Selector class.
80
- """
81
- self.default_impersonate = impersonate
82
- self.stealth = stealthy_headers
83
- self.default_proxies = proxies or {}
84
- self.default_proxy = proxy or None
85
- self.default_proxy_auth = proxy_auth or None
86
- self.default_timeout = timeout
87
- self.default_headers = headers or {}
88
- self.default_retries = retries
89
- self.default_retry_delay = retry_delay
90
- self.default_follow_redirects = follow_redirects
91
- self.default_max_redirects = max_redirects
92
- self.default_verify = verify
93
- self.default_cert = cert
94
- self.default_http3 = http3
55
+ self._default_impersonate = impersonate
56
+ self._stealth = stealthy_headers
57
+ self._default_proxies = proxies or {}
58
+ self._default_proxy = proxy or None
59
+ self._default_proxy_auth = proxy_auth or None
60
+ self._default_timeout = timeout
61
+ self._default_headers = headers or {}
62
+ self._default_retries = retries
63
+ self._default_retry_delay = retry_delay
64
+ self._default_follow_redirects = follow_redirects
65
+ self._default_max_redirects = max_redirects
66
+ self._default_verify = verify
67
+ self._default_cert = cert
68
+ self._default_http3 = http3
95
69
  self.selector_config = selector_config or {}
96
70
 
97
- self._curl_session: Optional[CurlSession] | bool = None
98
- self._async_curl_session: Optional[AsyncCurlSession] | bool = None
71
+ @staticmethod
72
+ def _get_with_precedence(request_val: Any, default_val: Any) -> Any:
73
+ """Get value with request-level priority over session-level"""
74
+ return request_val if request_val is not _UNSET else default_val
99
75
 
100
- def _merge_request_args(self, **kwargs) -> Dict[str, Any]:
76
+ def _merge_request_args(self, **method_kwargs) -> Dict[str, Any]:
101
77
  """Merge request-specific arguments with default session arguments."""
102
- url = kwargs.pop("url")
103
- request_args = {}
104
-
105
- headers = self.get_with_precedence(kwargs, "headers", self.default_headers)
106
- stealth = self.get_with_precedence(kwargs, "stealth", self.stealth)
107
- impersonate = self.get_with_precedence(kwargs, "impersonate", self.default_impersonate)
108
-
109
- if self.get_with_precedence(kwargs, "http3", self.default_http3): # pragma: no cover
110
- request_args["http_version"] = CurlHttpVersion.V3ONLY
78
+ url = method_kwargs.pop("url")
79
+ impersonate = self._get_with_precedence(method_kwargs.pop("impersonate"), self._default_impersonate)
80
+ http3_enabled = self._get_with_precedence(method_kwargs.pop("http3"), self._default_http3)
81
+ final_args = {
82
+ "url": url,
83
+ # Curl automatically generates the suitable browser headers when you use `impersonate`
84
+ "headers": self._headers_job(
85
+ url,
86
+ self._get_with_precedence(method_kwargs.pop("headers"), self._default_headers),
87
+ self._get_with_precedence(method_kwargs.pop("stealth"), self._stealth),
88
+ bool(impersonate),
89
+ ),
90
+ "proxies": self._get_with_precedence(method_kwargs.pop("proxies"), self._default_proxies),
91
+ "proxy": self._get_with_precedence(method_kwargs.pop("proxy"), self._default_proxy),
92
+ "proxy_auth": self._get_with_precedence(method_kwargs.pop("proxy_auth"), self._default_proxy_auth),
93
+ "timeout": self._get_with_precedence(method_kwargs.pop("timeout"), self._default_timeout),
94
+ "allow_redirects": self._get_with_precedence(
95
+ method_kwargs.pop("follow_redirects"), self._default_follow_redirects
96
+ ),
97
+ "max_redirects": self._get_with_precedence(method_kwargs.pop("max_redirects"), self._default_max_redirects),
98
+ "verify": self._get_with_precedence(method_kwargs.pop("verify"), self._default_verify),
99
+ "cert": self._get_with_precedence(method_kwargs.pop("cert"), self._default_cert),
100
+ "impersonate": impersonate,
101
+ **{
102
+ k: v
103
+ for k, v in method_kwargs.items()
104
+ if v
105
+ not in (
106
+ _UNSET,
107
+ None,
108
+ )
109
+ }, # Add any remaining parameters (after all known ones are popped)
110
+ }
111
+ if http3_enabled: # pragma: no cover
112
+ final_args["http_version"] = CurlHttpVersion.V3ONLY
111
113
  if impersonate:
112
114
  log.warning(
113
115
  "The argument `http3` might cause errors if used with `impersonate` argument, try switching it off if you encounter any curl errors."
114
116
  )
115
117
 
116
- request_args.update(
117
- {
118
- "url": url,
119
- # Curl automatically generates the suitable browser headers when you use `impersonate`
120
- "headers": self._headers_job(url, headers, stealth, bool(impersonate)),
121
- "proxies": self.get_with_precedence(kwargs, "proxies", self.default_proxies),
122
- "proxy": self.get_with_precedence(kwargs, "proxy", self.default_proxy),
123
- "proxy_auth": self.get_with_precedence(kwargs, "proxy_auth", self.default_proxy_auth),
124
- "timeout": self.get_with_precedence(kwargs, "timeout", self.default_timeout),
125
- "allow_redirects": self.get_with_precedence(kwargs, "allow_redirects", self.default_follow_redirects),
126
- "max_redirects": self.get_with_precedence(kwargs, "max_redirects", self.default_max_redirects),
127
- "verify": self.get_with_precedence(kwargs, "verify", self.default_verify),
128
- "cert": self.get_with_precedence(kwargs, "cert", self.default_cert),
129
- "impersonate": impersonate,
130
- **{
131
- k: v
132
- for k, v in kwargs.items()
133
- if v
134
- not in (
135
- _UNSET,
136
- None,
137
- )
138
- }, # Add any remaining parameters (after all known ones are popped)
139
- }
140
- )
141
- return request_args
118
+ return final_args
142
119
 
143
- def _headers_job(
144
- self,
145
- url,
146
- headers: Optional[Dict],
147
- stealth: Optional[bool],
148
- impersonate_enabled: bool,
149
- ) -> Dict:
150
- """Adds useragent to headers if it doesn't exist, generates real headers and append it to current headers, and
151
- finally generates a referer header that looks like if this request came from Google's search of the current URL's domain.
152
-
153
- :param headers: Current headers in the request if the user passed any
154
- :param stealth: Whether to enable the `stealthy_headers` argument to this request or not. If `None`, it defaults to the session default value.
155
- :param impersonate_enabled: Whether the browser impersonation is enabled or not.
156
- :return: A dictionary of the new headers.
120
+ def _headers_job(self, url, headers: Dict, stealth: bool, impersonate_enabled: bool) -> Dict:
121
+ """
122
+ 1. Adds a useragent to the headers if it doesn't have one
123
+ 2. Generates real headers and append them to current headers
124
+ 3. Generates a referer header that looks like as if this request came from a Google's search of the current URL's domain.
157
125
  """
158
- # Handle headers - if it was _UNSET, use default_headers
159
- if headers is _UNSET:
160
- headers = self.default_headers.copy()
161
- else:
162
- # Merge session headers with request headers, request takes precedence
163
- headers = {**self.default_headers, **(headers or {})}
164
-
165
- headers_keys = set(map(str.lower, headers.keys()))
126
+ # Merge session headers with request headers, request takes precedence (if it was set)
127
+ final_headers = {**self._default_headers, **(headers if headers and headers is not _UNSET else {})}
128
+ headers_keys = {k.lower() for k in final_headers}
166
129
  if stealth:
167
130
  if "referer" not in headers_keys:
168
- headers.update({"referer": generate_convincing_referer(url)})
131
+ final_headers["referer"] = generate_convincing_referer(url)
169
132
 
170
- if impersonate_enabled: # Curl will generate the suitable headers
171
- return headers
172
-
173
- extra_headers = generate_headers(browser_mode=False)
174
- # Don't overwrite user-supplied headers
175
- extra_headers = {key: value for key, value in extra_headers.items() if key.lower() not in headers_keys}
176
- headers.update(extra_headers)
133
+ if not impersonate_enabled: # Curl will generate the suitable headers
134
+ extra_headers = generate_headers(browser_mode=False)
135
+ final_headers.update(
136
+ {k: v for k, v in extra_headers.items() if k.lower() not in headers_keys}
137
+ ) # Don't overwrite user-supplied headers
177
138
 
178
139
  elif "user-agent" not in headers_keys and not impersonate_enabled:
179
- headers["User-Agent"] = __default_useragent__
180
- log.debug(f"Can't find useragent in headers so '{headers['User-Agent']}' was used.")
140
+ final_headers["User-Agent"] = __default_useragent__
141
+ log.debug(f"Can't find useragent in headers so '{final_headers['User-Agent']}' was used.")
142
+
143
+ return final_headers
144
+
181
145
 
182
- return headers
146
+ class _SyncSessionLogic(_ConfigurationLogic):
147
+ def __init__(
148
+ self,
149
+ impersonate: Optional[BrowserTypeLiteral] = "chrome",
150
+ http3: Optional[bool] = False,
151
+ stealthy_headers: Optional[bool] = True,
152
+ proxies: Optional[Dict[str, str]] = None,
153
+ proxy: Optional[str] = None,
154
+ proxy_auth: Optional[Tuple[str, str]] = None,
155
+ timeout: Optional[int | float] = 30,
156
+ headers: Optional[Dict[str, str]] = None,
157
+ retries: Optional[int] = 3,
158
+ retry_delay: Optional[int] = 1,
159
+ follow_redirects: bool = True,
160
+ max_redirects: int = 30,
161
+ verify: bool = True,
162
+ cert: Optional[str | Tuple[str, str]] = None,
163
+ selector_config: Optional[Dict] = None,
164
+ ):
165
+ super().__init__(
166
+ impersonate,
167
+ http3,
168
+ stealthy_headers,
169
+ proxies,
170
+ proxy,
171
+ proxy_auth,
172
+ timeout,
173
+ headers,
174
+ retries,
175
+ retry_delay,
176
+ follow_redirects,
177
+ max_redirects,
178
+ verify,
179
+ cert,
180
+ selector_config,
181
+ )
182
+ self._curl_session: Optional[CurlSession] = None
183
183
 
184
184
  def __enter__(self):
185
185
  """Creates and returns a new synchronous Fetcher Session"""
186
186
  if self._curl_session:
187
- raise RuntimeError(
188
- "This FetcherSession instance already has an active synchronous session. "
189
- "Create a new FetcherSession instance for a new independent session, "
190
- "or use the current instance sequentially after the previous context has exited."
191
- )
192
- if self._async_curl_session: # Prevent mixing if async is active from this instance
193
- raise RuntimeError(
194
- "This FetcherSession instance has an active asynchronous session. "
195
- "Cannot enter a synchronous context simultaneously with the same manager instance."
196
- )
187
+ raise RuntimeError("This FetcherSession instance already has an active synchronous session.")
197
188
 
198
189
  self._curl_session = CurlSession()
199
190
  return self
200
191
 
201
192
  def __exit__(self, exc_type, exc_val, exc_tb):
202
193
  """Closes the active synchronous session managed by this instance, if any."""
194
+ # For type checking (not accessed error)
195
+ _ = (
196
+ exc_type,
197
+ exc_val,
198
+ exc_tb,
199
+ )
203
200
  if self._curl_session:
204
201
  self._curl_session.close()
205
202
  self._curl_session = None
206
203
 
207
- async def __aenter__(self):
208
- """Creates and returns a new asynchronous Session."""
209
- if self._async_curl_session:
210
- raise RuntimeError(
211
- "This FetcherSession instance already has an active asynchronous session. "
212
- "Create a new FetcherSession instance for a new independent session, "
213
- "or use the current instance sequentially after the previous context has exited."
214
- )
215
- if self._curl_session: # Prevent mixing if sync is active from this instance
216
- raise RuntimeError(
217
- "This FetcherSession instance has an active synchronous session. "
218
- "Cannot enter an asynchronous context simultaneously with the same manager instance."
219
- )
220
-
221
- self._async_curl_session = AsyncCurlSession()
222
- return self
223
-
224
- async def __aexit__(self, exc_type, exc_val, exc_tb):
225
- """Closes the active asynchronous session managed by this instance, if any."""
226
- if self._async_curl_session:
227
- await self._async_curl_session.close()
228
- self._async_curl_session = None
229
-
230
204
  def __make_request(
231
205
  self,
232
206
  method: SUPPORTED_HTTP_METHODS,
233
- request_args: Dict[str, Any],
234
- max_retries: int,
235
- retry_delay: int,
236
- selector_config: Optional[Dict] = None,
207
+ stealth: Optional[bool] = None,
208
+ **kwargs,
237
209
  ) -> Response:
238
210
  """
239
211
  Perform an HTTP request using the configured session.
240
-
241
- :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
242
- :param request_args: Arguments to be passed to the session's `request()` method.
243
- :param max_retries: Maximum number of retries for the request.
244
- :param retry_delay: Number of seconds to wait between retries.
245
- :param selector_config: Arguments passed when creating the final Selector class.
246
- :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
247
212
  """
213
+ stealth = self._stealth if stealth is None else stealth
214
+
215
+ selector_config = kwargs.pop("selector_config", {}) or self.selector_config
216
+ max_retries = self._get_with_precedence(kwargs.pop("retries"), self._default_retries)
217
+ retry_delay = self._get_with_precedence(kwargs.pop("retry_delay"), self._default_retry_delay)
218
+ request_args = self._merge_request_args(stealth=stealth, **kwargs)
219
+
248
220
  session = self._curl_session
249
- if session is True and not any((self.__enter__, self.__exit__, self.__aenter__, self.__aexit__)):
221
+ one_off_request = False
222
+ if session is _NO_SESSION and self.__enter__ is None:
250
223
  # For usage inside FetcherClient
251
224
  # It turns out `curl_cffi` caches impersonation state, so if you turned it off, then on then off, it won't be off on the last time.
252
225
  session = CurlSession()
226
+ one_off_request = True
253
227
 
254
228
  if session:
255
229
  for attempt in range(max_retries):
256
230
  try:
257
231
  response = session.request(method, **request_args)
258
- # response.raise_for_status() # Retry responses with a status code between 200-400
259
- return ResponseFactory.from_http_request(response, selector_config)
232
+ result = ResponseFactory.from_http_request(response, selector_config)
233
+ return result
260
234
  except CurlError as e: # pragma: no cover
261
235
  if attempt < max_retries - 1:
262
236
  log.error(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
@@ -264,41 +238,407 @@ class FetcherSession:
264
238
  else:
265
239
  log.error(f"Failed after {max_retries} attempts: {e}")
266
240
  raise # Raise the exception if all retries fail
241
+ finally:
242
+ if session and one_off_request:
243
+ session.close()
267
244
 
268
245
  raise RuntimeError("No active session available.") # pragma: no cover
269
246
 
270
- async def __make_async_request(
247
+ def get(
271
248
  self,
272
- method: SUPPORTED_HTTP_METHODS,
273
- request_args: Dict[str, Any],
274
- max_retries: int,
275
- retry_delay: int,
249
+ url: str,
250
+ params: Optional[Dict | List | Tuple] = None,
251
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
252
+ cookies: Optional[CookieTypes] = None,
253
+ timeout: Optional[int | float] = _UNSET,
254
+ follow_redirects: Optional[bool] = _UNSET,
255
+ max_redirects: Optional[int] = _UNSET,
256
+ retries: Optional[int] = _UNSET,
257
+ retry_delay: Optional[int] = _UNSET,
258
+ proxies: Optional[ProxySpec] = _UNSET,
259
+ proxy: Optional[str] = _UNSET,
260
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
261
+ auth: Optional[Tuple[str, str]] = None,
262
+ verify: Optional[bool] = _UNSET,
263
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
264
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
265
+ http3: Optional[bool] = _UNSET,
266
+ stealthy_headers: Optional[bool] = _UNSET,
267
+ **kwargs,
268
+ ) -> Response:
269
+ """
270
+ Perform a GET request.
271
+
272
+ :param url: Target URL for the request.
273
+ :param params: Query string parameters for the request.
274
+ :param headers: Headers to include in the request.
275
+ :param cookies: Cookies to use in the request.
276
+ :param timeout: Number of seconds to wait before timing out.
277
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
278
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
279
+ :param retries: Number of retry attempts. Defaults to 3.
280
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
281
+ :param proxies: Dict of proxies to use.
282
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
283
+ Cannot be used together with the `proxies` parameter.
284
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
285
+ :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
286
+ :param verify: Whether to verify HTTPS certificates.
287
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
288
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
289
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
290
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
291
+ :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
292
+ :return: A `Response` object.
293
+ """
294
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
295
+ method_args.update(kwargs)
296
+ # For type checking (not accessed error)
297
+ _ = (
298
+ url,
299
+ params,
300
+ headers,
301
+ cookies,
302
+ timeout,
303
+ follow_redirects,
304
+ max_redirects,
305
+ retries,
306
+ retry_delay,
307
+ proxies,
308
+ proxy,
309
+ proxy_auth,
310
+ auth,
311
+ verify,
312
+ cert,
313
+ impersonate,
314
+ http3,
315
+ )
316
+ return self.__make_request("GET", stealth=stealthy_headers, **method_args)
317
+
318
+ def post(
319
+ self,
320
+ url: str,
321
+ data: Optional[Dict | str] = None,
322
+ json: Optional[Dict | List] = None,
323
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
324
+ params: Optional[Dict | List | Tuple] = None,
325
+ cookies: Optional[CookieTypes] = None,
326
+ timeout: Optional[int | float] = _UNSET,
327
+ follow_redirects: Optional[bool] = _UNSET,
328
+ max_redirects: Optional[int] = _UNSET,
329
+ retries: Optional[int] = _UNSET,
330
+ retry_delay: Optional[int] = _UNSET,
331
+ proxies: Optional[ProxySpec] = _UNSET,
332
+ proxy: Optional[str] = _UNSET,
333
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
334
+ auth: Optional[Tuple[str, str]] = None,
335
+ verify: Optional[bool] = _UNSET,
336
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
337
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
338
+ http3: Optional[bool] = _UNSET,
339
+ stealthy_headers: Optional[bool] = _UNSET,
340
+ **kwargs,
341
+ ) -> Response:
342
+ """
343
+ Perform a POST request.
344
+
345
+ :param url: Target URL for the request.
346
+ :param data: Form data to include in the request body.
347
+ :param json: A JSON serializable object to include in the body of the request.
348
+ :param params: Query string parameters for the request.
349
+ :param headers: Headers to include in the request.
350
+ :param cookies: Cookies to use in the request.
351
+ :param timeout: Number of seconds to wait before timing out.
352
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
353
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
354
+ :param retries: Number of retry attempts. Defaults to 3.
355
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
356
+ :param proxies: Dict of proxies to use.
357
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
358
+ Cannot be used together with the `proxies` parameter.
359
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
360
+ :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
361
+ :param verify: Whether to verify HTTPS certificates.
362
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
363
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
364
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
365
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
366
+ :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
367
+ :return: A `Response` object.
368
+ """
369
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
370
+ method_args.update(kwargs)
371
+ # For type checking (not accessed error)
372
+ _ = (
373
+ url,
374
+ params,
375
+ headers,
376
+ data,
377
+ json,
378
+ cookies,
379
+ timeout,
380
+ follow_redirects,
381
+ max_redirects,
382
+ retries,
383
+ retry_delay,
384
+ proxies,
385
+ proxy,
386
+ proxy_auth,
387
+ auth,
388
+ verify,
389
+ cert,
390
+ impersonate,
391
+ http3,
392
+ )
393
+ return self.__make_request("POST", stealth=stealthy_headers, **method_args)
394
+
395
+ def put(
396
+ self,
397
+ url: str,
398
+ data: Optional[Dict | str] = None,
399
+ json: Optional[Dict | List] = None,
400
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
401
+ params: Optional[Dict | List | Tuple] = None,
402
+ cookies: Optional[CookieTypes] = None,
403
+ timeout: Optional[int | float] = _UNSET,
404
+ follow_redirects: Optional[bool] = _UNSET,
405
+ max_redirects: Optional[int] = _UNSET,
406
+ retries: Optional[int] = _UNSET,
407
+ retry_delay: Optional[int] = _UNSET,
408
+ proxies: Optional[ProxySpec] = _UNSET,
409
+ proxy: Optional[str] = _UNSET,
410
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
411
+ auth: Optional[Tuple[str, str]] = None,
412
+ verify: Optional[bool] = _UNSET,
413
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
414
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
415
+ http3: Optional[bool] = _UNSET,
416
+ stealthy_headers: Optional[bool] = _UNSET,
417
+ **kwargs,
418
+ ) -> Response:
419
+ """
420
+ Perform a PUT request.
421
+
422
+ :param url: Target URL for the request.
423
+ :param data: Form data to include in the request body.
424
+ :param json: A JSON serializable object to include in the body of the request.
425
+ :param params: Query string parameters for the request.
426
+ :param headers: Headers to include in the request.
427
+ :param cookies: Cookies to use in the request.
428
+ :param timeout: Number of seconds to wait before timing out.
429
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
430
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
431
+ :param retries: Number of retry attempts. Defaults to 3.
432
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
433
+ :param proxies: Dict of proxies to use.
434
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
435
+ Cannot be used together with the `proxies` parameter.
436
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
437
+ :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
438
+ :param verify: Whether to verify HTTPS certificates.
439
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
440
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
441
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
442
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
443
+ :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
444
+ :return: A `Response` object.
445
+ """
446
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
447
+ method_args.update(kwargs)
448
+ # For type checking (not accessed error)
449
+ _ = (
450
+ url,
451
+ params,
452
+ headers,
453
+ data,
454
+ json,
455
+ cookies,
456
+ timeout,
457
+ follow_redirects,
458
+ max_redirects,
459
+ retries,
460
+ retry_delay,
461
+ proxies,
462
+ proxy,
463
+ proxy_auth,
464
+ auth,
465
+ verify,
466
+ cert,
467
+ impersonate,
468
+ http3,
469
+ )
470
+ return self.__make_request("PUT", stealth=stealthy_headers, **method_args)
471
+
472
+ def delete(
473
+ self,
474
+ url: str,
475
+ data: Optional[Dict | str] = None,
476
+ json: Optional[Dict | List] = None,
477
+ headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
478
+ params: Optional[Dict | List | Tuple] = None,
479
+ cookies: Optional[CookieTypes] = None,
480
+ timeout: Optional[int | float] = _UNSET,
481
+ follow_redirects: Optional[bool] = _UNSET,
482
+ max_redirects: Optional[int] = _UNSET,
483
+ retries: Optional[int] = _UNSET,
484
+ retry_delay: Optional[int] = _UNSET,
485
+ proxies: Optional[ProxySpec] = _UNSET,
486
+ proxy: Optional[str] = _UNSET,
487
+ proxy_auth: Optional[Tuple[str, str]] = _UNSET,
488
+ auth: Optional[Tuple[str, str]] = None,
489
+ verify: Optional[bool] = _UNSET,
490
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
491
+ impersonate: Optional[BrowserTypeLiteral] = _UNSET,
492
+ http3: Optional[bool] = _UNSET,
493
+ stealthy_headers: Optional[bool] = _UNSET,
494
+ **kwargs,
495
+ ) -> Response:
496
+ """
497
+ Perform a DELETE request.
498
+
499
+ :param url: Target URL for the request.
500
+ :param data: Form data to include in the request body.
501
+ :param json: A JSON serializable object to include in the body of the request.
502
+ :param params: Query string parameters for the request.
503
+ :param headers: Headers to include in the request.
504
+ :param cookies: Cookies to use in the request.
505
+ :param timeout: Number of seconds to wait before timing out.
506
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
507
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
508
+ :param retries: Number of retry attempts. Defaults to 3.
509
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
510
+ :param proxies: Dict of proxies to use.
511
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
512
+ Cannot be used together with the `proxies` parameter.
513
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
514
+ :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
515
+ :param verify: Whether to verify HTTPS certificates.
516
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
517
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
518
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
519
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
520
+ :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
521
+ :return: A `Response` object.
522
+ """
523
+ # Careful of sending a body in a DELETE request, it might cause some websites to reject the request as per https://www.rfc-editor.org/rfc/rfc7231#section-4.3.5,
524
+ # But some websites accept it, it depends on the implementation used.
525
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
526
+ method_args.update(kwargs)
527
+ # For type checking (not accessed error)
528
+ _ = (
529
+ url,
530
+ params,
531
+ headers,
532
+ data,
533
+ json,
534
+ cookies,
535
+ timeout,
536
+ follow_redirects,
537
+ max_redirects,
538
+ retries,
539
+ retry_delay,
540
+ proxies,
541
+ proxy,
542
+ proxy_auth,
543
+ auth,
544
+ verify,
545
+ cert,
546
+ impersonate,
547
+ http3,
548
+ )
549
+ return self.__make_request("DELETE", stealth=stealthy_headers, **method_args)
550
+
551
+
552
+ class _ASyncSessionLogic(_ConfigurationLogic):
553
+ def __init__(
554
+ self,
555
+ impersonate: Optional[BrowserTypeLiteral] = "chrome",
556
+ http3: Optional[bool] = False,
557
+ stealthy_headers: Optional[bool] = True,
558
+ proxies: Optional[Dict[str, str]] = None,
559
+ proxy: Optional[str] = None,
560
+ proxy_auth: Optional[Tuple[str, str]] = None,
561
+ timeout: Optional[int | float] = 30,
562
+ headers: Optional[Dict[str, str]] = None,
563
+ retries: Optional[int] = 3,
564
+ retry_delay: Optional[int] = 1,
565
+ follow_redirects: bool = True,
566
+ max_redirects: int = 30,
567
+ verify: bool = True,
568
+ cert: Optional[str | Tuple[str, str]] = None,
276
569
  selector_config: Optional[Dict] = None,
570
+ ):
571
+ super().__init__(
572
+ impersonate,
573
+ http3,
574
+ stealthy_headers,
575
+ proxies,
576
+ proxy,
577
+ proxy_auth,
578
+ timeout,
579
+ headers,
580
+ retries,
581
+ retry_delay,
582
+ follow_redirects,
583
+ max_redirects,
584
+ verify,
585
+ cert,
586
+ selector_config,
587
+ )
588
+ self._async_curl_session: Optional[AsyncCurlSession] = None
589
+
590
+ async def __aenter__(self):
591
+ """Creates and returns a new asynchronous Session."""
592
+ if self._async_curl_session:
593
+ raise RuntimeError("This FetcherSession instance already has an active asynchronous session.")
594
+
595
+ self._async_curl_session = AsyncCurlSession()
596
+ return self
597
+
598
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
599
+ """Closes the active asynchronous session managed by this instance, if any."""
600
+ # For type checking (not accessed error)
601
+ _ = (
602
+ exc_type,
603
+ exc_val,
604
+ exc_tb,
605
+ )
606
+ if self._async_curl_session:
607
+ await self._async_curl_session.close()
608
+ self._async_curl_session = None
609
+
610
+ async def __make_request(
611
+ self,
612
+ method: SUPPORTED_HTTP_METHODS,
613
+ stealth: Optional[bool] = None,
614
+ **kwargs,
277
615
  ) -> Response:
278
616
  """
279
617
  Perform an HTTP request using the configured session.
280
-
281
- :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
282
- :param request_args: Arguments to be passed to the session's `request()` method.
283
- :param max_retries: Maximum number of retries for the request.
284
- :param retry_delay: Number of seconds to wait between retries.
285
- :param selector_config: Arguments passed when creating the final Selector class.
286
- :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
287
618
  """
619
+ stealth = self._stealth if stealth is None else stealth
620
+
621
+ selector_config = kwargs.pop("selector_config", {}) or self.selector_config
622
+ max_retries = self._get_with_precedence(kwargs.pop("retries"), self._default_retries)
623
+ retry_delay = self._get_with_precedence(kwargs.pop("retry_delay"), self._default_retry_delay)
624
+ request_args = self._merge_request_args(stealth=stealth, **kwargs)
625
+
288
626
  session = self._async_curl_session
289
- if session is True and not any((self.__enter__, self.__exit__, self.__aenter__, self.__aexit__)):
627
+ one_off_request = False
628
+ if session is _NO_SESSION and self.__aenter__ is None:
290
629
  # For usage inside the ` AsyncFetcherClient ` class, and that's for several reasons
291
630
  # 1. It turns out `curl_cffi` caches impersonation state, so if you turned it off, then on then off, it won't be off on the last time.
292
631
  # 2. `curl_cffi` doesn't support making async requests without sessions
293
632
  # 3. Using a single session for many requests at the same time in async doesn't sit well with curl_cffi.
294
633
  session = AsyncCurlSession()
634
+ one_off_request = True
295
635
 
296
636
  if session:
297
637
  for attempt in range(max_retries):
298
638
  try:
299
639
  response = await session.request(method, **request_args)
300
- # response.raise_for_status() # Retry responses with a status code between 200-400
301
- return ResponseFactory.from_http_request(response, selector_config)
640
+ result = ResponseFactory.from_http_request(response, selector_config)
641
+ return result
302
642
  except CurlError as e: # pragma: no cover
303
643
  if attempt < max_retries - 1:
304
644
  log.error(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...")
@@ -306,44 +646,12 @@ class FetcherSession:
306
646
  else:
307
647
  log.error(f"Failed after {max_retries} attempts: {e}")
308
648
  raise # Raise the exception if all retries fail
649
+ finally:
650
+ if session and one_off_request:
651
+ await session.close()
309
652
 
310
653
  raise RuntimeError("No active session available.") # pragma: no cover
311
654
 
312
- @staticmethod
313
- def get_with_precedence(kwargs, key, default_value):
314
- """Get value with request-level priority over session-level"""
315
- request_value = kwargs.pop(key, _UNSET)
316
- return request_value if request_value is not _UNSET else default_value
317
-
318
- def __prepare_and_dispatch(
319
- self,
320
- method: SUPPORTED_HTTP_METHODS,
321
- stealth: Optional[bool] = None,
322
- **kwargs,
323
- ) -> Response | Awaitable[Response]:
324
- """
325
- Internal dispatcher. Prepares arguments and calls sync or async request helper.
326
-
327
- :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
328
- :param stealth: Whether to enable the `stealthy_headers` argument to this request or not. If `None`, it defaults to the session default value.
329
- :param url: Target URL for the request.
330
- :param kwargs: Additional request-specific arguments.
331
- :return: A `Response` object for synchronous requests or an awaitable for asynchronous.
332
- """
333
- stealth = self.stealth if stealth is None else stealth
334
-
335
- selector_config = kwargs.pop("selector_config", {}) or self.selector_config
336
- max_retries = self.get_with_precedence(kwargs, "retries", self.default_retries)
337
- retry_delay = self.get_with_precedence(kwargs, "retry_delay", self.default_retry_delay)
338
- request_args = self._merge_request_args(stealth=stealth, **kwargs)
339
- if self._curl_session:
340
- return self.__make_request(method, request_args, max_retries, retry_delay, selector_config)
341
- elif self._async_curl_session:
342
- # The returned value is a Coroutine
343
- return self.__make_async_request(method, request_args, max_retries, retry_delay, selector_config)
344
-
345
- raise RuntimeError("No active session available.")
346
-
347
655
  def get(
348
656
  self,
349
657
  url: str,
@@ -365,7 +673,7 @@ class FetcherSession:
365
673
  http3: Optional[bool] = _UNSET,
366
674
  stealthy_headers: Optional[bool] = _UNSET,
367
675
  **kwargs,
368
- ) -> Response | Awaitable[Response]:
676
+ ) -> Awaitable[Response]:
369
677
  """
370
678
  Perform a GET request.
371
679
 
@@ -389,29 +697,31 @@ class FetcherSession:
389
697
  :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
390
698
  :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
391
699
  :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
392
- :return: A `Response` object or an awaitable for async.
700
+ :return: A `Response` object.
393
701
  """
394
- request_args = {
395
- "url": url,
396
- "params": params,
397
- "headers": headers,
398
- "cookies": cookies,
399
- "timeout": timeout,
400
- "retry_delay": retry_delay,
401
- "allow_redirects": follow_redirects,
402
- "max_redirects": max_redirects,
403
- "retries": retries,
404
- "proxies": proxies,
405
- "proxy": proxy,
406
- "proxy_auth": proxy_auth,
407
- "auth": auth,
408
- "verify": verify,
409
- "cert": cert,
410
- "impersonate": impersonate,
411
- "http3": http3,
412
- **kwargs,
413
- }
414
- return self.__prepare_and_dispatch("GET", stealth=stealthy_headers, **request_args)
702
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
703
+ method_args.update(kwargs)
704
+ # For type checking (not accessed error)
705
+ _ = (
706
+ url,
707
+ params,
708
+ headers,
709
+ cookies,
710
+ timeout,
711
+ follow_redirects,
712
+ max_redirects,
713
+ retries,
714
+ retry_delay,
715
+ proxies,
716
+ proxy,
717
+ proxy_auth,
718
+ auth,
719
+ verify,
720
+ cert,
721
+ impersonate,
722
+ http3,
723
+ )
724
+ return self.__make_request("GET", stealth=stealthy_headers, **method_args)
415
725
 
416
726
  def post(
417
727
  self,
@@ -436,57 +746,59 @@ class FetcherSession:
436
746
  http3: Optional[bool] = _UNSET,
437
747
  stealthy_headers: Optional[bool] = _UNSET,
438
748
  **kwargs,
439
- ) -> Response | Awaitable[Response]:
749
+ ) -> Awaitable[Response]:
440
750
  """
441
751
  Perform a POST request.
442
752
 
443
753
  :param url: Target URL for the request.
444
754
  :param data: Form data to include in the request body.
445
755
  :param json: A JSON serializable object to include in the body of the request.
446
- :param headers: Headers to include in the request.
447
756
  :param params: Query string parameters for the request.
757
+ :param headers: Headers to include in the request.
448
758
  :param cookies: Cookies to use in the request.
449
759
  :param timeout: Number of seconds to wait before timing out.
450
760
  :param follow_redirects: Whether to follow redirects. Defaults to True.
451
761
  :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
452
762
  :param retries: Number of retry attempts. Defaults to 3.
453
763
  :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
454
- :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
764
+ :param proxies: Dict of proxies to use.
455
765
  :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
456
766
  Cannot be used together with the `proxies` parameter.
457
767
  :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
458
768
  :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
459
- :param verify: Whether to verify HTTPS certificates. Defaults to True.
769
+ :param verify: Whether to verify HTTPS certificates.
460
770
  :param cert: Tuple of (cert, key) filenames for the client certificate.
461
771
  :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
462
772
  :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
463
773
  :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
464
774
  :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
465
- :return: A `Response` object or an awaitable for async.
775
+ :return: A `Response` object.
466
776
  """
467
- request_args = {
468
- "url": url,
469
- "data": data,
470
- "json": json,
471
- "headers": headers,
472
- "params": params,
473
- "cookies": cookies,
474
- "timeout": timeout,
475
- "retry_delay": retry_delay,
476
- "proxy": proxy,
477
- "impersonate": impersonate,
478
- "allow_redirects": follow_redirects,
479
- "max_redirects": max_redirects,
480
- "retries": retries,
481
- "proxies": proxies,
482
- "proxy_auth": proxy_auth,
483
- "auth": auth,
484
- "verify": verify,
485
- "cert": cert,
486
- "http3": http3,
487
- **kwargs,
488
- }
489
- return self.__prepare_and_dispatch("POST", stealth=stealthy_headers, **request_args)
777
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
778
+ method_args.update(kwargs)
779
+ # For type checking (not accessed error)
780
+ _ = (
781
+ url,
782
+ params,
783
+ headers,
784
+ data,
785
+ json,
786
+ cookies,
787
+ timeout,
788
+ follow_redirects,
789
+ max_redirects,
790
+ retries,
791
+ retry_delay,
792
+ proxies,
793
+ proxy,
794
+ proxy_auth,
795
+ auth,
796
+ verify,
797
+ cert,
798
+ impersonate,
799
+ http3,
800
+ )
801
+ return self.__make_request("POST", stealth=stealthy_headers, **method_args)
490
802
 
491
803
  def put(
492
804
  self,
@@ -511,57 +823,59 @@ class FetcherSession:
511
823
  http3: Optional[bool] = _UNSET,
512
824
  stealthy_headers: Optional[bool] = _UNSET,
513
825
  **kwargs,
514
- ) -> Response | Awaitable[Response]:
826
+ ) -> Awaitable[Response]:
515
827
  """
516
828
  Perform a PUT request.
517
829
 
518
830
  :param url: Target URL for the request.
519
831
  :param data: Form data to include in the request body.
520
832
  :param json: A JSON serializable object to include in the body of the request.
521
- :param headers: Headers to include in the request.
522
833
  :param params: Query string parameters for the request.
834
+ :param headers: Headers to include in the request.
523
835
  :param cookies: Cookies to use in the request.
524
836
  :param timeout: Number of seconds to wait before timing out.
525
837
  :param follow_redirects: Whether to follow redirects. Defaults to True.
526
838
  :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
527
839
  :param retries: Number of retry attempts. Defaults to 3.
528
840
  :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
529
- :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
841
+ :param proxies: Dict of proxies to use.
530
842
  :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
531
843
  Cannot be used together with the `proxies` parameter.
532
844
  :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
533
845
  :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
534
- :param verify: Whether to verify HTTPS certificates. Defaults to True.
846
+ :param verify: Whether to verify HTTPS certificates.
535
847
  :param cert: Tuple of (cert, key) filenames for the client certificate.
536
848
  :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
537
849
  :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
538
850
  :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
539
851
  :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
540
- :return: A `Response` object or an awaitable for async.
852
+ :return: A `Response` object.
541
853
  """
542
- request_args = {
543
- "url": url,
544
- "data": data,
545
- "json": json,
546
- "headers": headers,
547
- "params": params,
548
- "cookies": cookies,
549
- "timeout": timeout,
550
- "retry_delay": retry_delay,
551
- "proxy": proxy,
552
- "impersonate": impersonate,
553
- "allow_redirects": follow_redirects,
554
- "max_redirects": max_redirects,
555
- "retries": retries,
556
- "proxies": proxies,
557
- "proxy_auth": proxy_auth,
558
- "auth": auth,
559
- "verify": verify,
560
- "cert": cert,
561
- "http3": http3,
562
- **kwargs,
563
- }
564
- return self.__prepare_and_dispatch("PUT", stealth=stealthy_headers, **request_args)
854
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
855
+ method_args.update(kwargs)
856
+ # For type checking (not accessed error)
857
+ _ = (
858
+ url,
859
+ params,
860
+ headers,
861
+ data,
862
+ json,
863
+ cookies,
864
+ timeout,
865
+ follow_redirects,
866
+ max_redirects,
867
+ retries,
868
+ retry_delay,
869
+ proxies,
870
+ proxy,
871
+ proxy_auth,
872
+ auth,
873
+ verify,
874
+ cert,
875
+ impersonate,
876
+ http3,
877
+ )
878
+ return self.__make_request("PUT", stealth=stealthy_headers, **method_args)
565
879
 
566
880
  def delete(
567
881
  self,
@@ -586,76 +900,175 @@ class FetcherSession:
586
900
  http3: Optional[bool] = _UNSET,
587
901
  stealthy_headers: Optional[bool] = _UNSET,
588
902
  **kwargs,
589
- ) -> Response | Awaitable[Response]:
903
+ ) -> Awaitable[Response]:
590
904
  """
591
905
  Perform a DELETE request.
592
906
 
593
907
  :param url: Target URL for the request.
594
908
  :param data: Form data to include in the request body.
595
909
  :param json: A JSON serializable object to include in the body of the request.
596
- :param headers: Headers to include in the request.
597
910
  :param params: Query string parameters for the request.
911
+ :param headers: Headers to include in the request.
598
912
  :param cookies: Cookies to use in the request.
599
913
  :param timeout: Number of seconds to wait before timing out.
600
914
  :param follow_redirects: Whether to follow redirects. Defaults to True.
601
915
  :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
602
916
  :param retries: Number of retry attempts. Defaults to 3.
603
917
  :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
604
- :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
918
+ :param proxies: Dict of proxies to use.
605
919
  :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
606
920
  Cannot be used together with the `proxies` parameter.
607
921
  :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
608
922
  :param auth: HTTP basic auth tuple of (username, password). Only basic auth is supported.
609
- :param verify: Whether to verify HTTPS certificates. Defaults to True.
923
+ :param verify: Whether to verify HTTPS certificates.
610
924
  :param cert: Tuple of (cert, key) filenames for the client certificate.
611
925
  :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
612
926
  :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
613
927
  :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
614
928
  :param kwargs: Additional keyword arguments to pass to the [`curl_cffi.requests.Session().request()`, `curl_cffi.requests.AsyncSession().request()`] method.
615
- :return: A `Response` object or an awaitable for async.
929
+ :return: A `Response` object.
616
930
  """
617
- request_args = {
618
- "url": url,
619
- # Careful of sending a body in a DELETE request, it might cause some websites to reject the request as per https://www.rfc-editor.org/rfc/rfc7231#section-4.3.5,
620
- # But some websites accept it, it depends on the implementation used.
621
- "data": data,
622
- "json": json,
623
- "headers": headers,
624
- "params": params,
625
- "cookies": cookies,
626
- "timeout": timeout,
627
- "retry_delay": retry_delay,
628
- "proxy": proxy,
629
- "impersonate": impersonate,
630
- "allow_redirects": follow_redirects,
631
- "max_redirects": max_redirects,
632
- "retries": retries,
633
- "proxies": proxies,
634
- "proxy_auth": proxy_auth,
635
- "auth": auth,
636
- "verify": verify,
637
- "cert": cert,
638
- "http3": http3,
639
- **kwargs,
640
- }
641
- return self.__prepare_and_dispatch("DELETE", stealth=stealthy_headers, **request_args)
931
+ # Careful of sending a body in a DELETE request, it might cause some websites to reject the request as per https://www.rfc-editor.org/rfc/rfc7231#section-4.3.5,
932
+ # But some websites accept it, it depends on the implementation used.
933
+ method_args = {k: v for k, v in locals().items() if k not in ("self", "stealthy_headers", "kwargs")}
934
+ method_args.update(kwargs)
935
+ # For type checking (not accessed error)
936
+ _ = (
937
+ url,
938
+ params,
939
+ headers,
940
+ data,
941
+ json,
942
+ cookies,
943
+ timeout,
944
+ follow_redirects,
945
+ max_redirects,
946
+ retries,
947
+ retry_delay,
948
+ proxies,
949
+ proxy,
950
+ proxy_auth,
951
+ auth,
952
+ verify,
953
+ cert,
954
+ impersonate,
955
+ http3,
956
+ )
957
+ return self.__make_request("DELETE", stealth=stealthy_headers, **method_args)
958
+
959
+
960
+ class FetcherSession:
961
+ """
962
+ A factory context manager that provides configured Fetcher sessions.
963
+
964
+ When this manager is used in a 'with' or 'async with' block,
965
+ it yields a new session configured with the manager's defaults.
966
+ A single instance of this manager should ideally be used for one active
967
+ session at a time (or sequentially). Re-entering a context with the
968
+ same manager instance while a session is already active is disallowed.
969
+ """
970
+
971
+ def __init__(
972
+ self,
973
+ impersonate: Optional[BrowserTypeLiteral] = "chrome",
974
+ http3: Optional[bool] = False,
975
+ stealthy_headers: Optional[bool] = True,
976
+ proxies: Optional[Dict[str, str]] = None,
977
+ proxy: Optional[str] = None,
978
+ proxy_auth: Optional[Tuple[str, str]] = None,
979
+ timeout: Optional[int | float] = 30,
980
+ headers: Optional[Dict[str, str]] = None,
981
+ retries: Optional[int] = 3,
982
+ retry_delay: Optional[int] = 1,
983
+ follow_redirects: bool = True,
984
+ max_redirects: int = 30,
985
+ verify: bool = True,
986
+ cert: Optional[str | Tuple[str, str]] = None,
987
+ selector_config: Optional[Dict] = None,
988
+ ):
989
+ """
990
+ :param impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version.
991
+ :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
992
+ :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
993
+ :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}.
994
+ :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
995
+ Cannot be used together with the `proxies` parameter.
996
+ :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password).
997
+ :param timeout: Number of seconds to wait before timing out.
998
+ :param headers: Headers to include in the session with every request.
999
+ :param retries: Number of retry attempts. Defaults to 3.
1000
+ :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
1001
+ :param follow_redirects: Whether to follow redirects. Defaults to True.
1002
+ :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
1003
+ :param verify: Whether to verify HTTPS certificates. Defaults to True.
1004
+ :param cert: Tuple of (cert, key) filenames for the client certificate.
1005
+ :param selector_config: Arguments passed when creating the final Selector class.
1006
+ """
1007
+ self._default_impersonate: Optional[BrowserTypeLiteral] = impersonate
1008
+ self._stealth = stealthy_headers
1009
+ self._default_proxies = proxies or {}
1010
+ self._default_proxy = proxy or None
1011
+ self._default_proxy_auth = proxy_auth or None
1012
+ self._default_timeout = timeout
1013
+ self._default_headers = headers or {}
1014
+ self._default_retries = retries
1015
+ self._default_retry_delay = retry_delay
1016
+ self._default_follow_redirects = follow_redirects
1017
+ self._default_max_redirects = max_redirects
1018
+ self._default_verify = verify
1019
+ self._default_cert = cert
1020
+ self._default_http3 = http3
1021
+ self.selector_config = selector_config or {}
1022
+ self._client: _SyncSessionLogic | _ASyncSessionLogic | None = None
1023
+
1024
+ def __enter__(self) -> _SyncSessionLogic:
1025
+ """Creates and returns a new synchronous Fetcher Session"""
1026
+ if self._client is None:
1027
+ # Use **vars(self) to avoid repeating all parameters
1028
+ config = {k.replace("_default_", ""): v for k, v in vars(self).items() if k.startswith("_default")}
1029
+ config["stealthy_headers"] = self._stealth
1030
+ config["selector_config"] = self.selector_config
1031
+ self._client = _SyncSessionLogic(**config)
1032
+ return self._client.__enter__()
1033
+ raise RuntimeError("This FetcherSession instance already has an active synchronous session.")
1034
+
1035
+ def __exit__(self, exc_type, exc_val, exc_tb):
1036
+ if self._client is not None and isinstance(self._client, _SyncSessionLogic):
1037
+ self._client.__exit__(exc_type, exc_val, exc_tb)
1038
+ self._client = None
1039
+ return
1040
+ raise RuntimeError("Cannot exit invalid session")
1041
+
1042
+ async def __aenter__(self) -> _ASyncSessionLogic:
1043
+ """Creates and returns a new asynchronous Session."""
1044
+ if self._client is None:
1045
+ # Use **vars(self) to avoid repeating all parameters
1046
+ config = {k.replace("_default_", ""): v for k, v in vars(self).items() if k.startswith("_default")}
1047
+ config["stealthy_headers"] = self._stealth
1048
+ config["selector_config"] = self.selector_config
1049
+ self._client = _ASyncSessionLogic(**config)
1050
+ return await self._client.__aenter__()
1051
+ raise RuntimeError("This FetcherSession instance already has an active asynchronous session.")
1052
+
1053
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1054
+ if self._client is not None and isinstance(self._client, _ASyncSessionLogic):
1055
+ await self._client.__aexit__(exc_type, exc_val, exc_tb)
1056
+ self._client = None
1057
+ return
1058
+ raise RuntimeError("Cannot exit invalid session")
642
1059
 
643
1060
 
644
- class FetcherClient(FetcherSession):
1061
+ class FetcherClient(_SyncSessionLogic):
645
1062
  def __init__(self, *args, **kwargs):
646
1063
  super().__init__(*args, **kwargs)
647
- self.__enter__ = None
648
- self.__exit__ = None
649
- self.__aenter__ = None
650
- self.__aexit__ = None
651
- self._curl_session = True
1064
+ self.__enter__: Any = None
1065
+ self.__exit__: Any = None
1066
+ self._curl_session: Any = _NO_SESSION
652
1067
 
653
1068
 
654
- class AsyncFetcherClient(FetcherSession):
1069
+ class AsyncFetcherClient(_ASyncSessionLogic):
655
1070
  def __init__(self, *args, **kwargs):
656
1071
  super().__init__(*args, **kwargs)
657
- self.__enter__ = None
658
- self.__exit__ = None
659
- self.__aenter__ = None
660
- self.__aexit__ = None
661
- self._async_curl_session = True
1072
+ self.__aenter__: Any = None
1073
+ self.__aexit__: Any = None
1074
+ self._async_curl_session: Any = _NO_SESSION