scrapling 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. scrapling/__init__.py +29 -19
  2. scrapling/cli.py +21 -4
  3. scrapling/core/_types.py +3 -2
  4. scrapling/core/ai.py +24 -15
  5. scrapling/core/custom_types.py +20 -27
  6. scrapling/core/mixins.py +15 -9
  7. scrapling/core/shell.py +6 -4
  8. scrapling/core/storage.py +7 -6
  9. scrapling/core/translator.py +13 -8
  10. scrapling/core/utils/__init__.py +0 -1
  11. scrapling/engines/_browsers/__init__.py +0 -2
  12. scrapling/engines/_browsers/_base.py +45 -21
  13. scrapling/engines/_browsers/_camoufox.py +98 -43
  14. scrapling/engines/_browsers/_config_tools.py +1 -1
  15. scrapling/engines/_browsers/_controllers.py +34 -13
  16. scrapling/engines/_browsers/_validators.py +31 -10
  17. scrapling/engines/constants.py +0 -15
  18. scrapling/engines/static.py +749 -336
  19. scrapling/engines/toolbelt/convertor.py +13 -15
  20. scrapling/engines/toolbelt/custom.py +6 -9
  21. scrapling/engines/toolbelt/fingerprints.py +17 -10
  22. scrapling/engines/toolbelt/navigation.py +11 -3
  23. scrapling/fetchers/__init__.py +46 -0
  24. scrapling/fetchers/chrome.py +210 -0
  25. scrapling/fetchers/firefox.py +212 -0
  26. scrapling/fetchers/requests.py +28 -0
  27. scrapling/parser.py +109 -84
  28. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/METADATA +17 -16
  29. scrapling-0.3.7.dist-info/RECORD +47 -0
  30. scrapling/fetchers.py +0 -444
  31. scrapling-0.3.5.dist-info/RECORD +0 -44
  32. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/WHEEL +0 -0
  33. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/entry_points.txt +0 -0
  34. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/licenses/LICENSE +0 -0
  35. {scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/top_level.txt +0 -0
@@ -24,15 +24,15 @@ class ResponseFactory:
24
24
 
25
25
  @classmethod
26
26
  @lru_cache(maxsize=16)
27
- def __extract_browser_encoding(cls, content_type: str | None) -> Optional[str]:
27
+ def __extract_browser_encoding(cls, content_type: str | None, default: str = "utf-8") -> str:
28
28
  """Extract browser encoding from headers.
29
29
  Ex: from header "content-type: text/html; charset=utf-8" -> "utf-8
30
30
  """
31
31
  if content_type:
32
32
  # Because Playwright can't do that by themselves like all libraries for some reason :3
33
33
  match = __CHARSET_RE__.search(content_type)
34
- return match.group(1) if match else None
35
- return None
34
+ return match.group(1) if match else default
35
+ return default
36
36
 
37
37
  @classmethod
38
38
  def _process_response_history(cls, first_response: SyncResponse, parser_arguments: Dict) -> list[Response]:
@@ -58,7 +58,8 @@ class ResponseFactory:
58
58
  "encoding": cls.__extract_browser_encoding(
59
59
  current_response.headers.get("content-type", "")
60
60
  )
61
- or "utf-8",
61
+ if current_response
62
+ else "utf-8",
62
63
  "cookies": tuple(),
63
64
  "headers": current_response.all_headers() if current_response else {},
64
65
  "request_headers": current_request.all_headers(),
@@ -107,15 +108,13 @@ class ResponseFactory:
107
108
  if not final_response:
108
109
  raise ValueError("Failed to get a response from the page")
109
110
 
110
- encoding = (
111
- cls.__extract_browser_encoding(final_response.headers.get("content-type", "")) or "utf-8"
112
- ) # default encoding
111
+ encoding = cls.__extract_browser_encoding(final_response.headers.get("content-type", ""))
113
112
  # PlayWright API sometimes give empty status text for some reason!
114
113
  status_text = final_response.status_text or StatusText.get(final_response.status)
115
114
 
116
115
  history = cls._process_response_history(first_response, parser_arguments)
117
116
  try:
118
- page_content = page.content()
117
+ page_content = final_response.text()
119
118
  except Exception as e: # pragma: no cover
120
119
  log.error(f"Error getting page content: {e}")
121
120
  page_content = ""
@@ -161,7 +160,8 @@ class ResponseFactory:
161
160
  "encoding": cls.__extract_browser_encoding(
162
161
  current_response.headers.get("content-type", "")
163
162
  )
164
- or "utf-8",
163
+ if current_response
164
+ else "utf-8",
165
165
  "cookies": tuple(),
166
166
  "headers": await current_response.all_headers() if current_response else {},
167
167
  "request_headers": await current_request.all_headers(),
@@ -210,15 +210,13 @@ class ResponseFactory:
210
210
  if not final_response:
211
211
  raise ValueError("Failed to get a response from the page")
212
212
 
213
- encoding = (
214
- cls.__extract_browser_encoding(final_response.headers.get("content-type", "")) or "utf-8"
215
- ) # default encoding
213
+ encoding = cls.__extract_browser_encoding(final_response.headers.get("content-type", ""))
216
214
  # PlayWright API sometimes give empty status text for some reason!
217
215
  status_text = final_response.status_text or StatusText.get(final_response.status)
218
216
 
219
217
  history = await cls._async_process_response_history(first_response, parser_arguments)
220
218
  try:
221
- page_content = await page.content()
219
+ page_content = await final_response.text()
222
220
  except Exception as e: # pragma: no cover
223
221
  log.error(f"Error getting page content in async: {e}")
224
222
  page_content = ""
@@ -255,8 +253,8 @@ class ResponseFactory:
255
253
  "encoding": response.encoding or "utf-8",
256
254
  "cookies": dict(response.cookies),
257
255
  "headers": dict(response.headers),
258
- "request_headers": dict(response.request.headers),
259
- "method": response.request.method,
256
+ "request_headers": dict(response.request.headers) if response.request else {},
257
+ "method": response.request.method if response.request else "GET",
260
258
  "history": response.history, # https://github.com/lexiforest/curl_cffi/issues/82
261
259
  **parser_arguments,
262
260
  }
@@ -8,6 +8,7 @@ from scrapling.core.utils import log
8
8
  from scrapling.core._types import (
9
9
  Any,
10
10
  Dict,
11
+ cast,
11
12
  List,
12
13
  Optional,
13
14
  Tuple,
@@ -30,10 +31,10 @@ class Response(Selector):
30
31
  request_headers: Dict,
31
32
  encoding: str = "utf-8",
32
33
  method: str = "GET",
33
- history: List = None,
34
- **selector_config: Dict,
34
+ history: List | None = None,
35
+ **selector_config: Any,
35
36
  ):
36
- adaptive_domain = selector_config.pop("adaptive_domain", None)
37
+ adaptive_domain: str = cast(str, selector_config.pop("adaptive_domain", ""))
37
38
  self.status = status
38
39
  self.reason = reason
39
40
  self.cookies = cookies
@@ -58,7 +59,7 @@ class BaseFetcher:
58
59
  keep_cdata: Optional[bool] = False
59
60
  storage_args: Optional[Dict] = None
60
61
  keep_comments: Optional[bool] = False
61
- adaptive_domain: Optional[str] = None
62
+ adaptive_domain: str = ""
62
63
  parser_keywords: Tuple = (
63
64
  "huge_tree",
64
65
  "adaptive",
@@ -124,12 +125,8 @@ class BaseFetcher:
124
125
  adaptive=cls.adaptive,
125
126
  storage=cls.storage,
126
127
  storage_args=cls.storage_args,
128
+ adaptive_domain=cls.adaptive_domain,
127
129
  )
128
- if cls.adaptive_domain:
129
- if not isinstance(cls.adaptive_domain, str):
130
- log.warning('[Ignored] The argument "adaptive_domain" must be of string type')
131
- else:
132
- parser_arguments.update({"adaptive_domain": cls.adaptive_domain})
133
130
 
134
131
  return parser_arguments
135
132
 
@@ -8,9 +8,10 @@ from platform import system as platform_system
8
8
  from tldextract import extract
9
9
  from browserforge.headers import Browser, HeaderGenerator
10
10
 
11
- from scrapling.core._types import Dict, Optional
11
+ from scrapling.core._types import Dict, Literal
12
12
 
13
13
  __OS_NAME__ = platform_system()
14
+ OSName = Literal["linux", "macos", "windows"]
14
15
 
15
16
 
16
17
  @lru_cache(10, typed=True)
@@ -28,16 +29,20 @@ def generate_convincing_referer(url: str) -> str:
28
29
 
29
30
 
30
31
  @lru_cache(1, typed=True)
31
- def get_os_name() -> Optional[str]:
32
- """Get the current OS name in the same format needed for browserforge
32
+ def get_os_name() -> OSName | None:
33
+ """Get the current OS name in the same format needed for browserforge, if the OS is Unknown, return None so browserforge uses all.
33
34
 
34
35
  :return: Current OS name or `None` otherwise
35
36
  """
36
- return {
37
- "Linux": "linux",
38
- "Darwin": "macos",
39
- "Windows": "windows",
40
- }.get(__OS_NAME__)
37
+ match __OS_NAME__:
38
+ case "Linux":
39
+ return "linux"
40
+ case "Darwin":
41
+ return "macos"
42
+ case "Windows":
43
+ return "windows"
44
+ case _:
45
+ return None
41
46
 
42
47
 
43
48
  def generate_headers(browser_mode: bool = False) -> Dict:
@@ -58,8 +63,10 @@ def generate_headers(browser_mode: bool = False) -> Dict:
58
63
  Browser(name="edge", min_version=130),
59
64
  ]
60
65
  )
61
-
62
- return HeaderGenerator(browser=browsers, os=os_name, device="desktop").generate()
66
+ if os_name:
67
+ return HeaderGenerator(browser=browsers, os=os_name, device="desktop").generate()
68
+ else:
69
+ return HeaderGenerator(browser=browsers, device="desktop").generate()
63
70
 
64
71
 
65
72
  __default_useragent__ = generate_headers(browser_mode=False).get("User-Agent")
@@ -11,7 +11,7 @@ from msgspec import Struct, structs, convert, ValidationError
11
11
  from playwright.sync_api import Route
12
12
 
13
13
  from scrapling.core.utils import log
14
- from scrapling.core._types import Dict, Optional, Tuple
14
+ from scrapling.core._types import Dict, Tuple, overload, Literal
15
15
  from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
16
16
 
17
17
  __BYPASSES_DIR__ = Path(__file__).parent / "bypasses"
@@ -49,7 +49,15 @@ async def async_intercept_route(route: async_Route):
49
49
  await route.continue_()
50
50
 
51
51
 
52
- def construct_proxy_dict(proxy_string: str | Dict[str, str], as_tuple=False) -> Optional[Dict | Tuple]:
52
+ @overload
53
+ def construct_proxy_dict(proxy_string: str | Dict[str, str] | Tuple, as_tuple: Literal[True]) -> Tuple: ...
54
+
55
+
56
+ @overload
57
+ def construct_proxy_dict(proxy_string: str | Dict[str, str] | Tuple, as_tuple: Literal[False] = False) -> Dict: ...
58
+
59
+
60
+ def construct_proxy_dict(proxy_string: str | Dict[str, str] | Tuple, as_tuple: bool = False) -> Dict | Tuple:
53
61
  """Validate a proxy and return it in the acceptable format for Playwright
54
62
  Reference: https://playwright.dev/python/docs/network#http-proxy
55
63
 
@@ -83,7 +91,7 @@ def construct_proxy_dict(proxy_string: str | Dict[str, str], as_tuple=False) ->
83
91
  except ValidationError as e:
84
92
  raise TypeError(f"Invalid proxy dictionary: {e}")
85
93
 
86
- return None
94
+ raise TypeError(f"Invalid proxy string: {proxy_string}")
87
95
 
88
96
 
89
97
  @lru_cache(10, typed=True)
@@ -0,0 +1,46 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ if TYPE_CHECKING:
4
+ from scrapling.fetchers.requests import Fetcher, AsyncFetcher, FetcherSession
5
+ from scrapling.fetchers.chrome import DynamicFetcher, DynamicSession, AsyncDynamicSession
6
+ from scrapling.fetchers.firefox import StealthyFetcher, StealthySession, AsyncStealthySession
7
+
8
+
9
+ # Lazy import mapping
10
+ _LAZY_IMPORTS = {
11
+ "Fetcher": ("scrapling.fetchers.requests", "Fetcher"),
12
+ "AsyncFetcher": ("scrapling.fetchers.requests", "AsyncFetcher"),
13
+ "FetcherSession": ("scrapling.fetchers.requests", "FetcherSession"),
14
+ "DynamicFetcher": ("scrapling.fetchers.chrome", "DynamicFetcher"),
15
+ "DynamicSession": ("scrapling.fetchers.chrome", "DynamicSession"),
16
+ "AsyncDynamicSession": ("scrapling.fetchers.chrome", "AsyncDynamicSession"),
17
+ "StealthyFetcher": ("scrapling.fetchers.firefox", "StealthyFetcher"),
18
+ "StealthySession": ("scrapling.fetchers.firefox", "StealthySession"),
19
+ "AsyncStealthySession": ("scrapling.fetchers.firefox", "AsyncStealthySession"),
20
+ }
21
+
22
+ __all__ = [
23
+ "Fetcher",
24
+ "AsyncFetcher",
25
+ "FetcherSession",
26
+ "DynamicFetcher",
27
+ "DynamicSession",
28
+ "AsyncDynamicSession",
29
+ "StealthyFetcher",
30
+ "StealthySession",
31
+ "AsyncStealthySession",
32
+ ]
33
+
34
+
35
+ def __getattr__(name: str) -> Any:
36
+ if name in _LAZY_IMPORTS:
37
+ module_path, class_name = _LAZY_IMPORTS[name]
38
+ module = __import__(module_path, fromlist=[class_name])
39
+ return getattr(module, class_name)
40
+ else:
41
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
42
+
43
+
44
+ def __dir__() -> list[str]:
45
+ """Support for dir() and autocomplete."""
46
+ return sorted(list(_LAZY_IMPORTS.keys()))
@@ -0,0 +1,210 @@
1
+ from scrapling.core._types import (
2
+ Callable,
3
+ List,
4
+ Dict,
5
+ Optional,
6
+ SelectorWaitStates,
7
+ )
8
+ from scrapling.engines.toolbelt.custom import BaseFetcher, Response
9
+ from scrapling.engines._browsers._controllers import DynamicSession, AsyncDynamicSession
10
+
11
+
12
+ class DynamicFetcher(BaseFetcher):
13
+ """A `Fetcher` class type that provide many options, all of them are based on PlayWright.
14
+
15
+ Using this Fetcher class, you can do requests with:
16
+ - Vanilla Playwright without any modifications other than the ones you chose.
17
+ - Stealthy Playwright with the stealth mode I wrote for it. It's still a work in progress, but it bypasses many online tests like bot.sannysoft.com
18
+ Some of the things stealth mode does include:
19
+ 1) Patches the CDP runtime fingerprint.
20
+ 2) Mimics some of the real browsers' properties by injecting several JS files and using custom options.
21
+ 3) Using custom flags on launch to hide Playwright even more and make it faster.
22
+ 4) Generates real browser's headers of the same type and same user OS, then append it to the request.
23
+ - Real browsers by passing the `real_chrome` argument or the CDP URL of your browser to be controlled by the Fetcher, and most of the options can be enabled on it.
24
+
25
+ > Note that these are the main options with PlayWright, but it can be mixed.
26
+ """
27
+
28
+ @classmethod
29
+ def fetch(
30
+ cls,
31
+ url: str,
32
+ headless: bool = True,
33
+ google_search: bool = True,
34
+ hide_canvas: bool = False,
35
+ disable_webgl: bool = False,
36
+ real_chrome: bool = False,
37
+ stealth: bool = False,
38
+ wait: int | float = 0,
39
+ page_action: Optional[Callable] = None,
40
+ proxy: Optional[str | Dict[str, str]] = None,
41
+ locale: str = "en-US",
42
+ extra_headers: Optional[Dict[str, str]] = None,
43
+ useragent: Optional[str] = None,
44
+ cdp_url: Optional[str] = None,
45
+ timeout: int | float = 30000,
46
+ disable_resources: bool = False,
47
+ wait_selector: Optional[str] = None,
48
+ init_script: Optional[str] = None,
49
+ cookies: Optional[List[Dict]] = None,
50
+ network_idle: bool = False,
51
+ load_dom: bool = True,
52
+ wait_selector_state: SelectorWaitStates = "attached",
53
+ additional_args: Optional[Dict] = None,
54
+ custom_config: Optional[Dict] = None,
55
+ ) -> Response:
56
+ """Opens up a browser and do your request based on your chosen options below.
57
+
58
+ :param url: Target url.
59
+ :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
60
+ :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
61
+ Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
62
+ This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
63
+ :param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
64
+ :param cookies: Set cookies for the next request.
65
+ :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
66
+ :param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
67
+ :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
68
+ :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
69
+ :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
70
+ :param wait_selector: Wait for a specific CSS selector to be in a specific state.
71
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation with this request.
72
+ :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
73
+ :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
74
+ :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
75
+ :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
76
+ :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
77
+ :param disable_webgl: Disables WebGL and WebGL 2.0 support entirely.
78
+ :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
79
+ :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
80
+ :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
81
+ :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
82
+ :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
83
+ :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
84
+ :return: A `Response` object.
85
+ """
86
+ if not custom_config:
87
+ custom_config = {}
88
+ elif not isinstance(custom_config, dict):
89
+ raise ValueError(f"The custom parser config must be of type dictionary, got {cls.__class__}")
90
+
91
+ with DynamicSession(
92
+ wait=wait,
93
+ proxy=proxy,
94
+ locale=locale,
95
+ timeout=timeout,
96
+ stealth=stealth,
97
+ cdp_url=cdp_url,
98
+ cookies=cookies,
99
+ headless=headless,
100
+ load_dom=load_dom,
101
+ useragent=useragent,
102
+ real_chrome=real_chrome,
103
+ page_action=page_action,
104
+ hide_canvas=hide_canvas,
105
+ init_script=init_script,
106
+ network_idle=network_idle,
107
+ google_search=google_search,
108
+ extra_headers=extra_headers,
109
+ wait_selector=wait_selector,
110
+ disable_webgl=disable_webgl,
111
+ additional_args=additional_args,
112
+ disable_resources=disable_resources,
113
+ wait_selector_state=wait_selector_state,
114
+ selector_config={**cls._generate_parser_arguments(), **custom_config},
115
+ ) as session:
116
+ return session.fetch(url)
117
+
118
+ @classmethod
119
+ async def async_fetch(
120
+ cls,
121
+ url: str,
122
+ headless: bool = True,
123
+ google_search: bool = True,
124
+ hide_canvas: bool = False,
125
+ disable_webgl: bool = False,
126
+ real_chrome: bool = False,
127
+ stealth: bool = False,
128
+ wait: int | float = 0,
129
+ page_action: Optional[Callable] = None,
130
+ proxy: Optional[str | Dict[str, str]] = None,
131
+ locale: str = "en-US",
132
+ extra_headers: Optional[Dict[str, str]] = None,
133
+ useragent: Optional[str] = None,
134
+ cdp_url: Optional[str] = None,
135
+ timeout: int | float = 30000,
136
+ disable_resources: bool = False,
137
+ wait_selector: Optional[str] = None,
138
+ init_script: Optional[str] = None,
139
+ cookies: Optional[List[Dict]] = None,
140
+ network_idle: bool = False,
141
+ load_dom: bool = True,
142
+ wait_selector_state: SelectorWaitStates = "attached",
143
+ additional_args: Optional[Dict] = None,
144
+ custom_config: Optional[Dict] = None,
145
+ ) -> Response:
146
+ """Opens up a browser and do your request based on your chosen options below.
147
+
148
+ :param url: Target url.
149
+ :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
150
+ :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
151
+ Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
152
+ This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
153
+ :param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
154
+ :param cookies: Set cookies for the next request.
155
+ :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
156
+ :param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
157
+ :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
158
+ :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
159
+ :param page_action: Added for automation. A function that takes the `page` object and does the automation you need.
160
+ :param wait_selector: Wait for a specific CSS selector to be in a specific state.
161
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation with this request.
162
+ :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
163
+ :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
164
+ :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
165
+ :param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
166
+ :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
167
+ :param disable_webgl: Disables WebGL and WebGL 2.0 support entirely.
168
+ :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
169
+ :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
170
+ :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
171
+ :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
172
+ :param custom_config: A dictionary of custom parser arguments to use with this request. Any argument passed will override any class parameters values.
173
+ :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
174
+ :return: A `Response` object.
175
+ """
176
+ if not custom_config:
177
+ custom_config = {}
178
+ elif not isinstance(custom_config, dict):
179
+ raise ValueError(f"The custom parser config must be of type dictionary, got {cls.__class__}")
180
+
181
+ async with AsyncDynamicSession(
182
+ wait=wait,
183
+ max_pages=1,
184
+ proxy=proxy,
185
+ locale=locale,
186
+ timeout=timeout,
187
+ stealth=stealth,
188
+ cdp_url=cdp_url,
189
+ cookies=cookies,
190
+ headless=headless,
191
+ load_dom=load_dom,
192
+ useragent=useragent,
193
+ real_chrome=real_chrome,
194
+ page_action=page_action,
195
+ hide_canvas=hide_canvas,
196
+ init_script=init_script,
197
+ network_idle=network_idle,
198
+ google_search=google_search,
199
+ extra_headers=extra_headers,
200
+ wait_selector=wait_selector,
201
+ disable_webgl=disable_webgl,
202
+ additional_args=additional_args,
203
+ disable_resources=disable_resources,
204
+ wait_selector_state=wait_selector_state,
205
+ selector_config={**cls._generate_parser_arguments(), **custom_config},
206
+ ) as session:
207
+ return await session.fetch(url)
208
+
209
+
210
+ PlayWrightFetcher = DynamicFetcher # For backward-compatibility