scrapling 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +5 -4
- scrapling/core/_types.py +2 -3
- scrapling/core/custom_types.py +93 -11
- scrapling/core/storage_adaptors.py +9 -10
- scrapling/core/translator.py +6 -7
- scrapling/core/utils.py +35 -30
- scrapling/defaults.py +2 -1
- scrapling/engines/__init__.py +2 -2
- scrapling/engines/camo.py +96 -26
- scrapling/engines/constants.py +4 -4
- scrapling/engines/pw.py +166 -96
- scrapling/engines/static.py +94 -50
- scrapling/engines/toolbelt/__init__.py +6 -20
- scrapling/engines/toolbelt/custom.py +22 -23
- scrapling/engines/toolbelt/fingerprints.py +7 -7
- scrapling/engines/toolbelt/navigation.py +25 -12
- scrapling/fetchers.py +233 -17
- scrapling/parser.py +63 -28
- {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/METADATA +41 -25
- scrapling-0.2.9.dist-info/RECORD +47 -0
- tests/fetchers/async/__init__.py +0 -0
- tests/fetchers/async/test_camoufox.py +95 -0
- tests/fetchers/async/test_httpx.py +83 -0
- tests/fetchers/async/test_playwright.py +99 -0
- tests/fetchers/sync/__init__.py +0 -0
- tests/fetchers/sync/test_camoufox.py +68 -0
- tests/fetchers/sync/test_httpx.py +82 -0
- tests/fetchers/sync/test_playwright.py +87 -0
- tests/fetchers/test_utils.py +90 -122
- tests/parser/test_automatch.py +64 -9
- tests/parser/test_general.py +263 -219
- scrapling-0.2.7.dist-info/RECORD +0 -42
- tests/fetchers/test_camoufox.py +0 -64
- tests/fetchers/test_httpx.py +0 -67
- tests/fetchers/test_playwright.py +0 -76
- {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/LICENSE +0 -0
- {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/WHEEL +0 -0
- {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/top_level.txt +0 -0
| @@ -2,13 +2,13 @@ | |
| 2 2 | 
             
            Functions related to custom types or type checking
         | 
| 3 3 | 
             
            """
         | 
| 4 4 | 
             
            import inspect
         | 
| 5 | 
            -
            import logging
         | 
| 6 5 | 
             
            from email.message import Message
         | 
| 7 6 |  | 
| 7 | 
            +
            from scrapling.core._types import (Any, Callable, Dict, List, Optional, Tuple,
         | 
| 8 | 
            +
                                               Type, Union)
         | 
| 8 9 | 
             
            from scrapling.core.custom_types import MappingProxyType
         | 
| 10 | 
            +
            from scrapling.core.utils import log, lru_cache
         | 
| 9 11 | 
             
            from scrapling.parser import Adaptor, SQLiteStorageSystem
         | 
| 10 | 
            -
            from scrapling.core.utils import setup_basic_logging, cache
         | 
| 11 | 
            -
            from scrapling.core._types import Any, List, Type, Union, Optional, Dict, Callable, Tuple
         | 
| 12 12 |  | 
| 13 13 |  | 
| 14 14 | 
             
            class ResponseEncoding:
         | 
| @@ -16,7 +16,7 @@ class ResponseEncoding: | |
| 16 16 | 
             
                __ISO_8859_1_CONTENT_TYPES = {"text/plain", "text/html", "text/css", "text/javascript"}
         | 
| 17 17 |  | 
| 18 18 | 
             
                @classmethod
         | 
| 19 | 
            -
                @ | 
| 19 | 
            +
                @lru_cache(maxsize=None)
         | 
| 20 20 | 
             
                def __parse_content_type(cls, header_value: str) -> Tuple[str, Dict[str, str]]:
         | 
| 21 21 | 
             
                    """Parse content type and parameters from a content-type header value.
         | 
| 22 22 |  | 
| @@ -38,7 +38,7 @@ class ResponseEncoding: | |
| 38 38 | 
             
                    return content_type, params
         | 
| 39 39 |  | 
| 40 40 | 
             
                @classmethod
         | 
| 41 | 
            -
                @ | 
| 41 | 
            +
                @lru_cache(maxsize=None)
         | 
| 42 42 | 
             
                def get_value(cls, content_type: Optional[str], text: Optional[str] = 'test') -> str:
         | 
| 43 43 | 
             
                    """Determine the appropriate character encoding from a content-type header.
         | 
| 44 44 |  | 
| @@ -84,7 +84,10 @@ class ResponseEncoding: | |
| 84 84 | 
             
            class Response(Adaptor):
         | 
| 85 85 | 
             
                """This class is returned by all engines as a way to unify response type between different libraries."""
         | 
| 86 86 |  | 
| 87 | 
            -
                 | 
| 87 | 
            +
                _is_response_result_logged = False  # Class-level flag, initialized to False
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                def __init__(self, url: str, text: str, body: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict,
         | 
| 90 | 
            +
                             encoding: str = 'utf-8', method: str = 'GET', **adaptor_arguments: Dict):
         | 
| 88 91 | 
             
                    automatch_domain = adaptor_arguments.pop('automatch_domain', None)
         | 
| 89 92 | 
             
                    self.status = status
         | 
| 90 93 | 
             
                    self.reason = reason
         | 
| @@ -95,6 +98,10 @@ class Response(Adaptor): | |
| 95 98 | 
             
                    super().__init__(text=text, body=body, url=automatch_domain or url, encoding=encoding, **adaptor_arguments)
         | 
| 96 99 | 
             
                    # For back-ward compatibility
         | 
| 97 100 | 
             
                    self.adaptor = self
         | 
| 101 | 
            +
                    # For easier debugging while working from a Python shell
         | 
| 102 | 
            +
                    if not Response._is_response_result_logged:
         | 
| 103 | 
            +
                        log.info(f'Fetched ({status}) <{method} {url}> (referer: {request_headers.get("referer")})')
         | 
| 104 | 
            +
                        Response._is_response_result_logged = True
         | 
| 98 105 |  | 
| 99 106 | 
             
                # def __repr__(self):
         | 
| 100 107 | 
             
                #     return f'<{self.__class__.__name__} [{self.status} {self.reason}]>'
         | 
| @@ -103,8 +110,8 @@ class Response(Adaptor): | |
| 103 110 | 
             
            class BaseFetcher:
         | 
| 104 111 | 
             
                def __init__(
         | 
| 105 112 | 
             
                        self, huge_tree: bool = True, keep_comments: Optional[bool] = False, auto_match: Optional[bool] = True,
         | 
| 106 | 
            -
                        storage: Any = SQLiteStorageSystem, storage_args: Optional[Dict] = None, | 
| 107 | 
            -
                        automatch_domain: Optional[str] = None,
         | 
| 113 | 
            +
                        storage: Any = SQLiteStorageSystem, storage_args: Optional[Dict] = None,
         | 
| 114 | 
            +
                        automatch_domain: Optional[str] = None, keep_cdata: Optional[bool] = False,
         | 
| 108 115 | 
             
                ):
         | 
| 109 116 | 
             
                    """Arguments below are the same from the Adaptor class so you can pass them directly, the rest of Adaptor's arguments
         | 
| 110 117 | 
             
                    are detected and passed automatically from the Fetcher based on the response for accessibility.
         | 
| @@ -112,6 +119,7 @@ class BaseFetcher: | |
| 112 119 | 
             
                    :param huge_tree: Enabled by default, should always be enabled when parsing large HTML documents. This controls
         | 
| 113 120 | 
             
                        libxml2 feature that forbids parsing certain large documents to protect from possible memory exhaustion.
         | 
| 114 121 | 
             
                    :param keep_comments: While parsing the HTML body, drop comments or not. Disabled by default for obvious reasons
         | 
| 122 | 
            +
                    :param keep_cdata: While parsing the HTML body, drop cdata or not. Disabled by default for cleaner HTML.
         | 
| 115 123 | 
             
                    :param auto_match: Globally turn-off the auto-match feature in all functions, this argument takes higher
         | 
| 116 124 | 
             
                        priority over all auto-match related arguments/functions in the class.
         | 
| 117 125 | 
             
                    :param storage: The storage class to be passed for auto-matching functionalities, see ``Docs`` for more info.
         | 
| @@ -119,23 +127,20 @@ class BaseFetcher: | |
| 119 127 | 
             
                        If empty, default values will be used.
         | 
| 120 128 | 
             
                    :param automatch_domain: For cases where you want to automatch selectors across different websites as if they were on the same website, use this argument to unify them.
         | 
| 121 129 | 
             
                        Otherwise, the domain of the request is used by default.
         | 
| 122 | 
            -
                    :param debug: Enable debug mode
         | 
| 123 130 | 
             
                    """
         | 
| 124 131 | 
             
                    # Adaptor class parameters
         | 
| 125 132 | 
             
                    # I won't validate Adaptor's class parameters here again, I will leave it to be validated later
         | 
| 126 133 | 
             
                    self.adaptor_arguments = dict(
         | 
| 127 134 | 
             
                        huge_tree=huge_tree,
         | 
| 128 135 | 
             
                        keep_comments=keep_comments,
         | 
| 136 | 
            +
                        keep_cdata=keep_cdata,
         | 
| 129 137 | 
             
                        auto_match=auto_match,
         | 
| 130 138 | 
             
                        storage=storage,
         | 
| 131 | 
            -
                        storage_args=storage_args | 
| 132 | 
            -
                        debug=debug,
         | 
| 139 | 
            +
                        storage_args=storage_args
         | 
| 133 140 | 
             
                    )
         | 
| 134 | 
            -
                    # If the user used fetchers first, then configure the logger from here instead of the `Adaptor` class
         | 
| 135 | 
            -
                    setup_basic_logging(level='debug' if debug else 'info')
         | 
| 136 141 | 
             
                    if automatch_domain:
         | 
| 137 142 | 
             
                        if type(automatch_domain) is not str:
         | 
| 138 | 
            -
                             | 
| 143 | 
            +
                            log.warning('[Ignored] The argument "automatch_domain" must be of string type')
         | 
| 139 144 | 
             
                        else:
         | 
| 140 145 | 
             
                            self.adaptor_arguments.update({'automatch_domain': automatch_domain})
         | 
| 141 146 |  | 
| @@ -211,7 +216,7 @@ class StatusText: | |
| 211 216 | 
             
                })
         | 
| 212 217 |  | 
| 213 218 | 
             
                @classmethod
         | 
| 214 | 
            -
                @ | 
| 219 | 
            +
                @lru_cache(maxsize=128)
         | 
| 215 220 | 
             
                def get(cls, status_code: int) -> str:
         | 
| 216 221 | 
             
                    """Get the phrase for a given HTTP status code."""
         | 
| 217 222 | 
             
                    return cls._phrases.get(status_code, "Unknown Status Code")
         | 
| @@ -278,7 +283,7 @@ def check_type_validity(variable: Any, valid_types: Union[List[Type], None], def | |
| 278 283 | 
             
                    error_msg = f'Argument "{var_name}" cannot be None'
         | 
| 279 284 | 
             
                    if critical:
         | 
| 280 285 | 
             
                        raise TypeError(error_msg)
         | 
| 281 | 
            -
                     | 
| 286 | 
            +
                    log.error(f'[Ignored] {error_msg}')
         | 
| 282 287 | 
             
                    return default_value
         | 
| 283 288 |  | 
| 284 289 | 
             
                # If no valid_types specified and variable has a value, return it
         | 
| @@ -291,13 +296,7 @@ def check_type_validity(variable: Any, valid_types: Union[List[Type], None], def | |
| 291 296 | 
             
                    error_msg = f'Argument "{var_name}" must be of type {" or ".join(type_names)}'
         | 
| 292 297 | 
             
                    if critical:
         | 
| 293 298 | 
             
                        raise TypeError(error_msg)
         | 
| 294 | 
            -
                     | 
| 299 | 
            +
                    log.error(f'[Ignored] {error_msg}')
         | 
| 295 300 | 
             
                    return default_value
         | 
| 296 301 |  | 
| 297 302 | 
             
                return variable
         | 
| 298 | 
            -
             | 
| 299 | 
            -
             | 
| 300 | 
            -
            # Pew Pew
         | 
| 301 | 
            -
            def do_nothing(page):
         | 
| 302 | 
            -
                # Just works as a filler for `page_action` argument in browser engines
         | 
| 303 | 
            -
                return page
         | 
| @@ -4,15 +4,15 @@ Functions related to generating headers and fingerprints generally | |
| 4 4 |  | 
| 5 5 | 
             
            import platform
         | 
| 6 6 |  | 
| 7 | 
            -
            from  | 
| 8 | 
            -
            from  | 
| 9 | 
            -
             | 
| 7 | 
            +
            from browserforge.fingerprints import Fingerprint, FingerprintGenerator
         | 
| 8 | 
            +
            from browserforge.headers import Browser, HeaderGenerator
         | 
| 10 9 | 
             
            from tldextract import extract
         | 
| 11 | 
            -
             | 
| 12 | 
            -
            from  | 
| 10 | 
            +
             | 
| 11 | 
            +
            from scrapling.core._types import Dict, Union
         | 
| 12 | 
            +
            from scrapling.core.utils import lru_cache
         | 
| 13 13 |  | 
| 14 14 |  | 
| 15 | 
            -
            @ | 
| 15 | 
            +
            @lru_cache(None, typed=True)
         | 
| 16 16 | 
             
            def generate_convincing_referer(url: str) -> str:
         | 
| 17 17 | 
             
                """Takes the domain from the URL without the subdomain/suffix and make it look like you were searching google for this website
         | 
| 18 18 |  | 
| @@ -26,7 +26,7 @@ def generate_convincing_referer(url: str) -> str: | |
| 26 26 | 
             
                return f'https://www.google.com/search?q={website_name}'
         | 
| 27 27 |  | 
| 28 28 |  | 
| 29 | 
            -
            @ | 
| 29 | 
            +
            @lru_cache(None, typed=True)
         | 
| 30 30 | 
             
            def get_os_name() -> Union[str, None]:
         | 
| 31 31 | 
             
                """Get the current OS name in the same format needed for browserforge
         | 
| 32 32 |  | 
| @@ -1,28 +1,41 @@ | |
| 1 1 | 
             
            """
         | 
| 2 2 | 
             
            Functions related to files and URLs
         | 
| 3 3 | 
             
            """
         | 
| 4 | 
            -
             | 
| 5 4 | 
             
            import os
         | 
| 6 | 
            -
            import  | 
| 7 | 
            -
             | 
| 5 | 
            +
            from urllib.parse import urlencode, urlparse
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            from playwright.async_api import Route as async_Route
         | 
| 8 | 
            +
            from playwright.sync_api import Route
         | 
| 8 9 |  | 
| 9 | 
            -
            from scrapling.core. | 
| 10 | 
            -
            from scrapling.core. | 
| 10 | 
            +
            from scrapling.core._types import Dict, Optional, Union
         | 
| 11 | 
            +
            from scrapling.core.utils import log, lru_cache
         | 
| 11 12 | 
             
            from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
         | 
| 12 13 |  | 
| 13 | 
            -
             | 
| 14 | 
            +
             | 
| 15 | 
            +
            def intercept_route(route: Route):
         | 
| 16 | 
            +
                """This is just a route handler but it drops requests that its type falls in `DEFAULT_DISABLED_RESOURCES`
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                :param route: PlayWright `Route` object of the current page
         | 
| 19 | 
            +
                :return: PlayWright `Route` object
         | 
| 20 | 
            +
                """
         | 
| 21 | 
            +
                if route.request.resource_type in DEFAULT_DISABLED_RESOURCES:
         | 
| 22 | 
            +
                    log.debug(f'Blocking background resource "{route.request.url}" of type "{route.request.resource_type}"')
         | 
| 23 | 
            +
                    route.abort()
         | 
| 24 | 
            +
                else:
         | 
| 25 | 
            +
                    route.continue_()
         | 
| 14 26 |  | 
| 15 27 |  | 
| 16 | 
            -
            def  | 
| 28 | 
            +
            async def async_intercept_route(route: async_Route):
         | 
| 17 29 | 
             
                """This is just a route handler but it drops requests that its type falls in `DEFAULT_DISABLED_RESOURCES`
         | 
| 18 30 |  | 
| 19 31 | 
             
                :param route: PlayWright `Route` object of the current page
         | 
| 20 32 | 
             
                :return: PlayWright `Route` object
         | 
| 21 33 | 
             
                """
         | 
| 22 34 | 
             
                if route.request.resource_type in DEFAULT_DISABLED_RESOURCES:
         | 
| 23 | 
            -
                     | 
| 24 | 
            -
                     | 
| 25 | 
            -
                 | 
| 35 | 
            +
                    log.debug(f'Blocking background resource "{route.request.url}" of type "{route.request.resource_type}"')
         | 
| 36 | 
            +
                    await route.abort()
         | 
| 37 | 
            +
                else:
         | 
| 38 | 
            +
                    await route.continue_()
         | 
| 26 39 |  | 
| 27 40 |  | 
| 28 41 | 
             
            def construct_proxy_dict(proxy_string: Union[str, Dict[str, str]]) -> Union[Dict, None]:
         | 
| @@ -43,7 +56,7 @@ def construct_proxy_dict(proxy_string: Union[str, Dict[str, str]]) -> Union[Dict | |
| 43 56 | 
             
                            }
         | 
| 44 57 | 
             
                        except ValueError:
         | 
| 45 58 | 
             
                            # Urllib will say that one of the parameters above can't be casted to the correct type like `int` for port etc...
         | 
| 46 | 
            -
                            raise TypeError( | 
| 59 | 
            +
                            raise TypeError('The proxy argument\'s string is in invalid format!')
         | 
| 47 60 |  | 
| 48 61 | 
             
                    elif isinstance(proxy_string, dict):
         | 
| 49 62 | 
             
                        valid_keys = ('server', 'username', 'password', )
         | 
| @@ -97,7 +110,7 @@ def construct_cdp_url(cdp_url: str, query_params: Optional[Dict] = None) -> str: | |
| 97 110 | 
             
                    raise ValueError(f"Invalid CDP URL: {str(e)}")
         | 
| 98 111 |  | 
| 99 112 |  | 
| 100 | 
            -
            @ | 
| 113 | 
            +
            @lru_cache(None, typed=True)
         | 
| 101 114 | 
             
            def js_bypass_path(filename: str) -> str:
         | 
| 102 115 | 
             
                """Takes the base filename of JS file inside the `bypasses` folder then return the full path of it
         | 
| 103 116 |  | 
    
        scrapling/fetchers.py
    CHANGED
    
    | @@ -1,7 +1,8 @@ | |
| 1 | 
            -
            from scrapling.core._types import  | 
| 2 | 
            -
             | 
| 3 | 
            -
            from scrapling.engines | 
| 4 | 
            -
             | 
| 1 | 
            +
            from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
         | 
| 2 | 
            +
                                               Union)
         | 
| 3 | 
            +
            from scrapling.engines import (CamoufoxEngine, PlaywrightEngine, StaticEngine,
         | 
| 4 | 
            +
                                           check_if_engine_usable)
         | 
| 5 | 
            +
            from scrapling.engines.toolbelt import BaseFetcher, Response
         | 
| 5 6 |  | 
| 6 7 |  | 
| 7 8 | 
             
            class Fetcher(BaseFetcher):
         | 
| @@ -9,7 +10,9 @@ class Fetcher(BaseFetcher): | |
| 9 10 |  | 
| 10 11 | 
             
                Any additional keyword arguments passed to the methods below are passed to the respective httpx's method directly.
         | 
| 11 12 | 
             
                """
         | 
| 12 | 
            -
                def get( | 
| 13 | 
            +
                def get(
         | 
| 14 | 
            +
                        self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True,
         | 
| 15 | 
            +
                        proxy: Optional[str] = None, retries: Optional[int] = 3, **kwargs: Dict) -> Response:
         | 
| 13 16 | 
             
                    """Make basic HTTP GET request for you but with some added flavors.
         | 
| 14 17 |  | 
| 15 18 | 
             
                    :param url: Target url.
         | 
| @@ -18,13 +21,17 @@ class Fetcher(BaseFetcher): | |
| 18 21 | 
             
                    :param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
         | 
| 19 22 | 
             
                        create a referer header as if this request had came from Google's search of this URL's domain.
         | 
| 20 23 | 
             
                    :param proxy: A string of a proxy to use for http and https requests, the format accepted is `http://username:password@localhost:8030`
         | 
| 24 | 
            +
                    :param retries: The number of retries to do through httpx if the request failed for any reason. The default is 3 retries.
         | 
| 21 25 | 
             
                    :param kwargs: Any additional keyword arguments are passed directly to `httpx.get()` function so check httpx documentation for details.
         | 
| 22 26 | 
             
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 23 27 | 
             
                    """
         | 
| 24 | 
            -
                     | 
| 28 | 
            +
                    adaptor_arguments = tuple(self.adaptor_arguments.items())
         | 
| 29 | 
            +
                    response_object = StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries, adaptor_arguments=adaptor_arguments).get(**kwargs)
         | 
| 25 30 | 
             
                    return response_object
         | 
| 26 31 |  | 
| 27 | 
            -
                def post( | 
| 32 | 
            +
                def post(
         | 
| 33 | 
            +
                        self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True,
         | 
| 34 | 
            +
                        proxy: Optional[str] = None, retries: Optional[int] = 3, **kwargs: Dict) -> Response:
         | 
| 28 35 | 
             
                    """Make basic HTTP POST request for you but with some added flavors.
         | 
| 29 36 |  | 
| 30 37 | 
             
                    :param url: Target url.
         | 
| @@ -33,13 +40,17 @@ class Fetcher(BaseFetcher): | |
| 33 40 | 
             
                    :param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
         | 
| 34 41 | 
             
                        create a referer header as if this request came from Google's search of this URL's domain.
         | 
| 35 42 | 
             
                    :param proxy: A string of a proxy to use for http and https requests, the format accepted is `http://username:password@localhost:8030`
         | 
| 43 | 
            +
                    :param retries: The number of retries to do through httpx if the request failed for any reason. The default is 3 retries.
         | 
| 36 44 | 
             
                    :param kwargs: Any additional keyword arguments are passed directly to `httpx.post()` function so check httpx documentation for details.
         | 
| 37 45 | 
             
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 38 46 | 
             
                    """
         | 
| 39 | 
            -
                     | 
| 47 | 
            +
                    adaptor_arguments = tuple(self.adaptor_arguments.items())
         | 
| 48 | 
            +
                    response_object = StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries, adaptor_arguments=adaptor_arguments).post(**kwargs)
         | 
| 40 49 | 
             
                    return response_object
         | 
| 41 50 |  | 
| 42 | 
            -
                def put( | 
| 51 | 
            +
                def put(
         | 
| 52 | 
            +
                        self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True,
         | 
| 53 | 
            +
                        proxy: Optional[str] = None, retries: Optional[int] = 3, **kwargs: Dict) -> Response:
         | 
| 43 54 | 
             
                    """Make basic HTTP PUT request for you but with some added flavors.
         | 
| 44 55 |  | 
| 45 56 | 
             
                    :param url: Target url
         | 
| @@ -48,14 +59,96 @@ class Fetcher(BaseFetcher): | |
| 48 59 | 
             
                    :param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
         | 
| 49 60 | 
             
                        create a referer header as if this request came from Google's search of this URL's domain.
         | 
| 50 61 | 
             
                    :param proxy: A string of a proxy to use for http and https requests, the format accepted is `http://username:password@localhost:8030`
         | 
| 62 | 
            +
                    :param retries: The number of retries to do through httpx if the request failed for any reason. The default is 3 retries.
         | 
| 51 63 | 
             
                    :param kwargs: Any additional keyword arguments are passed directly to `httpx.put()` function so check httpx documentation for details.
         | 
| 52 64 |  | 
| 53 65 | 
             
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 54 66 | 
             
                    """
         | 
| 55 | 
            -
                     | 
| 67 | 
            +
                    adaptor_arguments = tuple(self.adaptor_arguments.items())
         | 
| 68 | 
            +
                    response_object = StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries, adaptor_arguments=adaptor_arguments).put(**kwargs)
         | 
| 69 | 
            +
                    return response_object
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                def delete(
         | 
| 72 | 
            +
                        self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True,
         | 
| 73 | 
            +
                        proxy: Optional[str] = None, retries: Optional[int] = 3, **kwargs: Dict) -> Response:
         | 
| 74 | 
            +
                    """Make basic HTTP DELETE request for you but with some added flavors.
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                    :param url: Target url
         | 
| 77 | 
            +
                    :param follow_redirects: As the name says -- if enabled (default), redirects will be followed.
         | 
| 78 | 
            +
                    :param timeout: The time to wait for the request to finish in seconds. The default is 10 seconds.
         | 
| 79 | 
            +
                    :param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
         | 
| 80 | 
            +
                        create a referer header as if this request came from Google's search of this URL's domain.
         | 
| 81 | 
            +
                    :param proxy: A string of a proxy to use for http and https requests, the format accepted is `http://username:password@localhost:8030`
         | 
| 82 | 
            +
                    :param retries: The number of retries to do through httpx if the request failed for any reason. The default is 3 retries.
         | 
| 83 | 
            +
                    :param kwargs: Any additional keyword arguments are passed directly to `httpx.delete()` function so check httpx documentation for details.
         | 
| 84 | 
            +
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 85 | 
            +
                    """
         | 
| 86 | 
            +
                    adaptor_arguments = tuple(self.adaptor_arguments.items())
         | 
| 87 | 
            +
                    response_object = StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries, adaptor_arguments=adaptor_arguments).delete(**kwargs)
         | 
| 88 | 
            +
                    return response_object
         | 
| 89 | 
            +
             | 
| 90 | 
            +
             | 
| 91 | 
            +
            class AsyncFetcher(Fetcher):
         | 
| 92 | 
            +
                async def get(
         | 
| 93 | 
            +
                        self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True,
         | 
| 94 | 
            +
                        proxy: Optional[str] = None, retries: Optional[int] = 3, **kwargs: Dict) -> Response:
         | 
| 95 | 
            +
                    """Make basic HTTP GET request for you but with some added flavors.
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                    :param url: Target url.
         | 
| 98 | 
            +
                    :param follow_redirects: As the name says -- if enabled (default), redirects will be followed.
         | 
| 99 | 
            +
                    :param timeout: The time to wait for the request to finish in seconds. The default is 10 seconds.
         | 
| 100 | 
            +
                    :param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
         | 
| 101 | 
            +
                        create a referer header as if this request had came from Google's search of this URL's domain.
         | 
| 102 | 
            +
                    :param proxy: A string of a proxy to use for http and https requests, the format accepted is `http://username:password@localhost:8030`
         | 
| 103 | 
            +
                    :param retries: The number of retries to do through httpx if the request failed for any reason. The default is 3 retries.
         | 
| 104 | 
            +
                    :param kwargs: Any additional keyword arguments are passed directly to `httpx.get()` function so check httpx documentation for details.
         | 
| 105 | 
            +
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 106 | 
            +
                    """
         | 
| 107 | 
            +
                    adaptor_arguments = tuple(self.adaptor_arguments.items())
         | 
| 108 | 
            +
                    response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).async_get(**kwargs)
         | 
| 109 | 
            +
                    return response_object
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                async def post(
         | 
| 112 | 
            +
                        self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True,
         | 
| 113 | 
            +
                        proxy: Optional[str] = None, retries: Optional[int] = 3, **kwargs: Dict) -> Response:
         | 
| 114 | 
            +
                    """Make basic HTTP POST request for you but with some added flavors.
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                    :param url: Target url.
         | 
| 117 | 
            +
                    :param follow_redirects: As the name says -- if enabled (default), redirects will be followed.
         | 
| 118 | 
            +
                    :param timeout: The time to wait for the request to finish in seconds. The default is 10 seconds.
         | 
| 119 | 
            +
                    :param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
         | 
| 120 | 
            +
                        create a referer header as if this request came from Google's search of this URL's domain.
         | 
| 121 | 
            +
                    :param proxy: A string of a proxy to use for http and https requests, the format accepted is `http://username:password@localhost:8030`
         | 
| 122 | 
            +
                    :param retries: The number of retries to do through httpx if the request failed for any reason. The default is 3 retries.
         | 
| 123 | 
            +
                    :param kwargs: Any additional keyword arguments are passed directly to `httpx.post()` function so check httpx documentation for details.
         | 
| 124 | 
            +
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 125 | 
            +
                    """
         | 
| 126 | 
            +
                    adaptor_arguments = tuple(self.adaptor_arguments.items())
         | 
| 127 | 
            +
                    response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).async_post(**kwargs)
         | 
| 128 | 
            +
                    return response_object
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                async def put(
         | 
| 131 | 
            +
                        self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True,
         | 
| 132 | 
            +
                        proxy: Optional[str] = None, retries: Optional[int] = 3, **kwargs: Dict) -> Response:
         | 
| 133 | 
            +
                    """Make basic HTTP PUT request for you but with some added flavors.
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                    :param url: Target url
         | 
| 136 | 
            +
                    :param follow_redirects: As the name says -- if enabled (default), redirects will be followed.
         | 
| 137 | 
            +
                    :param timeout: The time to wait for the request to finish in seconds. The default is 10 seconds.
         | 
| 138 | 
            +
                    :param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
         | 
| 139 | 
            +
                        create a referer header as if this request came from Google's search of this URL's domain.
         | 
| 140 | 
            +
                    :param proxy: A string of a proxy to use for http and https requests, the format accepted is `http://username:password@localhost:8030`
         | 
| 141 | 
            +
                    :param retries: The number of retries to do through httpx if the request failed for any reason. The default is 3 retries.
         | 
| 142 | 
            +
                    :param kwargs: Any additional keyword arguments are passed directly to `httpx.put()` function so check httpx documentation for details.
         | 
| 143 | 
            +
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 144 | 
            +
                    """
         | 
| 145 | 
            +
                    adaptor_arguments = tuple(self.adaptor_arguments.items())
         | 
| 146 | 
            +
                    response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).async_post(**kwargs)
         | 
| 56 147 | 
             
                    return response_object
         | 
| 57 148 |  | 
| 58 | 
            -
                def delete( | 
| 149 | 
            +
                async def delete(
         | 
| 150 | 
            +
                        self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True,
         | 
| 151 | 
            +
                        proxy: Optional[str] = None, retries: Optional[int] = 3, **kwargs: Dict) -> Response:
         | 
| 59 152 | 
             
                    """Make basic HTTP DELETE request for you but with some added flavors.
         | 
| 60 153 |  | 
| 61 154 | 
             
                    :param url: Target url
         | 
| @@ -64,10 +157,12 @@ class Fetcher(BaseFetcher): | |
| 64 157 | 
             
                    :param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
         | 
| 65 158 | 
             
                        create a referer header as if this request came from Google's search of this URL's domain.
         | 
| 66 159 | 
             
                    :param proxy: A string of a proxy to use for http and https requests, the format accepted is `http://username:password@localhost:8030`
         | 
| 160 | 
            +
                    :param retries: The number of retries to do through httpx if the request failed for any reason. The default is 3 retries.
         | 
| 67 161 | 
             
                    :param kwargs: Any additional keyword arguments are passed directly to `httpx.delete()` function so check httpx documentation for details.
         | 
| 68 162 | 
             
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 69 163 | 
             
                    """
         | 
| 70 | 
            -
                     | 
| 164 | 
            +
                    adaptor_arguments = tuple(self.adaptor_arguments.items())
         | 
| 165 | 
            +
                    response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).async_delete(**kwargs)
         | 
| 71 166 | 
             
                    return response_object
         | 
| 72 167 |  | 
| 73 168 |  | 
| @@ -79,10 +174,10 @@ class StealthyFetcher(BaseFetcher): | |
| 79 174 | 
             
                """
         | 
| 80 175 | 
             
                def fetch(
         | 
| 81 176 | 
             
                        self, url: str, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
         | 
| 82 | 
            -
                        block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] =  | 
| 83 | 
            -
                        timeout: Optional[float] = 30000, page_action: Callable =  | 
| 177 | 
            +
                        block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
         | 
| 178 | 
            +
                        timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
         | 
| 84 179 | 
             
                        wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
         | 
| 85 | 
            -
                        os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True,
         | 
| 180 | 
            +
                        os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
         | 
| 86 181 | 
             
                ) -> Response:
         | 
| 87 182 | 
             
                    """
         | 
| 88 183 | 
             
                    Opens up a browser and do your request based on your chosen options below.
         | 
| @@ -98,7 +193,9 @@ class StealthyFetcher(BaseFetcher): | |
| 98 193 | 
             
                    :param addons: List of Firefox addons to use. Must be paths to extracted addons.
         | 
| 99 194 | 
             
                    :param disable_ads: Enabled by default, this installs `uBlock Origin` addon on the browser if enabled.
         | 
| 100 195 | 
             
                    :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
         | 
| 101 | 
            -
                    :param allow_webgl:  | 
| 196 | 
            +
                    :param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
         | 
| 197 | 
            +
                    :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
         | 
| 198 | 
            +
                        It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
         | 
| 102 199 | 
             
                    :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
         | 
| 103 200 | 
             
                    :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
         | 
| 104 201 | 
             
                    :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
         | 
| @@ -112,6 +209,7 @@ class StealthyFetcher(BaseFetcher): | |
| 112 209 | 
             
                    """
         | 
| 113 210 | 
             
                    engine = CamoufoxEngine(
         | 
| 114 211 | 
             
                        proxy=proxy,
         | 
| 212 | 
            +
                        geoip=geoip,
         | 
| 115 213 | 
             
                        addons=addons,
         | 
| 116 214 | 
             
                        timeout=timeout,
         | 
| 117 215 | 
             
                        headless=headless,
         | 
| @@ -132,6 +230,64 @@ class StealthyFetcher(BaseFetcher): | |
| 132 230 | 
             
                    )
         | 
| 133 231 | 
             
                    return engine.fetch(url)
         | 
| 134 232 |  | 
| 233 | 
            +
                async def async_fetch(
         | 
| 234 | 
            +
                        self, url: str, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
         | 
| 235 | 
            +
                        block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
         | 
| 236 | 
            +
                        timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
         | 
| 237 | 
            +
                        wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
         | 
| 238 | 
            +
                        os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
         | 
| 239 | 
            +
                ) -> Response:
         | 
| 240 | 
            +
                    """
         | 
| 241 | 
            +
                    Opens up a browser and do your request based on your chosen options below.
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                    :param url: Target url.
         | 
| 244 | 
            +
                    :param headless: Run the browser in headless/hidden (default), 'virtual' screen mode, or headful/visible mode.
         | 
| 245 | 
            +
                    :param block_images: Prevent the loading of images through Firefox preferences.
         | 
| 246 | 
            +
                        This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         | 
| 247 | 
            +
                    :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends but it made requests ~25% faster in my tests for some websites.
         | 
| 248 | 
            +
                        Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
         | 
| 249 | 
            +
                        This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         | 
| 250 | 
            +
                    :param block_webrtc: Blocks WebRTC entirely.
         | 
| 251 | 
            +
                    :param addons: List of Firefox addons to use. Must be paths to extracted addons.
         | 
| 252 | 
            +
                    :param disable_ads: Enabled by default, this installs `uBlock Origin` addon on the browser if enabled.
         | 
| 253 | 
            +
                    :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
         | 
| 254 | 
            +
                    :param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
         | 
| 255 | 
            +
                    :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
         | 
| 256 | 
            +
                        It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
         | 
| 257 | 
            +
                    :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
         | 
| 258 | 
            +
                    :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
         | 
| 259 | 
            +
                    :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
         | 
| 260 | 
            +
                    :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
         | 
| 261 | 
            +
                    :param wait_selector: Wait for a specific css selector to be in a specific state.
         | 
| 262 | 
            +
                    :param wait_selector_state: The state to wait for the selector given with `wait_selector`. Default state is `attached`.
         | 
| 263 | 
            +
                    :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name.
         | 
| 264 | 
            +
                    :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         | 
| 265 | 
            +
                    :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         | 
| 266 | 
            +
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 267 | 
            +
                    """
         | 
| 268 | 
            +
                    engine = CamoufoxEngine(
         | 
| 269 | 
            +
                        proxy=proxy,
         | 
| 270 | 
            +
                        geoip=geoip,
         | 
| 271 | 
            +
                        addons=addons,
         | 
| 272 | 
            +
                        timeout=timeout,
         | 
| 273 | 
            +
                        headless=headless,
         | 
| 274 | 
            +
                        humanize=humanize,
         | 
| 275 | 
            +
                        disable_ads=disable_ads,
         | 
| 276 | 
            +
                        allow_webgl=allow_webgl,
         | 
| 277 | 
            +
                        page_action=page_action,
         | 
| 278 | 
            +
                        network_idle=network_idle,
         | 
| 279 | 
            +
                        block_images=block_images,
         | 
| 280 | 
            +
                        block_webrtc=block_webrtc,
         | 
| 281 | 
            +
                        os_randomize=os_randomize,
         | 
| 282 | 
            +
                        wait_selector=wait_selector,
         | 
| 283 | 
            +
                        google_search=google_search,
         | 
| 284 | 
            +
                        extra_headers=extra_headers,
         | 
| 285 | 
            +
                        disable_resources=disable_resources,
         | 
| 286 | 
            +
                        wait_selector_state=wait_selector_state,
         | 
| 287 | 
            +
                        adaptor_arguments=self.adaptor_arguments,
         | 
| 288 | 
            +
                    )
         | 
| 289 | 
            +
                    return await engine.async_fetch(url)
         | 
| 290 | 
            +
             | 
| 135 291 |  | 
| 136 292 | 
             
            class PlayWrightFetcher(BaseFetcher):
         | 
| 137 293 | 
             
                """A `Fetcher` class type that provide many options, all of them are based on PlayWright.
         | 
| @@ -152,7 +308,7 @@ class PlayWrightFetcher(BaseFetcher): | |
| 152 308 | 
             
                def fetch(
         | 
| 153 309 | 
             
                        self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
         | 
| 154 310 | 
             
                        useragent: Optional[str] = None, network_idle: Optional[bool] = False, timeout: Optional[float] = 30000,
         | 
| 155 | 
            -
                        page_action: Optional[Callable] =  | 
| 311 | 
            +
                        page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: Optional[str] = 'attached',
         | 
| 156 312 | 
             
                        hide_canvas: Optional[bool] = False, disable_webgl: Optional[bool] = False, extra_headers: Optional[Dict[str, str]] = None, google_search: Optional[bool] = True,
         | 
| 157 313 | 
             
                        proxy: Optional[Union[str, Dict[str, str]]] = None, locale: Optional[str] = 'en-US',
         | 
| 158 314 | 
             
                        stealth: Optional[bool] = False, real_chrome: Optional[bool] = False,
         | 
| @@ -209,6 +365,66 @@ class PlayWrightFetcher(BaseFetcher): | |
| 209 365 | 
             
                    )
         | 
| 210 366 | 
             
                    return engine.fetch(url)
         | 
| 211 367 |  | 
| 368 | 
            +
                async def async_fetch(
         | 
| 369 | 
            +
                        self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
         | 
| 370 | 
            +
                        useragent: Optional[str] = None, network_idle: Optional[bool] = False, timeout: Optional[float] = 30000,
         | 
| 371 | 
            +
                        page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: Optional[str] = 'attached',
         | 
| 372 | 
            +
                        hide_canvas: Optional[bool] = False, disable_webgl: Optional[bool] = False, extra_headers: Optional[Dict[str, str]] = None, google_search: Optional[bool] = True,
         | 
| 373 | 
            +
                        proxy: Optional[Union[str, Dict[str, str]]] = None, locale: Optional[str] = 'en-US',
         | 
| 374 | 
            +
                        stealth: Optional[bool] = False, real_chrome: Optional[bool] = False,
         | 
| 375 | 
            +
                        cdp_url: Optional[str] = None,
         | 
| 376 | 
            +
                        nstbrowser_mode: Optional[bool] = False, nstbrowser_config: Optional[Dict] = None,
         | 
| 377 | 
            +
                ) -> Response:
         | 
| 378 | 
            +
                    """Opens up a browser and do your request based on your chosen options below.
         | 
| 379 | 
            +
             | 
| 380 | 
            +
                    :param url: Target url.
         | 
| 381 | 
            +
                    :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
         | 
| 382 | 
            +
                    :param disable_resources: Drop requests of unnecessary resources for speed boost. It depends but it made requests ~25% faster in my tests for some websites.
         | 
| 383 | 
            +
                        Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
         | 
| 384 | 
            +
                        This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
         | 
| 385 | 
            +
                    :param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
         | 
| 386 | 
            +
                    :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
         | 
| 387 | 
            +
                    :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
         | 
| 388 | 
            +
                    :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
         | 
| 389 | 
            +
                    :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
         | 
| 390 | 
            +
                    :param wait_selector: Wait for a specific css selector to be in a specific state.
         | 
| 391 | 
            +
                    :param wait_selector_state: The state to wait for the selector given with `wait_selector`. Default state is `attached`.
         | 
| 392 | 
            +
                    :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
         | 
| 393 | 
            +
                    :param real_chrome: If you have chrome browser installed on your device, enable this and the Fetcher will launch an instance of your browser and use it.
         | 
| 394 | 
            +
                    :param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
         | 
| 395 | 
            +
                    :param disable_webgl: Disables WebGL and WebGL 2.0 support entirely.
         | 
| 396 | 
            +
                    :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name.
         | 
| 397 | 
            +
                    :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
         | 
| 398 | 
            +
                    :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
         | 
| 399 | 
            +
                    :param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers/NSTBrowser through CDP.
         | 
| 400 | 
            +
                    :param nstbrowser_mode: Enables NSTBrowser mode, it have to be used with `cdp_url` argument or it will get completely ignored.
         | 
| 401 | 
            +
                    :param nstbrowser_config: The config you want to send with requests to the NSTBrowser. If left empty, Scrapling defaults to an optimized NSTBrowser's docker browserless config.
         | 
| 402 | 
            +
                    :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         | 
| 403 | 
            +
                    """
         | 
| 404 | 
            +
                    engine = PlaywrightEngine(
         | 
| 405 | 
            +
                        proxy=proxy,
         | 
| 406 | 
            +
                        locale=locale,
         | 
| 407 | 
            +
                        timeout=timeout,
         | 
| 408 | 
            +
                        stealth=stealth,
         | 
| 409 | 
            +
                        cdp_url=cdp_url,
         | 
| 410 | 
            +
                        headless=headless,
         | 
| 411 | 
            +
                        useragent=useragent,
         | 
| 412 | 
            +
                        real_chrome=real_chrome,
         | 
| 413 | 
            +
                        page_action=page_action,
         | 
| 414 | 
            +
                        hide_canvas=hide_canvas,
         | 
| 415 | 
            +
                        network_idle=network_idle,
         | 
| 416 | 
            +
                        google_search=google_search,
         | 
| 417 | 
            +
                        extra_headers=extra_headers,
         | 
| 418 | 
            +
                        wait_selector=wait_selector,
         | 
| 419 | 
            +
                        disable_webgl=disable_webgl,
         | 
| 420 | 
            +
                        nstbrowser_mode=nstbrowser_mode,
         | 
| 421 | 
            +
                        nstbrowser_config=nstbrowser_config,
         | 
| 422 | 
            +
                        disable_resources=disable_resources,
         | 
| 423 | 
            +
                        wait_selector_state=wait_selector_state,
         | 
| 424 | 
            +
                        adaptor_arguments=self.adaptor_arguments,
         | 
| 425 | 
            +
                    )
         | 
| 426 | 
            +
                    return await engine.async_fetch(url)
         | 
| 427 | 
            +
             | 
| 212 428 |  | 
| 213 429 | 
             
            class CustomFetcher(BaseFetcher):
         | 
| 214 430 | 
             
                def fetch(self, url: str, browser_engine, **kwargs) -> Response:
         |