scrapling 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. scrapling/__init__.py +5 -4
  2. scrapling/core/_types.py +2 -3
  3. scrapling/core/custom_types.py +93 -11
  4. scrapling/core/storage_adaptors.py +9 -10
  5. scrapling/core/translator.py +6 -7
  6. scrapling/core/utils.py +35 -30
  7. scrapling/defaults.py +2 -1
  8. scrapling/engines/__init__.py +2 -2
  9. scrapling/engines/camo.py +96 -26
  10. scrapling/engines/constants.py +4 -4
  11. scrapling/engines/pw.py +166 -96
  12. scrapling/engines/static.py +94 -50
  13. scrapling/engines/toolbelt/__init__.py +6 -20
  14. scrapling/engines/toolbelt/custom.py +22 -23
  15. scrapling/engines/toolbelt/fingerprints.py +7 -7
  16. scrapling/engines/toolbelt/navigation.py +25 -12
  17. scrapling/fetchers.py +233 -17
  18. scrapling/parser.py +63 -28
  19. {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/METADATA +41 -25
  20. scrapling-0.2.9.dist-info/RECORD +47 -0
  21. tests/fetchers/async/__init__.py +0 -0
  22. tests/fetchers/async/test_camoufox.py +95 -0
  23. tests/fetchers/async/test_httpx.py +83 -0
  24. tests/fetchers/async/test_playwright.py +99 -0
  25. tests/fetchers/sync/__init__.py +0 -0
  26. tests/fetchers/sync/test_camoufox.py +68 -0
  27. tests/fetchers/sync/test_httpx.py +82 -0
  28. tests/fetchers/sync/test_playwright.py +87 -0
  29. tests/fetchers/test_utils.py +90 -122
  30. tests/parser/test_automatch.py +64 -9
  31. tests/parser/test_general.py +263 -219
  32. scrapling-0.2.7.dist-info/RECORD +0 -42
  33. tests/fetchers/test_camoufox.py +0 -64
  34. tests/fetchers/test_httpx.py +0 -67
  35. tests/fetchers/test_playwright.py +0 -76
  36. {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/LICENSE +0 -0
  37. {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/WHEEL +0 -0
  38. {scrapling-0.2.7.dist-info → scrapling-0.2.9.dist-info}/top_level.txt +0 -0
scrapling/__init__.py CHANGED
@@ -1,11 +1,12 @@
1
1
  # Declare top-level shortcuts
2
- from scrapling.fetchers import Fetcher, StealthyFetcher, PlayWrightFetcher, CustomFetcher
2
+ from scrapling.core.custom_types import AttributesHandler, TextHandler
3
+ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
4
+ PlayWrightFetcher, StealthyFetcher)
3
5
  from scrapling.parser import Adaptor, Adaptors
4
- from scrapling.core.custom_types import TextHandler, AttributesHandler
5
6
 
6
7
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
7
- __version__ = "0.2.7"
8
+ __version__ = "0.2.9"
8
9
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
9
10
 
10
11
 
11
- __all__ = ['Adaptor', 'Fetcher', 'StealthyFetcher', 'PlayWrightFetcher']
12
+ __all__ = ['Adaptor', 'Fetcher', 'AsyncFetcher', 'StealthyFetcher', 'PlayWrightFetcher']
scrapling/core/_types.py CHANGED
@@ -2,9 +2,8 @@
2
2
  Type definitions for type checking purposes.
3
3
  """
4
4
 
5
- from typing import (
6
- Dict, Optional, Union, Callable, Any, List, Tuple, Pattern, Generator, Iterable, Type, TYPE_CHECKING, Literal
7
- )
5
+ from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable,
6
+ List, Literal, Optional, Pattern, Tuple, Type, Union)
8
7
 
9
8
  try:
10
9
  from typing import Protocol
@@ -1,24 +1,83 @@
1
1
  import re
2
- from types import MappingProxyType
3
2
  from collections.abc import Mapping
3
+ from types import MappingProxyType
4
4
 
5
- from scrapling.core.utils import _is_iterable, flatten
6
- from scrapling.core._types import Dict, List, Union, Pattern, SupportsIndex
7
-
8
- from orjson import loads, dumps
5
+ from orjson import dumps, loads
9
6
  from w3lib.html import replace_entities as _replace_entities
10
7
 
8
+ from scrapling.core._types import Dict, List, Pattern, SupportsIndex, Union
9
+ from scrapling.core.utils import _is_iterable, flatten
10
+
11
11
 
12
12
  class TextHandler(str):
13
13
  """Extends standard Python string by adding more functionality"""
14
14
  __slots__ = ()
15
15
 
16
16
  def __new__(cls, string):
17
- # Because str is immutable and we can't override __init__
18
- if type(string) is str:
17
+ if isinstance(string, str):
19
18
  return super().__new__(cls, string)
20
- else:
21
- return super().__new__(cls, '')
19
+ return super().__new__(cls, '')
20
+
21
+ # Make methods from original `str` class return `TextHandler` instead of returning `str` again
22
+ # Of course, this stupid workaround is only so we can keep the auto-completion working without issues in your IDE
23
+ # and I made sonnet write it for me :)
24
+ def strip(self, chars=None):
25
+ return TextHandler(super().strip(chars))
26
+
27
+ def lstrip(self, chars=None):
28
+ return TextHandler(super().lstrip(chars))
29
+
30
+ def rstrip(self, chars=None):
31
+ return TextHandler(super().rstrip(chars))
32
+
33
+ def capitalize(self):
34
+ return TextHandler(super().capitalize())
35
+
36
+ def casefold(self):
37
+ return TextHandler(super().casefold())
38
+
39
+ def center(self, width, fillchar=' '):
40
+ return TextHandler(super().center(width, fillchar))
41
+
42
+ def expandtabs(self, tabsize=8):
43
+ return TextHandler(super().expandtabs(tabsize))
44
+
45
+ def format(self, *args, **kwargs):
46
+ return TextHandler(super().format(*args, **kwargs))
47
+
48
+ def format_map(self, mapping):
49
+ return TextHandler(super().format_map(mapping))
50
+
51
+ def join(self, iterable):
52
+ return TextHandler(super().join(iterable))
53
+
54
+ def ljust(self, width, fillchar=' '):
55
+ return TextHandler(super().ljust(width, fillchar))
56
+
57
+ def rjust(self, width, fillchar=' '):
58
+ return TextHandler(super().rjust(width, fillchar))
59
+
60
+ def swapcase(self):
61
+ return TextHandler(super().swapcase())
62
+
63
+ def title(self):
64
+ return TextHandler(super().title())
65
+
66
+ def translate(self, table):
67
+ return TextHandler(super().translate(table))
68
+
69
+ def zfill(self, width):
70
+ return TextHandler(super().zfill(width))
71
+
72
+ def replace(self, old, new, count=-1):
73
+ return TextHandler(super().replace(old, new, count))
74
+
75
+ def upper(self):
76
+ return TextHandler(super().upper())
77
+
78
+ def lower(self):
79
+ return TextHandler(super().lower())
80
+ ##############
22
81
 
23
82
  def sort(self, reverse: bool = False) -> str:
24
83
  """Return a sorted version of the string"""
@@ -30,11 +89,21 @@ class TextHandler(str):
30
89
  data = re.sub(' +', ' ', data)
31
90
  return self.__class__(data.strip())
32
91
 
92
+ # For easy copy-paste from Scrapy/parsel code when needed :)
93
+ def get(self, default=None):
94
+ return self
95
+
96
+ def get_all(self):
97
+ return self
98
+
99
+ extract = get_all
100
+ extract_first = get
101
+
33
102
  def json(self) -> Dict:
34
103
  """Return json response if the response is jsonable otherwise throw error"""
35
- # Using __str__ function as a workaround for orjson issue with subclasses of str
104
+ # Using str function as a workaround for orjson issue with subclasses of str
36
105
  # Check this out: https://github.com/ijl/orjson/issues/445
37
- return loads(self.__str__())
106
+ return loads(str(self))
38
107
 
39
108
  def re(
40
109
  self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
@@ -127,6 +196,19 @@ class TextHandlers(List[TextHandler]):
127
196
  return result
128
197
  return default
129
198
 
199
+ # For easy copy-paste from Scrapy/parsel code when needed :)
200
+ def get(self, default=None):
201
+ """Returns the first item of the current list
202
+ :param default: the default value to return if the current list is empty
203
+ """
204
+ return self[0] if len(self) > 0 else default
205
+
206
+ def extract(self):
207
+ return self
208
+
209
+ extract_first = get
210
+ get_all = extract
211
+
130
212
 
131
213
  class AttributesHandler(Mapping):
132
214
  """A read-only mapping to use instead of the standard dictionary for the speed boost but at the same time I use it to add more functionalities.
@@ -1,16 +1,15 @@
1
- import orjson
2
1
  import sqlite3
3
- import logging
4
2
  import threading
5
- from hashlib import sha256
6
3
  from abc import ABC, abstractmethod
4
+ from hashlib import sha256
7
5
 
8
- from scrapling.core._types import Dict, Optional, Union
9
- from scrapling.core.utils import _StorageTools, cache
10
-
6
+ import orjson
11
7
  from lxml import html
12
8
  from tldextract import extract as tld
13
9
 
10
+ from scrapling.core._types import Dict, Optional, Union
11
+ from scrapling.core.utils import _StorageTools, log, lru_cache
12
+
14
13
 
15
14
  class StorageSystemMixin(ABC):
16
15
  # If you want to make your own storage system, you have to inherit from this
@@ -20,7 +19,7 @@ class StorageSystemMixin(ABC):
20
19
  """
21
20
  self.url = url
22
21
 
23
- @cache(None, typed=True)
22
+ @lru_cache(None, typed=True)
24
23
  def _get_base_url(self, default_value: str = 'default') -> str:
25
24
  if not self.url or type(self.url) is not str:
26
25
  return default_value
@@ -52,7 +51,7 @@ class StorageSystemMixin(ABC):
52
51
  raise NotImplementedError('Storage system must implement `save` method')
53
52
 
54
53
  @staticmethod
55
- @cache(None, typed=True)
54
+ @lru_cache(None, typed=True)
56
55
  def _get_hash(identifier: str) -> str:
57
56
  """If you want to hash identifier in your storage system, use this safer"""
58
57
  identifier = identifier.lower().strip()
@@ -64,7 +63,7 @@ class StorageSystemMixin(ABC):
64
63
  return f"{hash_value}_{len(identifier)}" # Length to reduce collision chance
65
64
 
66
65
 
67
- @cache(None, typed=True)
66
+ @lru_cache(None, typed=True)
68
67
  class SQLiteStorageSystem(StorageSystemMixin):
69
68
  """The recommended system to use, it's race condition safe and thread safe.
70
69
  Mainly built so the library can run in threaded frameworks like scrapy or threaded tools
@@ -86,7 +85,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
86
85
  self.connection.execute("PRAGMA journal_mode=WAL")
87
86
  self.cursor = self.connection.cursor()
88
87
  self._setup_database()
89
- logging.debug(
88
+ log.debug(
90
89
  f'Storage system loaded with arguments (storage_file="{storage_file}", url="{url}")'
91
90
  )
92
91
 
@@ -10,15 +10,14 @@ So you don't have to learn a new selectors/api method like what bs4 done with so
10
10
 
11
11
  import re
12
12
 
13
- from w3lib.html import HTML5_WHITESPACE
14
- from scrapling.core.utils import cache
15
- from scrapling.core._types import Any, Optional, Protocol, Self
16
-
17
- from cssselect.xpath import ExpressionError
18
- from cssselect.xpath import XPathExpr as OriginalXPathExpr
19
13
  from cssselect import HTMLTranslator as OriginalHTMLTranslator
20
14
  from cssselect.parser import Element, FunctionalPseudoElement, PseudoElement
15
+ from cssselect.xpath import ExpressionError
16
+ from cssselect.xpath import XPathExpr as OriginalXPathExpr
17
+ from w3lib.html import HTML5_WHITESPACE
21
18
 
19
+ from scrapling.core._types import Any, Optional, Protocol, Self
20
+ from scrapling.core.utils import lru_cache
22
21
 
23
22
  regex = f"[{HTML5_WHITESPACE}]+"
24
23
  replace_html5_whitespaces = re.compile(regex).sub
@@ -140,6 +139,6 @@ class TranslatorMixin:
140
139
 
141
140
 
142
141
  class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
143
- @cache(maxsize=256)
142
+ @lru_cache(maxsize=256)
144
143
  def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
145
144
  return super().css_to_xpath(css, prefix)
scrapling/core/utils.py CHANGED
@@ -1,23 +1,45 @@
1
- import re
2
1
  import logging
2
+ import re
3
3
  from itertools import chain
4
- # Using cache on top of a class is brilliant way to achieve Singleton design pattern without much code
5
- from functools import lru_cache as cache # functools.cache is available on Python 3.9+ only so let's keep lru_cache
6
-
7
- from scrapling.core._types import Dict, Iterable, Any, Union
8
4
 
9
5
  import orjson
10
6
  from lxml import html
11
7
 
8
+ from scrapling.core._types import Any, Dict, Iterable, Union
9
+
10
+ # Using cache on top of a class is brilliant way to achieve Singleton design pattern without much code
11
+ # functools.cache is available on Python 3.9+ only so let's keep lru_cache
12
+ from functools import lru_cache # isort:skip
13
+
12
14
  html_forbidden = {html.HtmlComment, }
13
- logging.basicConfig(
14
- level=logging.ERROR,
15
- format='%(asctime)s - %(levelname)s - %(message)s',
16
- handlers=[
17
- logging.StreamHandler()
18
- ]
15
+
16
+
17
+ @lru_cache(1, typed=True)
18
+ def setup_logger():
19
+ """Create and configure a logger with a standard format.
20
+
21
+ :returns: logging.Logger: Configured logger instance
22
+ """
23
+ logger = logging.getLogger('scrapling')
24
+ logger.setLevel(logging.INFO)
25
+
26
+ formatter = logging.Formatter(
27
+ fmt="[%(asctime)s] %(levelname)s: %(message)s",
28
+ datefmt="%Y-%m-%d %H:%M:%S"
19
29
  )
20
30
 
31
+ console_handler = logging.StreamHandler()
32
+ console_handler.setFormatter(formatter)
33
+
34
+ # Add handler to logger (if not already added)
35
+ if not logger.handlers:
36
+ logger.addHandler(console_handler)
37
+
38
+ return logger
39
+
40
+
41
+ log = setup_logger()
42
+
21
43
 
22
44
  def is_jsonable(content: Union[bytes, str]) -> bool:
23
45
  if type(content) is bytes:
@@ -30,23 +52,6 @@ def is_jsonable(content: Union[bytes, str]) -> bool:
30
52
  return False
31
53
 
32
54
 
33
- @cache(None, typed=True)
34
- def setup_basic_logging(level: str = 'debug'):
35
- levels = {
36
- 'debug': logging.DEBUG,
37
- 'info': logging.INFO,
38
- 'warning': logging.WARNING,
39
- 'error': logging.ERROR,
40
- 'critical': logging.CRITICAL
41
- }
42
- formatter = logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S")
43
- lvl = levels[level.lower()]
44
- handler = logging.StreamHandler()
45
- handler.setFormatter(formatter)
46
- # Configure the root logger
47
- logging.basicConfig(level=lvl, handlers=[handler])
48
-
49
-
50
55
  def flatten(lst: Iterable):
51
56
  return list(chain.from_iterable(lst))
52
57
 
@@ -94,7 +99,7 @@ class _StorageTools:
94
99
  parent = element.getparent()
95
100
  return tuple(
96
101
  (element.tag,) if parent is None else (
97
- cls._get_element_path(parent) + (element.tag,)
102
+ cls._get_element_path(parent) + (element.tag,)
98
103
  )
99
104
  )
100
105
 
@@ -110,7 +115,7 @@ class _StorageTools:
110
115
  # return _impl
111
116
 
112
117
 
113
- @cache(None, typed=True)
118
+ @lru_cache(None, typed=True)
114
119
  def clean_spaces(string):
115
120
  string = string.replace('\t', ' ')
116
121
  string = re.sub('[\n|\r]', '', string)
scrapling/defaults.py CHANGED
@@ -1,6 +1,7 @@
1
- from .fetchers import Fetcher, StealthyFetcher, PlayWrightFetcher
1
+ from .fetchers import AsyncFetcher, Fetcher, PlayWrightFetcher, StealthyFetcher
2
2
 
3
3
  # If you are going to use Fetchers with the default settings, import them from this file instead for a cleaner looking code
4
4
  Fetcher = Fetcher()
5
+ AsyncFetcher = AsyncFetcher()
5
6
  StealthyFetcher = StealthyFetcher()
6
7
  PlayWrightFetcher = PlayWrightFetcher()
@@ -1,7 +1,7 @@
1
1
  from .camo import CamoufoxEngine
2
- from .static import StaticEngine
3
- from .pw import PlaywrightEngine
4
2
  from .constants import DEFAULT_DISABLED_RESOURCES, DEFAULT_STEALTH_FLAGS
3
+ from .pw import PlaywrightEngine
4
+ from .static import StaticEngine
5
5
  from .toolbelt import check_if_engine_usable
6
6
 
7
7
  __all__ = ['CamoufoxEngine', 'PlaywrightEngine']
scrapling/engines/camo.py CHANGED
@@ -1,28 +1,26 @@
1
- import logging
2
- from scrapling.core._types import Union, Callable, Optional, Dict, List, Literal
3
-
4
- from scrapling.engines.toolbelt import (
5
- Response,
6
- do_nothing,
7
- StatusText,
8
- get_os_name,
9
- intercept_route,
10
- check_type_validity,
11
- construct_proxy_dict,
12
- generate_convincing_referer,
13
- )
14
-
15
1
  from camoufox import DefaultAddons
2
+ from camoufox.async_api import AsyncCamoufox
16
3
  from camoufox.sync_api import Camoufox
17
4
 
5
+ from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
6
+ Union)
7
+ from scrapling.core.utils import log
8
+ from scrapling.engines.toolbelt import (Response, StatusText,
9
+ async_intercept_route,
10
+ check_type_validity,
11
+ construct_proxy_dict,
12
+ generate_convincing_referer,
13
+ get_os_name, intercept_route)
14
+
18
15
 
19
16
  class CamoufoxEngine:
20
17
  def __init__(
21
18
  self, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
22
- block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = False, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
23
- timeout: Optional[float] = 30000, page_action: Callable = do_nothing, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
19
+ block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
20
+ timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
24
21
  wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
25
22
  proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True,
23
+ geoip: Optional[bool] = False,
26
24
  adaptor_arguments: Dict = None,
27
25
  ):
28
26
  """An engine that utilizes Camoufox library, check the `StealthyFetcher` class for more documentation.
@@ -36,13 +34,15 @@ class CamoufoxEngine:
36
34
  :param block_webrtc: Blocks WebRTC entirely.
37
35
  :param addons: List of Firefox addons to use. Must be paths to extracted addons.
38
36
  :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
39
- :param allow_webgl: Whether to allow WebGL. To prevent leaks, only use this for special cases.
37
+ :param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
40
38
  :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
41
39
  :param disable_ads: Enabled by default, this installs `uBlock Origin` addon on the browser if enabled.
42
40
  :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
43
41
  :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
44
42
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
45
43
  :param wait_selector: Wait for a specific css selector to be in a specific state.
44
+ :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
45
+ It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
46
46
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. Default state is `attached`.
47
47
  :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name.
48
48
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
@@ -58,16 +58,20 @@ class CamoufoxEngine:
58
58
  self.google_search = bool(google_search)
59
59
  self.os_randomize = bool(os_randomize)
60
60
  self.disable_ads = bool(disable_ads)
61
+ self.geoip = bool(geoip)
61
62
  self.extra_headers = extra_headers or {}
62
63
  self.proxy = construct_proxy_dict(proxy)
63
64
  self.addons = addons or []
64
65
  self.humanize = humanize
65
66
  self.timeout = check_type_validity(timeout, [int, float], 30000)
66
- if callable(page_action):
67
- self.page_action = page_action
67
+ if page_action is not None:
68
+ if callable(page_action):
69
+ self.page_action = page_action
70
+ else:
71
+ self.page_action = None
72
+ log.error('[Ignored] Argument "page_action" must be callable')
68
73
  else:
69
- self.page_action = do_nothing
70
- logging.error('[Ignored] Argument "page_action" must be callable')
74
+ self.page_action = None
71
75
 
72
76
  self.wait_selector = wait_selector
73
77
  self.wait_selector_state = wait_selector_state
@@ -81,6 +85,7 @@ class CamoufoxEngine:
81
85
  """
82
86
  addons = [] if self.disable_ads else [DefaultAddons.UBO]
83
87
  with Camoufox(
88
+ geoip=self.geoip,
84
89
  proxy=self.proxy,
85
90
  addons=self.addons,
86
91
  exclude_addons=addons,
@@ -106,7 +111,8 @@ class CamoufoxEngine:
106
111
  if self.network_idle:
107
112
  page.wait_for_load_state('networkidle')
108
113
 
109
- page = self.page_action(page)
114
+ if self.page_action is not None:
115
+ page = self.page_action(page)
110
116
 
111
117
  if self.wait_selector and type(self.wait_selector) is str:
112
118
  waiter = page.locator(self.wait_selector)
@@ -119,11 +125,8 @@ class CamoufoxEngine:
119
125
 
120
126
  # This will be parsed inside `Response`
121
127
  encoding = res.headers.get('content-type', '') or 'utf-8' # default encoding
122
-
123
- status_text = res.status_text
124
128
  # PlayWright API sometimes give empty status text for some reason!
125
- if not status_text:
126
- status_text = StatusText.get(res.status)
129
+ status_text = res.status_text or StatusText.get(res.status)
127
130
 
128
131
  response = Response(
129
132
  url=res.url,
@@ -140,3 +143,70 @@ class CamoufoxEngine:
140
143
  page.close()
141
144
 
142
145
  return response
146
+
147
+ async def async_fetch(self, url: str) -> Response:
148
+ """Opens up the browser and do your request based on your chosen options.
149
+
150
+ :param url: Target url.
151
+ :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
152
+ """
153
+ addons = [] if self.disable_ads else [DefaultAddons.UBO]
154
+ async with AsyncCamoufox(
155
+ geoip=self.geoip,
156
+ proxy=self.proxy,
157
+ addons=self.addons,
158
+ exclude_addons=addons,
159
+ headless=self.headless,
160
+ humanize=self.humanize,
161
+ i_know_what_im_doing=True, # To turn warnings off with the user configurations
162
+ allow_webgl=self.allow_webgl,
163
+ block_webrtc=self.block_webrtc,
164
+ block_images=self.block_images, # Careful! it makes some websites doesn't finish loading at all like stackoverflow even in headful
165
+ os=None if self.os_randomize else get_os_name(),
166
+ ) as browser:
167
+ page = await browser.new_page()
168
+ page.set_default_navigation_timeout(self.timeout)
169
+ page.set_default_timeout(self.timeout)
170
+ if self.disable_resources:
171
+ await page.route("**/*", async_intercept_route)
172
+
173
+ if self.extra_headers:
174
+ await page.set_extra_http_headers(self.extra_headers)
175
+
176
+ res = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
177
+ await page.wait_for_load_state(state="domcontentloaded")
178
+ if self.network_idle:
179
+ await page.wait_for_load_state('networkidle')
180
+
181
+ if self.page_action is not None:
182
+ page = await self.page_action(page)
183
+
184
+ if self.wait_selector and type(self.wait_selector) is str:
185
+ waiter = page.locator(self.wait_selector)
186
+ await waiter.first.wait_for(state=self.wait_selector_state)
187
+ # Wait again after waiting for the selector, helpful with protections like Cloudflare
188
+ await page.wait_for_load_state(state="load")
189
+ await page.wait_for_load_state(state="domcontentloaded")
190
+ if self.network_idle:
191
+ await page.wait_for_load_state('networkidle')
192
+
193
+ # This will be parsed inside `Response`
194
+ encoding = res.headers.get('content-type', '') or 'utf-8' # default encoding
195
+ # PlayWright API sometimes give empty status text for some reason!
196
+ status_text = res.status_text or StatusText.get(res.status)
197
+
198
+ response = Response(
199
+ url=res.url,
200
+ text=await page.content(),
201
+ body=(await page.content()).encode('utf-8'),
202
+ status=res.status,
203
+ reason=status_text,
204
+ encoding=encoding,
205
+ cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
206
+ headers=await res.all_headers(),
207
+ request_headers=await res.request.all_headers(),
208
+ **self.adaptor_arguments
209
+ )
210
+ await page.close()
211
+
212
+ return response
@@ -1,5 +1,5 @@
1
1
  # Disable loading these resources for speed
2
- DEFAULT_DISABLED_RESOURCES = [
2
+ DEFAULT_DISABLED_RESOURCES = {
3
3
  'font',
4
4
  'image',
5
5
  'media',
@@ -10,9 +10,9 @@ DEFAULT_DISABLED_RESOURCES = [
10
10
  'websocket',
11
11
  'csp_report',
12
12
  'stylesheet',
13
- ]
13
+ }
14
14
 
15
- DEFAULT_STEALTH_FLAGS = [
15
+ DEFAULT_STEALTH_FLAGS = (
16
16
  # Explanation: https://peter.sh/experiments/chromium-command-line-switches/
17
17
  # Generally this will make the browser faster and less detectable
18
18
  '--no-pings',
@@ -87,7 +87,7 @@ DEFAULT_STEALTH_FLAGS = [
87
87
  '--enable-features=NetworkService,NetworkServiceInProcess,TrustTokens,TrustTokensAlwaysAllowIssuance',
88
88
  '--blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4',
89
89
  '--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process,TranslateUI,BlinkGenPropertyTrees',
90
- ]
90
+ )
91
91
 
92
92
  # Defaulting to the docker mode, token doesn't matter in it as it's passed for the container
93
93
  NSTBROWSER_DEFAULT_QUERY = {