scrapling 0.2.92__py3-none-any.whl → 0.2.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +1 -1
- scrapling/core/_types.py +2 -1
- scrapling/core/custom_types.py +91 -39
- scrapling/core/translator.py +1 -1
- scrapling/defaults.py +8 -5
- scrapling/engines/camo.py +6 -2
- scrapling/engines/pw.py +1 -1
- scrapling/fetchers.py +5 -5
- scrapling/parser.py +153 -189
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/METADATA +58 -32
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/RECORD +17 -17
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/WHEEL +1 -1
- tests/fetchers/async/test_playwright.py +1 -1
- tests/fetchers/sync/test_playwright.py +1 -1
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/LICENSE +0 -0
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/entry_points.txt +0 -0
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/top_level.txt +0 -0
scrapling/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
|
|
5
5
|
from scrapling.parser import Adaptor, Adaptors
|
6
6
|
|
7
7
|
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
8
|
-
__version__ = "0.2.
|
8
|
+
__version__ = "0.2.93"
|
9
9
|
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
10
10
|
|
11
11
|
|
scrapling/core/_types.py
CHANGED
@@ -3,7 +3,8 @@ Type definitions for type checking purposes.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable,
|
6
|
-
List, Literal, Optional, Pattern, Tuple, Type,
|
6
|
+
List, Literal, Optional, Pattern, Tuple, Type, TypeVar,
|
7
|
+
Union)
|
7
8
|
|
8
9
|
SelectorWaitStates = Literal["attached", "detached", "hidden", "visible"]
|
9
10
|
|
scrapling/core/custom_types.py
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
import re
|
2
|
+
import typing
|
2
3
|
from collections.abc import Mapping
|
3
4
|
from types import MappingProxyType
|
4
5
|
|
5
6
|
from orjson import dumps, loads
|
6
7
|
from w3lib.html import replace_entities as _replace_entities
|
7
8
|
|
8
|
-
from scrapling.core._types import Dict, List,
|
9
|
+
from scrapling.core._types import (Dict, Iterable, List, Literal, Optional,
|
10
|
+
Pattern, SupportsIndex, TypeVar, Union)
|
9
11
|
from scrapling.core.utils import _is_iterable, flatten
|
10
12
|
|
13
|
+
# Define type variable for AttributeHandler value type
|
14
|
+
_TextHandlerType = TypeVar('_TextHandlerType', bound='TextHandler')
|
15
|
+
|
11
16
|
|
12
17
|
class TextHandler(str):
|
13
18
|
"""Extends standard Python string by adding more functionality"""
|
@@ -18,72 +23,89 @@ class TextHandler(str):
|
|
18
23
|
return super().__new__(cls, string)
|
19
24
|
return super().__new__(cls, '')
|
20
25
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
26
|
+
@typing.overload
|
27
|
+
def __getitem__(self, key: SupportsIndex) -> 'TextHandler':
|
28
|
+
pass
|
29
|
+
|
30
|
+
@typing.overload
|
31
|
+
def __getitem__(self, key: slice) -> "TextHandlers":
|
32
|
+
pass
|
33
|
+
|
34
|
+
def __getitem__(self, key: Union[SupportsIndex, slice]) -> Union["TextHandler", "TextHandlers"]:
|
35
|
+
lst = super().__getitem__(key)
|
36
|
+
if isinstance(key, slice):
|
37
|
+
lst = [TextHandler(s) for s in lst]
|
38
|
+
return TextHandlers(typing.cast(List[_TextHandlerType], lst))
|
39
|
+
return typing.cast(_TextHandlerType, TextHandler(lst))
|
40
|
+
|
41
|
+
def split(self, sep: str = None, maxsplit: SupportsIndex = -1) -> 'TextHandlers':
|
42
|
+
return TextHandlers(
|
43
|
+
typing.cast(List[_TextHandlerType], [TextHandler(s) for s in super().split(sep, maxsplit)])
|
44
|
+
)
|
45
|
+
|
46
|
+
def strip(self, chars: str = None) -> Union[str, 'TextHandler']:
|
25
47
|
return TextHandler(super().strip(chars))
|
26
48
|
|
27
|
-
def lstrip(self, chars=None):
|
49
|
+
def lstrip(self, chars: str = None) -> Union[str, 'TextHandler']:
|
28
50
|
return TextHandler(super().lstrip(chars))
|
29
51
|
|
30
|
-
def rstrip(self, chars=None):
|
52
|
+
def rstrip(self, chars: str = None) -> Union[str, 'TextHandler']:
|
31
53
|
return TextHandler(super().rstrip(chars))
|
32
54
|
|
33
|
-
def capitalize(self):
|
55
|
+
def capitalize(self) -> Union[str, 'TextHandler']:
|
34
56
|
return TextHandler(super().capitalize())
|
35
57
|
|
36
|
-
def casefold(self):
|
58
|
+
def casefold(self) -> Union[str, 'TextHandler']:
|
37
59
|
return TextHandler(super().casefold())
|
38
60
|
|
39
|
-
def center(self, width, fillchar=' '):
|
61
|
+
def center(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
|
40
62
|
return TextHandler(super().center(width, fillchar))
|
41
63
|
|
42
|
-
def expandtabs(self, tabsize=8):
|
64
|
+
def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, 'TextHandler']:
|
43
65
|
return TextHandler(super().expandtabs(tabsize))
|
44
66
|
|
45
|
-
def format(self, *args, **kwargs):
|
67
|
+
def format(self, *args: str, **kwargs: str) -> Union[str, 'TextHandler']:
|
46
68
|
return TextHandler(super().format(*args, **kwargs))
|
47
69
|
|
48
|
-
def format_map(self, mapping):
|
70
|
+
def format_map(self, mapping) -> Union[str, 'TextHandler']:
|
49
71
|
return TextHandler(super().format_map(mapping))
|
50
72
|
|
51
|
-
def join(self, iterable):
|
73
|
+
def join(self, iterable: Iterable[str]) -> Union[str, 'TextHandler']:
|
52
74
|
return TextHandler(super().join(iterable))
|
53
75
|
|
54
|
-
def ljust(self, width, fillchar=' '):
|
76
|
+
def ljust(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
|
55
77
|
return TextHandler(super().ljust(width, fillchar))
|
56
78
|
|
57
|
-
def rjust(self, width, fillchar=' '):
|
79
|
+
def rjust(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
|
58
80
|
return TextHandler(super().rjust(width, fillchar))
|
59
81
|
|
60
|
-
def swapcase(self):
|
82
|
+
def swapcase(self) -> Union[str, 'TextHandler']:
|
61
83
|
return TextHandler(super().swapcase())
|
62
84
|
|
63
|
-
def title(self):
|
85
|
+
def title(self) -> Union[str, 'TextHandler']:
|
64
86
|
return TextHandler(super().title())
|
65
87
|
|
66
|
-
def translate(self, table):
|
88
|
+
def translate(self, table) -> Union[str, 'TextHandler']:
|
67
89
|
return TextHandler(super().translate(table))
|
68
90
|
|
69
|
-
def zfill(self, width):
|
91
|
+
def zfill(self, width: SupportsIndex) -> Union[str, 'TextHandler']:
|
70
92
|
return TextHandler(super().zfill(width))
|
71
93
|
|
72
|
-
def replace(self, old, new, count
|
94
|
+
def replace(self, old: str, new: str, count: SupportsIndex = -1) -> Union[str, 'TextHandler']:
|
73
95
|
return TextHandler(super().replace(old, new, count))
|
74
96
|
|
75
|
-
def upper(self):
|
97
|
+
def upper(self) -> Union[str, 'TextHandler']:
|
76
98
|
return TextHandler(super().upper())
|
77
99
|
|
78
|
-
def lower(self):
|
100
|
+
def lower(self) -> Union[str, 'TextHandler']:
|
79
101
|
return TextHandler(super().lower())
|
80
102
|
##############
|
81
103
|
|
82
|
-
def sort(self, reverse: bool = False) -> str:
|
104
|
+
def sort(self, reverse: bool = False) -> Union[str, 'TextHandler']:
|
83
105
|
"""Return a sorted version of the string"""
|
84
106
|
return self.__class__("".join(sorted(self, reverse=reverse)))
|
85
107
|
|
86
|
-
def clean(self) -> str:
|
108
|
+
def clean(self) -> Union[str, 'TextHandler']:
|
87
109
|
"""Return a new version of the string after removing all white spaces and consecutive spaces"""
|
88
110
|
data = re.sub(r'[\t|\r|\n]', '', self)
|
89
111
|
data = re.sub(' +', ' ', data)
|
@@ -105,10 +127,32 @@ class TextHandler(str):
|
|
105
127
|
# Check this out: https://github.com/ijl/orjson/issues/445
|
106
128
|
return loads(str(self))
|
107
129
|
|
130
|
+
@typing.overload
|
131
|
+
def re(
|
132
|
+
self,
|
133
|
+
regex: Union[str, Pattern[str]],
|
134
|
+
check_match: Literal[True],
|
135
|
+
replace_entities: bool = True,
|
136
|
+
clean_match: bool = False,
|
137
|
+
case_sensitive: bool = False,
|
138
|
+
) -> bool:
|
139
|
+
...
|
140
|
+
|
141
|
+
@typing.overload
|
142
|
+
def re(
|
143
|
+
self,
|
144
|
+
regex: Union[str, Pattern[str]],
|
145
|
+
replace_entities: bool = True,
|
146
|
+
clean_match: bool = False,
|
147
|
+
case_sensitive: bool = False,
|
148
|
+
check_match: Literal[False] = False,
|
149
|
+
) -> "TextHandlers[TextHandler]":
|
150
|
+
...
|
151
|
+
|
108
152
|
def re(
|
109
153
|
self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
|
110
154
|
case_sensitive: bool = False, check_match: bool = False
|
111
|
-
) -> Union[
|
155
|
+
) -> Union["TextHandlers[TextHandler]", bool]:
|
112
156
|
"""Apply the given regex to the current text and return a list of strings with the matches.
|
113
157
|
|
114
158
|
:param regex: Can be either a compiled regular expression or a string.
|
@@ -133,12 +177,12 @@ class TextHandler(str):
|
|
133
177
|
results = flatten(results)
|
134
178
|
|
135
179
|
if not replace_entities:
|
136
|
-
return [TextHandler(string) for string in results]
|
180
|
+
return TextHandlers(typing.cast(List[_TextHandlerType], [TextHandler(string) for string in results]))
|
137
181
|
|
138
|
-
return [TextHandler(_replace_entities(s)) for s in results]
|
182
|
+
return TextHandlers(typing.cast(List[_TextHandlerType], [TextHandler(_replace_entities(s)) for s in results]))
|
139
183
|
|
140
184
|
def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
|
141
|
-
clean_match: bool = False, case_sensitive: bool = False) ->
|
185
|
+
clean_match: bool = False, case_sensitive: bool = False) -> "TextHandler":
|
142
186
|
"""Apply the given regex to text and return the first match if found, otherwise return the default value.
|
143
187
|
|
144
188
|
:param regex: Can be either a compiled regular expression or a string.
|
@@ -158,15 +202,23 @@ class TextHandlers(List[TextHandler]):
|
|
158
202
|
"""
|
159
203
|
__slots__ = ()
|
160
204
|
|
161
|
-
|
205
|
+
@typing.overload
|
206
|
+
def __getitem__(self, pos: SupportsIndex) -> TextHandler:
|
207
|
+
pass
|
208
|
+
|
209
|
+
@typing.overload
|
210
|
+
def __getitem__(self, pos: slice) -> "TextHandlers":
|
211
|
+
pass
|
212
|
+
|
213
|
+
def __getitem__(self, pos: Union[SupportsIndex, slice]) -> Union[TextHandler, "TextHandlers"]:
|
162
214
|
lst = super().__getitem__(pos)
|
163
215
|
if isinstance(pos, slice):
|
164
|
-
|
165
|
-
|
166
|
-
|
216
|
+
lst = [TextHandler(s) for s in lst]
|
217
|
+
return TextHandlers(typing.cast(List[_TextHandlerType], lst))
|
218
|
+
return typing.cast(_TextHandlerType, TextHandler(lst))
|
167
219
|
|
168
220
|
def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
|
169
|
-
case_sensitive: bool = False) -> '
|
221
|
+
case_sensitive: bool = False) -> 'TextHandlers[TextHandler]':
|
170
222
|
"""Call the ``.re()`` method for each element in this list and return
|
171
223
|
their results flattened as TextHandlers.
|
172
224
|
|
@@ -178,10 +230,10 @@ class TextHandlers(List[TextHandler]):
|
|
178
230
|
results = [
|
179
231
|
n.re(regex, replace_entities, clean_match, case_sensitive) for n in self
|
180
232
|
]
|
181
|
-
return flatten(results)
|
233
|
+
return TextHandlers(flatten(results))
|
182
234
|
|
183
235
|
def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
|
184
|
-
clean_match: bool = False, case_sensitive: bool = False) ->
|
236
|
+
clean_match: bool = False, case_sensitive: bool = False) -> TextHandler:
|
185
237
|
"""Call the ``.re_first()`` method for each element in this list and return
|
186
238
|
the first result or the default value otherwise.
|
187
239
|
|
@@ -210,7 +262,7 @@ class TextHandlers(List[TextHandler]):
|
|
210
262
|
get_all = extract
|
211
263
|
|
212
264
|
|
213
|
-
class AttributesHandler(Mapping):
|
265
|
+
class AttributesHandler(Mapping[str, _TextHandlerType]):
|
214
266
|
"""A read-only mapping to use instead of the standard dictionary for the speed boost but at the same time I use it to add more functionalities.
|
215
267
|
If standard dictionary is needed, just convert this class to dictionary with `dict` function
|
216
268
|
"""
|
@@ -231,7 +283,7 @@ class AttributesHandler(Mapping):
|
|
231
283
|
# Fastest read-only mapping type
|
232
284
|
self._data = MappingProxyType(mapping)
|
233
285
|
|
234
|
-
def get(self, key, default=None):
|
286
|
+
def get(self, key: str, default: Optional[str] = None) -> Union[_TextHandlerType, None]:
|
235
287
|
"""Acts like standard dictionary `.get()` method"""
|
236
288
|
return self._data.get(key, default)
|
237
289
|
|
@@ -253,7 +305,7 @@ class AttributesHandler(Mapping):
|
|
253
305
|
"""Convert current attributes to JSON string if the attributes are JSON serializable otherwise throws error"""
|
254
306
|
return dumps(dict(self._data))
|
255
307
|
|
256
|
-
def __getitem__(self, key):
|
308
|
+
def __getitem__(self, key: str) -> _TextHandlerType:
|
257
309
|
return self._data[key]
|
258
310
|
|
259
311
|
def __iter__(self):
|
scrapling/core/translator.py
CHANGED
@@ -139,6 +139,6 @@ class TranslatorMixin:
|
|
139
139
|
|
140
140
|
|
141
141
|
class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
|
142
|
-
@lru_cache(maxsize=
|
142
|
+
@lru_cache(maxsize=2048)
|
143
143
|
def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
|
144
144
|
return super().css_to_xpath(css, prefix)
|
scrapling/defaults.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
-
from .fetchers import AsyncFetcher
|
1
|
+
from .fetchers import AsyncFetcher as _AsyncFetcher
|
2
|
+
from .fetchers import Fetcher as _Fetcher
|
3
|
+
from .fetchers import PlayWrightFetcher as _PlayWrightFetcher
|
4
|
+
from .fetchers import StealthyFetcher as _StealthyFetcher
|
2
5
|
|
3
6
|
# If you are going to use Fetchers with the default settings, import them from this file instead for a cleaner looking code
|
4
|
-
Fetcher =
|
5
|
-
AsyncFetcher =
|
6
|
-
StealthyFetcher =
|
7
|
-
PlayWrightFetcher =
|
7
|
+
Fetcher = _Fetcher()
|
8
|
+
AsyncFetcher = _AsyncFetcher()
|
9
|
+
StealthyFetcher = _StealthyFetcher()
|
10
|
+
PlayWrightFetcher = _PlayWrightFetcher()
|
scrapling/engines/camo.py
CHANGED
@@ -19,7 +19,7 @@ class CamoufoxEngine:
|
|
19
19
|
block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
|
20
20
|
timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
|
21
21
|
wait_selector_state: Optional[SelectorWaitStates] = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
|
22
|
-
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] =
|
22
|
+
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False,
|
23
23
|
geoip: Optional[bool] = False,
|
24
24
|
adaptor_arguments: Dict = None,
|
25
25
|
):
|
@@ -36,7 +36,7 @@ class CamoufoxEngine:
|
|
36
36
|
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
|
37
37
|
:param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
|
38
38
|
:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
|
39
|
-
:param disable_ads:
|
39
|
+
:param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
|
40
40
|
:param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
|
41
41
|
:param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
|
42
42
|
:param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
|
@@ -95,6 +95,8 @@ class CamoufoxEngine:
|
|
95
95
|
with Camoufox(
|
96
96
|
geoip=self.geoip,
|
97
97
|
proxy=self.proxy,
|
98
|
+
disable_coop=True,
|
99
|
+
enable_cache=True,
|
98
100
|
addons=self.addons,
|
99
101
|
exclude_addons=addons,
|
100
102
|
headless=self.headless,
|
@@ -174,6 +176,8 @@ class CamoufoxEngine:
|
|
174
176
|
async with AsyncCamoufox(
|
175
177
|
geoip=self.geoip,
|
176
178
|
proxy=self.proxy,
|
179
|
+
disable_coop=True,
|
180
|
+
enable_cache=True,
|
177
181
|
addons=self.addons,
|
178
182
|
exclude_addons=addons,
|
179
183
|
headless=self.headless,
|
scrapling/engines/pw.py
CHANGED
@@ -105,7 +105,7 @@ class PlaywrightEngine:
|
|
105
105
|
"""
|
106
106
|
cdp_url = self.cdp_url
|
107
107
|
if self.nstbrowser_mode:
|
108
|
-
if self.nstbrowser_config and
|
108
|
+
if self.nstbrowser_config and isinstance(self.nstbrowser_config, dict):
|
109
109
|
config = self.nstbrowser_config
|
110
110
|
else:
|
111
111
|
query = NSTBROWSER_DEFAULT_QUERY.copy()
|
scrapling/fetchers.py
CHANGED
@@ -143,7 +143,7 @@ class AsyncFetcher(Fetcher):
|
|
143
143
|
:return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
|
144
144
|
"""
|
145
145
|
adaptor_arguments = tuple(self.adaptor_arguments.items())
|
146
|
-
response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).
|
146
|
+
response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).async_put(**kwargs)
|
147
147
|
return response_object
|
148
148
|
|
149
149
|
async def delete(
|
@@ -177,7 +177,7 @@ class StealthyFetcher(BaseFetcher):
|
|
177
177
|
block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
|
178
178
|
timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
|
179
179
|
wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
|
180
|
-
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] =
|
180
|
+
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False, geoip: Optional[bool] = False,
|
181
181
|
) -> Response:
|
182
182
|
"""
|
183
183
|
Opens up a browser and do your request based on your chosen options below.
|
@@ -191,7 +191,7 @@ class StealthyFetcher(BaseFetcher):
|
|
191
191
|
This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
|
192
192
|
:param block_webrtc: Blocks WebRTC entirely.
|
193
193
|
:param addons: List of Firefox addons to use. Must be paths to extracted addons.
|
194
|
-
:param disable_ads:
|
194
|
+
:param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
|
195
195
|
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
|
196
196
|
:param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
|
197
197
|
:param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
|
@@ -235,7 +235,7 @@ class StealthyFetcher(BaseFetcher):
|
|
235
235
|
block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
|
236
236
|
timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
|
237
237
|
wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
|
238
|
-
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] =
|
238
|
+
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False, geoip: Optional[bool] = False,
|
239
239
|
) -> Response:
|
240
240
|
"""
|
241
241
|
Opens up a browser and do your request based on your chosen options below.
|
@@ -249,7 +249,7 @@ class StealthyFetcher(BaseFetcher):
|
|
249
249
|
This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
|
250
250
|
:param block_webrtc: Blocks WebRTC entirely.
|
251
251
|
:param addons: List of Firefox addons to use. Must be paths to extracted addons.
|
252
|
-
:param disable_ads:
|
252
|
+
:param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
|
253
253
|
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
|
254
254
|
:param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
|
255
255
|
:param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
|