scrapling 0.2.92__py3-none-any.whl → 0.2.93__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- scrapling/__init__.py +1 -1
- scrapling/core/_types.py +2 -1
- scrapling/core/custom_types.py +91 -39
- scrapling/core/translator.py +1 -1
- scrapling/defaults.py +8 -5
- scrapling/engines/camo.py +6 -2
- scrapling/engines/pw.py +1 -1
- scrapling/fetchers.py +5 -5
- scrapling/parser.py +153 -189
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/METADATA +58 -32
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/RECORD +17 -17
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/WHEEL +1 -1
- tests/fetchers/async/test_playwright.py +1 -1
- tests/fetchers/sync/test_playwright.py +1 -1
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/LICENSE +0 -0
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/entry_points.txt +0 -0
- {scrapling-0.2.92.dist-info → scrapling-0.2.93.dist-info}/top_level.txt +0 -0
scrapling/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
|
|
5
5
|
from scrapling.parser import Adaptor, Adaptors
|
6
6
|
|
7
7
|
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
8
|
-
__version__ = "0.2.
|
8
|
+
__version__ = "0.2.93"
|
9
9
|
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
10
10
|
|
11
11
|
|
scrapling/core/_types.py
CHANGED
@@ -3,7 +3,8 @@ Type definitions for type checking purposes.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable,
|
6
|
-
List, Literal, Optional, Pattern, Tuple, Type,
|
6
|
+
List, Literal, Optional, Pattern, Tuple, Type, TypeVar,
|
7
|
+
Union)
|
7
8
|
|
8
9
|
SelectorWaitStates = Literal["attached", "detached", "hidden", "visible"]
|
9
10
|
|
scrapling/core/custom_types.py
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
import re
|
2
|
+
import typing
|
2
3
|
from collections.abc import Mapping
|
3
4
|
from types import MappingProxyType
|
4
5
|
|
5
6
|
from orjson import dumps, loads
|
6
7
|
from w3lib.html import replace_entities as _replace_entities
|
7
8
|
|
8
|
-
from scrapling.core._types import Dict, List,
|
9
|
+
from scrapling.core._types import (Dict, Iterable, List, Literal, Optional,
|
10
|
+
Pattern, SupportsIndex, TypeVar, Union)
|
9
11
|
from scrapling.core.utils import _is_iterable, flatten
|
10
12
|
|
13
|
+
# Define type variable for AttributeHandler value type
|
14
|
+
_TextHandlerType = TypeVar('_TextHandlerType', bound='TextHandler')
|
15
|
+
|
11
16
|
|
12
17
|
class TextHandler(str):
|
13
18
|
"""Extends standard Python string by adding more functionality"""
|
@@ -18,72 +23,89 @@ class TextHandler(str):
|
|
18
23
|
return super().__new__(cls, string)
|
19
24
|
return super().__new__(cls, '')
|
20
25
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
26
|
+
@typing.overload
|
27
|
+
def __getitem__(self, key: SupportsIndex) -> 'TextHandler':
|
28
|
+
pass
|
29
|
+
|
30
|
+
@typing.overload
|
31
|
+
def __getitem__(self, key: slice) -> "TextHandlers":
|
32
|
+
pass
|
33
|
+
|
34
|
+
def __getitem__(self, key: Union[SupportsIndex, slice]) -> Union["TextHandler", "TextHandlers"]:
|
35
|
+
lst = super().__getitem__(key)
|
36
|
+
if isinstance(key, slice):
|
37
|
+
lst = [TextHandler(s) for s in lst]
|
38
|
+
return TextHandlers(typing.cast(List[_TextHandlerType], lst))
|
39
|
+
return typing.cast(_TextHandlerType, TextHandler(lst))
|
40
|
+
|
41
|
+
def split(self, sep: str = None, maxsplit: SupportsIndex = -1) -> 'TextHandlers':
|
42
|
+
return TextHandlers(
|
43
|
+
typing.cast(List[_TextHandlerType], [TextHandler(s) for s in super().split(sep, maxsplit)])
|
44
|
+
)
|
45
|
+
|
46
|
+
def strip(self, chars: str = None) -> Union[str, 'TextHandler']:
|
25
47
|
return TextHandler(super().strip(chars))
|
26
48
|
|
27
|
-
def lstrip(self, chars=None):
|
49
|
+
def lstrip(self, chars: str = None) -> Union[str, 'TextHandler']:
|
28
50
|
return TextHandler(super().lstrip(chars))
|
29
51
|
|
30
|
-
def rstrip(self, chars=None):
|
52
|
+
def rstrip(self, chars: str = None) -> Union[str, 'TextHandler']:
|
31
53
|
return TextHandler(super().rstrip(chars))
|
32
54
|
|
33
|
-
def capitalize(self):
|
55
|
+
def capitalize(self) -> Union[str, 'TextHandler']:
|
34
56
|
return TextHandler(super().capitalize())
|
35
57
|
|
36
|
-
def casefold(self):
|
58
|
+
def casefold(self) -> Union[str, 'TextHandler']:
|
37
59
|
return TextHandler(super().casefold())
|
38
60
|
|
39
|
-
def center(self, width, fillchar=' '):
|
61
|
+
def center(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
|
40
62
|
return TextHandler(super().center(width, fillchar))
|
41
63
|
|
42
|
-
def expandtabs(self, tabsize=8):
|
64
|
+
def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, 'TextHandler']:
|
43
65
|
return TextHandler(super().expandtabs(tabsize))
|
44
66
|
|
45
|
-
def format(self, *args, **kwargs):
|
67
|
+
def format(self, *args: str, **kwargs: str) -> Union[str, 'TextHandler']:
|
46
68
|
return TextHandler(super().format(*args, **kwargs))
|
47
69
|
|
48
|
-
def format_map(self, mapping):
|
70
|
+
def format_map(self, mapping) -> Union[str, 'TextHandler']:
|
49
71
|
return TextHandler(super().format_map(mapping))
|
50
72
|
|
51
|
-
def join(self, iterable):
|
73
|
+
def join(self, iterable: Iterable[str]) -> Union[str, 'TextHandler']:
|
52
74
|
return TextHandler(super().join(iterable))
|
53
75
|
|
54
|
-
def ljust(self, width, fillchar=' '):
|
76
|
+
def ljust(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
|
55
77
|
return TextHandler(super().ljust(width, fillchar))
|
56
78
|
|
57
|
-
def rjust(self, width, fillchar=' '):
|
79
|
+
def rjust(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
|
58
80
|
return TextHandler(super().rjust(width, fillchar))
|
59
81
|
|
60
|
-
def swapcase(self):
|
82
|
+
def swapcase(self) -> Union[str, 'TextHandler']:
|
61
83
|
return TextHandler(super().swapcase())
|
62
84
|
|
63
|
-
def title(self):
|
85
|
+
def title(self) -> Union[str, 'TextHandler']:
|
64
86
|
return TextHandler(super().title())
|
65
87
|
|
66
|
-
def translate(self, table):
|
88
|
+
def translate(self, table) -> Union[str, 'TextHandler']:
|
67
89
|
return TextHandler(super().translate(table))
|
68
90
|
|
69
|
-
def zfill(self, width):
|
91
|
+
def zfill(self, width: SupportsIndex) -> Union[str, 'TextHandler']:
|
70
92
|
return TextHandler(super().zfill(width))
|
71
93
|
|
72
|
-
def replace(self, old, new, count
|
94
|
+
def replace(self, old: str, new: str, count: SupportsIndex = -1) -> Union[str, 'TextHandler']:
|
73
95
|
return TextHandler(super().replace(old, new, count))
|
74
96
|
|
75
|
-
def upper(self):
|
97
|
+
def upper(self) -> Union[str, 'TextHandler']:
|
76
98
|
return TextHandler(super().upper())
|
77
99
|
|
78
|
-
def lower(self):
|
100
|
+
def lower(self) -> Union[str, 'TextHandler']:
|
79
101
|
return TextHandler(super().lower())
|
80
102
|
##############
|
81
103
|
|
82
|
-
def sort(self, reverse: bool = False) -> str:
|
104
|
+
def sort(self, reverse: bool = False) -> Union[str, 'TextHandler']:
|
83
105
|
"""Return a sorted version of the string"""
|
84
106
|
return self.__class__("".join(sorted(self, reverse=reverse)))
|
85
107
|
|
86
|
-
def clean(self) -> str:
|
108
|
+
def clean(self) -> Union[str, 'TextHandler']:
|
87
109
|
"""Return a new version of the string after removing all white spaces and consecutive spaces"""
|
88
110
|
data = re.sub(r'[\t|\r|\n]', '', self)
|
89
111
|
data = re.sub(' +', ' ', data)
|
@@ -105,10 +127,32 @@ class TextHandler(str):
|
|
105
127
|
# Check this out: https://github.com/ijl/orjson/issues/445
|
106
128
|
return loads(str(self))
|
107
129
|
|
130
|
+
@typing.overload
|
131
|
+
def re(
|
132
|
+
self,
|
133
|
+
regex: Union[str, Pattern[str]],
|
134
|
+
check_match: Literal[True],
|
135
|
+
replace_entities: bool = True,
|
136
|
+
clean_match: bool = False,
|
137
|
+
case_sensitive: bool = False,
|
138
|
+
) -> bool:
|
139
|
+
...
|
140
|
+
|
141
|
+
@typing.overload
|
142
|
+
def re(
|
143
|
+
self,
|
144
|
+
regex: Union[str, Pattern[str]],
|
145
|
+
replace_entities: bool = True,
|
146
|
+
clean_match: bool = False,
|
147
|
+
case_sensitive: bool = False,
|
148
|
+
check_match: Literal[False] = False,
|
149
|
+
) -> "TextHandlers[TextHandler]":
|
150
|
+
...
|
151
|
+
|
108
152
|
def re(
|
109
153
|
self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
|
110
154
|
case_sensitive: bool = False, check_match: bool = False
|
111
|
-
) -> Union[
|
155
|
+
) -> Union["TextHandlers[TextHandler]", bool]:
|
112
156
|
"""Apply the given regex to the current text and return a list of strings with the matches.
|
113
157
|
|
114
158
|
:param regex: Can be either a compiled regular expression or a string.
|
@@ -133,12 +177,12 @@ class TextHandler(str):
|
|
133
177
|
results = flatten(results)
|
134
178
|
|
135
179
|
if not replace_entities:
|
136
|
-
return [TextHandler(string) for string in results]
|
180
|
+
return TextHandlers(typing.cast(List[_TextHandlerType], [TextHandler(string) for string in results]))
|
137
181
|
|
138
|
-
return [TextHandler(_replace_entities(s)) for s in results]
|
182
|
+
return TextHandlers(typing.cast(List[_TextHandlerType], [TextHandler(_replace_entities(s)) for s in results]))
|
139
183
|
|
140
184
|
def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
|
141
|
-
clean_match: bool = False, case_sensitive: bool = False) ->
|
185
|
+
clean_match: bool = False, case_sensitive: bool = False) -> "TextHandler":
|
142
186
|
"""Apply the given regex to text and return the first match if found, otherwise return the default value.
|
143
187
|
|
144
188
|
:param regex: Can be either a compiled regular expression or a string.
|
@@ -158,15 +202,23 @@ class TextHandlers(List[TextHandler]):
|
|
158
202
|
"""
|
159
203
|
__slots__ = ()
|
160
204
|
|
161
|
-
|
205
|
+
@typing.overload
|
206
|
+
def __getitem__(self, pos: SupportsIndex) -> TextHandler:
|
207
|
+
pass
|
208
|
+
|
209
|
+
@typing.overload
|
210
|
+
def __getitem__(self, pos: slice) -> "TextHandlers":
|
211
|
+
pass
|
212
|
+
|
213
|
+
def __getitem__(self, pos: Union[SupportsIndex, slice]) -> Union[TextHandler, "TextHandlers"]:
|
162
214
|
lst = super().__getitem__(pos)
|
163
215
|
if isinstance(pos, slice):
|
164
|
-
|
165
|
-
|
166
|
-
|
216
|
+
lst = [TextHandler(s) for s in lst]
|
217
|
+
return TextHandlers(typing.cast(List[_TextHandlerType], lst))
|
218
|
+
return typing.cast(_TextHandlerType, TextHandler(lst))
|
167
219
|
|
168
220
|
def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
|
169
|
-
case_sensitive: bool = False) -> '
|
221
|
+
case_sensitive: bool = False) -> 'TextHandlers[TextHandler]':
|
170
222
|
"""Call the ``.re()`` method for each element in this list and return
|
171
223
|
their results flattened as TextHandlers.
|
172
224
|
|
@@ -178,10 +230,10 @@ class TextHandlers(List[TextHandler]):
|
|
178
230
|
results = [
|
179
231
|
n.re(regex, replace_entities, clean_match, case_sensitive) for n in self
|
180
232
|
]
|
181
|
-
return flatten(results)
|
233
|
+
return TextHandlers(flatten(results))
|
182
234
|
|
183
235
|
def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
|
184
|
-
clean_match: bool = False, case_sensitive: bool = False) ->
|
236
|
+
clean_match: bool = False, case_sensitive: bool = False) -> TextHandler:
|
185
237
|
"""Call the ``.re_first()`` method for each element in this list and return
|
186
238
|
the first result or the default value otherwise.
|
187
239
|
|
@@ -210,7 +262,7 @@ class TextHandlers(List[TextHandler]):
|
|
210
262
|
get_all = extract
|
211
263
|
|
212
264
|
|
213
|
-
class AttributesHandler(Mapping):
|
265
|
+
class AttributesHandler(Mapping[str, _TextHandlerType]):
|
214
266
|
"""A read-only mapping to use instead of the standard dictionary for the speed boost but at the same time I use it to add more functionalities.
|
215
267
|
If standard dictionary is needed, just convert this class to dictionary with `dict` function
|
216
268
|
"""
|
@@ -231,7 +283,7 @@ class AttributesHandler(Mapping):
|
|
231
283
|
# Fastest read-only mapping type
|
232
284
|
self._data = MappingProxyType(mapping)
|
233
285
|
|
234
|
-
def get(self, key, default=None):
|
286
|
+
def get(self, key: str, default: Optional[str] = None) -> Union[_TextHandlerType, None]:
|
235
287
|
"""Acts like standard dictionary `.get()` method"""
|
236
288
|
return self._data.get(key, default)
|
237
289
|
|
@@ -253,7 +305,7 @@ class AttributesHandler(Mapping):
|
|
253
305
|
"""Convert current attributes to JSON string if the attributes are JSON serializable otherwise throws error"""
|
254
306
|
return dumps(dict(self._data))
|
255
307
|
|
256
|
-
def __getitem__(self, key):
|
308
|
+
def __getitem__(self, key: str) -> _TextHandlerType:
|
257
309
|
return self._data[key]
|
258
310
|
|
259
311
|
def __iter__(self):
|
scrapling/core/translator.py
CHANGED
@@ -139,6 +139,6 @@ class TranslatorMixin:
|
|
139
139
|
|
140
140
|
|
141
141
|
class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
|
142
|
-
@lru_cache(maxsize=
|
142
|
+
@lru_cache(maxsize=2048)
|
143
143
|
def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
|
144
144
|
return super().css_to_xpath(css, prefix)
|
scrapling/defaults.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
-
from .fetchers import AsyncFetcher
|
1
|
+
from .fetchers import AsyncFetcher as _AsyncFetcher
|
2
|
+
from .fetchers import Fetcher as _Fetcher
|
3
|
+
from .fetchers import PlayWrightFetcher as _PlayWrightFetcher
|
4
|
+
from .fetchers import StealthyFetcher as _StealthyFetcher
|
2
5
|
|
3
6
|
# If you are going to use Fetchers with the default settings, import them from this file instead for a cleaner looking code
|
4
|
-
Fetcher =
|
5
|
-
AsyncFetcher =
|
6
|
-
StealthyFetcher =
|
7
|
-
PlayWrightFetcher =
|
7
|
+
Fetcher = _Fetcher()
|
8
|
+
AsyncFetcher = _AsyncFetcher()
|
9
|
+
StealthyFetcher = _StealthyFetcher()
|
10
|
+
PlayWrightFetcher = _PlayWrightFetcher()
|
scrapling/engines/camo.py
CHANGED
@@ -19,7 +19,7 @@ class CamoufoxEngine:
|
|
19
19
|
block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
|
20
20
|
timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
|
21
21
|
wait_selector_state: Optional[SelectorWaitStates] = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
|
22
|
-
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] =
|
22
|
+
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False,
|
23
23
|
geoip: Optional[bool] = False,
|
24
24
|
adaptor_arguments: Dict = None,
|
25
25
|
):
|
@@ -36,7 +36,7 @@ class CamoufoxEngine:
|
|
36
36
|
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
|
37
37
|
:param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
|
38
38
|
:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
|
39
|
-
:param disable_ads:
|
39
|
+
:param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
|
40
40
|
:param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
|
41
41
|
:param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
|
42
42
|
:param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
|
@@ -95,6 +95,8 @@ class CamoufoxEngine:
|
|
95
95
|
with Camoufox(
|
96
96
|
geoip=self.geoip,
|
97
97
|
proxy=self.proxy,
|
98
|
+
disable_coop=True,
|
99
|
+
enable_cache=True,
|
98
100
|
addons=self.addons,
|
99
101
|
exclude_addons=addons,
|
100
102
|
headless=self.headless,
|
@@ -174,6 +176,8 @@ class CamoufoxEngine:
|
|
174
176
|
async with AsyncCamoufox(
|
175
177
|
geoip=self.geoip,
|
176
178
|
proxy=self.proxy,
|
179
|
+
disable_coop=True,
|
180
|
+
enable_cache=True,
|
177
181
|
addons=self.addons,
|
178
182
|
exclude_addons=addons,
|
179
183
|
headless=self.headless,
|
scrapling/engines/pw.py
CHANGED
@@ -105,7 +105,7 @@ class PlaywrightEngine:
|
|
105
105
|
"""
|
106
106
|
cdp_url = self.cdp_url
|
107
107
|
if self.nstbrowser_mode:
|
108
|
-
if self.nstbrowser_config and
|
108
|
+
if self.nstbrowser_config and isinstance(self.nstbrowser_config, dict):
|
109
109
|
config = self.nstbrowser_config
|
110
110
|
else:
|
111
111
|
query = NSTBROWSER_DEFAULT_QUERY.copy()
|
scrapling/fetchers.py
CHANGED
@@ -143,7 +143,7 @@ class AsyncFetcher(Fetcher):
|
|
143
143
|
:return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
|
144
144
|
"""
|
145
145
|
adaptor_arguments = tuple(self.adaptor_arguments.items())
|
146
|
-
response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).
|
146
|
+
response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).async_put(**kwargs)
|
147
147
|
return response_object
|
148
148
|
|
149
149
|
async def delete(
|
@@ -177,7 +177,7 @@ class StealthyFetcher(BaseFetcher):
|
|
177
177
|
block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
|
178
178
|
timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
|
179
179
|
wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
|
180
|
-
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] =
|
180
|
+
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False, geoip: Optional[bool] = False,
|
181
181
|
) -> Response:
|
182
182
|
"""
|
183
183
|
Opens up a browser and do your request based on your chosen options below.
|
@@ -191,7 +191,7 @@ class StealthyFetcher(BaseFetcher):
|
|
191
191
|
This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
|
192
192
|
:param block_webrtc: Blocks WebRTC entirely.
|
193
193
|
:param addons: List of Firefox addons to use. Must be paths to extracted addons.
|
194
|
-
:param disable_ads:
|
194
|
+
:param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
|
195
195
|
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
|
196
196
|
:param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
|
197
197
|
:param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
|
@@ -235,7 +235,7 @@ class StealthyFetcher(BaseFetcher):
|
|
235
235
|
block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
|
236
236
|
timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
|
237
237
|
wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
|
238
|
-
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] =
|
238
|
+
proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False, geoip: Optional[bool] = False,
|
239
239
|
) -> Response:
|
240
240
|
"""
|
241
241
|
Opens up a browser and do your request based on your chosen options below.
|
@@ -249,7 +249,7 @@ class StealthyFetcher(BaseFetcher):
|
|
249
249
|
This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
|
250
250
|
:param block_webrtc: Blocks WebRTC entirely.
|
251
251
|
:param addons: List of Firefox addons to use. Must be paths to extracted addons.
|
252
|
-
:param disable_ads:
|
252
|
+
:param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
|
253
253
|
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
|
254
254
|
:param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
|
255
255
|
:param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
|