scrapling 0.2.91__py3-none-any.whl → 0.2.93__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
scrapling/__init__.py CHANGED
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
5
5
  from scrapling.parser import Adaptor, Adaptors
6
6
 
7
7
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
8
- __version__ = "0.2.91"
8
+ __version__ = "0.2.93"
9
9
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
10
10
 
11
11
 
scrapling/cli.py ADDED
@@ -0,0 +1,37 @@
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import click
7
+
8
+
9
+ def get_package_dir():
10
+ return Path(os.path.dirname(__file__))
11
+
12
+
13
+ def run_command(command, line):
14
+ print(f"Installing {line}...")
15
+ _ = subprocess.check_call(command, shell=True)
16
+ # I meant to not use try except here
17
+
18
+
19
+ @click.command(help="Install all Scrapling's Fetchers dependencies")
20
+ def install():
21
+ if not get_package_dir().joinpath(".scrapling_dependencies_installed").exists():
22
+ run_command([sys.executable, "-m", "playwright", "install", 'chromium'], 'Playwright browsers')
23
+ run_command([sys.executable, "-m", "playwright", "install-deps", 'chromium', 'firefox'], 'Playwright dependencies')
24
+ run_command([sys.executable, "-m", "camoufox", "fetch", '--browserforge'], 'Camoufox browser and databases')
25
+ # if no errors raised by above commands, then we add below file
26
+ get_package_dir().joinpath(".scrapling_dependencies_installed").touch()
27
+ else:
28
+ print('The dependencies are already installed')
29
+
30
+
31
+ @click.group()
32
+ def main():
33
+ pass
34
+
35
+
36
+ # Adding commands
37
+ main.add_command(install)
scrapling/core/_types.py CHANGED
@@ -3,7 +3,8 @@ Type definitions for type checking purposes.
3
3
  """
4
4
 
5
5
  from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable,
6
- List, Literal, Optional, Pattern, Tuple, Type, Union)
6
+ List, Literal, Optional, Pattern, Tuple, Type, TypeVar,
7
+ Union)
7
8
 
8
9
  SelectorWaitStates = Literal["attached", "detached", "hidden", "visible"]
9
10
 
@@ -1,13 +1,18 @@
1
1
  import re
2
+ import typing
2
3
  from collections.abc import Mapping
3
4
  from types import MappingProxyType
4
5
 
5
6
  from orjson import dumps, loads
6
7
  from w3lib.html import replace_entities as _replace_entities
7
8
 
8
- from scrapling.core._types import Dict, List, Pattern, SupportsIndex, Union
9
+ from scrapling.core._types import (Dict, Iterable, List, Literal, Optional,
10
+ Pattern, SupportsIndex, TypeVar, Union)
9
11
  from scrapling.core.utils import _is_iterable, flatten
10
12
 
13
+ # Define type variable for AttributeHandler value type
14
+ _TextHandlerType = TypeVar('_TextHandlerType', bound='TextHandler')
15
+
11
16
 
12
17
  class TextHandler(str):
13
18
  """Extends standard Python string by adding more functionality"""
@@ -18,72 +23,89 @@ class TextHandler(str):
18
23
  return super().__new__(cls, string)
19
24
  return super().__new__(cls, '')
20
25
 
21
- # Make methods from original `str` class return `TextHandler` instead of returning `str` again
22
- # Of course, this stupid workaround is only so we can keep the auto-completion working without issues in your IDE
23
- # and I made sonnet write it for me :)
24
- def strip(self, chars=None):
26
+ @typing.overload
27
+ def __getitem__(self, key: SupportsIndex) -> 'TextHandler':
28
+ pass
29
+
30
+ @typing.overload
31
+ def __getitem__(self, key: slice) -> "TextHandlers":
32
+ pass
33
+
34
+ def __getitem__(self, key: Union[SupportsIndex, slice]) -> Union["TextHandler", "TextHandlers"]:
35
+ lst = super().__getitem__(key)
36
+ if isinstance(key, slice):
37
+ lst = [TextHandler(s) for s in lst]
38
+ return TextHandlers(typing.cast(List[_TextHandlerType], lst))
39
+ return typing.cast(_TextHandlerType, TextHandler(lst))
40
+
41
+ def split(self, sep: str = None, maxsplit: SupportsIndex = -1) -> 'TextHandlers':
42
+ return TextHandlers(
43
+ typing.cast(List[_TextHandlerType], [TextHandler(s) for s in super().split(sep, maxsplit)])
44
+ )
45
+
46
+ def strip(self, chars: str = None) -> Union[str, 'TextHandler']:
25
47
  return TextHandler(super().strip(chars))
26
48
 
27
- def lstrip(self, chars=None):
49
+ def lstrip(self, chars: str = None) -> Union[str, 'TextHandler']:
28
50
  return TextHandler(super().lstrip(chars))
29
51
 
30
- def rstrip(self, chars=None):
52
+ def rstrip(self, chars: str = None) -> Union[str, 'TextHandler']:
31
53
  return TextHandler(super().rstrip(chars))
32
54
 
33
- def capitalize(self):
55
+ def capitalize(self) -> Union[str, 'TextHandler']:
34
56
  return TextHandler(super().capitalize())
35
57
 
36
- def casefold(self):
58
+ def casefold(self) -> Union[str, 'TextHandler']:
37
59
  return TextHandler(super().casefold())
38
60
 
39
- def center(self, width, fillchar=' '):
61
+ def center(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
40
62
  return TextHandler(super().center(width, fillchar))
41
63
 
42
- def expandtabs(self, tabsize=8):
64
+ def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, 'TextHandler']:
43
65
  return TextHandler(super().expandtabs(tabsize))
44
66
 
45
- def format(self, *args, **kwargs):
67
+ def format(self, *args: str, **kwargs: str) -> Union[str, 'TextHandler']:
46
68
  return TextHandler(super().format(*args, **kwargs))
47
69
 
48
- def format_map(self, mapping):
70
+ def format_map(self, mapping) -> Union[str, 'TextHandler']:
49
71
  return TextHandler(super().format_map(mapping))
50
72
 
51
- def join(self, iterable):
73
+ def join(self, iterable: Iterable[str]) -> Union[str, 'TextHandler']:
52
74
  return TextHandler(super().join(iterable))
53
75
 
54
- def ljust(self, width, fillchar=' '):
76
+ def ljust(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
55
77
  return TextHandler(super().ljust(width, fillchar))
56
78
 
57
- def rjust(self, width, fillchar=' '):
79
+ def rjust(self, width: SupportsIndex, fillchar: str = ' ') -> Union[str, 'TextHandler']:
58
80
  return TextHandler(super().rjust(width, fillchar))
59
81
 
60
- def swapcase(self):
82
+ def swapcase(self) -> Union[str, 'TextHandler']:
61
83
  return TextHandler(super().swapcase())
62
84
 
63
- def title(self):
85
+ def title(self) -> Union[str, 'TextHandler']:
64
86
  return TextHandler(super().title())
65
87
 
66
- def translate(self, table):
88
+ def translate(self, table) -> Union[str, 'TextHandler']:
67
89
  return TextHandler(super().translate(table))
68
90
 
69
- def zfill(self, width):
91
+ def zfill(self, width: SupportsIndex) -> Union[str, 'TextHandler']:
70
92
  return TextHandler(super().zfill(width))
71
93
 
72
- def replace(self, old, new, count=-1):
94
+ def replace(self, old: str, new: str, count: SupportsIndex = -1) -> Union[str, 'TextHandler']:
73
95
  return TextHandler(super().replace(old, new, count))
74
96
 
75
- def upper(self):
97
+ def upper(self) -> Union[str, 'TextHandler']:
76
98
  return TextHandler(super().upper())
77
99
 
78
- def lower(self):
100
+ def lower(self) -> Union[str, 'TextHandler']:
79
101
  return TextHandler(super().lower())
80
102
  ##############
81
103
 
82
- def sort(self, reverse: bool = False) -> str:
104
+ def sort(self, reverse: bool = False) -> Union[str, 'TextHandler']:
83
105
  """Return a sorted version of the string"""
84
106
  return self.__class__("".join(sorted(self, reverse=reverse)))
85
107
 
86
- def clean(self) -> str:
108
+ def clean(self) -> Union[str, 'TextHandler']:
87
109
  """Return a new version of the string after removing all white spaces and consecutive spaces"""
88
110
  data = re.sub(r'[\t|\r|\n]', '', self)
89
111
  data = re.sub(' +', ' ', data)
@@ -105,10 +127,32 @@ class TextHandler(str):
105
127
  # Check this out: https://github.com/ijl/orjson/issues/445
106
128
  return loads(str(self))
107
129
 
130
+ @typing.overload
131
+ def re(
132
+ self,
133
+ regex: Union[str, Pattern[str]],
134
+ check_match: Literal[True],
135
+ replace_entities: bool = True,
136
+ clean_match: bool = False,
137
+ case_sensitive: bool = False,
138
+ ) -> bool:
139
+ ...
140
+
141
+ @typing.overload
142
+ def re(
143
+ self,
144
+ regex: Union[str, Pattern[str]],
145
+ replace_entities: bool = True,
146
+ clean_match: bool = False,
147
+ case_sensitive: bool = False,
148
+ check_match: Literal[False] = False,
149
+ ) -> "TextHandlers[TextHandler]":
150
+ ...
151
+
108
152
  def re(
109
153
  self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
110
154
  case_sensitive: bool = False, check_match: bool = False
111
- ) -> Union[List[str], bool]:
155
+ ) -> Union["TextHandlers[TextHandler]", bool]:
112
156
  """Apply the given regex to the current text and return a list of strings with the matches.
113
157
 
114
158
  :param regex: Can be either a compiled regular expression or a string.
@@ -133,12 +177,12 @@ class TextHandler(str):
133
177
  results = flatten(results)
134
178
 
135
179
  if not replace_entities:
136
- return [TextHandler(string) for string in results]
180
+ return TextHandlers(typing.cast(List[_TextHandlerType], [TextHandler(string) for string in results]))
137
181
 
138
- return [TextHandler(_replace_entities(s)) for s in results]
182
+ return TextHandlers(typing.cast(List[_TextHandlerType], [TextHandler(_replace_entities(s)) for s in results]))
139
183
 
140
184
  def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
141
- clean_match: bool = False, case_sensitive: bool = False) -> Union[str, None]:
185
+ clean_match: bool = False, case_sensitive: bool = False) -> "TextHandler":
142
186
  """Apply the given regex to text and return the first match if found, otherwise return the default value.
143
187
 
144
188
  :param regex: Can be either a compiled regular expression or a string.
@@ -158,15 +202,23 @@ class TextHandlers(List[TextHandler]):
158
202
  """
159
203
  __slots__ = ()
160
204
 
161
- def __getitem__(self, pos: Union[SupportsIndex, slice]) -> Union[TextHandler, "TextHandlers[TextHandler]"]:
205
+ @typing.overload
206
+ def __getitem__(self, pos: SupportsIndex) -> TextHandler:
207
+ pass
208
+
209
+ @typing.overload
210
+ def __getitem__(self, pos: slice) -> "TextHandlers":
211
+ pass
212
+
213
+ def __getitem__(self, pos: Union[SupportsIndex, slice]) -> Union[TextHandler, "TextHandlers"]:
162
214
  lst = super().__getitem__(pos)
163
215
  if isinstance(pos, slice):
164
- return self.__class__(lst)
165
- else:
166
- return lst
216
+ lst = [TextHandler(s) for s in lst]
217
+ return TextHandlers(typing.cast(List[_TextHandlerType], lst))
218
+ return typing.cast(_TextHandlerType, TextHandler(lst))
167
219
 
168
220
  def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
169
- case_sensitive: bool = False) -> 'List[str]':
221
+ case_sensitive: bool = False) -> 'TextHandlers[TextHandler]':
170
222
  """Call the ``.re()`` method for each element in this list and return
171
223
  their results flattened as TextHandlers.
172
224
 
@@ -178,10 +230,10 @@ class TextHandlers(List[TextHandler]):
178
230
  results = [
179
231
  n.re(regex, replace_entities, clean_match, case_sensitive) for n in self
180
232
  ]
181
- return flatten(results)
233
+ return TextHandlers(flatten(results))
182
234
 
183
235
  def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
184
- clean_match: bool = False, case_sensitive: bool = False) -> Union[str, None]:
236
+ clean_match: bool = False, case_sensitive: bool = False) -> TextHandler:
185
237
  """Call the ``.re_first()`` method for each element in this list and return
186
238
  the first result or the default value otherwise.
187
239
 
@@ -210,7 +262,7 @@ class TextHandlers(List[TextHandler]):
210
262
  get_all = extract
211
263
 
212
264
 
213
- class AttributesHandler(Mapping):
265
+ class AttributesHandler(Mapping[str, _TextHandlerType]):
214
266
  """A read-only mapping to use instead of the standard dictionary for the speed boost but at the same time I use it to add more functionalities.
215
267
  If standard dictionary is needed, just convert this class to dictionary with `dict` function
216
268
  """
@@ -231,7 +283,7 @@ class AttributesHandler(Mapping):
231
283
  # Fastest read-only mapping type
232
284
  self._data = MappingProxyType(mapping)
233
285
 
234
- def get(self, key, default=None):
286
+ def get(self, key: str, default: Optional[str] = None) -> Union[_TextHandlerType, None]:
235
287
  """Acts like standard dictionary `.get()` method"""
236
288
  return self._data.get(key, default)
237
289
 
@@ -253,7 +305,7 @@ class AttributesHandler(Mapping):
253
305
  """Convert current attributes to JSON string if the attributes are JSON serializable otherwise throws error"""
254
306
  return dumps(dict(self._data))
255
307
 
256
- def __getitem__(self, key):
308
+ def __getitem__(self, key: str) -> _TextHandlerType:
257
309
  return self._data[key]
258
310
 
259
311
  def __iter__(self):
@@ -139,6 +139,6 @@ class TranslatorMixin:
139
139
 
140
140
 
141
141
  class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
142
- @lru_cache(maxsize=256)
142
+ @lru_cache(maxsize=2048)
143
143
  def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
144
144
  return super().css_to_xpath(css, prefix)
scrapling/defaults.py CHANGED
@@ -1,7 +1,10 @@
1
- from .fetchers import AsyncFetcher, Fetcher, PlayWrightFetcher, StealthyFetcher
1
+ from .fetchers import AsyncFetcher as _AsyncFetcher
2
+ from .fetchers import Fetcher as _Fetcher
3
+ from .fetchers import PlayWrightFetcher as _PlayWrightFetcher
4
+ from .fetchers import StealthyFetcher as _StealthyFetcher
2
5
 
3
6
  # If you are going to use Fetchers with the default settings, import them from this file instead for a cleaner looking code
4
- Fetcher = Fetcher()
5
- AsyncFetcher = AsyncFetcher()
6
- StealthyFetcher = StealthyFetcher()
7
- PlayWrightFetcher = PlayWrightFetcher()
7
+ Fetcher = _Fetcher()
8
+ AsyncFetcher = _AsyncFetcher()
9
+ StealthyFetcher = _StealthyFetcher()
10
+ PlayWrightFetcher = _PlayWrightFetcher()
scrapling/engines/camo.py CHANGED
@@ -19,7 +19,7 @@ class CamoufoxEngine:
19
19
  block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
20
20
  timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
21
21
  wait_selector_state: Optional[SelectorWaitStates] = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
22
- proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True,
22
+ proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False,
23
23
  geoip: Optional[bool] = False,
24
24
  adaptor_arguments: Dict = None,
25
25
  ):
@@ -36,7 +36,7 @@ class CamoufoxEngine:
36
36
  :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
37
37
  :param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
38
38
  :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
39
- :param disable_ads: Enabled by default, this installs `uBlock Origin` addon on the browser if enabled.
39
+ :param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
40
40
  :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
41
41
  :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
42
42
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
@@ -89,12 +89,14 @@ class CamoufoxEngine:
89
89
 
90
90
  def handle_response(finished_response):
91
91
  nonlocal final_response
92
- if finished_response.request.resource_type == "document":
92
+ if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
93
93
  final_response = finished_response
94
94
 
95
95
  with Camoufox(
96
96
  geoip=self.geoip,
97
97
  proxy=self.proxy,
98
+ disable_coop=True,
99
+ enable_cache=True,
98
100
  addons=self.addons,
99
101
  exclude_addons=addons,
100
102
  headless=self.headless,
@@ -133,7 +135,6 @@ class CamoufoxEngine:
133
135
  if self.network_idle:
134
136
  page.wait_for_load_state('networkidle')
135
137
 
136
- response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
137
138
  # In case we didn't catch a document type somehow
138
139
  final_response = final_response if final_response else first_response
139
140
  # This will be parsed inside `Response`
@@ -142,15 +143,15 @@ class CamoufoxEngine:
142
143
  status_text = final_response.status_text or StatusText.get(final_response.status)
143
144
 
144
145
  response = Response(
145
- url=final_response.url,
146
+ url=page.url,
146
147
  text=page.content(),
147
- body=response_bytes,
148
+ body=page.content().encode('utf-8'),
148
149
  status=final_response.status,
149
150
  reason=status_text,
150
151
  encoding=encoding,
151
152
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
152
- headers=final_response.all_headers(),
153
- request_headers=final_response.request.all_headers(),
153
+ headers=first_response.all_headers(),
154
+ request_headers=first_response.request.all_headers(),
154
155
  **self.adaptor_arguments
155
156
  )
156
157
  page.close()
@@ -169,12 +170,14 @@ class CamoufoxEngine:
169
170
 
170
171
  async def handle_response(finished_response):
171
172
  nonlocal final_response
172
- if finished_response.request.resource_type == "document":
173
+ if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
173
174
  final_response = finished_response
174
175
 
175
176
  async with AsyncCamoufox(
176
177
  geoip=self.geoip,
177
178
  proxy=self.proxy,
179
+ disable_coop=True,
180
+ enable_cache=True,
178
181
  addons=self.addons,
179
182
  exclude_addons=addons,
180
183
  headless=self.headless,
@@ -213,7 +216,6 @@ class CamoufoxEngine:
213
216
  if self.network_idle:
214
217
  await page.wait_for_load_state('networkidle')
215
218
 
216
- response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
217
219
  # In case we didn't catch a document type somehow
218
220
  final_response = final_response if final_response else first_response
219
221
  # This will be parsed inside `Response`
@@ -222,15 +224,15 @@ class CamoufoxEngine:
222
224
  status_text = final_response.status_text or StatusText.get(final_response.status)
223
225
 
224
226
  response = Response(
225
- url=final_response.url,
227
+ url=page.url,
226
228
  text=await page.content(),
227
- body=response_bytes,
229
+ body=(await page.content()).encode('utf-8'),
228
230
  status=final_response.status,
229
231
  reason=status_text,
230
232
  encoding=encoding,
231
233
  cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
232
- headers=await final_response.all_headers(),
233
- request_headers=await final_response.request.all_headers(),
234
+ headers=await first_response.all_headers(),
235
+ request_headers=await first_response.request.all_headers(),
234
236
  **self.adaptor_arguments
235
237
  )
236
238
  await page.close()
scrapling/engines/pw.py CHANGED
@@ -105,7 +105,7 @@ class PlaywrightEngine:
105
105
  """
106
106
  cdp_url = self.cdp_url
107
107
  if self.nstbrowser_mode:
108
- if self.nstbrowser_config and type(self.nstbrowser_config) is Dict:
108
+ if self.nstbrowser_config and isinstance(self.nstbrowser_config, dict):
109
109
  config = self.nstbrowser_config
110
110
  else:
111
111
  query = NSTBROWSER_DEFAULT_QUERY.copy()
@@ -206,7 +206,7 @@ class PlaywrightEngine:
206
206
 
207
207
  def handle_response(finished_response: PlaywrightResponse):
208
208
  nonlocal final_response
209
- if finished_response.request.resource_type == "document":
209
+ if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
210
210
  final_response = finished_response
211
211
 
212
212
  with sync_playwright() as p:
@@ -252,7 +252,6 @@ class PlaywrightEngine:
252
252
  if self.network_idle:
253
253
  page.wait_for_load_state('networkidle')
254
254
 
255
- response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
256
255
  # In case we didn't catch a document type somehow
257
256
  final_response = final_response if final_response else first_response
258
257
  # This will be parsed inside `Response`
@@ -261,15 +260,15 @@ class PlaywrightEngine:
261
260
  status_text = final_response.status_text or StatusText.get(final_response.status)
262
261
 
263
262
  response = Response(
264
- url=final_response.url,
263
+ url=page.url,
265
264
  text=page.content(),
266
- body=response_bytes,
265
+ body=page.content().encode('utf-8'),
267
266
  status=final_response.status,
268
267
  reason=status_text,
269
268
  encoding=encoding,
270
269
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
271
- headers=final_response.all_headers(),
272
- request_headers=final_response.request.all_headers(),
270
+ headers=first_response.all_headers(),
271
+ request_headers=first_response.request.all_headers(),
273
272
  **self.adaptor_arguments
274
273
  )
275
274
  page.close()
@@ -293,7 +292,7 @@ class PlaywrightEngine:
293
292
 
294
293
  async def handle_response(finished_response: PlaywrightResponse):
295
294
  nonlocal final_response
296
- if finished_response.request.resource_type == "document":
295
+ if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
297
296
  final_response = finished_response
298
297
 
299
298
  async with async_playwright() as p:
@@ -339,7 +338,6 @@ class PlaywrightEngine:
339
338
  if self.network_idle:
340
339
  await page.wait_for_load_state('networkidle')
341
340
 
342
- response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
343
341
  # In case we didn't catch a document type somehow
344
342
  final_response = final_response if final_response else first_response
345
343
  # This will be parsed inside `Response`
@@ -348,15 +346,15 @@ class PlaywrightEngine:
348
346
  status_text = final_response.status_text or StatusText.get(final_response.status)
349
347
 
350
348
  response = Response(
351
- url=final_response.url,
349
+ url=page.url,
352
350
  text=await page.content(),
353
- body=response_bytes,
351
+ body=(await page.content()).encode('utf-8'),
354
352
  status=final_response.status,
355
353
  reason=status_text,
356
354
  encoding=encoding,
357
355
  cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
358
- headers=await final_response.all_headers(),
359
- request_headers=await final_response.request.all_headers(),
356
+ headers=await first_response.all_headers(),
357
+ request_headers=await first_response.request.all_headers(),
360
358
  **self.adaptor_arguments
361
359
  )
362
360
  await page.close()
scrapling/fetchers.py CHANGED
@@ -143,7 +143,7 @@ class AsyncFetcher(Fetcher):
143
143
  :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
144
144
  """
145
145
  adaptor_arguments = tuple(self.adaptor_arguments.items())
146
- response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).async_post(**kwargs)
146
+ response_object = await StaticEngine(url, proxy, stealthy_headers, follow_redirects, timeout, retries=retries, adaptor_arguments=adaptor_arguments).async_put(**kwargs)
147
147
  return response_object
148
148
 
149
149
  async def delete(
@@ -177,7 +177,7 @@ class StealthyFetcher(BaseFetcher):
177
177
  block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
178
178
  timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
179
179
  wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
180
- proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
180
+ proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False, geoip: Optional[bool] = False,
181
181
  ) -> Response:
182
182
  """
183
183
  Opens up a browser and do your request based on your chosen options below.
@@ -191,7 +191,7 @@ class StealthyFetcher(BaseFetcher):
191
191
  This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
192
192
  :param block_webrtc: Blocks WebRTC entirely.
193
193
  :param addons: List of Firefox addons to use. Must be paths to extracted addons.
194
- :param disable_ads: Enabled by default, this installs `uBlock Origin` addon on the browser if enabled.
194
+ :param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
195
195
  :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
196
196
  :param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
197
197
  :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.
@@ -235,7 +235,7 @@ class StealthyFetcher(BaseFetcher):
235
235
  block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
236
236
  timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
237
237
  wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
238
- proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
238
+ proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = False, geoip: Optional[bool] = False,
239
239
  ) -> Response:
240
240
  """
241
241
  Opens up a browser and do your request based on your chosen options below.
@@ -249,7 +249,7 @@ class StealthyFetcher(BaseFetcher):
249
249
  This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
250
250
  :param block_webrtc: Blocks WebRTC entirely.
251
251
  :param addons: List of Firefox addons to use. Must be paths to extracted addons.
252
- :param disable_ads: Enabled by default, this installs `uBlock Origin` addon on the browser if enabled.
252
+ :param disable_ads: Disabled by default, this installs `uBlock Origin` addon on the browser if enabled.
253
253
  :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
254
254
  :param allow_webgl: Enabled by default. Disabling it WebGL not recommended as many WAFs now checks if WebGL is enabled.
255
255
  :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, & spoof the WebRTC IP address.