scrapling 0.2.93__tar.gz → 0.2.94__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. {scrapling-0.2.93/scrapling.egg-info → scrapling-0.2.94}/PKG-INFO +3 -3
  2. {scrapling-0.2.93 → scrapling-0.2.94}/README.md +1 -1
  3. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/__init__.py +1 -1
  4. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/core/custom_types.py +11 -11
  5. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/camo.py +42 -2
  6. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/pw.py +42 -0
  7. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/static.py +1 -0
  8. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/custom.py +2 -1
  9. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/parser.py +9 -9
  10. {scrapling-0.2.93 → scrapling-0.2.94/scrapling.egg-info}/PKG-INFO +3 -3
  11. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling.egg-info/requires.txt +1 -1
  12. {scrapling-0.2.93 → scrapling-0.2.94}/setup.cfg +1 -1
  13. {scrapling-0.2.93 → scrapling-0.2.94}/setup.py +2 -2
  14. {scrapling-0.2.93 → scrapling-0.2.94}/LICENSE +0 -0
  15. {scrapling-0.2.93 → scrapling-0.2.94}/MANIFEST.in +0 -0
  16. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/cli.py +0 -0
  17. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/core/__init__.py +0 -0
  18. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/core/_types.py +0 -0
  19. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/core/mixins.py +0 -0
  20. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/core/storage_adaptors.py +0 -0
  21. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/core/translator.py +0 -0
  22. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/core/utils.py +0 -0
  23. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/defaults.py +0 -0
  24. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/__init__.py +0 -0
  25. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/constants.py +0 -0
  26. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/__init__.py +0 -0
  27. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/bypasses/navigator_plugins.js +0 -0
  28. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/bypasses/notification_permission.js +0 -0
  29. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/bypasses/pdf_viewer.js +0 -0
  30. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +0 -0
  31. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/bypasses/screen_props.js +0 -0
  32. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/bypasses/webdriver_fully.js +0 -0
  33. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/bypasses/window_chrome.js +0 -0
  34. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/fingerprints.py +0 -0
  35. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/engines/toolbelt/navigation.py +0 -0
  36. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/fetchers.py +0 -0
  37. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling/py.typed +0 -0
  38. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling.egg-info/SOURCES.txt +0 -0
  39. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling.egg-info/dependency_links.txt +0 -0
  40. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling.egg-info/entry_points.txt +0 -0
  41. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling.egg-info/not-zip-safe +0 -0
  42. {scrapling-0.2.93 → scrapling-0.2.94}/scrapling.egg-info/top_level.txt +0 -0
  43. {scrapling-0.2.93 → scrapling-0.2.94}/tests/__init__.py +0 -0
  44. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/__init__.py +0 -0
  45. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/async/__init__.py +0 -0
  46. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/async/test_camoufox.py +0 -0
  47. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/async/test_httpx.py +0 -0
  48. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/async/test_playwright.py +0 -0
  49. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/sync/__init__.py +0 -0
  50. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/sync/test_camoufox.py +0 -0
  51. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/sync/test_httpx.py +0 -0
  52. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/sync/test_playwright.py +0 -0
  53. {scrapling-0.2.93 → scrapling-0.2.94}/tests/fetchers/test_utils.py +0 -0
  54. {scrapling-0.2.93 → scrapling-0.2.94}/tests/parser/__init__.py +0 -0
  55. {scrapling-0.2.93 → scrapling-0.2.94}/tests/parser/test_automatch.py +0 -0
  56. {scrapling-0.2.93 → scrapling-0.2.94}/tests/parser/test_general.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: scrapling
3
- Version: 0.2.93
3
+ Version: 0.2.94
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -40,7 +40,7 @@ Requires-Dist: tldextract
40
40
  Requires-Dist: httpx[brotli,socks,zstd]
41
41
  Requires-Dist: playwright>=1.49.1
42
42
  Requires-Dist: rebrowser-playwright>=1.49.1
43
- Requires-Dist: camoufox[geoip]>=0.4.10
43
+ Requires-Dist: camoufox[geoip]>=0.4.11
44
44
  Dynamic: author
45
45
  Dynamic: author-email
46
46
  Dynamic: classifier
@@ -267,7 +267,7 @@ then use it right away without initializing like:
267
267
  page = StealthyFetcher.fetch('https://example.com')
268
268
  ```
269
269
 
270
- Also, the `Response` object returned from all fetchers is the same as the `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
270
+ Also, the `Response` object returned from all fetchers is the same as the `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, `history`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
271
271
  > [!NOTE]
272
272
  > The `auto_match` argument is enabled by default which is the one you should care about the most as you will see later.
273
273
  ### Fetcher
@@ -212,7 +212,7 @@ then use it right away without initializing like:
212
212
  page = StealthyFetcher.fetch('https://example.com')
213
213
  ```
214
214
 
215
- Also, the `Response` object returned from all fetchers is the same as the `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
215
+ Also, the `Response` object returned from all fetchers is the same as the `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, `history`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
216
216
  > [!NOTE]
217
217
  > The `auto_match` argument is enabled by default which is the one you should care about the most as you will see later.
218
218
  ### Fetcher
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
5
5
  from scrapling.parser import Adaptor, Adaptors
6
6
 
7
7
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
8
- __version__ = "0.2.93"
8
+ __version__ = "0.2.94"
9
9
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
10
10
 
11
11
 
@@ -134,7 +134,7 @@ class TextHandler(str):
134
134
  check_match: Literal[True],
135
135
  replace_entities: bool = True,
136
136
  clean_match: bool = False,
137
- case_sensitive: bool = False,
137
+ case_sensitive: bool = True,
138
138
  ) -> bool:
139
139
  ...
140
140
 
@@ -144,26 +144,26 @@ class TextHandler(str):
144
144
  regex: Union[str, Pattern[str]],
145
145
  replace_entities: bool = True,
146
146
  clean_match: bool = False,
147
- case_sensitive: bool = False,
147
+ case_sensitive: bool = True,
148
148
  check_match: Literal[False] = False,
149
149
  ) -> "TextHandlers[TextHandler]":
150
150
  ...
151
151
 
152
152
  def re(
153
153
  self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
154
- case_sensitive: bool = False, check_match: bool = False
154
+ case_sensitive: bool = True, check_match: bool = False
155
155
  ) -> Union["TextHandlers[TextHandler]", bool]:
156
156
  """Apply the given regex to the current text and return a list of strings with the matches.
157
157
 
158
158
  :param regex: Can be either a compiled regular expression or a string.
159
159
  :param replace_entities: if enabled character entity references are replaced by their corresponding character
160
160
  :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
161
- :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
161
+ :param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
162
162
  :param check_match: used to quickly check if this regex matches or not without any operations on the results
163
163
 
164
164
  """
165
165
  if isinstance(regex, str):
166
- if not case_sensitive:
166
+ if case_sensitive:
167
167
  regex = re.compile(regex, re.UNICODE)
168
168
  else:
169
169
  regex = re.compile(regex, flags=re.UNICODE | re.IGNORECASE)
@@ -182,14 +182,14 @@ class TextHandler(str):
182
182
  return TextHandlers(typing.cast(List[_TextHandlerType], [TextHandler(_replace_entities(s)) for s in results]))
183
183
 
184
184
  def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
185
- clean_match: bool = False, case_sensitive: bool = False) -> "TextHandler":
185
+ clean_match: bool = False, case_sensitive: bool = True) -> "TextHandler":
186
186
  """Apply the given regex to text and return the first match if found, otherwise return the default value.
187
187
 
188
188
  :param regex: Can be either a compiled regular expression or a string.
189
189
  :param default: The default value to be returned if there is no match
190
190
  :param replace_entities: if enabled character entity references are replaced by their corresponding character
191
191
  :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
192
- :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
192
+ :param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
193
193
 
194
194
  """
195
195
  result = self.re(regex, replace_entities, clean_match=clean_match, case_sensitive=case_sensitive)
@@ -218,14 +218,14 @@ class TextHandlers(List[TextHandler]):
218
218
  return typing.cast(_TextHandlerType, TextHandler(lst))
219
219
 
220
220
  def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
221
- case_sensitive: bool = False) -> 'TextHandlers[TextHandler]':
221
+ case_sensitive: bool = True) -> 'TextHandlers[TextHandler]':
222
222
  """Call the ``.re()`` method for each element in this list and return
223
223
  their results flattened as TextHandlers.
224
224
 
225
225
  :param regex: Can be either a compiled regular expression or a string.
226
226
  :param replace_entities: if enabled character entity references are replaced by their corresponding character
227
227
  :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
228
- :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
228
+ :param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
229
229
  """
230
230
  results = [
231
231
  n.re(regex, replace_entities, clean_match, case_sensitive) for n in self
@@ -233,7 +233,7 @@ class TextHandlers(List[TextHandler]):
233
233
  return TextHandlers(flatten(results))
234
234
 
235
235
  def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
236
- clean_match: bool = False, case_sensitive: bool = False) -> TextHandler:
236
+ clean_match: bool = False, case_sensitive: bool = True) -> TextHandler:
237
237
  """Call the ``.re_first()`` method for each element in this list and return
238
238
  the first result or the default value otherwise.
239
239
 
@@ -241,7 +241,7 @@ class TextHandlers(List[TextHandler]):
241
241
  :param default: The default value to be returned if there is no match
242
242
  :param replace_entities: if enabled character entity references are replaced by their corresponding character
243
243
  :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
244
- :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
244
+ :param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
245
245
  """
246
246
  for n in self:
247
247
  for result in n.re(regex, replace_entities, clean_match, case_sensitive):
@@ -95,7 +95,6 @@ class CamoufoxEngine:
95
95
  with Camoufox(
96
96
  geoip=self.geoip,
97
97
  proxy=self.proxy,
98
- disable_coop=True,
99
98
  enable_cache=True,
100
99
  addons=self.addons,
101
100
  exclude_addons=addons,
@@ -142,6 +141,26 @@ class CamoufoxEngine:
142
141
  # PlayWright API sometimes give empty status text for some reason!
143
142
  status_text = final_response.status_text or StatusText.get(final_response.status)
144
143
 
144
+ history = []
145
+ current_request = first_response.request.redirected_from
146
+ while current_request:
147
+ current_response = current_request.response()
148
+
149
+ history.insert(0, Response(
150
+ url=current_request.url,
151
+ # using current_response.text() will trigger "Error: Response.text: Response body is unavailable for redirect responses"
152
+ text='',
153
+ body=b'',
154
+ status=current_response.status if current_response else 301,
155
+ reason=(current_response.status_text or StatusText.get(current_response.status)) if current_response else StatusText.get(301),
156
+ encoding=current_response.headers.get('content-type', '') or 'utf-8',
157
+ cookies={},
158
+ headers=current_response.all_headers() if current_response else {},
159
+ request_headers=current_request.all_headers(),
160
+ **self.adaptor_arguments
161
+ ))
162
+ current_request = current_request.redirected_from
163
+
145
164
  response = Response(
146
165
  url=page.url,
147
166
  text=page.content(),
@@ -152,6 +171,7 @@ class CamoufoxEngine:
152
171
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
153
172
  headers=first_response.all_headers(),
154
173
  request_headers=first_response.request.all_headers(),
174
+ history=history,
155
175
  **self.adaptor_arguments
156
176
  )
157
177
  page.close()
@@ -176,7 +196,6 @@ class CamoufoxEngine:
176
196
  async with AsyncCamoufox(
177
197
  geoip=self.geoip,
178
198
  proxy=self.proxy,
179
- disable_coop=True,
180
199
  enable_cache=True,
181
200
  addons=self.addons,
182
201
  exclude_addons=addons,
@@ -223,6 +242,26 @@ class CamoufoxEngine:
223
242
  # PlayWright API sometimes give empty status text for some reason!
224
243
  status_text = final_response.status_text or StatusText.get(final_response.status)
225
244
 
245
+ history = []
246
+ current_request = first_response.request.redirected_from
247
+ while current_request:
248
+ current_response = await current_request.response()
249
+
250
+ history.insert(0, Response(
251
+ url=current_request.url,
252
+ # using current_response.text() will trigger "Error: Response.text: Response body is unavailable for redirect responses"
253
+ text='',
254
+ body=b'',
255
+ status=current_response.status if current_response else 301,
256
+ reason=(current_response.status_text or StatusText.get(current_response.status)) if current_response else StatusText.get(301),
257
+ encoding=current_response.headers.get('content-type', '') or 'utf-8',
258
+ cookies={},
259
+ headers=await current_response.all_headers() if current_response else {},
260
+ request_headers=await current_request.all_headers(),
261
+ **self.adaptor_arguments
262
+ ))
263
+ current_request = current_request.redirected_from
264
+
226
265
  response = Response(
227
266
  url=page.url,
228
267
  text=await page.content(),
@@ -233,6 +272,7 @@ class CamoufoxEngine:
233
272
  cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
234
273
  headers=await first_response.all_headers(),
235
274
  request_headers=await first_response.request.all_headers(),
275
+ history=history,
236
276
  **self.adaptor_arguments
237
277
  )
238
278
  await page.close()
@@ -259,6 +259,26 @@ class PlaywrightEngine:
259
259
  # PlayWright API sometimes give empty status text for some reason!
260
260
  status_text = final_response.status_text or StatusText.get(final_response.status)
261
261
 
262
+ history = []
263
+ current_request = first_response.request.redirected_from
264
+ while current_request:
265
+ current_response = current_request.response()
266
+
267
+ history.insert(0, Response(
268
+ url=current_request.url,
269
+ # using current_response.text() will trigger "Error: Response.text: Response body is unavailable for redirect responses"
270
+ text='',
271
+ body=b'',
272
+ status=current_response.status if current_response else 301,
273
+ reason=(current_response.status_text or StatusText.get(current_response.status)) if current_response else StatusText.get(301),
274
+ encoding=current_response.headers.get('content-type', '') or 'utf-8',
275
+ cookies={},
276
+ headers=current_response.all_headers() if current_response else {},
277
+ request_headers=current_request.all_headers(),
278
+ **self.adaptor_arguments
279
+ ))
280
+ current_request = current_request.redirected_from
281
+
262
282
  response = Response(
263
283
  url=page.url,
264
284
  text=page.content(),
@@ -269,6 +289,7 @@ class PlaywrightEngine:
269
289
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
270
290
  headers=first_response.all_headers(),
271
291
  request_headers=first_response.request.all_headers(),
292
+ history=history,
272
293
  **self.adaptor_arguments
273
294
  )
274
295
  page.close()
@@ -345,6 +366,26 @@ class PlaywrightEngine:
345
366
  # PlayWright API sometimes give empty status text for some reason!
346
367
  status_text = final_response.status_text or StatusText.get(final_response.status)
347
368
 
369
+ history = []
370
+ current_request = first_response.request.redirected_from
371
+ while current_request:
372
+ current_response = await current_request.response()
373
+
374
+ history.insert(0, Response(
375
+ url=current_request.url,
376
+ # using current_response.text() will trigger "Error: Response.text: Response body is unavailable for redirect responses"
377
+ text='',
378
+ body=b'',
379
+ status=current_response.status if current_response else 301,
380
+ reason=(current_response.status_text or StatusText.get(current_response.status)) if current_response else StatusText.get(301),
381
+ encoding=current_response.headers.get('content-type', '') or 'utf-8',
382
+ cookies={},
383
+ headers=await current_response.all_headers() if current_response else {},
384
+ request_headers=await current_request.all_headers(),
385
+ **self.adaptor_arguments
386
+ ))
387
+ current_request = current_request.redirected_from
388
+
348
389
  response = Response(
349
390
  url=page.url,
350
391
  text=await page.content(),
@@ -355,6 +396,7 @@ class PlaywrightEngine:
355
396
  cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
356
397
  headers=await first_response.all_headers(),
357
398
  request_headers=await first_response.request.all_headers(),
399
+ history=history,
358
400
  **self.adaptor_arguments
359
401
  )
360
402
  await page.close()
@@ -72,6 +72,7 @@ class StaticEngine:
72
72
  headers=dict(response.headers),
73
73
  request_headers=dict(response.request.headers),
74
74
  method=response.request.method,
75
+ history=[self._prepare_response(redirection) for redirection in response.history],
75
76
  **self.adaptor_arguments
76
77
  )
77
78
 
@@ -85,13 +85,14 @@ class Response(Adaptor):
85
85
  """This class is returned by all engines as a way to unify response type between different libraries."""
86
86
 
87
87
  def __init__(self, url: str, text: str, body: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict,
88
- encoding: str = 'utf-8', method: str = 'GET', **adaptor_arguments: Dict):
88
+ encoding: str = 'utf-8', method: str = 'GET', history: List = None, **adaptor_arguments: Dict):
89
89
  automatch_domain = adaptor_arguments.pop('automatch_domain', None)
90
90
  self.status = status
91
91
  self.reason = reason
92
92
  self.cookies = cookies
93
93
  self.headers = headers
94
94
  self.request_headers = request_headers
95
+ self.history = history or []
95
96
  encoding = ResponseEncoding.get_value(encoding, text)
96
97
  super().__init__(text=text, body=body, url=automatch_domain or url, encoding=encoding, **adaptor_arguments)
97
98
  # For back-ward compatibility
@@ -132,7 +132,7 @@ class Adaptor(SelectorsGeneration):
132
132
  self.__tag = None
133
133
  # No need to check if all response attributes exist or not because if `status` exist, then the rest exist (Save some CPU cycles for speed)
134
134
  self.__response_data = {
135
- key: getattr(self, key) for key in ('status', 'reason', 'cookies', 'headers', 'request_headers',)
135
+ key: getattr(self, key) for key in ('status', 'reason', 'cookies', 'history', 'headers', 'request_headers',)
136
136
  } if hasattr(self, 'status') else {}
137
137
 
138
138
  # Node functionalities, I wanted to move to separate Mixin class but it had slight impact on performance
@@ -763,25 +763,25 @@ class Adaptor(SelectorsGeneration):
763
763
  return self.get_all_text(strip=True).json()
764
764
 
765
765
  def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True,
766
- clean_match: bool = False, case_sensitive: bool = False) -> TextHandlers:
766
+ clean_match: bool = False, case_sensitive: bool = True) -> TextHandlers:
767
767
  """Apply the given regex to the current text and return a list of strings with the matches.
768
768
 
769
769
  :param regex: Can be either a compiled regular expression or a string.
770
770
  :param replace_entities: if enabled character entity references are replaced by their corresponding character
771
771
  :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
772
- :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
772
+ :param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
773
773
  """
774
774
  return self.text.re(regex, replace_entities, clean_match, case_sensitive)
775
775
 
776
776
  def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
777
- clean_match: bool = False, case_sensitive: bool = False) -> TextHandler:
777
+ clean_match: bool = False, case_sensitive: bool = True) -> TextHandler:
778
778
  """Apply the given regex to text and return the first match if found, otherwise return the default value.
779
779
 
780
780
  :param regex: Can be either a compiled regular expression or a string.
781
781
  :param default: The default value to be returned if there is no match
782
782
  :param replace_entities: if enabled character entity references are replaced by their corresponding character
783
783
  :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
784
- :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
784
+ :param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
785
785
  """
786
786
  return self.text.re_first(regex, default, replace_entities, clean_match, case_sensitive)
787
787
 
@@ -1009,14 +1009,14 @@ class Adaptors(List[Adaptor]):
1009
1009
  return self.__class__(flatten(results))
1010
1010
 
1011
1011
  def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True,
1012
- clean_match: bool = False, case_sensitive: bool = False) -> TextHandlers[TextHandler]:
1012
+ clean_match: bool = False, case_sensitive: bool = True) -> TextHandlers[TextHandler]:
1013
1013
  """Call the ``.re()`` method for each element in this list and return
1014
1014
  their results flattened as List of TextHandler.
1015
1015
 
1016
1016
  :param regex: Can be either a compiled regular expression or a string.
1017
1017
  :param replace_entities: if enabled character entity references are replaced by their corresponding character
1018
1018
  :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
1019
- :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
1019
+ :param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
1020
1020
  """
1021
1021
  results = [
1022
1022
  n.text.re(regex, replace_entities, clean_match, case_sensitive) for n in self
@@ -1024,7 +1024,7 @@ class Adaptors(List[Adaptor]):
1024
1024
  return TextHandlers(flatten(results))
1025
1025
 
1026
1026
  def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
1027
- clean_match: bool = False, case_sensitive: bool = False) -> TextHandler:
1027
+ clean_match: bool = False, case_sensitive: bool = True) -> TextHandler:
1028
1028
  """Call the ``.re_first()`` method for each element in this list and return
1029
1029
  the first result or the default value otherwise.
1030
1030
 
@@ -1032,7 +1032,7 @@ class Adaptors(List[Adaptor]):
1032
1032
  :param default: The default value to be returned if there is no match
1033
1033
  :param replace_entities: if enabled character entity references are replaced by their corresponding character
1034
1034
  :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
1035
- :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
1035
+ :param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
1036
1036
  """
1037
1037
  for n in self:
1038
1038
  for result in n.re(regex, replace_entities, clean_match, case_sensitive):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: scrapling
3
- Version: 0.2.93
3
+ Version: 0.2.94
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -40,7 +40,7 @@ Requires-Dist: tldextract
40
40
  Requires-Dist: httpx[brotli,socks,zstd]
41
41
  Requires-Dist: playwright>=1.49.1
42
42
  Requires-Dist: rebrowser-playwright>=1.49.1
43
- Requires-Dist: camoufox[geoip]>=0.4.10
43
+ Requires-Dist: camoufox[geoip]>=0.4.11
44
44
  Dynamic: author
45
45
  Dynamic: author-email
46
46
  Dynamic: classifier
@@ -267,7 +267,7 @@ then use it right away without initializing like:
267
267
  page = StealthyFetcher.fetch('https://example.com')
268
268
  ```
269
269
 
270
- Also, the `Response` object returned from all fetchers is the same as the `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
270
+ Also, the `Response` object returned from all fetchers is the same as the `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, `history`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
271
271
  > [!NOTE]
272
272
  > The `auto_match` argument is enabled by default which is the one you should care about the most as you will see later.
273
273
  ### Fetcher
@@ -7,4 +7,4 @@ tldextract
7
7
  httpx[brotli,socks,zstd]
8
8
  playwright>=1.49.1
9
9
  rebrowser-playwright>=1.49.1
10
- camoufox[geoip]>=0.4.10
10
+ camoufox[geoip]>=0.4.11
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = scrapling
3
- version = 0.2.93
3
+ version = 0.2.94
4
4
  author = Karim Shoair
5
5
  author_email = karim.shoair@pm.me
6
6
  description = Scrapling is an undetectable, powerful, flexible, adaptive, and high-performance web scraping library for Python.
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
6
6
 
7
7
  setup(
8
8
  name="scrapling",
9
- version="0.2.93",
9
+ version="0.2.94",
10
10
  description="""Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
11
11
  simplifies the process of extracting data from websites, even when they undergo structural changes, and offers
12
12
  impressive speed improvements over many popular scraping tools.""",
@@ -61,7 +61,7 @@ setup(
61
61
  'httpx[brotli,zstd, socks]',
62
62
  'playwright>=1.49.1',
63
63
  'rebrowser-playwright>=1.49.1',
64
- 'camoufox[geoip]>=0.4.10'
64
+ 'camoufox[geoip]>=0.4.11'
65
65
  ],
66
66
  python_requires=">=3.9",
67
67
  url="https://github.com/D4Vinci/Scrapling",
File without changes
File without changes
File without changes
File without changes