scrapling 0.2.93__py3-none-any.whl → 0.2.94__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- scrapling/__init__.py +1 -1
- scrapling/core/custom_types.py +11 -11
- scrapling/engines/camo.py +42 -2
- scrapling/engines/pw.py +42 -0
- scrapling/engines/static.py +1 -0
- scrapling/engines/toolbelt/custom.py +2 -1
- scrapling/parser.py +9 -9
- {scrapling-0.2.93.dist-info → scrapling-0.2.94.dist-info}/METADATA +3 -3
- {scrapling-0.2.93.dist-info → scrapling-0.2.94.dist-info}/RECORD +13 -13
- {scrapling-0.2.93.dist-info → scrapling-0.2.94.dist-info}/LICENSE +0 -0
- {scrapling-0.2.93.dist-info → scrapling-0.2.94.dist-info}/WHEEL +0 -0
- {scrapling-0.2.93.dist-info → scrapling-0.2.94.dist-info}/entry_points.txt +0 -0
- {scrapling-0.2.93.dist-info → scrapling-0.2.94.dist-info}/top_level.txt +0 -0
scrapling/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
|
|
5
5
|
from scrapling.parser import Adaptor, Adaptors
|
6
6
|
|
7
7
|
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
8
|
-
__version__ = "0.2.
|
8
|
+
__version__ = "0.2.94"
|
9
9
|
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
10
10
|
|
11
11
|
|
scrapling/core/custom_types.py
CHANGED
@@ -134,7 +134,7 @@ class TextHandler(str):
|
|
134
134
|
check_match: Literal[True],
|
135
135
|
replace_entities: bool = True,
|
136
136
|
clean_match: bool = False,
|
137
|
-
case_sensitive: bool =
|
137
|
+
case_sensitive: bool = True,
|
138
138
|
) -> bool:
|
139
139
|
...
|
140
140
|
|
@@ -144,26 +144,26 @@ class TextHandler(str):
|
|
144
144
|
regex: Union[str, Pattern[str]],
|
145
145
|
replace_entities: bool = True,
|
146
146
|
clean_match: bool = False,
|
147
|
-
case_sensitive: bool =
|
147
|
+
case_sensitive: bool = True,
|
148
148
|
check_match: Literal[False] = False,
|
149
149
|
) -> "TextHandlers[TextHandler]":
|
150
150
|
...
|
151
151
|
|
152
152
|
def re(
|
153
153
|
self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
|
154
|
-
case_sensitive: bool =
|
154
|
+
case_sensitive: bool = True, check_match: bool = False
|
155
155
|
) -> Union["TextHandlers[TextHandler]", bool]:
|
156
156
|
"""Apply the given regex to the current text and return a list of strings with the matches.
|
157
157
|
|
158
158
|
:param regex: Can be either a compiled regular expression or a string.
|
159
159
|
:param replace_entities: if enabled character entity references are replaced by their corresponding character
|
160
160
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
161
|
-
:param case_sensitive: if
|
161
|
+
:param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
|
162
162
|
:param check_match: used to quickly check if this regex matches or not without any operations on the results
|
163
163
|
|
164
164
|
"""
|
165
165
|
if isinstance(regex, str):
|
166
|
-
if
|
166
|
+
if case_sensitive:
|
167
167
|
regex = re.compile(regex, re.UNICODE)
|
168
168
|
else:
|
169
169
|
regex = re.compile(regex, flags=re.UNICODE | re.IGNORECASE)
|
@@ -182,14 +182,14 @@ class TextHandler(str):
|
|
182
182
|
return TextHandlers(typing.cast(List[_TextHandlerType], [TextHandler(_replace_entities(s)) for s in results]))
|
183
183
|
|
184
184
|
def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
|
185
|
-
clean_match: bool = False, case_sensitive: bool =
|
185
|
+
clean_match: bool = False, case_sensitive: bool = True) -> "TextHandler":
|
186
186
|
"""Apply the given regex to text and return the first match if found, otherwise return the default value.
|
187
187
|
|
188
188
|
:param regex: Can be either a compiled regular expression or a string.
|
189
189
|
:param default: The default value to be returned if there is no match
|
190
190
|
:param replace_entities: if enabled character entity references are replaced by their corresponding character
|
191
191
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
192
|
-
:param case_sensitive: if
|
192
|
+
:param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
|
193
193
|
|
194
194
|
"""
|
195
195
|
result = self.re(regex, replace_entities, clean_match=clean_match, case_sensitive=case_sensitive)
|
@@ -218,14 +218,14 @@ class TextHandlers(List[TextHandler]):
|
|
218
218
|
return typing.cast(_TextHandlerType, TextHandler(lst))
|
219
219
|
|
220
220
|
def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
|
221
|
-
case_sensitive: bool =
|
221
|
+
case_sensitive: bool = True) -> 'TextHandlers[TextHandler]':
|
222
222
|
"""Call the ``.re()`` method for each element in this list and return
|
223
223
|
their results flattened as TextHandlers.
|
224
224
|
|
225
225
|
:param regex: Can be either a compiled regular expression or a string.
|
226
226
|
:param replace_entities: if enabled character entity references are replaced by their corresponding character
|
227
227
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
228
|
-
:param case_sensitive: if
|
228
|
+
:param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
|
229
229
|
"""
|
230
230
|
results = [
|
231
231
|
n.re(regex, replace_entities, clean_match, case_sensitive) for n in self
|
@@ -233,7 +233,7 @@ class TextHandlers(List[TextHandler]):
|
|
233
233
|
return TextHandlers(flatten(results))
|
234
234
|
|
235
235
|
def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
|
236
|
-
clean_match: bool = False, case_sensitive: bool =
|
236
|
+
clean_match: bool = False, case_sensitive: bool = True) -> TextHandler:
|
237
237
|
"""Call the ``.re_first()`` method for each element in this list and return
|
238
238
|
the first result or the default value otherwise.
|
239
239
|
|
@@ -241,7 +241,7 @@ class TextHandlers(List[TextHandler]):
|
|
241
241
|
:param default: The default value to be returned if there is no match
|
242
242
|
:param replace_entities: if enabled character entity references are replaced by their corresponding character
|
243
243
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
244
|
-
:param case_sensitive: if
|
244
|
+
:param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
|
245
245
|
"""
|
246
246
|
for n in self:
|
247
247
|
for result in n.re(regex, replace_entities, clean_match, case_sensitive):
|
scrapling/engines/camo.py
CHANGED
@@ -95,7 +95,6 @@ class CamoufoxEngine:
|
|
95
95
|
with Camoufox(
|
96
96
|
geoip=self.geoip,
|
97
97
|
proxy=self.proxy,
|
98
|
-
disable_coop=True,
|
99
98
|
enable_cache=True,
|
100
99
|
addons=self.addons,
|
101
100
|
exclude_addons=addons,
|
@@ -142,6 +141,26 @@ class CamoufoxEngine:
|
|
142
141
|
# PlayWright API sometimes give empty status text for some reason!
|
143
142
|
status_text = final_response.status_text or StatusText.get(final_response.status)
|
144
143
|
|
144
|
+
history = []
|
145
|
+
current_request = first_response.request.redirected_from
|
146
|
+
while current_request:
|
147
|
+
current_response = current_request.response()
|
148
|
+
|
149
|
+
history.insert(0, Response(
|
150
|
+
url=current_request.url,
|
151
|
+
# using current_response.text() will trigger "Error: Response.text: Response body is unavailable for redirect responses"
|
152
|
+
text='',
|
153
|
+
body=b'',
|
154
|
+
status=current_response.status if current_response else 301,
|
155
|
+
reason=(current_response.status_text or StatusText.get(current_response.status)) if current_response else StatusText.get(301),
|
156
|
+
encoding=current_response.headers.get('content-type', '') or 'utf-8',
|
157
|
+
cookies={},
|
158
|
+
headers=current_response.all_headers() if current_response else {},
|
159
|
+
request_headers=current_request.all_headers(),
|
160
|
+
**self.adaptor_arguments
|
161
|
+
))
|
162
|
+
current_request = current_request.redirected_from
|
163
|
+
|
145
164
|
response = Response(
|
146
165
|
url=page.url,
|
147
166
|
text=page.content(),
|
@@ -152,6 +171,7 @@ class CamoufoxEngine:
|
|
152
171
|
cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
|
153
172
|
headers=first_response.all_headers(),
|
154
173
|
request_headers=first_response.request.all_headers(),
|
174
|
+
history=history,
|
155
175
|
**self.adaptor_arguments
|
156
176
|
)
|
157
177
|
page.close()
|
@@ -176,7 +196,6 @@ class CamoufoxEngine:
|
|
176
196
|
async with AsyncCamoufox(
|
177
197
|
geoip=self.geoip,
|
178
198
|
proxy=self.proxy,
|
179
|
-
disable_coop=True,
|
180
199
|
enable_cache=True,
|
181
200
|
addons=self.addons,
|
182
201
|
exclude_addons=addons,
|
@@ -223,6 +242,26 @@ class CamoufoxEngine:
|
|
223
242
|
# PlayWright API sometimes give empty status text for some reason!
|
224
243
|
status_text = final_response.status_text or StatusText.get(final_response.status)
|
225
244
|
|
245
|
+
history = []
|
246
|
+
current_request = first_response.request.redirected_from
|
247
|
+
while current_request:
|
248
|
+
current_response = await current_request.response()
|
249
|
+
|
250
|
+
history.insert(0, Response(
|
251
|
+
url=current_request.url,
|
252
|
+
# using current_response.text() will trigger "Error: Response.text: Response body is unavailable for redirect responses"
|
253
|
+
text='',
|
254
|
+
body=b'',
|
255
|
+
status=current_response.status if current_response else 301,
|
256
|
+
reason=(current_response.status_text or StatusText.get(current_response.status)) if current_response else StatusText.get(301),
|
257
|
+
encoding=current_response.headers.get('content-type', '') or 'utf-8',
|
258
|
+
cookies={},
|
259
|
+
headers=await current_response.all_headers() if current_response else {},
|
260
|
+
request_headers=await current_request.all_headers(),
|
261
|
+
**self.adaptor_arguments
|
262
|
+
))
|
263
|
+
current_request = current_request.redirected_from
|
264
|
+
|
226
265
|
response = Response(
|
227
266
|
url=page.url,
|
228
267
|
text=await page.content(),
|
@@ -233,6 +272,7 @@ class CamoufoxEngine:
|
|
233
272
|
cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
|
234
273
|
headers=await first_response.all_headers(),
|
235
274
|
request_headers=await first_response.request.all_headers(),
|
275
|
+
history=history,
|
236
276
|
**self.adaptor_arguments
|
237
277
|
)
|
238
278
|
await page.close()
|
scrapling/engines/pw.py
CHANGED
@@ -259,6 +259,26 @@ class PlaywrightEngine:
|
|
259
259
|
# PlayWright API sometimes give empty status text for some reason!
|
260
260
|
status_text = final_response.status_text or StatusText.get(final_response.status)
|
261
261
|
|
262
|
+
history = []
|
263
|
+
current_request = first_response.request.redirected_from
|
264
|
+
while current_request:
|
265
|
+
current_response = current_request.response()
|
266
|
+
|
267
|
+
history.insert(0, Response(
|
268
|
+
url=current_request.url,
|
269
|
+
# using current_response.text() will trigger "Error: Response.text: Response body is unavailable for redirect responses"
|
270
|
+
text='',
|
271
|
+
body=b'',
|
272
|
+
status=current_response.status if current_response else 301,
|
273
|
+
reason=(current_response.status_text or StatusText.get(current_response.status)) if current_response else StatusText.get(301),
|
274
|
+
encoding=current_response.headers.get('content-type', '') or 'utf-8',
|
275
|
+
cookies={},
|
276
|
+
headers=current_response.all_headers() if current_response else {},
|
277
|
+
request_headers=current_request.all_headers(),
|
278
|
+
**self.adaptor_arguments
|
279
|
+
))
|
280
|
+
current_request = current_request.redirected_from
|
281
|
+
|
262
282
|
response = Response(
|
263
283
|
url=page.url,
|
264
284
|
text=page.content(),
|
@@ -269,6 +289,7 @@ class PlaywrightEngine:
|
|
269
289
|
cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
|
270
290
|
headers=first_response.all_headers(),
|
271
291
|
request_headers=first_response.request.all_headers(),
|
292
|
+
history=history,
|
272
293
|
**self.adaptor_arguments
|
273
294
|
)
|
274
295
|
page.close()
|
@@ -345,6 +366,26 @@ class PlaywrightEngine:
|
|
345
366
|
# PlayWright API sometimes give empty status text for some reason!
|
346
367
|
status_text = final_response.status_text or StatusText.get(final_response.status)
|
347
368
|
|
369
|
+
history = []
|
370
|
+
current_request = first_response.request.redirected_from
|
371
|
+
while current_request:
|
372
|
+
current_response = await current_request.response()
|
373
|
+
|
374
|
+
history.insert(0, Response(
|
375
|
+
url=current_request.url,
|
376
|
+
# using current_response.text() will trigger "Error: Response.text: Response body is unavailable for redirect responses"
|
377
|
+
text='',
|
378
|
+
body=b'',
|
379
|
+
status=current_response.status if current_response else 301,
|
380
|
+
reason=(current_response.status_text or StatusText.get(current_response.status)) if current_response else StatusText.get(301),
|
381
|
+
encoding=current_response.headers.get('content-type', '') or 'utf-8',
|
382
|
+
cookies={},
|
383
|
+
headers=await current_response.all_headers() if current_response else {},
|
384
|
+
request_headers=await current_request.all_headers(),
|
385
|
+
**self.adaptor_arguments
|
386
|
+
))
|
387
|
+
current_request = current_request.redirected_from
|
388
|
+
|
348
389
|
response = Response(
|
349
390
|
url=page.url,
|
350
391
|
text=await page.content(),
|
@@ -355,6 +396,7 @@ class PlaywrightEngine:
|
|
355
396
|
cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
|
356
397
|
headers=await first_response.all_headers(),
|
357
398
|
request_headers=await first_response.request.all_headers(),
|
399
|
+
history=history,
|
358
400
|
**self.adaptor_arguments
|
359
401
|
)
|
360
402
|
await page.close()
|
scrapling/engines/static.py
CHANGED
@@ -72,6 +72,7 @@ class StaticEngine:
|
|
72
72
|
headers=dict(response.headers),
|
73
73
|
request_headers=dict(response.request.headers),
|
74
74
|
method=response.request.method,
|
75
|
+
history=[self._prepare_response(redirection) for redirection in response.history],
|
75
76
|
**self.adaptor_arguments
|
76
77
|
)
|
77
78
|
|
@@ -85,13 +85,14 @@ class Response(Adaptor):
|
|
85
85
|
"""This class is returned by all engines as a way to unify response type between different libraries."""
|
86
86
|
|
87
87
|
def __init__(self, url: str, text: str, body: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict,
|
88
|
-
encoding: str = 'utf-8', method: str = 'GET', **adaptor_arguments: Dict):
|
88
|
+
encoding: str = 'utf-8', method: str = 'GET', history: List = None, **adaptor_arguments: Dict):
|
89
89
|
automatch_domain = adaptor_arguments.pop('automatch_domain', None)
|
90
90
|
self.status = status
|
91
91
|
self.reason = reason
|
92
92
|
self.cookies = cookies
|
93
93
|
self.headers = headers
|
94
94
|
self.request_headers = request_headers
|
95
|
+
self.history = history or []
|
95
96
|
encoding = ResponseEncoding.get_value(encoding, text)
|
96
97
|
super().__init__(text=text, body=body, url=automatch_domain or url, encoding=encoding, **adaptor_arguments)
|
97
98
|
# For back-ward compatibility
|
scrapling/parser.py
CHANGED
@@ -132,7 +132,7 @@ class Adaptor(SelectorsGeneration):
|
|
132
132
|
self.__tag = None
|
133
133
|
# No need to check if all response attributes exist or not because if `status` exist, then the rest exist (Save some CPU cycles for speed)
|
134
134
|
self.__response_data = {
|
135
|
-
key: getattr(self, key) for key in ('status', 'reason', 'cookies', 'headers', 'request_headers',)
|
135
|
+
key: getattr(self, key) for key in ('status', 'reason', 'cookies', 'history', 'headers', 'request_headers',)
|
136
136
|
} if hasattr(self, 'status') else {}
|
137
137
|
|
138
138
|
# Node functionalities, I wanted to move to separate Mixin class but it had slight impact on performance
|
@@ -763,25 +763,25 @@ class Adaptor(SelectorsGeneration):
|
|
763
763
|
return self.get_all_text(strip=True).json()
|
764
764
|
|
765
765
|
def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True,
|
766
|
-
clean_match: bool = False, case_sensitive: bool =
|
766
|
+
clean_match: bool = False, case_sensitive: bool = True) -> TextHandlers:
|
767
767
|
"""Apply the given regex to the current text and return a list of strings with the matches.
|
768
768
|
|
769
769
|
:param regex: Can be either a compiled regular expression or a string.
|
770
770
|
:param replace_entities: if enabled character entity references are replaced by their corresponding character
|
771
771
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
772
|
-
:param case_sensitive: if
|
772
|
+
:param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
|
773
773
|
"""
|
774
774
|
return self.text.re(regex, replace_entities, clean_match, case_sensitive)
|
775
775
|
|
776
776
|
def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
|
777
|
-
clean_match: bool = False, case_sensitive: bool =
|
777
|
+
clean_match: bool = False, case_sensitive: bool = True) -> TextHandler:
|
778
778
|
"""Apply the given regex to text and return the first match if found, otherwise return the default value.
|
779
779
|
|
780
780
|
:param regex: Can be either a compiled regular expression or a string.
|
781
781
|
:param default: The default value to be returned if there is no match
|
782
782
|
:param replace_entities: if enabled character entity references are replaced by their corresponding character
|
783
783
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
784
|
-
:param case_sensitive: if
|
784
|
+
:param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
|
785
785
|
"""
|
786
786
|
return self.text.re_first(regex, default, replace_entities, clean_match, case_sensitive)
|
787
787
|
|
@@ -1009,14 +1009,14 @@ class Adaptors(List[Adaptor]):
|
|
1009
1009
|
return self.__class__(flatten(results))
|
1010
1010
|
|
1011
1011
|
def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True,
|
1012
|
-
clean_match: bool = False, case_sensitive: bool =
|
1012
|
+
clean_match: bool = False, case_sensitive: bool = True) -> TextHandlers[TextHandler]:
|
1013
1013
|
"""Call the ``.re()`` method for each element in this list and return
|
1014
1014
|
their results flattened as List of TextHandler.
|
1015
1015
|
|
1016
1016
|
:param regex: Can be either a compiled regular expression or a string.
|
1017
1017
|
:param replace_entities: if enabled character entity references are replaced by their corresponding character
|
1018
1018
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
1019
|
-
:param case_sensitive: if
|
1019
|
+
:param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
|
1020
1020
|
"""
|
1021
1021
|
results = [
|
1022
1022
|
n.text.re(regex, replace_entities, clean_match, case_sensitive) for n in self
|
@@ -1024,7 +1024,7 @@ class Adaptors(List[Adaptor]):
|
|
1024
1024
|
return TextHandlers(flatten(results))
|
1025
1025
|
|
1026
1026
|
def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
|
1027
|
-
clean_match: bool = False, case_sensitive: bool =
|
1027
|
+
clean_match: bool = False, case_sensitive: bool = True) -> TextHandler:
|
1028
1028
|
"""Call the ``.re_first()`` method for each element in this list and return
|
1029
1029
|
the first result or the default value otherwise.
|
1030
1030
|
|
@@ -1032,7 +1032,7 @@ class Adaptors(List[Adaptor]):
|
|
1032
1032
|
:param default: The default value to be returned if there is no match
|
1033
1033
|
:param replace_entities: if enabled character entity references are replaced by their corresponding character
|
1034
1034
|
:param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
|
1035
|
-
:param case_sensitive: if
|
1035
|
+
:param case_sensitive: if disabled, function will set the regex to ignore letters case while compiling it
|
1036
1036
|
"""
|
1037
1037
|
for n in self:
|
1038
1038
|
for result in n.re(regex, replace_entities, clean_match, case_sensitive):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: scrapling
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.94
|
4
4
|
Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
|
5
5
|
Home-page: https://github.com/D4Vinci/Scrapling
|
6
6
|
Author: Karim Shoair
|
@@ -40,7 +40,7 @@ Requires-Dist: tldextract
|
|
40
40
|
Requires-Dist: httpx[brotli,socks,zstd]
|
41
41
|
Requires-Dist: playwright>=1.49.1
|
42
42
|
Requires-Dist: rebrowser-playwright>=1.49.1
|
43
|
-
Requires-Dist: camoufox[geoip]>=0.4.
|
43
|
+
Requires-Dist: camoufox[geoip]>=0.4.11
|
44
44
|
Dynamic: author
|
45
45
|
Dynamic: author-email
|
46
46
|
Dynamic: classifier
|
@@ -267,7 +267,7 @@ then use it right away without initializing like:
|
|
267
267
|
page = StealthyFetcher.fetch('https://example.com')
|
268
268
|
```
|
269
269
|
|
270
|
-
Also, the `Response` object returned from all fetchers is the same as the `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
|
270
|
+
Also, the `Response` object returned from all fetchers is the same as the `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, `history`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
|
271
271
|
> [!NOTE]
|
272
272
|
> The `auto_match` argument is enabled by default which is the one you should care about the most as you will see later.
|
273
273
|
### Fetcher
|
@@ -1,23 +1,23 @@
|
|
1
|
-
scrapling/__init__.py,sha256=
|
1
|
+
scrapling/__init__.py,sha256=pOwvxTBwxLovt0OJNZz2A5FkbfjQC0wKrDmONqoNsL0,500
|
2
2
|
scrapling/cli.py,sha256=njPdJKmbLFHeWjtSiGEm9ALBdSyfUp0IaJvxQL5C31Q,1125
|
3
3
|
scrapling/defaults.py,sha256=sdXeZjXEX7PmCtaa0weK0nRrAUzqZukNNqipZ_sltYE,469
|
4
4
|
scrapling/fetchers.py,sha256=qmiJ6S-bnPWvP48Z6rKxBnSuR-tdwHlJwlIsYxGxFM0,35405
|
5
|
-
scrapling/parser.py,sha256=
|
5
|
+
scrapling/parser.py,sha256=b_1eHxRwHRCidyvm3F6ST6qIYvVEVU6GhTTCI1LblVk,54330
|
6
6
|
scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
|
7
7
|
scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
scrapling/core/_types.py,sha256=dKVi_dUxdxNtTr7sj7ySkHXDfrsmjFTfpCQeO5tGuBY,670
|
9
|
-
scrapling/core/custom_types.py,sha256=
|
9
|
+
scrapling/core/custom_types.py,sha256=X5fNOS3E7BDkvoUxrRZpEoPlzbLMlibGhZVGbHb2E74,13393
|
10
10
|
scrapling/core/mixins.py,sha256=sozbpaGL1_O_x3U-ABM5aYWpnxpCLfdbcA9SG3P7weY,3532
|
11
11
|
scrapling/core/storage_adaptors.py,sha256=l_ZYcdn1y69AcoPuRrPoaxqKysN62pMExrwJWYdu5MA,6220
|
12
12
|
scrapling/core/translator.py,sha256=hFSc3mxG5pYhbwRgingeFbD_E73U799vCsvVv0uFEXw,5237
|
13
13
|
scrapling/core/utils.py,sha256=03LzCDzmeK1TXPjIKVzHSUgSfhpe36XE8AwxlgxzJoU,3705
|
14
14
|
scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c,267
|
15
|
-
scrapling/engines/camo.py,sha256=
|
15
|
+
scrapling/engines/camo.py,sha256=SHMRnIrN6599upo5-G3fZQ10455xyB-bB_EsLMjBStA,16072
|
16
16
|
scrapling/engines/constants.py,sha256=Gb_nXFoBB4ujJkd05SKkenMe1UDiRYQA3dkmA3DunLg,3723
|
17
|
-
scrapling/engines/pw.py,sha256=
|
18
|
-
scrapling/engines/static.py,sha256=
|
17
|
+
scrapling/engines/pw.py,sha256=LvS1jvTf3s7mfdeQo7_OyQ5zpiOzvBu5g88hOLlQBCQ,20856
|
18
|
+
scrapling/engines/static.py,sha256=_bqVKcsTkm8ok6NIH6PDDaXtyQ6B2ZoGWccjZJKwvBo,10414
|
19
19
|
scrapling/engines/toolbelt/__init__.py,sha256=VQDdYm1zY9Apno6d8UrULk29vUjllZrQqD8mXL1E2Fc,402
|
20
|
-
scrapling/engines/toolbelt/custom.py,sha256=
|
20
|
+
scrapling/engines/toolbelt/custom.py,sha256=qgONLwpxUoEIAIQBF1RcakYu8cqAAmX8qdyaol5hfjA,12813
|
21
21
|
scrapling/engines/toolbelt/fingerprints.py,sha256=ajEHdXHr7W4hw9KcNS7XlyxNBZu37p1bRj18TiICLzU,2929
|
22
22
|
scrapling/engines/toolbelt/navigation.py,sha256=xEfZRJefuxOCGxQOSI2llS0du0Y2XmoIPdVGUSHOd7k,4567
|
23
23
|
scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
|
@@ -41,9 +41,9 @@ tests/fetchers/sync/test_playwright.py,sha256=MEyDRaMyxDIWupG7f_xz0f0jd9Cpbd5rXC
|
|
41
41
|
tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
42
|
tests/parser/test_automatch.py,sha256=SxsNdExE8zz8AcPRQFBUjZ3Q_1-tPOd9dzVvMSZpOYQ,4908
|
43
43
|
tests/parser/test_general.py,sha256=dyfOsc8lleoY4AxcfDUBUaD1i95xecfYuTUhKBsYjwo,12100
|
44
|
-
scrapling-0.2.
|
45
|
-
scrapling-0.2.
|
46
|
-
scrapling-0.2.
|
47
|
-
scrapling-0.2.
|
48
|
-
scrapling-0.2.
|
49
|
-
scrapling-0.2.
|
44
|
+
scrapling-0.2.94.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
|
45
|
+
scrapling-0.2.94.dist-info/METADATA,sha256=nF08IkBzVob418wgav0uHzbNdVXH1-FrTYZAxrTfg24,68878
|
46
|
+
scrapling-0.2.94.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
47
|
+
scrapling-0.2.94.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
|
48
|
+
scrapling-0.2.94.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
|
49
|
+
scrapling-0.2.94.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|