scrapling 0.2.97__py3-none-any.whl → 0.2.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +35 -6
- scrapling/core/storage_adaptors.py +3 -3
- scrapling/core/translator.py +3 -0
- scrapling/core/utils.py +1 -1
- scrapling/defaults.py +18 -9
- scrapling/engines/pw.py +2 -2
- scrapling/engines/static.py +1 -1
- scrapling/engines/toolbelt/custom.py +2 -2
- scrapling/engines/toolbelt/fingerprints.py +2 -2
- scrapling/engines/toolbelt/navigation.py +1 -1
- scrapling/parser.py +5 -11
- {scrapling-0.2.97.dist-info → scrapling-0.2.98.dist-info}/METADATA +7 -7
- {scrapling-0.2.97.dist-info → scrapling-0.2.98.dist-info}/RECORD +17 -17
- {scrapling-0.2.97.dist-info → scrapling-0.2.98.dist-info}/LICENSE +0 -0
- {scrapling-0.2.97.dist-info → scrapling-0.2.98.dist-info}/WHEEL +0 -0
- {scrapling-0.2.97.dist-info → scrapling-0.2.98.dist-info}/entry_points.txt +0 -0
- {scrapling-0.2.97.dist-info → scrapling-0.2.98.dist-info}/top_level.txt +0 -0
scrapling/__init__.py
CHANGED
@@ -1,12 +1,41 @@
|
|
1
|
-
# Declare top-level shortcuts
|
2
|
-
from scrapling.core.custom_types import AttributesHandler, TextHandler
|
3
|
-
from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
|
4
|
-
PlayWrightFetcher, StealthyFetcher)
|
5
|
-
from scrapling.parser import Adaptor, Adaptors
|
6
1
|
|
7
2
|
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
8
|
-
__version__ = "0.2.
|
3
|
+
__version__ = "0.2.98"
|
9
4
|
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
10
5
|
|
11
6
|
|
7
|
+
# A lightweight approach to create lazy loader for each import for backward compatibility
|
8
|
+
# This will reduces initial memory footprint significantly (only loads what's used)
|
9
|
+
def __getattr__(name):
|
10
|
+
if name == 'Fetcher':
|
11
|
+
from scrapling.fetchers import Fetcher as cls
|
12
|
+
return cls
|
13
|
+
elif name == 'Adaptor':
|
14
|
+
from scrapling.parser import Adaptor as cls
|
15
|
+
return cls
|
16
|
+
elif name == 'Adaptors':
|
17
|
+
from scrapling.parser import Adaptors as cls
|
18
|
+
return cls
|
19
|
+
elif name == 'AttributesHandler':
|
20
|
+
from scrapling.core.custom_types import AttributesHandler as cls
|
21
|
+
return cls
|
22
|
+
elif name == 'TextHandler':
|
23
|
+
from scrapling.core.custom_types import TextHandler as cls
|
24
|
+
return cls
|
25
|
+
elif name == 'AsyncFetcher':
|
26
|
+
from scrapling.fetchers import AsyncFetcher as cls
|
27
|
+
return cls
|
28
|
+
elif name == 'StealthyFetcher':
|
29
|
+
from scrapling.fetchers import StealthyFetcher as cls
|
30
|
+
return cls
|
31
|
+
elif name == 'PlayWrightFetcher':
|
32
|
+
from scrapling.fetchers import PlayWrightFetcher as cls
|
33
|
+
return cls
|
34
|
+
elif name == 'CustomFetcher':
|
35
|
+
from scrapling.fetchers import CustomFetcher as cls
|
36
|
+
return cls
|
37
|
+
else:
|
38
|
+
raise AttributeError(f"module 'scrapling' has no attribute '{name}'")
|
39
|
+
|
40
|
+
|
12
41
|
__all__ = ['Adaptor', 'Fetcher', 'AsyncFetcher', 'StealthyFetcher', 'PlayWrightFetcher']
|
@@ -19,7 +19,7 @@ class StorageSystemMixin(ABC):
|
|
19
19
|
"""
|
20
20
|
self.url = url
|
21
21
|
|
22
|
-
@lru_cache(
|
22
|
+
@lru_cache(64, typed=True)
|
23
23
|
def _get_base_url(self, default_value: str = 'default') -> str:
|
24
24
|
if not self.url or type(self.url) is not str:
|
25
25
|
return default_value
|
@@ -51,7 +51,7 @@ class StorageSystemMixin(ABC):
|
|
51
51
|
raise NotImplementedError('Storage system must implement `save` method')
|
52
52
|
|
53
53
|
@staticmethod
|
54
|
-
@lru_cache(
|
54
|
+
@lru_cache(128, typed=True)
|
55
55
|
def _get_hash(identifier: str) -> str:
|
56
56
|
"""If you want to hash identifier in your storage system, use this safer"""
|
57
57
|
identifier = identifier.lower().strip()
|
@@ -63,7 +63,7 @@ class StorageSystemMixin(ABC):
|
|
63
63
|
return f"{hash_value}_{len(identifier)}" # Length to reduce collision chance
|
64
64
|
|
65
65
|
|
66
|
-
@lru_cache(
|
66
|
+
@lru_cache(1, typed=True)
|
67
67
|
class SQLiteStorageSystem(StorageSystemMixin):
|
68
68
|
"""The recommended system to use, it's race condition safe and thread safe.
|
69
69
|
Mainly built so the library can run in threaded frameworks like scrapy or threaded tools
|
scrapling/core/translator.py
CHANGED
@@ -142,3 +142,6 @@ class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
|
|
142
142
|
@lru_cache(maxsize=256)
|
143
143
|
def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
|
144
144
|
return super().css_to_xpath(css, prefix)
|
145
|
+
|
146
|
+
|
147
|
+
translator_instance = HTMLTranslator()
|
scrapling/core/utils.py
CHANGED
scrapling/defaults.py
CHANGED
@@ -1,10 +1,19 @@
|
|
1
|
-
from .fetchers import AsyncFetcher as _AsyncFetcher
|
2
|
-
from .fetchers import Fetcher as _Fetcher
|
3
|
-
from .fetchers import PlayWrightFetcher as _PlayWrightFetcher
|
4
|
-
from .fetchers import StealthyFetcher as _StealthyFetcher
|
5
|
-
|
6
1
|
# If you are going to use Fetchers with the default settings, import them from this file instead for a cleaner looking code
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
|
3
|
+
# A lightweight approach to create lazy loader for each import for backward compatibility
|
4
|
+
# This will reduces initial memory footprint significantly (only loads what's used)
|
5
|
+
def __getattr__(name):
|
6
|
+
if name == 'Fetcher':
|
7
|
+
from scrapling.fetchers import Fetcher as cls
|
8
|
+
return cls()
|
9
|
+
elif name == 'AsyncFetcher':
|
10
|
+
from scrapling.fetchers import AsyncFetcher as cls
|
11
|
+
return cls()
|
12
|
+
elif name == 'StealthyFetcher':
|
13
|
+
from scrapling.fetchers import StealthyFetcher as cls
|
14
|
+
return cls()
|
15
|
+
elif name == 'PlayWrightFetcher':
|
16
|
+
from scrapling.fetchers import PlayWrightFetcher as cls
|
17
|
+
return cls()
|
18
|
+
else:
|
19
|
+
raise AttributeError(f"module 'scrapling' has no attribute '{name}'")
|
scrapling/engines/pw.py
CHANGED
@@ -126,7 +126,7 @@ class PlaywrightEngine:
|
|
126
126
|
|
127
127
|
return cdp_url
|
128
128
|
|
129
|
-
@lru_cache(
|
129
|
+
@lru_cache(32, typed=True)
|
130
130
|
def __set_flags(self):
|
131
131
|
"""Returns the flags that will be used while launching the browser if stealth mode is enabled"""
|
132
132
|
flags = DEFAULT_STEALTH_FLAGS
|
@@ -169,7 +169,7 @@ class PlaywrightEngine:
|
|
169
169
|
|
170
170
|
return context_kwargs
|
171
171
|
|
172
|
-
@lru_cache(
|
172
|
+
@lru_cache(1)
|
173
173
|
def __stealth_scripts(self):
|
174
174
|
# Basic bypasses nothing fancy as I'm still working on it
|
175
175
|
# But with adding these bypasses to the above config, it bypasses many online tests like
|
scrapling/engines/static.py
CHANGED
@@ -7,7 +7,7 @@ from scrapling.core.utils import log, lru_cache
|
|
7
7
|
from .toolbelt import Response, generate_convincing_referer, generate_headers
|
8
8
|
|
9
9
|
|
10
|
-
@lru_cache(
|
10
|
+
@lru_cache(2, typed=True) # Singleton easily
|
11
11
|
class StaticEngine:
|
12
12
|
def __init__(
|
13
13
|
self, url: str, proxy: Optional[str] = None, stealthy_headers: bool = True, follow_redirects: bool = True,
|
@@ -16,7 +16,7 @@ class ResponseEncoding:
|
|
16
16
|
__ISO_8859_1_CONTENT_TYPES = {"text/plain", "text/html", "text/css", "text/javascript"}
|
17
17
|
|
18
18
|
@classmethod
|
19
|
-
@lru_cache(maxsize=
|
19
|
+
@lru_cache(maxsize=128)
|
20
20
|
def __parse_content_type(cls, header_value: str) -> Tuple[str, Dict[str, str]]:
|
21
21
|
"""Parse content type and parameters from a content-type header value.
|
22
22
|
|
@@ -38,7 +38,7 @@ class ResponseEncoding:
|
|
38
38
|
return content_type, params
|
39
39
|
|
40
40
|
@classmethod
|
41
|
-
@lru_cache(maxsize=
|
41
|
+
@lru_cache(maxsize=128)
|
42
42
|
def get_value(cls, content_type: Optional[str], text: Optional[str] = 'test') -> str:
|
43
43
|
"""Determine the appropriate character encoding from a content-type header.
|
44
44
|
|
@@ -12,7 +12,7 @@ from scrapling.core._types import Dict, Union
|
|
12
12
|
from scrapling.core.utils import lru_cache
|
13
13
|
|
14
14
|
|
15
|
-
@lru_cache(
|
15
|
+
@lru_cache(10, typed=True)
|
16
16
|
def generate_convincing_referer(url: str) -> str:
|
17
17
|
"""Takes the domain from the URL without the subdomain/suffix and make it look like you were searching google for this website
|
18
18
|
|
@@ -26,7 +26,7 @@ def generate_convincing_referer(url: str) -> str:
|
|
26
26
|
return f'https://www.google.com/search?q={website_name}'
|
27
27
|
|
28
28
|
|
29
|
-
@lru_cache(
|
29
|
+
@lru_cache(1, typed=True)
|
30
30
|
def get_os_name() -> Union[str, None]:
|
31
31
|
"""Get the current OS name in the same format needed for browserforge
|
32
32
|
|
@@ -110,7 +110,7 @@ def construct_cdp_url(cdp_url: str, query_params: Optional[Dict] = None) -> str:
|
|
110
110
|
raise ValueError(f"Invalid CDP URL: {str(e)}")
|
111
111
|
|
112
112
|
|
113
|
-
@lru_cache(
|
113
|
+
@lru_cache(10, typed=True)
|
114
114
|
def js_bypass_path(filename: str) -> str:
|
115
115
|
"""Takes the base filename of JS file inside the `bypasses` folder then return the full path of it
|
116
116
|
|
scrapling/parser.py
CHANGED
@@ -17,7 +17,7 @@ from scrapling.core.custom_types import (AttributesHandler, TextHandler,
|
|
17
17
|
from scrapling.core.mixins import SelectorsGeneration
|
18
18
|
from scrapling.core.storage_adaptors import (SQLiteStorageSystem,
|
19
19
|
StorageSystemMixin, _StorageTools)
|
20
|
-
from scrapling.core.translator import
|
20
|
+
from scrapling.core.translator import translator_instance
|
21
21
|
from scrapling.core.utils import (clean_spaces, flatten, html_forbidden,
|
22
22
|
is_jsonable, log)
|
23
23
|
|
@@ -26,7 +26,7 @@ class Adaptor(SelectorsGeneration):
|
|
26
26
|
__slots__ = (
|
27
27
|
'url', 'encoding', '__auto_match_enabled', '_root', '_storage',
|
28
28
|
'__keep_comments', '__huge_tree_enabled', '__attributes', '__text', '__tag',
|
29
|
-
'__keep_cdata'
|
29
|
+
'__keep_cdata'
|
30
30
|
)
|
31
31
|
|
32
32
|
def __init__(
|
@@ -72,20 +72,17 @@ class Adaptor(SelectorsGeneration):
|
|
72
72
|
raise ValueError("Adaptor class needs text, body, or root arguments to work")
|
73
73
|
|
74
74
|
self.__text = ''
|
75
|
-
self.__raw_body = ''
|
76
75
|
if root is None:
|
77
76
|
if text is None:
|
78
77
|
if not body or not isinstance(body, bytes):
|
79
78
|
raise TypeError(f"body argument must be valid and of type bytes, got {body.__class__}")
|
80
79
|
|
81
80
|
body = body.replace(b"\x00", b"").strip()
|
82
|
-
self.__raw_body = body.replace(b"\x00", b"").strip().decode()
|
83
81
|
else:
|
84
82
|
if not isinstance(text, str):
|
85
83
|
raise TypeError(f"text argument must be of type str, got {text.__class__}")
|
86
84
|
|
87
85
|
body = text.strip().replace("\x00", "").encode(encoding) or b"<html/>"
|
88
|
-
self.__raw_body = text.strip()
|
89
86
|
|
90
87
|
# https://lxml.de/api/lxml.etree.HTMLParser-class.html
|
91
88
|
parser = html.HTMLParser(
|
@@ -250,10 +247,7 @@ class Adaptor(SelectorsGeneration):
|
|
250
247
|
"""Return the inner html code of the element"""
|
251
248
|
return TextHandler(etree.tostring(self._root, encoding='unicode', method='html', with_tail=False))
|
252
249
|
|
253
|
-
|
254
|
-
def body(self) -> TextHandler:
|
255
|
-
"""Return raw HTML code of the element/page without any processing when possible or return `Adaptor.html_content`"""
|
256
|
-
return TextHandler(self.__raw_body) or self.html_content
|
250
|
+
body = html_content
|
257
251
|
|
258
252
|
def prettify(self) -> TextHandler:
|
259
253
|
"""Return a prettified version of the element's inner html-code"""
|
@@ -476,7 +470,7 @@ class Adaptor(SelectorsGeneration):
|
|
476
470
|
try:
|
477
471
|
if not self.__auto_match_enabled or ',' not in selector:
|
478
472
|
# No need to split selectors in this case, let's save some CPU cycles :)
|
479
|
-
xpath_selector =
|
473
|
+
xpath_selector = translator_instance.css_to_xpath(selector)
|
480
474
|
return self.xpath(xpath_selector, identifier or selector, auto_match, auto_save, percentage)
|
481
475
|
|
482
476
|
results = []
|
@@ -484,7 +478,7 @@ class Adaptor(SelectorsGeneration):
|
|
484
478
|
for single_selector in split_selectors(selector):
|
485
479
|
# I'm doing this only so the `save` function save data correctly for combined selectors
|
486
480
|
# Like using the ',' to combine two different selectors that point to different elements.
|
487
|
-
xpath_selector =
|
481
|
+
xpath_selector = translator_instance.css_to_xpath(single_selector.canonical())
|
488
482
|
results += self.xpath(
|
489
483
|
xpath_selector, identifier or single_selector.canonical(), auto_match, auto_save, percentage
|
490
484
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: scrapling
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.98
|
4
4
|
Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again! In an internet filled with complications,
|
5
5
|
Home-page: https://github.com/D4Vinci/Scrapling
|
6
6
|
Author: Karim Shoair
|
@@ -173,7 +173,7 @@ Deep SerpApi is a dedicated search engine designed for large language models (LL
|
|
173
173
|
## Getting Started
|
174
174
|
|
175
175
|
```python
|
176
|
-
from scrapling import Fetcher
|
176
|
+
from scrapling.fetchers import Fetcher
|
177
177
|
|
178
178
|
fetcher = Fetcher(auto_match=False)
|
179
179
|
|
@@ -255,7 +255,7 @@ Fetchers are interfaces built on top of other libraries with added features that
|
|
255
255
|
### Features
|
256
256
|
You might be slightly confused by now so let me clear things up. All fetcher-type classes are imported in the same way
|
257
257
|
```python
|
258
|
-
from scrapling import Fetcher, StealthyFetcher, PlayWrightFetcher
|
258
|
+
from scrapling.fetchers import Fetcher, StealthyFetcher, PlayWrightFetcher
|
259
259
|
```
|
260
260
|
All of them can take these initialization arguments: `auto_match`, `huge_tree`, `keep_comments`, `keep_cdata`, `storage`, and `storage_args`, which are the same ones you give to the `Adaptor` class.
|
261
261
|
|
@@ -287,7 +287,7 @@ You can route all traffic (HTTP and HTTPS) to a proxy for any of these methods i
|
|
287
287
|
```
|
288
288
|
For Async requests, you will just replace the import like below:
|
289
289
|
```python
|
290
|
-
>> from scrapling import AsyncFetcher
|
290
|
+
>> from scrapling.fetchers import AsyncFetcher
|
291
291
|
>> page = await AsyncFetcher().get('https://httpbin.org/get', stealthy_headers=True, follow_redirects=True)
|
292
292
|
>> page = await AsyncFetcher().post('https://httpbin.org/post', data={'key': 'value'}, proxy='http://username:password@localhost:8030')
|
293
293
|
>> page = await AsyncFetcher().put('https://httpbin.org/put', data={'key': 'value'})
|
@@ -541,7 +541,7 @@ When website owners implement structural changes like
|
|
541
541
|
The selector will no longer function and your code needs maintenance. That's where Scrapling's auto-matching feature comes into play.
|
542
542
|
|
543
543
|
```python
|
544
|
-
from scrapling import Adaptor
|
544
|
+
from scrapling.parser import Adaptor
|
545
545
|
# Before the change
|
546
546
|
page = Adaptor(page_source, url='example.com')
|
547
547
|
element = page.css('#p1' auto_save=True)
|
@@ -559,7 +559,7 @@ To solve this issue, I will use [The Web Archive](https://archive.org/)'s [Wayba
|
|
559
559
|
If I want to extract the Questions button from the old design I can use a selector like this `#hmenus > div:nth-child(1) > ul > li:nth-child(1) > a` This selector is too specific because it was generated by Google Chrome.
|
560
560
|
Now let's test the same selector in both versions
|
561
561
|
```python
|
562
|
-
>> from scrapling import Fetcher
|
562
|
+
>> from scrapling.fetchers import Fetcher
|
563
563
|
>> selector = '#hmenus > div:nth-child(1) > ul > li:nth-child(1) > a'
|
564
564
|
>> old_url = "https://web.archive.org/web/20100102003420/http://stackoverflow.com/"
|
565
565
|
>> new_url = "https://stackoverflow.com/"
|
@@ -620,7 +620,7 @@ Note: The filtering process always starts from the first filter it finds in the
|
|
620
620
|
Examples to clear any confusion :)
|
621
621
|
|
622
622
|
```python
|
623
|
-
>> from scrapling import Fetcher
|
623
|
+
>> from scrapling.fetchers import Fetcher
|
624
624
|
>> page = Fetcher().get('https://quotes.toscrape.com/')
|
625
625
|
# Find all elements with tag name `div`.
|
626
626
|
>> page.find_all('div')
|
@@ -1,25 +1,25 @@
|
|
1
|
-
scrapling/__init__.py,sha256=
|
1
|
+
scrapling/__init__.py,sha256=S-SWj9O2r0Tu8Z-mPxDJ-z3h5k-bBfhFOETaCY4A9dc,1510
|
2
2
|
scrapling/cli.py,sha256=7yTsMhVAqqS8Z27T5dFKrR9_X8vuFjBlwYgAF22W7T8,1292
|
3
|
-
scrapling/defaults.py,sha256=
|
3
|
+
scrapling/defaults.py,sha256=MAn2MMLBFvoe4i3u_qlp6YEvGUiCjNPPDux1cFCdpsU,866
|
4
4
|
scrapling/fetchers.py,sha256=xwVCjAg0VCXwhB2igSLQvb0D0bOPGfg5WNtxgE7m-W0,34987
|
5
|
-
scrapling/parser.py,sha256=
|
5
|
+
scrapling/parser.py,sha256=1xS1UjCm1GVnKcVAtup9rSE1xuYPxXOgJe-8LJE5gUk,53956
|
6
6
|
scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
|
7
7
|
scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
scrapling/core/_types.py,sha256=dKVi_dUxdxNtTr7sj7ySkHXDfrsmjFTfpCQeO5tGuBY,670
|
9
9
|
scrapling/core/custom_types.py,sha256=EWGx5t5scHEB1SMsitzc8duskq-5f-Qaj40IWkNTRzM,12947
|
10
10
|
scrapling/core/mixins.py,sha256=sozbpaGL1_O_x3U-ABM5aYWpnxpCLfdbcA9SG3P7weY,3532
|
11
|
-
scrapling/core/storage_adaptors.py,sha256=
|
12
|
-
scrapling/core/translator.py,sha256=
|
13
|
-
scrapling/core/utils.py,sha256=
|
11
|
+
scrapling/core/storage_adaptors.py,sha256=gZbUpHtLOL7o_oZbES_o40r39zShxTeTM8YK6dXA5Zo,6214
|
12
|
+
scrapling/core/translator.py,sha256=3a2VX9KR-q-GzwT1OgGDv1UlzIkvBggkQXUdiMyL-4c,5277
|
13
|
+
scrapling/core/utils.py,sha256=KX88B3tV1-SgCAr69TUN3LfmsTDcLnEhYJiPuWd31yA,3704
|
14
14
|
scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c,267
|
15
15
|
scrapling/engines/camo.py,sha256=oYKA0l3EpOcQW2APRj5FEmslqtp9A8i_ZljqlKvIDeI,16129
|
16
16
|
scrapling/engines/constants.py,sha256=Gb_nXFoBB4ujJkd05SKkenMe1UDiRYQA3dkmA3DunLg,3723
|
17
|
-
scrapling/engines/pw.py,sha256=
|
18
|
-
scrapling/engines/static.py,sha256=
|
17
|
+
scrapling/engines/pw.py,sha256=cZraIBWd9ulEGEdhETIGmpevi62CN9JGcUU1OIDdxkA,21369
|
18
|
+
scrapling/engines/static.py,sha256=EjdaR0beqWfEKKavT7vlBnozoayQaVpqeVtaOuzd384,9306
|
19
19
|
scrapling/engines/toolbelt/__init__.py,sha256=VQDdYm1zY9Apno6d8UrULk29vUjllZrQqD8mXL1E2Fc,402
|
20
|
-
scrapling/engines/toolbelt/custom.py,sha256=
|
21
|
-
scrapling/engines/toolbelt/fingerprints.py,sha256=
|
22
|
-
scrapling/engines/toolbelt/navigation.py,sha256=
|
20
|
+
scrapling/engines/toolbelt/custom.py,sha256=_-baGB8oOOHogbaddtGsq_K_01ccOjOkGA6tOKk28hM,12811
|
21
|
+
scrapling/engines/toolbelt/fingerprints.py,sha256=Zzoqq3p6X_8D7eTxACz3z96cBZWWK61iKOGo2sZUtlg,2924
|
22
|
+
scrapling/engines/toolbelt/navigation.py,sha256=fMjDgicqy2MoZZll2h5EvrrxkL6yNrC09v8isTpwAt0,4565
|
23
23
|
scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
|
24
24
|
scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
|
25
25
|
scrapling/engines/toolbelt/bypasses/pdf_viewer.js,sha256=mKjjSuP1-BOGC_2WhRYHJo_LP7lTBi2KXmP_zsHO_tI,173
|
@@ -41,9 +41,9 @@ tests/fetchers/sync/test_playwright.py,sha256=MEyDRaMyxDIWupG7f_xz0f0jd9Cpbd5rXC
|
|
41
41
|
tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
42
|
tests/parser/test_automatch.py,sha256=SxsNdExE8zz8AcPRQFBUjZ3Q_1-tPOd9dzVvMSZpOYQ,4908
|
43
43
|
tests/parser/test_general.py,sha256=dyfOsc8lleoY4AxcfDUBUaD1i95xecfYuTUhKBsYjwo,12100
|
44
|
-
scrapling-0.2.
|
45
|
-
scrapling-0.2.
|
46
|
-
scrapling-0.2.
|
47
|
-
scrapling-0.2.
|
48
|
-
scrapling-0.2.
|
49
|
-
scrapling-0.2.
|
44
|
+
scrapling-0.2.98.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
|
45
|
+
scrapling-0.2.98.dist-info/METADATA,sha256=Un_ROxrGIvk_8w-ECQbwKAcJzYyx3MTWS1DHt9FRqdI,69718
|
46
|
+
scrapling-0.2.98.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
47
|
+
scrapling-0.2.98.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
|
48
|
+
scrapling-0.2.98.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
|
49
|
+
scrapling-0.2.98.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|