scrapling 0.2.98__py3-none-any.whl → 0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +18 -31
- scrapling/cli.py +818 -20
- scrapling/core/_html_utils.py +348 -0
- scrapling/core/_types.py +34 -17
- scrapling/core/ai.py +611 -0
- scrapling/core/custom_types.py +183 -100
- scrapling/core/mixins.py +27 -19
- scrapling/core/shell.py +647 -0
- scrapling/core/{storage_adaptors.py → storage.py} +41 -33
- scrapling/core/translator.py +20 -26
- scrapling/core/utils.py +49 -54
- scrapling/engines/__init__.py +15 -6
- scrapling/engines/_browsers/__init__.py +2 -0
- scrapling/engines/_browsers/_camoufox.py +745 -0
- scrapling/engines/_browsers/_config_tools.py +130 -0
- scrapling/engines/_browsers/_controllers.py +630 -0
- scrapling/engines/_browsers/_page.py +93 -0
- scrapling/engines/_browsers/_validators.py +150 -0
- scrapling/engines/constants.py +101 -88
- scrapling/engines/static.py +667 -110
- scrapling/engines/toolbelt/__init__.py +20 -6
- scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +2 -1
- scrapling/engines/toolbelt/convertor.py +254 -0
- scrapling/engines/toolbelt/custom.py +205 -186
- scrapling/engines/toolbelt/fingerprints.py +32 -46
- scrapling/engines/toolbelt/navigation.py +68 -39
- scrapling/fetchers.py +255 -260
- scrapling/parser.py +781 -449
- scrapling-0.3.dist-info/METADATA +409 -0
- scrapling-0.3.dist-info/RECORD +41 -0
- {scrapling-0.2.98.dist-info → scrapling-0.3.dist-info}/WHEEL +1 -1
- {scrapling-0.2.98.dist-info → scrapling-0.3.dist-info}/top_level.txt +0 -1
- scrapling/defaults.py +0 -19
- scrapling/engines/camo.py +0 -299
- scrapling/engines/pw.py +0 -428
- scrapling/engines/toolbelt/bypasses/pdf_viewer.js +0 -5
- scrapling-0.2.98.dist-info/METADATA +0 -867
- scrapling-0.2.98.dist-info/RECORD +0 -49
- tests/__init__.py +0 -1
- tests/fetchers/__init__.py +0 -1
- tests/fetchers/async/__init__.py +0 -0
- tests/fetchers/async/test_camoufox.py +0 -95
- tests/fetchers/async/test_httpx.py +0 -83
- tests/fetchers/async/test_playwright.py +0 -99
- tests/fetchers/sync/__init__.py +0 -0
- tests/fetchers/sync/test_camoufox.py +0 -68
- tests/fetchers/sync/test_httpx.py +0 -82
- tests/fetchers/sync/test_playwright.py +0 -87
- tests/fetchers/test_utils.py +0 -97
- tests/parser/__init__.py +0 -0
- tests/parser/test_automatch.py +0 -111
- tests/parser/test_general.py +0 -330
- {scrapling-0.2.98.dist-info → scrapling-0.3.dist-info}/entry_points.txt +0 -0
- {scrapling-0.2.98.dist-info → scrapling-0.3.dist-info/licenses}/LICENSE +0 -0
scrapling/__init__.py
CHANGED
@@ -1,41 +1,28 @@
|
|
1
|
-
|
2
1
|
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
3
|
-
__version__ = "0.
|
2
|
+
__version__ = "0.3"
|
4
3
|
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
5
4
|
|
6
5
|
|
7
|
-
# A lightweight approach to create lazy loader for each import for backward compatibility
|
6
|
+
# A lightweight approach to create a lazy loader for each import for backward compatibility
|
8
7
|
# This will reduces initial memory footprint significantly (only loads what's used)
|
9
8
|
def __getattr__(name):
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
return
|
25
|
-
elif name == 'AsyncFetcher':
|
26
|
-
from scrapling.fetchers import AsyncFetcher as cls
|
27
|
-
return cls
|
28
|
-
elif name == 'StealthyFetcher':
|
29
|
-
from scrapling.fetchers import StealthyFetcher as cls
|
30
|
-
return cls
|
31
|
-
elif name == 'PlayWrightFetcher':
|
32
|
-
from scrapling.fetchers import PlayWrightFetcher as cls
|
33
|
-
return cls
|
34
|
-
elif name == 'CustomFetcher':
|
35
|
-
from scrapling.fetchers import CustomFetcher as cls
|
36
|
-
return cls
|
9
|
+
lazy_imports = {
|
10
|
+
"Fetcher": ("scrapling.fetchers", "Fetcher"),
|
11
|
+
"Selector": ("scrapling.parser", "Selector"),
|
12
|
+
"Selectors": ("scrapling.parser", "Selectors"),
|
13
|
+
"AttributesHandler": ("scrapling.core.custom_types", "AttributesHandler"),
|
14
|
+
"TextHandler": ("scrapling.core.custom_types", "TextHandler"),
|
15
|
+
"AsyncFetcher": ("scrapling.fetchers", "AsyncFetcher"),
|
16
|
+
"StealthyFetcher": ("scrapling.fetchers", "StealthyFetcher"),
|
17
|
+
"DynamicFetcher": ("scrapling.fetchers", "DynamicFetcher"),
|
18
|
+
}
|
19
|
+
|
20
|
+
if name in lazy_imports:
|
21
|
+
module_path, class_name = lazy_imports[name]
|
22
|
+
module = __import__(module_path, fromlist=[class_name])
|
23
|
+
return getattr(module, class_name)
|
37
24
|
else:
|
38
25
|
raise AttributeError(f"module 'scrapling' has no attribute '{name}'")
|
39
26
|
|
40
27
|
|
41
|
-
__all__ = [
|
28
|
+
__all__ = ["Selector", "Fetcher", "AsyncFetcher", "StealthyFetcher", "DynamicFetcher"]
|