scrapling 0.2.99__py3-none-any.whl → 0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. scrapling/__init__.py +18 -31
  2. scrapling/cli.py +818 -20
  3. scrapling/core/_html_utils.py +348 -0
  4. scrapling/core/_types.py +34 -17
  5. scrapling/core/ai.py +611 -0
  6. scrapling/core/custom_types.py +183 -100
  7. scrapling/core/mixins.py +27 -19
  8. scrapling/core/shell.py +647 -0
  9. scrapling/core/{storage_adaptors.py → storage.py} +41 -33
  10. scrapling/core/translator.py +20 -26
  11. scrapling/core/utils.py +49 -54
  12. scrapling/engines/__init__.py +15 -6
  13. scrapling/engines/_browsers/__init__.py +2 -0
  14. scrapling/engines/_browsers/_camoufox.py +745 -0
  15. scrapling/engines/_browsers/_config_tools.py +130 -0
  16. scrapling/engines/_browsers/_controllers.py +630 -0
  17. scrapling/engines/_browsers/_page.py +93 -0
  18. scrapling/engines/_browsers/_validators.py +150 -0
  19. scrapling/engines/constants.py +101 -88
  20. scrapling/engines/static.py +667 -110
  21. scrapling/engines/toolbelt/__init__.py +20 -6
  22. scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +2 -1
  23. scrapling/engines/toolbelt/convertor.py +254 -0
  24. scrapling/engines/toolbelt/custom.py +158 -175
  25. scrapling/engines/toolbelt/fingerprints.py +32 -46
  26. scrapling/engines/toolbelt/navigation.py +68 -39
  27. scrapling/fetchers.py +227 -333
  28. scrapling/parser.py +781 -449
  29. scrapling-0.3.dist-info/METADATA +409 -0
  30. scrapling-0.3.dist-info/RECORD +41 -0
  31. {scrapling-0.2.99.dist-info → scrapling-0.3.dist-info}/WHEEL +1 -1
  32. {scrapling-0.2.99.dist-info → scrapling-0.3.dist-info}/top_level.txt +0 -1
  33. scrapling/defaults.py +0 -25
  34. scrapling/engines/camo.py +0 -339
  35. scrapling/engines/pw.py +0 -465
  36. scrapling/engines/toolbelt/bypasses/pdf_viewer.js +0 -5
  37. scrapling-0.2.99.dist-info/METADATA +0 -290
  38. scrapling-0.2.99.dist-info/RECORD +0 -49
  39. tests/__init__.py +0 -1
  40. tests/fetchers/__init__.py +0 -1
  41. tests/fetchers/async/__init__.py +0 -0
  42. tests/fetchers/async/test_camoufox.py +0 -97
  43. tests/fetchers/async/test_httpx.py +0 -85
  44. tests/fetchers/async/test_playwright.py +0 -101
  45. tests/fetchers/sync/__init__.py +0 -0
  46. tests/fetchers/sync/test_camoufox.py +0 -70
  47. tests/fetchers/sync/test_httpx.py +0 -84
  48. tests/fetchers/sync/test_playwright.py +0 -89
  49. tests/fetchers/test_utils.py +0 -97
  50. tests/parser/__init__.py +0 -0
  51. tests/parser/test_automatch.py +0 -111
  52. tests/parser/test_general.py +0 -330
  53. {scrapling-0.2.99.dist-info → scrapling-0.3.dist-info}/entry_points.txt +0 -0
  54. {scrapling-0.2.99.dist-info → scrapling-0.3.dist-info}/licenses/LICENSE +0 -0
scrapling/__init__.py CHANGED
@@ -1,41 +1,28 @@
1
-
2
1
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
3
- __version__ = "0.2.99"
2
+ __version__ = "0.3"
4
3
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
5
4
 
6
5
 
7
- # A lightweight approach to create lazy loader for each import for backward compatibility
6
+ # A lightweight approach to create a lazy loader for each import for backward compatibility
8
7
  # This will reduces initial memory footprint significantly (only loads what's used)
9
8
  def __getattr__(name):
10
- if name == 'Fetcher':
11
- from scrapling.fetchers import Fetcher as cls
12
- return cls
13
- elif name == 'Adaptor':
14
- from scrapling.parser import Adaptor as cls
15
- return cls
16
- elif name == 'Adaptors':
17
- from scrapling.parser import Adaptors as cls
18
- return cls
19
- elif name == 'AttributesHandler':
20
- from scrapling.core.custom_types import AttributesHandler as cls
21
- return cls
22
- elif name == 'TextHandler':
23
- from scrapling.core.custom_types import TextHandler as cls
24
- return cls
25
- elif name == 'AsyncFetcher':
26
- from scrapling.fetchers import AsyncFetcher as cls
27
- return cls
28
- elif name == 'StealthyFetcher':
29
- from scrapling.fetchers import StealthyFetcher as cls
30
- return cls
31
- elif name == 'PlayWrightFetcher':
32
- from scrapling.fetchers import PlayWrightFetcher as cls
33
- return cls
34
- elif name == 'CustomFetcher':
35
- from scrapling.fetchers import CustomFetcher as cls
36
- return cls
9
+ lazy_imports = {
10
+ "Fetcher": ("scrapling.fetchers", "Fetcher"),
11
+ "Selector": ("scrapling.parser", "Selector"),
12
+ "Selectors": ("scrapling.parser", "Selectors"),
13
+ "AttributesHandler": ("scrapling.core.custom_types", "AttributesHandler"),
14
+ "TextHandler": ("scrapling.core.custom_types", "TextHandler"),
15
+ "AsyncFetcher": ("scrapling.fetchers", "AsyncFetcher"),
16
+ "StealthyFetcher": ("scrapling.fetchers", "StealthyFetcher"),
17
+ "DynamicFetcher": ("scrapling.fetchers", "DynamicFetcher"),
18
+ }
19
+
20
+ if name in lazy_imports:
21
+ module_path, class_name = lazy_imports[name]
22
+ module = __import__(module_path, fromlist=[class_name])
23
+ return getattr(module, class_name)
37
24
  else:
38
25
  raise AttributeError(f"module 'scrapling' has no attribute '{name}'")
39
26
 
40
27
 
41
- __all__ = ['Adaptor', 'Fetcher', 'AsyncFetcher', 'StealthyFetcher', 'PlayWrightFetcher']
28
+ __all__ = ["Selector", "Fetcher", "AsyncFetcher", "StealthyFetcher", "DynamicFetcher"]