PyPI - scrapling - Versions diffs - 0.2.9__py3-none-any.whl → 0.2.92__py3-none-any.whl - Mend

scrapling 0.2.9py3-none-any.whl → 0.2.92py3-none-any.whl

Files changed (14) hide show

scrapling/__init__.py +1 -1
scrapling/cli.py +37 -0
scrapling/core/_types.py +2 -0
scrapling/engines/camo.py +40 -16
scrapling/engines/pw.py +43 -16
scrapling/engines/toolbelt/custom.py +1 -5
scrapling/fetchers.py +7 -7
scrapling/parser.py +3 -3
{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/METADATA +8 -42
{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/RECORD +14 -12
scrapling-0.2.92.dist-info/entry_points.txt +2 -0
{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/LICENSE +0 -0
{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/WHEEL +0 -0
{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/top_level.txt +0 -0

scrapling/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
 from scrapling.parser import Adaptor, Adaptors
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.2.9"
+__version__ = "0.2.92"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"

scrapling/cli.py ADDED Viewed

@@ -0,0 +1,37 @@
+import os
+import subprocess
+import sys
+from pathlib import Path
+import click
+def get_package_dir():
+    return Path(os.path.dirname(__file__))
+def run_command(command, line):
+    print(f"Installing {line}...")
+    _ = subprocess.check_call(command, shell=True)
+    # I meant to not use try except here
+@click.command(help="Install all Scrapling's Fetchers dependencies")
+def install():
+    if not get_package_dir().joinpath(".scrapling_dependencies_installed").exists():
+        run_command([sys.executable, "-m", "playwright", "install", 'chromium'], 'Playwright browsers')
+        run_command([sys.executable, "-m", "playwright", "install-deps", 'chromium', 'firefox'], 'Playwright dependencies')
+        run_command([sys.executable, "-m", "camoufox", "fetch", '--browserforge'], 'Camoufox browser and databases')
+        # if no errors raised by above commands, then we add below file
+        get_package_dir().joinpath(".scrapling_dependencies_installed").touch()
+    else:
+        print('The dependencies are already installed')
+@click.group()
+def main():
+    pass
+# Adding commands
+main.add_command(install)

scrapling/core/_types.py CHANGED Viewed

@@ -5,6 +5,8 @@ Type definitions for type checking purposes.
 from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable,
                     List, Literal, Optional, Pattern, Tuple, Type, Union)
+SelectorWaitStates = Literal["attached", "detached", "hidden", "visible"]
 try:
     from typing import Protocol
 except ImportError:

scrapling/engines/camo.py CHANGED Viewed

@@ -3,7 +3,7 @@ from camoufox.async_api import AsyncCamoufox
 from camoufox.sync_api import Camoufox
 from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
-                                   Union)
+                                   SelectorWaitStates, Union)
 from scrapling.core.utils import log
 from scrapling.engines.toolbelt import (Response, StatusText,
                                         async_intercept_route,
@@ -18,7 +18,7 @@ class CamoufoxEngine:
             self, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
             block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
             timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
-            wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
+            wait_selector_state: Optional[SelectorWaitStates] = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
             proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True,
             geoip: Optional[bool] = False,
             adaptor_arguments: Dict = None,
@@ -84,6 +84,14 @@ class CamoufoxEngine:
         :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
         addons = [] if self.disable_ads else [DefaultAddons.UBO]
+        # Store the final response
+        final_response = None
+        def handle_response(finished_response):
+            nonlocal final_response
+            if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
+                final_response = finished_response
         with Camoufox(
                 geoip=self.geoip,
                 proxy=self.proxy,
@@ -100,13 +108,15 @@ class CamoufoxEngine:
             page = browser.new_page()
             page.set_default_navigation_timeout(self.timeout)
             page.set_default_timeout(self.timeout)
+            # Listen for all responses
+            page.on("response", handle_response)
             if self.disable_resources:
                 page.route("**/*", intercept_route)
             if self.extra_headers:
                 page.set_extra_http_headers(self.extra_headers)
-            res = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
+            first_response = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
             page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
                 page.wait_for_load_state('networkidle')
@@ -123,21 +133,23 @@ class CamoufoxEngine:
                 if self.network_idle:
                     page.wait_for_load_state('networkidle')
+            # In case we didn't catch a document type somehow
+            final_response = final_response if final_response else first_response
             # This will be parsed inside `Response`
-            encoding = res.headers.get('content-type', '') or 'utf-8'  # default encoding
+            encoding = final_response.headers.get('content-type', '') or 'utf-8'  # default encoding
             # PlayWright API sometimes give empty status text for some reason!
-            status_text = res.status_text or StatusText.get(res.status)
+            status_text = final_response.status_text or StatusText.get(final_response.status)
             response = Response(
-                url=res.url,
+                url=page.url,
                 text=page.content(),
                 body=page.content().encode('utf-8'),
-                status=res.status,
+                status=final_response.status,
                 reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
-                headers=res.all_headers(),
-                request_headers=res.request.all_headers(),
+                headers=first_response.all_headers(),
+                request_headers=first_response.request.all_headers(),
                 **self.adaptor_arguments
             )
             page.close()
@@ -151,6 +163,14 @@ class CamoufoxEngine:
         :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
         addons = [] if self.disable_ads else [DefaultAddons.UBO]
+        # Store the final response
+        final_response = None
+        async def handle_response(finished_response):
+            nonlocal final_response
+            if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
+                final_response = finished_response
         async with AsyncCamoufox(
                 geoip=self.geoip,
                 proxy=self.proxy,
@@ -167,13 +187,15 @@ class CamoufoxEngine:
             page = await browser.new_page()
             page.set_default_navigation_timeout(self.timeout)
             page.set_default_timeout(self.timeout)
+            # Listen for all responses
+            page.on("response", handle_response)
             if self.disable_resources:
                 await page.route("**/*", async_intercept_route)
             if self.extra_headers:
                 await page.set_extra_http_headers(self.extra_headers)
-            res = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
+            first_response = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
             await page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
                 await page.wait_for_load_state('networkidle')
@@ -190,21 +212,23 @@ class CamoufoxEngine:
                 if self.network_idle:
                     await page.wait_for_load_state('networkidle')
+            # In case we didn't catch a document type somehow
+            final_response = final_response if final_response else first_response
             # This will be parsed inside `Response`
-            encoding = res.headers.get('content-type', '') or 'utf-8'  # default encoding
+            encoding = final_response.headers.get('content-type', '') or 'utf-8'  # default encoding
             # PlayWright API sometimes give empty status text for some reason!
-            status_text = res.status_text or StatusText.get(res.status)
+            status_text = final_response.status_text or StatusText.get(final_response.status)
             response = Response(
-                url=res.url,
+                url=page.url,
                 text=await page.content(),
                 body=(await page.content()).encode('utf-8'),
-                status=res.status,
+                status=final_response.status,
                 reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
-                headers=await res.all_headers(),
-                request_headers=await res.request.all_headers(),
+                headers=await first_response.all_headers(),
+                request_headers=await first_response.request.all_headers(),
                 **self.adaptor_arguments
             )
             await page.close()

scrapling/engines/pw.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
-from scrapling.core._types import Callable, Dict, Optional, Union
+from scrapling.core._types import (Callable, Dict, Optional,
+                                   SelectorWaitStates, Union)
 from scrapling.core.utils import log, lru_cache
 from scrapling.engines.constants import (DEFAULT_STEALTH_FLAGS,
                                          NSTBROWSER_DEFAULT_QUERY)
@@ -23,7 +24,7 @@ class PlaywrightEngine:
             page_action: Callable = None,
             wait_selector: Optional[str] = None,
             locale: Optional[str] = 'en-US',
-            wait_selector_state: Optional[str] = 'attached',
+            wait_selector_state: SelectorWaitStates = 'attached',
             stealth: Optional[bool] = False,
             real_chrome: Optional[bool] = False,
             hide_canvas: Optional[bool] = False,
@@ -193,12 +194,21 @@ class PlaywrightEngine:
         :param url: Target url.
         :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
+        from playwright.sync_api import Response as PlaywrightResponse
         if not self.stealth or self.real_chrome:
             # Because rebrowser_playwright doesn't play well with real browsers
             from playwright.sync_api import sync_playwright
         else:
             from rebrowser_playwright.sync_api import sync_playwright
+        # Store the final response
+        final_response = None
+        def handle_response(finished_response: PlaywrightResponse):
+            nonlocal final_response
+            if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
+                final_response = finished_response
         with sync_playwright() as p:
             # Creating the browser
             if self.cdp_url:
@@ -212,6 +222,8 @@ class PlaywrightEngine:
             page = context.new_page()
             page.set_default_navigation_timeout(self.timeout)
             page.set_default_timeout(self.timeout)
+            # Listen for all responses
+            page.on("response", handle_response)
             if self.extra_headers:
                 page.set_extra_http_headers(self.extra_headers)
@@ -223,7 +235,7 @@ class PlaywrightEngine:
                 for script in self.__stealth_scripts():
                     page.add_init_script(path=script)
-            res = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
+            first_response = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
             page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
                 page.wait_for_load_state('networkidle')
@@ -240,21 +252,23 @@ class PlaywrightEngine:
                 if self.network_idle:
                     page.wait_for_load_state('networkidle')
+            # In case we didn't catch a document type somehow
+            final_response = final_response if final_response else first_response
             # This will be parsed inside `Response`
-            encoding = res.headers.get('content-type', '') or 'utf-8'  # default encoding
+            encoding = final_response.headers.get('content-type', '') or 'utf-8'  # default encoding
             # PlayWright API sometimes give empty status text for some reason!
-            status_text = res.status_text or StatusText.get(res.status)
+            status_text = final_response.status_text or StatusText.get(final_response.status)
             response = Response(
-                url=res.url,
+                url=page.url,
                 text=page.content(),
                 body=page.content().encode('utf-8'),
-                status=res.status,
+                status=final_response.status,
                 reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
-                headers=res.all_headers(),
-                request_headers=res.request.all_headers(),
+                headers=first_response.all_headers(),
+                request_headers=first_response.request.all_headers(),
                 **self.adaptor_arguments
             )
             page.close()
@@ -266,12 +280,21 @@ class PlaywrightEngine:
         :param url: Target url.
         :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
+        from playwright.async_api import Response as PlaywrightResponse
         if not self.stealth or self.real_chrome:
             # Because rebrowser_playwright doesn't play well with real browsers
             from playwright.async_api import async_playwright
         else:
             from rebrowser_playwright.async_api import async_playwright
+        # Store the final response
+        final_response = None
+        async def handle_response(finished_response: PlaywrightResponse):
+            nonlocal final_response
+            if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
+                final_response = finished_response
         async with async_playwright() as p:
             # Creating the browser
             if self.cdp_url:
@@ -285,6 +308,8 @@ class PlaywrightEngine:
             page = await context.new_page()
             page.set_default_navigation_timeout(self.timeout)
             page.set_default_timeout(self.timeout)
+            # Listen for all responses
+            page.on("response", handle_response)
             if self.extra_headers:
                 await page.set_extra_http_headers(self.extra_headers)
@@ -296,7 +321,7 @@ class PlaywrightEngine:
                 for script in self.__stealth_scripts():
                     await page.add_init_script(path=script)
-            res = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
+            first_response = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
             await page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
                 await page.wait_for_load_state('networkidle')
@@ -313,21 +338,23 @@ class PlaywrightEngine:
                 if self.network_idle:
                     await page.wait_for_load_state('networkidle')
+            # In case we didn't catch a document type somehow
+            final_response = final_response if final_response else first_response
             # This will be parsed inside `Response`
-            encoding = res.headers.get('content-type', '') or 'utf-8'  # default encoding
+            encoding = final_response.headers.get('content-type', '') or 'utf-8'  # default encoding
             # PlayWright API sometimes give empty status text for some reason!
-            status_text = res.status_text or StatusText.get(res.status)
+            status_text = final_response.status_text or StatusText.get(final_response.status)
             response = Response(
-                url=res.url,
+                url=page.url,
                 text=await page.content(),
                 body=(await page.content()).encode('utf-8'),
-                status=res.status,
+                status=final_response.status,
                 reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
-                headers=await res.all_headers(),
-                request_headers=await res.request.all_headers(),
+                headers=await first_response.all_headers(),
+                request_headers=await first_response.request.all_headers(),
                 **self.adaptor_arguments
             )
             await page.close()

scrapling/engines/toolbelt/custom.py CHANGED Viewed

@@ -84,8 +84,6 @@ class ResponseEncoding:
 class Response(Adaptor):
     """This class is returned by all engines as a way to unify response type between different libraries."""
-    _is_response_result_logged = False  # Class-level flag, initialized to False
     def __init__(self, url: str, text: str, body: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict,
                  encoding: str = 'utf-8', method: str = 'GET', **adaptor_arguments: Dict):
         automatch_domain = adaptor_arguments.pop('automatch_domain', None)
@@ -99,9 +97,7 @@ class Response(Adaptor):
         # For back-ward compatibility
         self.adaptor = self
         # For easier debugging while working from a Python shell
-        if not Response._is_response_result_logged:
-            log.info(f'Fetched ({status}) <{method} {url}> (referer: {request_headers.get("referer")})')
-            Response._is_response_result_logged = True
+        log.info(f'Fetched ({status}) <{method} {url}> (referer: {request_headers.get("referer")})')
     # def __repr__(self):
     #     return f'<{self.__class__.__name__} [{self.status} {self.reason}]>'

scrapling/fetchers.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
-                                   Union)
+                                   SelectorWaitStates, Union)
 from scrapling.engines import (CamoufoxEngine, PlaywrightEngine, StaticEngine,
                                check_if_engine_usable)
 from scrapling.engines.toolbelt import BaseFetcher, Response
@@ -176,8 +176,8 @@ class StealthyFetcher(BaseFetcher):
             self, url: str, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
             block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
             timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
-            wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
-            os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
+            wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
+            proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
@@ -234,8 +234,8 @@ class StealthyFetcher(BaseFetcher):
             self, url: str, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
             block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
             timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
-            wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
-            os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
+            wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
+            proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
@@ -308,7 +308,7 @@ class PlayWrightFetcher(BaseFetcher):
     def fetch(
             self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
             useragent: Optional[str] = None, network_idle: Optional[bool] = False, timeout: Optional[float] = 30000,
-            page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: Optional[str] = 'attached',
+            page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: SelectorWaitStates = 'attached',
             hide_canvas: Optional[bool] = False, disable_webgl: Optional[bool] = False, extra_headers: Optional[Dict[str, str]] = None, google_search: Optional[bool] = True,
             proxy: Optional[Union[str, Dict[str, str]]] = None, locale: Optional[str] = 'en-US',
             stealth: Optional[bool] = False, real_chrome: Optional[bool] = False,
@@ -368,7 +368,7 @@ class PlayWrightFetcher(BaseFetcher):
     async def async_fetch(
             self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
             useragent: Optional[str] = None, network_idle: Optional[bool] = False, timeout: Optional[float] = 30000,
-            page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: Optional[str] = 'attached',
+            page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: SelectorWaitStates = 'attached',
             hide_canvas: Optional[bool] = False, disable_webgl: Optional[bool] = False, extra_headers: Optional[Dict[str, str]] = None, google_search: Optional[bool] = True,
             proxy: Optional[Union[str, Dict[str, str]]] = None, locale: Optional[str] = 'en-US',
             stealth: Optional[bool] = False, real_chrome: Optional[bool] = False,

scrapling/parser.py CHANGED Viewed

@@ -155,7 +155,7 @@ class Adaptor(SelectorsGeneration):
         else:
             if issubclass(type(element), html.HtmlMixin):
-                return self.__class__(
+                return Adaptor(
                     root=element,
                     text='', body=b'',  # Since root argument is provided, both `text` and `body` will be ignored so this is just a filler
                     url=self.url, encoding=self.encoding, auto_match=self.__auto_match_enabled,
@@ -474,7 +474,7 @@ class Adaptor(SelectorsGeneration):
     def css(self, selector: str, identifier: str = '',
             auto_match: bool = False, auto_save: bool = False, percentage: int = 0
-            ) -> Union['Adaptors[Adaptor]', List]:
+            ) -> Union['Adaptors[Adaptor]', List, 'TextHandlers[TextHandler]']:
         """Search current tree with CSS3 selectors
         **Important:
@@ -517,7 +517,7 @@ class Adaptor(SelectorsGeneration):
     def xpath(self, selector: str, identifier: str = '',
               auto_match: bool = False, auto_save: bool = False, percentage: int = 0, **kwargs: Any
-              ) -> Union['Adaptors[Adaptor]', List]:
+              ) -> Union['Adaptors[Adaptor]', List, 'TextHandlers[TextHandler]']:
         """Search current tree with XPath selectors
         **Important:

{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scrapling
-Version: 0.2.9
+Version: 0.2.92
 Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair
@@ -21,7 +21,6 @@ Classifier: Topic :: Text Processing :: Markup :: HTML
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
@@ -35,10 +34,11 @@ License-File: LICENSE
 Requires-Dist: requests>=2.3
 Requires-Dist: lxml>=4.5
 Requires-Dist: cssselect>=1.2
+Requires-Dist: click
 Requires-Dist: w3lib
 Requires-Dist: orjson>=3
 Requires-Dist: tldextract
-Requires-Dist: httpx[brotli,zstd]
+Requires-Dist: httpx[brotli,socks,zstd]
 Requires-Dist: playwright>=1.49.1
 Requires-Dist: rebrowser-playwright>=1.49.1
 Requires-Dist: camoufox[geoip]>=0.4.9
@@ -212,52 +212,18 @@ Scrapling can find elements with more methods and it returns full element `Adapt
 > All benchmarks' results are an average of 100 runs. See our [benchmarks.py](https://github.com/D4Vinci/Scrapling/blob/main/benchmarks.py) for methodology and to run your comparisons.
 ## Installation
-Scrapling is a breeze to get started with - Starting from version 0.2.9, we require at least Python 3.9 to work.
+Scrapling is a breeze to get started with; Starting from version 0.2.9, we require at least Python 3.9 to work.
 ```bash
 pip3 install scrapling
 ```
-- For using the `StealthyFetcher`, go to the command line and download the browser with
-<details><summary>Windows OS</summary>
-```bash
-camoufox fetch --browserforge
-```
-</details>
-<details><summary>MacOS</summary>
-```bash
-python3 -m camoufox fetch --browserforge
-```
-</details>
-<details><summary>Linux</summary>
+Then run this command to install browsers' dependencies needed to use Fetcher classes
 ```bash
-python -m camoufox fetch --browserforge
-```
-On a fresh installation of Linux, you may also need the following Firefox dependencies:
-- Debian-based distros
-    ```bash
-    sudo apt install -y libgtk-3-0 libx11-xcb1 libasound2
-    ```
-- Arch-based distros
-    ```bash
-    sudo pacman -S gtk3 libx11 libxcb cairo libasound alsa-lib
-    ```
-</details>
-<small> See the official <a href="https://camoufox.com/python/installation/#download-the-browser">Camoufox documentation</a> for more info on installation</small>
-- If you are going to use the `PlayWrightFetcher` options, then install Playwright's Chromium browser with:
-```commandline
-playwright install chromium
-```
-- If you are going to use normal requests only with the `Fetcher` class then update the fingerprints files with:
-```commandline
-python -m browserforge update
+scrapling install
 ```
+If you have any installation issues, please open an issue.
 ## Fetching Websites
-Fetchers are basically interfaces that do requests or fetch pages for you in a single request fashion and then return an `Adaptor` object for you. This feature was introduced because the only option we had before was to fetch the page as you wanted it, then pass it manually to the `Adaptor` class to create an `Adaptor` instance and start playing around with the page.
+Fetchers are interfaces built on top of other libraries with added features that do requests or fetch pages for you in a single request fashion and then return an `Adaptor` object. This feature was introduced because the only option we had before was to fetch the page as you wanted it, then pass it manually to the `Adaptor` class to create an `Adaptor` instance and start playing around with the page.
 ### Features
 You might be slightly confused by now so let me clear things up. All fetcher-type classes are imported in the same way

{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,23 @@
-scrapling/__init__.py,sha256=4adit4xM1Io6mBz-VnnSHcPCQxIYhvDmDVMhbXu8VF4,499
+scrapling/__init__.py,sha256=0iEOX168f4gLFpReEUemMOhTske8AS2o0UQHJWXn-4o,500
+scrapling/cli.py,sha256=njPdJKmbLFHeWjtSiGEm9ALBdSyfUp0IaJvxQL5C31Q,1125
 scrapling/defaults.py,sha256=tJAOMB-PMd3aLZz3j_yr6haBxxaklAvWdS_hP-GFFdU,331
-scrapling/fetchers.py,sha256=I_N32DMjCzNCMmrkGYoX480x1Eh5Lka6cMJ-EcSfszk,35342
-scrapling/parser.py,sha256=NKwOsGR6TB7XC9lMkA418_DRWE6pyUqK0XtmTAA51ic,55215
+scrapling/fetchers.py,sha256=K3MKBqKDOXItJNwxFY2fe1C21Vz6QSd91fFtN98Mpg4,35402
+scrapling/parser.py,sha256=sT1gh5pnbjpUzFt8K9DGD6x60zKQcAtzmyf8DgiNDCI,55266
 scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
 scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-scrapling/core/_types.py,sha256=__HJ2JTk5vx5eg_7HAJmDjaHrMDIaoxNG8fadLLyKV8,566
+scrapling/core/_types.py,sha256=OcsP1WeQEOlEVo9OzTrLQfgZZfXuJ0civVs31SynwGA,641
 scrapling/core/custom_types.py,sha256=ZRzpoT6qQ4vU_ejhLXa7WYuYLGl5HwAjLPe01xdhuvM,10808
 scrapling/core/mixins.py,sha256=sozbpaGL1_O_x3U-ABM5aYWpnxpCLfdbcA9SG3P7weY,3532
 scrapling/core/storage_adaptors.py,sha256=l_ZYcdn1y69AcoPuRrPoaxqKysN62pMExrwJWYdu5MA,6220
 scrapling/core/translator.py,sha256=ojDmNi5pFZE6Ke-AiSsTilXiPRdR8yhX3o-uVGMkap8,5236
 scrapling/core/utils.py,sha256=03LzCDzmeK1TXPjIKVzHSUgSfhpe36XE8AwxlgxzJoU,3705
 scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c,267
-scrapling/engines/camo.py,sha256=L5jRNUgJSAY5hE8KCD-tz4SFrx7ZjowJoWpHrl7havI,12359
+scrapling/engines/camo.py,sha256=wJRfaIU0w_hDSlrP2AdpjBU6NNEKw0wSnVbqUoxt1Gk,13682
 scrapling/engines/constants.py,sha256=Gb_nXFoBB4ujJkd05SKkenMe1UDiRYQA3dkmA3DunLg,3723
-scrapling/engines/pw.py,sha256=0vCDaodve_WcOdbGqBdyRwMECPZmQ0eGLQikh4WHKFc,17011
+scrapling/engines/pw.py,sha256=MCYE5rDx55D2VOIeUNLl44ROXnyFRfku_u2FOcXjqEQ,18534
 scrapling/engines/static.py,sha256=7SVEfeigCPfwC1ukx0zIFFe96Bo5fox6qOq2IWrP6P8,10319
 scrapling/engines/toolbelt/__init__.py,sha256=VQDdYm1zY9Apno6d8UrULk29vUjllZrQqD8mXL1E2Fc,402
-scrapling/engines/toolbelt/custom.py,sha256=FbWTUC0Z8NTmTLFDiiCchs4W0_Q40lz2ONnhInRNuvA,12947
+scrapling/engines/toolbelt/custom.py,sha256=d3qyeCg_qHm1RRE7yv5hyU9b17Y7YDPGBOVhEH1CAT0,12754
 scrapling/engines/toolbelt/fingerprints.py,sha256=ajEHdXHr7W4hw9KcNS7XlyxNBZu37p1bRj18TiICLzU,2929
 scrapling/engines/toolbelt/navigation.py,sha256=xEfZRJefuxOCGxQOSI2llS0du0Y2XmoIPdVGUSHOd7k,4567
 scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
@@ -40,8 +41,9 @@ tests/fetchers/sync/test_playwright.py,sha256=5eZdPwk3JGeaO7GuExv_QsByLyWDE9joxn
 tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/parser/test_automatch.py,sha256=SxsNdExE8zz8AcPRQFBUjZ3Q_1-tPOd9dzVvMSZpOYQ,4908
 tests/parser/test_general.py,sha256=dyfOsc8lleoY4AxcfDUBUaD1i95xecfYuTUhKBsYjwo,12100
-scrapling-0.2.9.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
-scrapling-0.2.9.dist-info/METADATA,sha256=Wg6lcRo_5LcyotrB1ZXagT5-gToAyRmtNKsq6TJoNk4,68382
-scrapling-0.2.9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-scrapling-0.2.9.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
-scrapling-0.2.9.dist-info/RECORD,,
+scrapling-0.2.92.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
+scrapling-0.2.92.dist-info/METADATA,sha256=2I-HK-xEkVFFyQBio8NAKR0eQEBB-dLHFuvb5eluCEQ,67415
+scrapling-0.2.92.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+scrapling-0.2.92.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
+scrapling-0.2.92.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
+scrapling-0.2.92.dist-info/RECORD,,

scrapling-0.2.92.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ scrapling = scrapling.cli:main

{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/LICENSE RENAMED Viewed

File without changes

{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/WHEEL RENAMED Viewed

File without changes

{scrapling-0.2.9.dist-info → scrapling-0.2.92.dist-info}/top_level.txt RENAMED Viewed

File without changes

scrapling 0.2.9__py3-none-any.whl → 0.2.92__py3-none-any.whl

scrapling 0.2.9py3-none-any.whl → 0.2.92py3-none-any.whl