PyPI - scrapling - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

scrapling 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

scrapling/__init__.py +1 -1
scrapling/cli.py +4 -4
scrapling/core/custom_types.py +2 -2
scrapling/core/shell.py +21 -6
scrapling/engines/_browsers/_base.py +5 -31
scrapling/engines/_browsers/_camoufox.py +74 -44
scrapling/engines/_browsers/_controllers.py +41 -50
scrapling/engines/_browsers/_page.py +1 -42
scrapling/engines/_browsers/_validators.py +130 -65
scrapling/engines/static.py +2 -4
scrapling/engines/toolbelt/navigation.py +1 -1
scrapling/parser.py +16 -12
{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/METADATA +18 -17
{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/RECORD +18 -18
{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/WHEEL +0 -0
{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/entry_points.txt +0 -0
{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/licenses/LICENSE +0 -0
{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/top_level.txt +0 -0

scrapling/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.3.3"
+__version__ = "0.3.5"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"

scrapling/cli.py CHANGED Viewed

@@ -32,8 +32,8 @@ def __ParseJSONData(json_string: Optional[str] = None) -> Optional[Dict[str, Any
     try:
         return json_loads(json_string)
-    except JSONDecodeError as e:  # pragma: no cover
-        raise ValueError(f"Invalid JSON data '{json_string}': {e}")
+    except JSONDecodeError as err:  # pragma: no cover
+        raise ValueError(f"Invalid JSON data '{json_string}': {err}")
 def __Request_and_Save(
@@ -65,8 +65,8 @@ def __ParseExtractArguments(
         for key, value in _CookieParser(cookies):
             try:
                 parsed_cookies[key] = value
-            except Exception as e:
-                raise ValueError(f"Could not parse cookies '{cookies}': {e}")
+            except Exception as err:
+                raise ValueError(f"Could not parse cookies '{cookies}': {err}")
     parsed_json = __ParseJSONData(json)
     parsed_params = {}

scrapling/core/custom_types.py CHANGED Viewed

@@ -145,7 +145,7 @@ class TextHandler(str):
         clean_match: bool = False,
         case_sensitive: bool = True,
         check_match: Literal[False] = False,
-    ) -> "TextHandlers[TextHandler]": ...
+    ) -> "TextHandlers": ...
     def re(
         self,
@@ -241,7 +241,7 @@ class TextHandlers(List[TextHandler]):
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
-    ) -> "TextHandlers[TextHandler]":
+    ) -> "TextHandlers":
         """Call the ``.re()`` method for each element in this list and return
         their results flattened as TextHandlers.

scrapling/core/shell.py CHANGED Viewed

@@ -201,7 +201,7 @@ class CurlParser:
                 data_payload = parsed_args.data_binary  # Fallback to string
         elif parsed_args.data_raw is not None:
-            data_payload = parsed_args.data_raw
+            data_payload = parsed_args.data_raw.lstrip("$")
         elif parsed_args.data is not None:
             data_payload = parsed_args.data
@@ -318,7 +318,7 @@ def show_page_in_browser(page: Selector):  # pragma: no cover
     try:
         fd, fname = make_temp_file(prefix="scrapling_view_", suffix=".html")
         with open(fd, "w", encoding=page.encoding) as f:
-            f.write(page.body)
+            f.write(page.html_content)
         open_in_browser(f"file://{fname}")
     except IOError as e:
@@ -335,15 +335,25 @@ class CustomShell:
         from scrapling.fetchers import (
             Fetcher as __Fetcher,
             AsyncFetcher as __AsyncFetcher,
+            FetcherSession as __FetcherSession,
             DynamicFetcher as __DynamicFetcher,
+            DynamicSession as __DynamicSession,
+            AsyncDynamicSession as __AsyncDynamicSession,
             StealthyFetcher as __StealthyFetcher,
+            StealthySession as __StealthySession,
+            AsyncStealthySession as __AsyncStealthySession,
         )
         self.__InteractiveShellEmbed = __InteractiveShellEmbed
         self.__Fetcher = __Fetcher
         self.__AsyncFetcher = __AsyncFetcher
+        self.__FetcherSession = __FetcherSession
         self.__DynamicFetcher = __DynamicFetcher
+        self.__DynamicSession = __DynamicSession
+        self.__AsyncDynamicSession = __AsyncDynamicSession
         self.__StealthyFetcher = __StealthyFetcher
+        self.__StealthySession = __StealthySession
+        self.__AsyncStealthySession = __AsyncStealthySession
         self.code = code
         self.page = None
         self.pages = Selectors([])
@@ -379,9 +389,9 @@ class CustomShell:
         """Create a custom banner for the shell"""
         return f"""
 -> Available Scrapling objects:
-   - Fetcher/AsyncFetcher
-   - DynamicFetcher
-   - StealthyFetcher
+   - Fetcher/AsyncFetcher/FetcherSession
+   - DynamicFetcher/DynamicSession/AsyncDynamicSession
+   - StealthyFetcher/StealthySession/AsyncStealthySession
    - Selector
 -> Useful shortcuts:
@@ -449,6 +459,11 @@ Type 'exit' or press Ctrl+D to exit.
             "delete": delete,
             "Fetcher": self.__Fetcher,
             "AsyncFetcher": self.__AsyncFetcher,
+            "FetcherSession": self.__FetcherSession,
+            "DynamicSession": self.__DynamicSession,
+            "AsyncDynamicSession": self.__AsyncDynamicSession,
+            "StealthySession": self.__StealthySession,
+            "AsyncStealthySession": self.__AsyncStealthySession,
             "fetch": dynamic_fetch,
             "DynamicFetcher": self.__DynamicFetcher,
             "stealthy_fetch": stealthy_fetch,
@@ -530,7 +545,7 @@ class Convertor:
             for page in pages:
                 match extraction_type:
                     case "markdown":
-                        yield cls._convert_to_markdown(page.body)
+                        yield cls._convert_to_markdown(page.html_content)
                     case "html":
                         yield page.body
                     case "text":

scrapling/engines/_browsers/_base.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from time import time, sleep
+from time import time
 from asyncio import sleep as asyncio_sleep, Lock
 from camoufox import DefaultAddons
@@ -31,7 +31,7 @@ class SyncSession:
     def __init__(self, max_pages: int = 1):
         self.max_pages = max_pages
         self.page_pool = PagePool(max_pages)
-        self.__max_wait_for_page = 60
+        self._max_wait_for_page = 60
         self.playwright: Optional[Playwright] = None
         self.context: Optional[BrowserContext] = None
         self._closed = False
@@ -44,23 +44,7 @@ class SyncSession:
     ) -> PageInfo:  # pragma: no cover
         """Get a new page to use"""
-        # Close all finished pages to ensure clean state
-        self.page_pool.close_all_finished_pages()
-        # If we're at max capacity after cleanup, wait for busy pages to finish
-        if self.page_pool.pages_count >= self.max_pages:
-            start_time = time()
-            while time() - start_time < self.__max_wait_for_page:
-                # Wait for any pages to finish, then clean them up
-                sleep(0.05)
-                self.page_pool.close_all_finished_pages()
-                if self.page_pool.pages_count < self.max_pages:
-                    break
-            else:
-                raise TimeoutError(
-                    f"No pages finished to clear place in the pool within the {self.__max_wait_for_page}s timeout period"
-                )
+        # No need to check if a page is available or not in sync code because the code blocked before reaching here till the page closed, ofc.
         page = self.context.new_page()
         page.set_default_navigation_timeout(timeout)
         page.set_default_timeout(timeout)
@@ -76,11 +60,6 @@ class SyncSession:
         return self.page_pool.add_page(page)
-    @staticmethod
-    def _get_with_precedence(request_value: Any, session_value: Any, sentinel_value: object) -> Any:
-        """Get value with request-level priority over session-level"""
-        return request_value if request_value is not sentinel_value else session_value
     def get_pool_stats(self) -> Dict[str, int]:
         """Get statistics about the current page pool"""
         return {
@@ -105,21 +84,16 @@ class AsyncSession(SyncSession):
     ) -> PageInfo:  # pragma: no cover
         """Get a new page to use"""
         async with self._lock:
-            # Close all finished pages to ensure clean state
-            await self.page_pool.aclose_all_finished_pages()
             # If we're at max capacity after cleanup, wait for busy pages to finish
             if self.page_pool.pages_count >= self.max_pages:
                 start_time = time()
-                while time() - start_time < self.__max_wait_for_page:
-                    # Wait for any pages to finish, then clean them up
+                while time() - start_time < self._max_wait_for_page:
                     await asyncio_sleep(0.05)
-                    await self.page_pool.aclose_all_finished_pages()
                     if self.page_pool.pages_count < self.max_pages:
                         break
                 else:
                     raise TimeoutError(
-                        f"No pages finished to clear place in the pool within the {self.__max_wait_for_page}s timeout period"
+                        f"No pages finished to clear place in the pool within the {self._max_wait_for_page}s timeout period"
                     )
             page = await self.context.new_page()

scrapling/engines/_browsers/_camoufox.py CHANGED Viewed

@@ -14,8 +14,9 @@ from playwright.async_api import (
     Locator as AsyncLocator,
     Page as async_Page,
 )
+from playwright._impl._errors import Error as PlaywrightError
-from ._validators import validate, CamoufoxConfig
+from ._validators import validate_fetch as _validate
 from ._base import SyncSession, AsyncSession, StealthySessionMixin
 from scrapling.core.utils import log
 from scrapling.core._types import (
@@ -201,20 +202,34 @@ class StealthySession(StealthySessionMixin, SyncSession):
         self._closed = True
+    @staticmethod
+    def _get_page_content(page: Page) -> str | None:
+        """
+        A workaround for Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
+        :param page: The page to extract content from.
+        :return:
+        """
+        while True:
+            try:
+                return page.content() or ""
+            except PlaywrightError:
+                page.wait_for_timeout(1000)
+                continue
     def _solve_cloudflare(self, page: Page) -> None:  # pragma: no cover
         """Solve the cloudflare challenge displayed on the playwright page passed
         :param page: The targeted page
         :return:
         """
-        challenge_type = self._detect_cloudflare(page.content())
+        challenge_type = self._detect_cloudflare(self._get_page_content(page))
         if not challenge_type:
             log.error("No Cloudflare challenge found.")
             return
         else:
             log.info(f'The turnstile version discovered is "{challenge_type}"')
             if challenge_type == "non-interactive":
-                while "<title>Just a moment...</title>" in (page.content()):
+                while "<title>Just a moment...</title>" in (self._get_page_content(page)):
                     log.info("Waiting for Cloudflare wait page to disappear.")
                     page.wait_for_timeout(1000)
                     page.wait_for_load_state()
@@ -222,7 +237,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
                 return
             else:
-                while "Verifying you are human." in page.content():
+                while "Verifying you are human." in self._get_page_content(page):
                     # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
                     page.wait_for_timeout(500)
@@ -282,23 +297,22 @@ class StealthySession(StealthySessionMixin, SyncSession):
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
-        # Validate all resolved parameters
-        params = validate(
-            dict(
-                google_search=self._get_with_precedence(google_search, self.google_search, _UNSET),
-                timeout=self._get_with_precedence(timeout, self.timeout, _UNSET),
-                wait=self._get_with_precedence(wait, self.wait, _UNSET),
-                page_action=self._get_with_precedence(page_action, self.page_action, _UNSET),
-                extra_headers=self._get_with_precedence(extra_headers, self.extra_headers, _UNSET),
-                disable_resources=self._get_with_precedence(disable_resources, self.disable_resources, _UNSET),
-                wait_selector=self._get_with_precedence(wait_selector, self.wait_selector, _UNSET),
-                wait_selector_state=self._get_with_precedence(wait_selector_state, self.wait_selector_state, _UNSET),
-                network_idle=self._get_with_precedence(network_idle, self.network_idle, _UNSET),
-                load_dom=self._get_with_precedence(load_dom, self.load_dom, _UNSET),
-                solve_cloudflare=self._get_with_precedence(solve_cloudflare, self.solve_cloudflare, _UNSET),
-                selector_config=self._get_with_precedence(selector_config, self.selector_config, _UNSET),
-            ),
-            CamoufoxConfig,
+        params = _validate(
+            [
+                ("google_search", google_search, self.google_search),
+                ("timeout", timeout, self.timeout),
+                ("wait", wait, self.wait),
+                ("page_action", page_action, self.page_action),
+                ("extra_headers", extra_headers, self.extra_headers),
+                ("disable_resources", disable_resources, self.disable_resources),
+                ("wait_selector", wait_selector, self.wait_selector),
+                ("wait_selector_state", wait_selector_state, self.wait_selector_state),
+                ("network_idle", network_idle, self.network_idle),
+                ("load_dom", load_dom, self.load_dom),
+                ("solve_cloudflare", solve_cloudflare, self.solve_cloudflare),
+                ("selector_config", selector_config, self.selector_config),
+            ],
+            _UNSET,
         )
         if self._closed:  # pragma: no cover
@@ -366,8 +380,9 @@ class StealthySession(StealthySessionMixin, SyncSession):
                 page_info.page, first_response, final_response, params.selector_config
             )
-            # Mark the page as finished for next use
-            page_info.mark_finished()
+            # Close the page, to free up resources
+            page_info.page.close()
+            self.page_pool.pages.remove(page_info)
             return response
@@ -506,20 +521,34 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
         self._closed = True
+    @staticmethod
+    async def _get_page_content(page: async_Page) -> str | None:
+        """
+        A workaround for Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
+        :param page: The page to extract content from.
+        :return:
+        """
+        while True:
+            try:
+                return (await page.content()) or ""
+            except PlaywrightError:
+                await page.wait_for_timeout(1000)
+                continue
     async def _solve_cloudflare(self, page: async_Page):
         """Solve the cloudflare challenge displayed on the playwright page passed. The async version
         :param page: The async targeted page
         :return:
         """
-        challenge_type = self._detect_cloudflare(await page.content())
+        challenge_type = self._detect_cloudflare(await self._get_page_content(page))
         if not challenge_type:
             log.error("No Cloudflare challenge found.")
             return
         else:
             log.info(f'The turnstile version discovered is "{challenge_type}"')
             if challenge_type == "non-interactive":  # pragma: no cover
-                while "<title>Just a moment...</title>" in (await page.content()):
+                while "<title>Just a moment...</title>" in (await self._get_page_content(page)):
                     log.info("Waiting for Cloudflare wait page to disappear.")
                     await page.wait_for_timeout(1000)
                     await page.wait_for_load_state()
@@ -527,7 +556,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
                 return
             else:
-                while "Verifying you are human." in (await page.content()):
+                while "Verifying you are human." in (await self._get_page_content(page)):
                     # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
                     await page.wait_for_timeout(500)
@@ -587,22 +616,22 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
-        params = validate(
-            dict(
-                google_search=self._get_with_precedence(google_search, self.google_search, _UNSET),
-                timeout=self._get_with_precedence(timeout, self.timeout, _UNSET),
-                wait=self._get_with_precedence(wait, self.wait, _UNSET),
-                page_action=self._get_with_precedence(page_action, self.page_action, _UNSET),
-                extra_headers=self._get_with_precedence(extra_headers, self.extra_headers, _UNSET),
-                disable_resources=self._get_with_precedence(disable_resources, self.disable_resources, _UNSET),
-                wait_selector=self._get_with_precedence(wait_selector, self.wait_selector, _UNSET),
-                wait_selector_state=self._get_with_precedence(wait_selector_state, self.wait_selector_state, _UNSET),
-                network_idle=self._get_with_precedence(network_idle, self.network_idle, _UNSET),
-                load_dom=self._get_with_precedence(load_dom, self.load_dom, _UNSET),
-                solve_cloudflare=self._get_with_precedence(solve_cloudflare, self.solve_cloudflare, _UNSET),
-                selector_config=self._get_with_precedence(selector_config, self.selector_config, _UNSET),
-            ),
-            CamoufoxConfig,
+        params = _validate(
+            [
+                ("google_search", google_search, self.google_search),
+                ("timeout", timeout, self.timeout),
+                ("wait", wait, self.wait),
+                ("page_action", page_action, self.page_action),
+                ("extra_headers", extra_headers, self.extra_headers),
+                ("disable_resources", disable_resources, self.disable_resources),
+                ("wait_selector", wait_selector, self.wait_selector),
+                ("wait_selector_state", wait_selector_state, self.wait_selector_state),
+                ("network_idle", network_idle, self.network_idle),
+                ("load_dom", load_dom, self.load_dom),
+                ("solve_cloudflare", solve_cloudflare, self.solve_cloudflare),
+                ("selector_config", selector_config, self.selector_config),
+            ],
+            _UNSET,
         )
         if self._closed:  # pragma: no cover
@@ -672,8 +701,9 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
                 page_info.page, first_response, final_response, params.selector_config
             )
-            # Mark the page as finished for next use
-            page_info.mark_finished()
+            # Close the page, to free up resources
+            await page_info.page.close()
+            self.page_pool.pages.remove(page_info)
             return response

scrapling/engines/_browsers/_controllers.py CHANGED Viewed

@@ -11,14 +11,12 @@ from playwright.async_api import (
     Playwright as AsyncPlaywright,
     Locator as AsyncLocator,
 )
-from rebrowser_playwright.sync_api import sync_playwright as sync_rebrowser_playwright
-from rebrowser_playwright.async_api import (
-    async_playwright as async_rebrowser_playwright,
-)
+from patchright.sync_api import sync_playwright as sync_patchright
+from patchright.async_api import async_playwright as async_patchright
 from scrapling.core.utils import log
 from ._base import SyncSession, AsyncSession, DynamicSessionMixin
-from ._validators import validate, PlaywrightConfig
+from ._validators import validate_fetch as _validate
 from scrapling.core._types import (
     Dict,
     List,
@@ -154,10 +152,7 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
     def __create__(self):
         """Create a browser for this instance and context."""
-        sync_context = sync_rebrowser_playwright
-        if not self.stealth or self.real_chrome:
-            # Because rebrowser_playwright doesn't play well with real browsers
-            sync_context = sync_playwright
+        sync_context = sync_patchright if self.stealth else sync_playwright
         self.playwright: Playwright = sync_context().start()
@@ -229,22 +224,21 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
-        # Validate all resolved parameters
-        params = validate(
-            dict(
-                google_search=self._get_with_precedence(google_search, self.google_search, _UNSET),
-                timeout=self._get_with_precedence(timeout, self.timeout, _UNSET),
-                wait=self._get_with_precedence(wait, self.wait, _UNSET),
-                page_action=self._get_with_precedence(page_action, self.page_action, _UNSET),
-                extra_headers=self._get_with_precedence(extra_headers, self.extra_headers, _UNSET),
-                disable_resources=self._get_with_precedence(disable_resources, self.disable_resources, _UNSET),
-                wait_selector=self._get_with_precedence(wait_selector, self.wait_selector, _UNSET),
-                wait_selector_state=self._get_with_precedence(wait_selector_state, self.wait_selector_state, _UNSET),
-                network_idle=self._get_with_precedence(network_idle, self.network_idle, _UNSET),
-                load_dom=self._get_with_precedence(load_dom, self.load_dom, _UNSET),
-                selector_config=self._get_with_precedence(selector_config, self.selector_config, _UNSET),
-            ),
-            PlaywrightConfig,
+        params = _validate(
+            [
+                ("google_search", google_search, self.google_search),
+                ("timeout", timeout, self.timeout),
+                ("wait", wait, self.wait),
+                ("page_action", page_action, self.page_action),
+                ("extra_headers", extra_headers, self.extra_headers),
+                ("disable_resources", disable_resources, self.disable_resources),
+                ("wait_selector", wait_selector, self.wait_selector),
+                ("wait_selector_state", wait_selector_state, self.wait_selector_state),
+                ("network_idle", network_idle, self.network_idle),
+                ("load_dom", load_dom, self.load_dom),
+                ("selector_config", selector_config, self.selector_config),
+            ],
+            _UNSET,
         )
         if self._closed:  # pragma: no cover
@@ -305,8 +299,9 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
                 page_info.page, first_response, final_response, params.selector_config
             )
-            # Mark the page as finished for next use
-            page_info.mark_finished()
+            # Close the page, to free up resources
+            page_info.page.close()
+            self.page_pool.pages.remove(page_info)
             return response
@@ -402,10 +397,7 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
     async def __create__(self):
         """Create a browser for this instance and context."""
-        async_context = async_rebrowser_playwright
-        if not self.stealth or self.real_chrome:
-            # Because rebrowser_playwright doesn't play well with real browsers
-            async_context = async_playwright
+        async_context = async_patchright if self.stealth else async_playwright
         self.playwright: AsyncPlaywright = await async_context().start()
@@ -478,22 +470,21 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
-        # Validate all resolved parameters
-        params = validate(
-            dict(
-                google_search=self._get_with_precedence(google_search, self.google_search, _UNSET),
-                timeout=self._get_with_precedence(timeout, self.timeout, _UNSET),
-                wait=self._get_with_precedence(wait, self.wait, _UNSET),
-                page_action=self._get_with_precedence(page_action, self.page_action, _UNSET),
-                extra_headers=self._get_with_precedence(extra_headers, self.extra_headers, _UNSET),
-                disable_resources=self._get_with_precedence(disable_resources, self.disable_resources, _UNSET),
-                wait_selector=self._get_with_precedence(wait_selector, self.wait_selector, _UNSET),
-                wait_selector_state=self._get_with_precedence(wait_selector_state, self.wait_selector_state, _UNSET),
-                network_idle=self._get_with_precedence(network_idle, self.network_idle, _UNSET),
-                load_dom=self._get_with_precedence(load_dom, self.load_dom, _UNSET),
-                selector_config=self._get_with_precedence(selector_config, self.selector_config, _UNSET),
-            ),
-            PlaywrightConfig,
+        params = _validate(
+            [
+                ("google_search", google_search, self.google_search),
+                ("timeout", timeout, self.timeout),
+                ("wait", wait, self.wait),
+                ("page_action", page_action, self.page_action),
+                ("extra_headers", extra_headers, self.extra_headers),
+                ("disable_resources", disable_resources, self.disable_resources),
+                ("wait_selector", wait_selector, self.wait_selector),
+                ("wait_selector_state", wait_selector_state, self.wait_selector_state),
+                ("network_idle", network_idle, self.network_idle),
+                ("load_dom", load_dom, self.load_dom),
+                ("selector_config", selector_config, self.selector_config),
+            ],
+            _UNSET,
         )
         if self._closed:  # pragma: no cover
@@ -554,9 +545,9 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
                 page_info.page, first_response, final_response, params.selector_config
             )
-            # Mark the page as finished for next use
-            page_info.mark_finished()
+            # Close the page, to free up resources
+            await page_info.page.close()
+            self.page_pool.pages.remove(page_info)
             return response
         except Exception as e:  # pragma: no cover

scrapling/engines/_browsers/_page.py CHANGED Viewed

@@ -6,7 +6,7 @@ from playwright.async_api import Page as AsyncPage
 from scrapling.core._types import Optional, List, Literal
-PageState = Literal["finished", "ready", "busy", "error"]  # States that a page can be in
+PageState = Literal["ready", "busy", "error"]  # States that a page can be in
 @dataclass
@@ -23,11 +23,6 @@ class PageInfo:
         self.state = "busy"
         self.url = url
-    def mark_finished(self):
-        """Mark the page as finished for new requests"""
-        self.state = "finished"
-        self.url = ""
     def mark_error(self):
         """Mark the page as having an error"""
         self.state = "error"
@@ -67,12 +62,6 @@ class PagePool:
         """Get the total number of pages"""
         return len(self.pages)
-    @property
-    def finished_count(self) -> int:
-        """Get the number of finished pages"""
-        with self._lock:
-            return sum(1 for p in self.pages if p.state == "finished")
     @property
     def busy_count(self) -> int:
         """Get the number of busy pages"""
@@ -83,33 +72,3 @@ class PagePool:
         """Remove pages in error state"""
         with self._lock:
             self.pages = [p for p in self.pages if p.state != "error"]
-    def close_all_finished_pages(self):
-        """Close all pages in finished state and remove them from the pool"""
-        with self._lock:
-            pages_to_remove = []
-            for page_info in self.pages:
-                if page_info.state == "finished":
-                    try:
-                        page_info.page.close()
-                    except Exception:
-                        pass
-                    pages_to_remove.append(page_info)
-            for page_info in pages_to_remove:
-                self.pages.remove(page_info)
-    async def aclose_all_finished_pages(self):
-        """Async version: Close all pages in finished state and remove them from the pool"""
-        with self._lock:
-            pages_to_remove = []
-            for page_info in self.pages:
-                if page_info.state == "finished":
-                    try:
-                        await page_info.page.close()
-                    except Exception:
-                        pass
-                    pages_to_remove.append(page_info)
-            for page_info in pages_to_remove:
-                self.pages.remove(page_info)

scrapling/engines/_browsers/_validators.py CHANGED Viewed

@@ -1,21 +1,69 @@
-from msgspec import Struct, convert, ValidationError
-from urllib.parse import urlparse
 from pathlib import Path
+from typing import Annotated
+from dataclasses import dataclass
+from urllib.parse import urlparse
+from msgspec import Struct, Meta, convert, ValidationError
 from scrapling.core._types import (
-    Optional,
     Dict,
-    Callable,
     List,
+    Tuple,
+    Optional,
+    Callable,
     SelectorWaitStates,
 )
 from scrapling.engines.toolbelt.navigation import construct_proxy_dict
+# Custom validators for msgspec
+def _validate_file_path(value: str):
+    """Fast file path validation"""
+    path = Path(value)
+    if not path.exists():
+        raise ValueError(f"Init script path not found: {value}")
+    if not path.is_file():
+        raise ValueError(f"Init script is not a file: {value}")
+    if not path.is_absolute():
+        raise ValueError(f"Init script is not a absolute path: {value}")
+def _validate_addon_path(value: str):
+    """Fast addon path validation"""
+    path = Path(value)
+    if not path.exists():
+        raise FileNotFoundError(f"Addon path not found: {value}")
+    if not path.is_dir():
+        raise ValueError(f"Addon path must be a directory of the extracted addon: {value}")
+def _validate_cdp_url(cdp_url: str):
+    """Fast CDP URL validation"""
+    try:
+        # Check the scheme
+        if not cdp_url.startswith(("ws://", "wss://")):
+            raise ValueError("CDP URL must use 'ws://' or 'wss://' scheme")
+        # Validate hostname and port
+        if not urlparse(cdp_url).netloc:
+            raise ValueError("Invalid hostname for the CDP URL")
+    except AttributeError as e:
+        raise ValueError(f"Malformed CDP URL: {cdp_url}: {str(e)}")
+    except Exception as e:
+        raise ValueError(f"Invalid CDP URL '{cdp_url}': {str(e)}")
+# Type aliases for cleaner annotations
+PagesCount = Annotated[int, Meta(ge=1, le=50)]
+Seconds = Annotated[int, float, Meta(ge=0)]
 class PlaywrightConfig(Struct, kw_only=True, frozen=False):
     """Configuration struct for validation"""
-    max_pages: int = 1
+    max_pages: PagesCount = 1
     cdp_url: Optional[str] = None
     headless: bool = True
     google_search: bool = True
@@ -23,13 +71,13 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
     disable_webgl: bool = False
     real_chrome: bool = False
     stealth: bool = False
-    wait: int | float = 0
+    wait: Seconds = 0
     page_action: Optional[Callable] = None
     proxy: Optional[str | Dict[str, str]] = None  # The default value for proxy in Playwright's source is `None`
     locale: str = "en-US"
     extra_headers: Optional[Dict[str, str]] = None
     useragent: Optional[str] = None
-    timeout: int | float = 30000
+    timeout: Seconds = 30000
     init_script: Optional[str] = None
     disable_resources: bool = False
     wait_selector: Optional[str] = None
@@ -41,52 +89,26 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
     def __post_init__(self):
         """Custom validation after msgspec validation"""
-        if self.max_pages < 1 or self.max_pages > 50:
-            raise ValueError("max_pages must be between 1 and 50")
-        if self.timeout < 0:
-            raise ValueError("timeout must be >= 0")
         if self.page_action and not callable(self.page_action):
             raise TypeError(f"page_action must be callable, got {type(self.page_action).__name__}")
         if self.proxy:
             self.proxy = construct_proxy_dict(self.proxy, as_tuple=True)
         if self.cdp_url:
-            self.__validate_cdp(self.cdp_url)
+            _validate_cdp_url(self.cdp_url)
         if not self.cookies:
             self.cookies = []
         if not self.selector_config:
             self.selector_config = {}
         if self.init_script is not None:
-            script_path = Path(self.init_script)
-            if not script_path.exists():
-                raise ValueError("Init script path not found")
-            elif not script_path.is_file():
-                raise ValueError("Init script is not a file")
-            elif not script_path.is_absolute():
-                raise ValueError("Init script is not a absolute path")
-    @staticmethod
-    def __validate_cdp(cdp_url):
-        try:
-            # Check the scheme
-            if not cdp_url.startswith(("ws://", "wss://")):
-                raise ValueError("CDP URL must use 'ws://' or 'wss://' scheme")
-            # Validate hostname and port
-            if not urlparse(cdp_url).netloc:
-                raise ValueError("Invalid hostname for the CDP URL")
-        except AttributeError as e:
-            raise ValueError(f"Malformed CDP URL: {cdp_url}: {str(e)}")
-        except Exception as e:
-            raise ValueError(f"Invalid CDP URL '{cdp_url}': {str(e)}")
+            _validate_file_path(self.init_script)
 class CamoufoxConfig(Struct, kw_only=True, frozen=False):
     """Configuration struct for validation"""
-    max_pages: int = 1
+    max_pages: PagesCount = 1
     headless: bool = True  # noqa: F821
     block_images: bool = False
     disable_resources: bool = False
@@ -96,8 +118,8 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
     load_dom: bool = True
     humanize: bool | float = True
     solve_cloudflare: bool = False
-    wait: int | float = 0
-    timeout: int | float = 30000
+    wait: Seconds = 0
+    timeout: Seconds = 30000
     init_script: Optional[str] = None
     page_action: Optional[Callable] = None
     wait_selector: Optional[str] = None
@@ -115,38 +137,23 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
     def __post_init__(self):
         """Custom validation after msgspec validation"""
-        if self.max_pages < 1 or self.max_pages > 50:
-            raise ValueError("max_pages must be between 1 and 50")
-        if self.timeout < 0:
-            raise ValueError("timeout must be >= 0")
         if self.page_action and not callable(self.page_action):
             raise TypeError(f"page_action must be callable, got {type(self.page_action).__name__}")
         if self.proxy:
             self.proxy = construct_proxy_dict(self.proxy, as_tuple=True)
-        if not self.addons:
-            self.addons = []
-        else:
+        if self.addons and isinstance(self.addons, list):
             for addon in self.addons:
-                addon_path = Path(addon)
-                if not addon_path.exists():
-                    raise FileNotFoundError(f"Addon's path not found: {addon}")
-                elif not addon_path.is_dir():
-                    raise ValueError(
-                        f"Addon's path is not a folder, you need to pass a folder of the extracted addon: {addon}"
-                    )
+                _validate_addon_path(addon)
+        else:
+            self.addons = []
         if self.init_script is not None:
-            script_path = Path(self.init_script)
-            if not script_path.exists():
-                raise ValueError("Init script path not found")
-            elif not script_path.is_file():
-                raise ValueError("Init script is not a file")
-            elif not script_path.is_absolute():
-                raise ValueError("Init script is not a absolute path")
+            _validate_file_path(self.init_script)
         if not self.cookies:
             self.cookies = []
+        # Cloudflare timeout adjustment
         if self.solve_cloudflare and self.timeout < 60_000:
             self.timeout = 60_000
         if not self.selector_config:
@@ -155,10 +162,68 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
             self.additional_args = {}
-def validate(params, model):
+# Code parts to validate `fetch` in the least possible numbers of lines overall
+class FetchConfig(Struct, kw_only=True):
+    """Configuration struct for `fetch` calls validation"""
+    google_search: bool = True
+    timeout: Seconds = 30000
+    wait: Seconds = 0
+    page_action: Optional[Callable] = None
+    extra_headers: Optional[Dict[str, str]] = None
+    disable_resources: bool = False
+    wait_selector: Optional[str] = None
+    wait_selector_state: SelectorWaitStates = "attached"
+    network_idle: bool = False
+    load_dom: bool = True
+    solve_cloudflare: bool = False
+    selector_config: Optional[Dict] = {}
+    def to_dict(self):
+        return {f: getattr(self, f) for f in self.__struct_fields__}
+@dataclass
+class _fetch_params:
+    """A dataclass of all parameters used by `fetch` calls"""
+    google_search: bool
+    timeout: Seconds
+    wait: Seconds
+    page_action: Optional[Callable]
+    extra_headers: Optional[Dict[str, str]]
+    disable_resources: bool
+    wait_selector: Optional[str]
+    wait_selector_state: SelectorWaitStates
+    network_idle: bool
+    load_dom: bool
+    solve_cloudflare: bool
+    selector_config: Optional[Dict]
+def validate_fetch(params: List[Tuple], sentinel=None) -> _fetch_params:
+    result = {}
+    overrides = {}
+    for arg, request_value, session_value in params:
+        if request_value is not sentinel:
+            overrides[arg] = request_value
+        else:
+            result[arg] = session_value
+    if overrides:
+        overrides = validate(overrides, FetchConfig).to_dict()
+        overrides.update(result)
+        return _fetch_params(**overrides)
+    if not result.get("solve_cloudflare"):
+        result["solve_cloudflare"] = False
+    return _fetch_params(**result)
+def validate(params: Dict, model) -> PlaywrightConfig | CamoufoxConfig | FetchConfig:
     try:
-        config = convert(params, model)
+        return convert(params, model)
     except ValidationError as e:
-        raise TypeError(f"Invalid argument type: {e}")
-    return config
+        raise TypeError(f"Invalid argument type: {e}") from e

scrapling/engines/static.py CHANGED Viewed

@@ -94,8 +94,8 @@ class FetcherSession:
         self.default_http3 = http3
         self.selector_config = selector_config or {}
-        self._curl_session: Optional[CurlSession] = None
-        self._async_curl_session: Optional[AsyncCurlSession] = None
+        self._curl_session: Optional[CurlSession] | bool = None
+        self._async_curl_session: Optional[AsyncCurlSession] | bool = None
     def _merge_request_args(self, **kwargs) -> Dict[str, Any]:
         """Merge request-specific arguments with default session arguments."""
@@ -239,7 +239,6 @@ class FetcherSession:
         Perform an HTTP request using the configured session.
         :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
-        :param url: Target URL for the request.
         :param request_args: Arguments to be passed to the session's `request()` method.
         :param max_retries: Maximum number of retries for the request.
         :param retry_delay: Number of seconds to wait between retries.
@@ -280,7 +279,6 @@ class FetcherSession:
         Perform an HTTP request using the configured session.
         :param method: HTTP method to be used, supported methods are ["GET", "POST", "PUT", "DELETE"]
-        :param url: Target URL for the request.
         :param request_args: Arguments to be passed to the session's `request()` method.
         :param max_retries: Maximum number of retries for the request.
         :param retry_delay: Number of seconds to wait between retries.

scrapling/engines/toolbelt/navigation.py CHANGED Viewed

@@ -4,7 +4,7 @@ Functions related to files and URLs
 from pathlib import Path
 from functools import lru_cache
-from urllib.parse import urlencode, urlparse
+from urllib.parse import urlparse
 from playwright.async_api import Route as async_Route
 from msgspec import Struct, structs, convert, ValidationError

scrapling/parser.py CHANGED Viewed

@@ -239,7 +239,7 @@ class Selector(SelectorsGeneration):
         )
     def __handle_element(
-        self, element: HtmlElement | _ElementUnicodeResult
+        self, element: Optional[HtmlElement | _ElementUnicodeResult]
     ) -> Optional[Union[TextHandler, "Selector"]]:
         """Used internally in all functions to convert a single element to type (Selector|TextHandler) when possible"""
         if element is None:
@@ -339,24 +339,28 @@ class Selector(SelectorsGeneration):
     @property
     def html_content(self) -> TextHandler:
         """Return the inner HTML code of the element"""
-        return TextHandler(tostring(self._root, encoding=self.encoding, method="html", with_tail=False))
+        content = tostring(self._root, encoding=self.encoding, method="html", with_tail=False)
+        if isinstance(content, bytes):
+            content = content.decode("utf-8")
+        return TextHandler(content)
     @property
-    def body(self):
+    def body(self) -> str | bytes:
         """Return the raw body of the current `Selector` without any processing. Useful for binary and non-HTML requests."""
         return self._raw_body
     def prettify(self) -> TextHandler:
         """Return a prettified version of the element's inner html-code"""
-        return TextHandler(
-            tostring(
-                self._root,
-                encoding=self.encoding,
-                pretty_print=True,
-                method="html",
-                with_tail=False,
-            )
+        content = tostring(
+            self._root,
+            encoding=self.encoding,
+            pretty_print=True,
+            method="html",
+            with_tail=False,
         )
+        if isinstance(content, bytes):
+            content = content.decode("utf-8")
+        return TextHandler(content)
     def has_class(self, class_name: str) -> bool:
         """Check if the element has a specific class
@@ -1255,7 +1259,7 @@ class Selectors(List[Selector]):
         :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
         :param case_sensitive: if disabled, the function will set the regex to ignore the letters case while compiling it
         """
-        results = [n.text.re(regex, replace_entities, clean_match, case_sensitive) for n in self]
+        results = [n.re(regex, replace_entities, clean_match, case_sensitive) for n in self]
         return TextHandlers(flatten(results))
     def re_first(

{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scrapling
-Version: 0.3.3
+Version: 0.3.5
 Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair
@@ -69,15 +69,15 @@ Requires-Dist: cssselect>=1.3.0
 Requires-Dist: orjson>=3.11.3
 Requires-Dist: tldextract>=5.3.0
 Provides-Extra: fetchers
-Requires-Dist: click>=8.2.1; extra == "fetchers"
+Requires-Dist: click>=8.3.0; extra == "fetchers"
 Requires-Dist: curl_cffi>=0.13.0; extra == "fetchers"
-Requires-Dist: playwright>=1.52.0; extra == "fetchers"
-Requires-Dist: rebrowser-playwright>=1.52.0; extra == "fetchers"
+Requires-Dist: playwright>=1.55.0; extra == "fetchers"
+Requires-Dist: patchright>=1.55.2; extra == "fetchers"
 Requires-Dist: camoufox>=0.4.11; extra == "fetchers"
 Requires-Dist: geoip2>=5.1.0; extra == "fetchers"
 Requires-Dist: msgspec>=0.19.0; extra == "fetchers"
 Provides-Extra: ai
-Requires-Dist: mcp>=1.14.0; extra == "ai"
+Requires-Dist: mcp>=1.14.1; extra == "ai"
 Requires-Dist: markdownify>=1.2.0; extra == "ai"
 Requires-Dist: scrapling[fetchers]; extra == "ai"
 Provides-Extra: shell
@@ -114,14 +114,6 @@ Dynamic: license-file
 </p>
 <p align="center">
-    <a href="https://scrapling.readthedocs.io/en/latest/#installation">
-        Installation
-    </a>
-    ·
-    <a href="https://scrapling.readthedocs.io/en/latest/overview/">
-        Overview
-    </a>
-    ·
     <a href="https://scrapling.readthedocs.io/en/latest/parsing/selection/">
         Selection methods
     </a>
@@ -130,6 +122,14 @@ Dynamic: license-file
         Choosing a fetcher
     </a>
     ·
+    <a href="https://scrapling.readthedocs.io/en/latest/cli/overview/">
+        CLI
+    </a>
+    ·
+    <a href="https://scrapling.readthedocs.io/en/latest/ai/mcp-server/">
+        MCP mode
+    </a>
+    ·
     <a href="https://scrapling.readthedocs.io/en/latest/tutorials/migrating_from_beautifulsoup/">
         Migrating from Beautifulsoup
     </a>
@@ -157,11 +157,13 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
 <!-- sponsors -->
+<a href="https://www.thordata.com/?ls=github&lk=D4Vinci" target="_blank" title="A global network of over 60M+ residential proxies with 99.7% availability, ensuring stable and reliable web data scraping to support AI, BI, and workflows."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/thordata.jpg"></a>
 <a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png"></a>
+<a href="https://visit.decodo.com/Dy6W0b" target="_blank" title="Try the Most Efficient Residential Proxies for Free"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/decodo.png"></a>
 <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
 <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
-<a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
 <a href="https://www.nstproxy.com/?type=flow&utm_source=scrapling" target="_blank" title="One Proxy Service, Infinite Solutions at Unbeatable Prices!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/NSTproxy.png"></a>
+<a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
 <!-- /sponsors -->
@@ -410,10 +412,9 @@ This project includes code adapted from:
 ## Thanks and References
 - [Daijro](https://github.com/daijro)'s brilliant work on [BrowserForge](https://github.com/daijro/browserforge) and [Camoufox](https://github.com/daijro/camoufox)
-- [Vinyzu](https://github.com/Vinyzu)'s work on [Botright](https://github.com/Vinyzu/Botright)
+- [Vinyzu](https://github.com/Vinyzu)'s brilliant work on [Botright](https://github.com/Vinyzu/Botright) and [PatchRight](https://github.com/Kaliiiiiiiiii-Vinyzu/patchright)
 - [brotector](https://github.com/kaliiiiiiiiii/brotector) for browser detection bypass techniques
-- [fakebrowser](https://github.com/kkoooqq/fakebrowser) for fingerprinting research
-- [rebrowser-patches](https://github.com/rebrowser/rebrowser-patches) for stealth improvements
+- [fakebrowser](https://github.com/kkoooqq/fakebrowser) and [BotBrowser](https://github.com/botswin/BotBrowser) for fingerprinting research
 ---
 <div align="center"><small>Designed & crafted with ❤️ by Karim Shoair.</small></div><br>

{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
-scrapling/__init__.py,sha256=c1t8r6IGEXC-PhNeFxFtoqNsiSv7B_9f_XBn52EWESg,1236
-scrapling/cli.py,sha256=ooObP0VoYGxnskEJB6xFp23NREI_XDPJpsMSr9Sv8nk,26355
+scrapling/__init__.py,sha256=3-wjeMR5IQVhHoPcl5KYMo3cgA00q1mWn38q02xTWck,1236
+scrapling/cli.py,sha256=tGQ3q4pHJZf1XJ8UIqPdT2JR9bjOhlXydmY1cNLkbZc,26363
 scrapling/fetchers.py,sha256=aYQUxp-0i-OBucdpdG6zjWCafTCgpXJdnJ0GIrm5GfA,26523
-scrapling/parser.py,sha256=aJRqfuOxBHrM_Co9XHeuL6qYHgQTyi7zD1DoCA3mROY,57321
+scrapling/parser.py,sha256=Fh15nediLLSfYQOb_vr76YFUA_fNJFU7klYCkp_XXts,57517
 scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
 scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 scrapling/core/_html_utils.py,sha256=ki47v54SsTL5-khi1jcLkJqAHqEq19cuex-dqzXdbEI,20328
 scrapling/core/_types.py,sha256=iXhi8LFkU4wjkGOjITdY1IDBEbn5rOxsl7xwEKT1L3I,895
 scrapling/core/ai.py,sha256=v3wjtXJgBRUtImE6Q_Bf_FruOArJyraQk4kqsqhlU8k,35474
-scrapling/core/custom_types.py,sha256=3kLrNDVm1vP3IziyyNjFVVQO_2bacwvm3hiK7h3gWjU,13634
+scrapling/core/custom_types.py,sha256=GlQZiVIMCyv8vOdDUlASPn85r_4nw0P9ggID9q1VkRA,13608
 scrapling/core/mixins.py,sha256=2iUVcN2XSAKGEvNmAM2Rr9axpZoxu0M2gIFEaFTO_Dg,3206
-scrapling/core/shell.py,sha256=Um_CukPuX8K1VgEZsu_cXdsnoJTC_lnv4co2DBZXICU,21956
+scrapling/core/shell.py,sha256=dCD8c_k1skXrKSIc_Qe_KgsiMOAS_1eCzgWjvSO74-I,22893
 scrapling/core/storage.py,sha256=8lWMPut6lPpvn9iOkgy9ao11_g8FNkXq67wHKtU4uuM,6290
 scrapling/core/translator.py,sha256=HLJngeRRw2M0eNe_f8AfQD64a49OECIEm5Df_WELVG4,5135
 scrapling/core/utils/__init__.py,sha256=7B14TcrDVwSaH6BQrMnzb1NtFa4Om237dJcF9oe-lM0,204
@@ -17,28 +17,28 @@ scrapling/core/utils/_shell.py,sha256=zes71MmFTs7V9f0JFstaWcjQhKNZN6xvspu29YVQtR
 scrapling/core/utils/_utils.py,sha256=ATy-wwz00U-alOGH-NGK-VoPNr1qYmUwEoWuqAHjDkg,3143
 scrapling/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 scrapling/engines/constants.py,sha256=DP7yVRK1w1W4B1eXGeeKKQNDSo163FFXdPaxTy4adqw,4088
-scrapling/engines/static.py,sha256=779pVJvCCjFSCKGN7a_9rAH55oO2SyO88cnVrE2DQy8,33204
+scrapling/engines/static.py,sha256=Tuwl6cEDP5_OQKPFRmemz7ozdeZnWm0vir4J7JYdQCs,33122
 scrapling/engines/_browsers/__init__.py,sha256=lu5RgcV4zYacRaKm28ph5TzjqAovTaQNNfXSgQGwDOU,123
-scrapling/engines/_browsers/_base.py,sha256=vX75atq2QpsiCCpT4P2W4HQqs3P2RZWYHvDevh271kQ,12238
-scrapling/engines/_browsers/_camoufox.py,sha256=RHQMlSrn4PKg0p53b5r7aGzWxacTjUZWlPOdRPDOvls,34890
+scrapling/engines/_browsers/_base.py,sha256=29rPeXyrRnFIPLLMbvq3CUxGw4sMEJ3nKki9CC1iH2g,11049
+scrapling/engines/_browsers/_camoufox.py,sha256=BvxsTLcDpTMVoqsHIy7Smwls1zo6fpCtGMDW4v5Kim8,35356
 scrapling/engines/_browsers/_config_tools.py,sha256=mEPA5SGrWq0dl15cDOT6sOsm5NHMD0vI0fuPttGpw-U,4610
-scrapling/engines/_browsers/_controllers.py,sha256=GlYGt_LBTDjrWpD1zgKyyy9mynlEPR9MfnXJFUSDw2s,28246
-scrapling/engines/_browsers/_page.py,sha256=ixwI5d-AIzfUGekRSCbPLJAckf673B7QCyaWO-xJa84,3688
-scrapling/engines/_browsers/_validators.py,sha256=knkGvgpGeqtOWx4Us3pln1o4mJXfG4M-SWII080I9AE,6117
+scrapling/engines/_browsers/_controllers.py,sha256=YuiO8uw8pyv8hQLBvZCJcTGrNbKZSsYzkPKK9X6bq6U,27232
+scrapling/engines/_browsers/_page.py,sha256=1z-P6c97cTkULE-FVrsMY589e6eL_20Ae8pUe6vjggE,2206
+scrapling/engines/_browsers/_validators.py,sha256=jvJjXURN79aeR-ZFc_k5zf_3ClP18gM1qZA7dMXd_YI,7491
 scrapling/engines/toolbelt/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 scrapling/engines/toolbelt/convertor.py,sha256=e_rMcW8ScdfxKO-V5Mk61blVzwuDgd82CpRds0Z2tMQ,13102
 scrapling/engines/toolbelt/custom.py,sha256=uhMXa_LNcvvG3wZXBRKHXvqLqShMR9SHwc3bBv4UaQs,7664
 scrapling/engines/toolbelt/fingerprints.py,sha256=hCxKUTwo8sy7iN9wk8OA5vGo9XOn6E365zvC1C6zWDE,2212
-scrapling/engines/toolbelt/navigation.py,sha256=CWvM0KDuLPzvM6T8_yBq05nmB4scXshoEKVbhR4zEBk,3561
+scrapling/engines/toolbelt/navigation.py,sha256=Ej23I1n9AjCwxva_yRXUQeefmYJgi7lgb2Wr_b8RNFs,3550
 scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
 scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
 scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js,sha256=clzuf7KYcvDWYaKKxT_bkAoCT2fGsOcUw47948CHjAc,267
 scrapling/engines/toolbelt/bypasses/screen_props.js,sha256=fZEuHMQ1-fYuxxUMoQXUvVWYUkPUbblkfMfpiLvBY7w,599
 scrapling/engines/toolbelt/bypasses/webdriver_fully.js,sha256=hdJw4clRAJQqIdq5gIFC_eC-x7C1i2ab01KV5ylmOBs,728
 scrapling/engines/toolbelt/bypasses/window_chrome.js,sha256=D7hqzNGGDorh8JVlvm2YIv7Bk2CoVkG55MDIdyqhT1w,6808
-scrapling-0.3.3.dist-info/licenses/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
-scrapling-0.3.3.dist-info/METADATA,sha256=QXhVgzdtzq9U5kEpv8kWSkGD64EQBoZfmR5QRkfTV1I,21948
-scrapling-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-scrapling-0.3.3.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
-scrapling-0.3.3.dist-info/top_level.txt,sha256=Ud-yF-PC2U5HQ3nc5QwT7HSPdIpF1RuwQ_mYgBzHHIM,10
-scrapling-0.3.3.dist-info/RECORD,,
+scrapling-0.3.5.dist-info/licenses/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
+scrapling-0.3.5.dist-info/METADATA,sha256=a-ZKBr0yH6jKb88l5BpbwMhWEbP-mQG3_NoI4Rogv9M,22513
+scrapling-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+scrapling-0.3.5.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
+scrapling-0.3.5.dist-info/top_level.txt,sha256=Ud-yF-PC2U5HQ3nc5QwT7HSPdIpF1RuwQ_mYgBzHHIM,10
+scrapling-0.3.5.dist-info/RECORD,,

{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{scrapling-0.3.3.dist-info → scrapling-0.3.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

scrapling 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

scrapling 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl