PyPI - scrapling - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

scrapling 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

scrapling/__init__.py +1 -1
scrapling/core/_types.py +3 -0
scrapling/core/ai.py +2 -1
scrapling/core/custom_types.py +20 -27
scrapling/core/mixins.py +15 -9
scrapling/core/shell.py +4 -3
scrapling/core/storage.py +5 -5
scrapling/core/translator.py +13 -8
scrapling/engines/_browsers/_base.py +175 -21
scrapling/engines/_browsers/_camoufox.py +95 -171
scrapling/engines/_browsers/_config_tools.py +9 -3
scrapling/engines/_browsers/_controllers.py +51 -101
scrapling/engines/_browsers/_validators.py +95 -63
scrapling/engines/static.py +678 -668
scrapling/engines/toolbelt/convertor.py +48 -15
scrapling/engines/toolbelt/custom.py +6 -21
scrapling/engines/toolbelt/fingerprints.py +14 -9
scrapling/engines/toolbelt/navigation.py +11 -3
scrapling/fetchers/__init__.py +11 -1
scrapling/fetchers/chrome.py +15 -4
scrapling/fetchers/firefox.py +0 -4
scrapling/parser.py +105 -80
{scrapling-0.3.6.dist-info → scrapling-0.3.8.dist-info}/METADATA +7 -6
scrapling-0.3.8.dist-info/RECORD +47 -0
scrapling-0.3.6.dist-info/RECORD +0 -47
{scrapling-0.3.6.dist-info → scrapling-0.3.8.dist-info}/WHEEL +0 -0
{scrapling-0.3.6.dist-info → scrapling-0.3.8.dist-info}/entry_points.txt +0 -0
{scrapling-0.3.6.dist-info → scrapling-0.3.8.dist-info}/licenses/LICENSE +0 -0
{scrapling-0.3.6.dist-info → scrapling-0.3.8.dist-info}/top_level.txt +0 -0

scrapling/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.3.6"
+__version__ = "0.3.8"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"
 from typing import Any, TYPE_CHECKING

scrapling/core/_types.py CHANGED Viewed

@@ -12,9 +12,11 @@ from typing import (
     Generator,
     Iterable,
     List,
+    Set,
     Literal,
     Optional,
     Pattern,
+    Sequence,
     Tuple,
     TypeVar,
     Union,
@@ -22,6 +24,7 @@ from typing import (
     Mapping,
     Awaitable,
     Protocol,
+    Coroutine,
     SupportsIndex,
 )

scrapling/core/ai.py CHANGED Viewed

@@ -20,6 +20,7 @@ from scrapling.core._types import (
     Mapping,
     Dict,
     List,
+    Any,
     SelectorWaitStates,
     Generator,
 )
@@ -171,7 +172,7 @@ class ScraplingMCPServer:
         :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
         """
         async with FetcherSession() as session:
-            tasks = [
+            tasks: List[Any] = [
                 session.get(
                     url,
                     auth=auth,

scrapling/core/custom_types.py CHANGED Viewed

@@ -5,6 +5,7 @@ from re import compile as re_compile, UNICODE, IGNORECASE
 from orjson import dumps, loads
 from scrapling.core._types import (
+    Any,
     cast,
     Dict,
     List,
@@ -14,7 +15,6 @@ from scrapling.core._types import (
     Literal,
     Pattern,
     Iterable,
-    Optional,
     Generator,
     SupportsIndex,
 )
@@ -33,23 +33,20 @@ class TextHandler(str):
     def __getitem__(self, key: SupportsIndex | slice) -> "TextHandler":  # pragma: no cover
         lst = super().__getitem__(key)
-        return cast(_TextHandlerType, TextHandler(lst))
+        return TextHandler(lst)
-    def split(self, sep: str = None, maxsplit: SupportsIndex = -1) -> "TextHandlers":  # pragma: no cover
-        return TextHandlers(
-            cast(
-                List[_TextHandlerType],
-                [TextHandler(s) for s in super().split(sep, maxsplit)],
-            )
-        )
+    def split(
+        self, sep: str | None = None, maxsplit: SupportsIndex = -1
+    ) -> Union[List, "TextHandlers"]:  # pragma: no cover
+        return TextHandlers([TextHandler(s) for s in super().split(sep, maxsplit)])
-    def strip(self, chars: str = None) -> Union[str, "TextHandler"]:  # pragma: no cover
+    def strip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().strip(chars))
-    def lstrip(self, chars: str = None) -> Union[str, "TextHandler"]:  # pragma: no cover
+    def lstrip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().lstrip(chars))
-    def rstrip(self, chars: str = None) -> Union[str, "TextHandler"]:  # pragma: no cover
+    def rstrip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().rstrip(chars))
     def capitalize(self) -> Union[str, "TextHandler"]:  # pragma: no cover
@@ -64,7 +61,7 @@ class TextHandler(str):
     def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().expandtabs(tabsize))
-    def format(self, *args: str, **kwargs: str) -> Union[str, "TextHandler"]:  # pragma: no cover
+    def format(self, *args: object, **kwargs: str) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().format(*args, **kwargs))
     def format_map(self, mapping) -> Union[str, "TextHandler"]:  # pragma: no cover
@@ -131,10 +128,11 @@ class TextHandler(str):
     def re(
         self,
         regex: str | Pattern,
-        check_match: Literal[True],
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
+        *,
+        check_match: Literal[True],
     ) -> bool: ...
     @overload
@@ -179,19 +177,14 @@ class TextHandler(str):
             results = flatten(results)
         if not replace_entities:
-            return TextHandlers(cast(List[_TextHandlerType], [TextHandler(string) for string in results]))
+            return TextHandlers([TextHandler(string) for string in results])
-        return TextHandlers(
-            cast(
-                List[_TextHandlerType],
-                [TextHandler(_replace_entities(s)) for s in results],
-            )
-        )
+        return TextHandlers([TextHandler(_replace_entities(s)) for s in results])
     def re_first(
         self,
         regex: str | Pattern,
-        default=None,
+        default: Any = None,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
@@ -232,8 +225,8 @@ class TextHandlers(List[TextHandler]):
     def __getitem__(self, pos: SupportsIndex | slice) -> Union[TextHandler, "TextHandlers"]:
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
-            return TextHandlers(cast(List[_TextHandlerType], lst))
-        return cast(_TextHandlerType, TextHandler(lst))
+            return TextHandlers(cast(List[TextHandler], lst))
+        return TextHandler(cast(TextHandler, lst))
     def re(
         self,
@@ -256,7 +249,7 @@ class TextHandlers(List[TextHandler]):
     def re_first(
         self,
         regex: str | Pattern,
-        default=None,
+        default: Any = None,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
@@ -309,9 +302,9 @@ class AttributesHandler(Mapping[str, _TextHandlerType]):
             )
         # Fastest read-only mapping type
-        self._data = MappingProxyType(mapping)
+        self._data: Mapping[str, Any] = MappingProxyType(mapping)
-    def get(self, key: str, default: Optional[str] = None) -> Optional[_TextHandlerType]:
+    def get(self, key: str, default: Any = None) -> _TextHandlerType:
         """Acts like the standard dictionary `.get()` method"""
         return self._data.get(key, default)

scrapling/core/mixins.py CHANGED Viewed

@@ -1,3 +1,9 @@
+from scrapling.core._types import TYPE_CHECKING
+if TYPE_CHECKING:
+    from scrapling.parser import Selector
 class SelectorsGeneration:
     """
     Functions for generating selectors
@@ -5,7 +11,7 @@ class SelectorsGeneration:
     Inspiration: https://searchfox.org/mozilla-central/source/devtools/shared/inspector/css-logic.js#591
     """
-    def __general_selection(self, selection: str = "css", full_path: bool = False) -> str:
+    def _general_selection(self: "Selector", selection: str = "css", full_path: bool = False) -> str:  # type: ignore[name-defined]
         """Generate a selector for the current element.
         :return: A string of the generated selector.
         """
@@ -47,29 +53,29 @@ class SelectorsGeneration:
         return " > ".join(reversed(selectorPath)) if css else "//" + "/".join(reversed(selectorPath))
     @property
-    def generate_css_selector(self) -> str:
+    def generate_css_selector(self: "Selector") -> str:  # type: ignore[name-defined]
         """Generate a CSS selector for the current element
         :return: A string of the generated selector.
         """
-        return self.__general_selection()
+        return self._general_selection()
     @property
-    def generate_full_css_selector(self) -> str:
+    def generate_full_css_selector(self: "Selector") -> str:  # type: ignore[name-defined]
         """Generate a complete CSS selector for the current element
         :return: A string of the generated selector.
         """
-        return self.__general_selection(full_path=True)
+        return self._general_selection(full_path=True)
     @property
-    def generate_xpath_selector(self) -> str:
+    def generate_xpath_selector(self: "Selector") -> str:  # type: ignore[name-defined]
         """Generate an XPath selector for the current element
         :return: A string of the generated selector.
         """
-        return self.__general_selection("xpath")
+        return self._general_selection("xpath")
     @property
-    def generate_full_xpath_selector(self) -> str:
+    def generate_full_xpath_selector(self: "Selector") -> str:  # type: ignore[name-defined]
         """Generate a complete XPath selector for the current element
         :return: A string of the generated selector.
         """
-        return self.__general_selection("xpath", full_path=True)
+        return self._general_selection("xpath", full_path=True)

scrapling/core/shell.py CHANGED Viewed

@@ -31,6 +31,7 @@ from scrapling.core._types import (
     Optional,
     Dict,
     Any,
+    cast,
     extraction_types,
     Generator,
 )
@@ -540,15 +541,15 @@ class Convertor:
             raise ValueError(f"Unknown extraction type: {extraction_type}")
         else:
             if main_content_only:
-                page = page.css_first("body") or page
+                page = cast(Selector, page.css_first("body")) or page
-            pages = [page] if not css_selector else page.css(css_selector)
+            pages = [page] if not css_selector else cast(Selectors, page.css(css_selector))
             for page in pages:
                 match extraction_type:
                     case "markdown":
                         yield cls._convert_to_markdown(page.html_content)
                     case "html":
-                        yield page.body
+                        yield page.html_content
                     case "text":
                         txt_content = page.get_all_text(strip=True)
                         for s in (

scrapling/core/storage.py CHANGED Viewed

@@ -56,13 +56,13 @@ class StorageSystemMixin(ABC):  # pragma: no cover
     @lru_cache(128, typed=True)
     def _get_hash(identifier: str) -> str:
         """If you want to hash identifier in your storage system, use this safer"""
-        identifier = identifier.lower().strip()
-        if isinstance(identifier, str):
+        _identifier = identifier.lower().strip()
+        if isinstance(_identifier, str):
             # Hash functions have to take bytes
-            identifier = identifier.encode("utf-8")
+            _identifier = _identifier.encode("utf-8")
-        hash_value = sha256(identifier).hexdigest()
-        return f"{hash_value}_{len(identifier)}"  # Length to reduce collision chance
+        hash_value = sha256(_identifier).hexdigest()
+        return f"{hash_value}_{len(_identifier)}"  # Length to reduce collision chance
 @lru_cache(1, typed=True)

scrapling/core/translator.py CHANGED Viewed

@@ -10,24 +10,23 @@ So you don't have to learn a new selectors/api method like what bs4 done with so
 from functools import lru_cache
-from cssselect.xpath import ExpressionError
-from cssselect.xpath import XPathExpr as OriginalXPathExpr
 from cssselect import HTMLTranslator as OriginalHTMLTranslator
+from cssselect.xpath import ExpressionError, XPathExpr as OriginalXPathExpr
 from cssselect.parser import Element, FunctionalPseudoElement, PseudoElement
-from scrapling.core._types import Any, Optional, Protocol, Self
+from scrapling.core._types import Any, Protocol, Self
 class XPathExpr(OriginalXPathExpr):
     textnode: bool = False
-    attribute: Optional[str] = None
+    attribute: str | None = None
     @classmethod
     def from_xpath(
         cls,
         xpath: OriginalXPathExpr,
         textnode: bool = False,
-        attribute: Optional[str] = None,
+        attribute: str | None = None,
     ) -> Self:
         x = cls(path=xpath.path, element=xpath.element, condition=xpath.condition)
         x.textnode = textnode
@@ -71,10 +70,10 @@ class XPathExpr(OriginalXPathExpr):
 # e.g. cssselect.GenericTranslator, cssselect.HTMLTranslator
 class TranslatorProtocol(Protocol):
-    def xpath_element(self, selector: Element) -> OriginalXPathExpr:  # pragma: no cover
+    def xpath_element(self, selector: Element) -> OriginalXPathExpr:  # pyright: ignore # pragma: no cover
         pass
-    def css_to_xpath(self, css: str, prefix: str = ...) -> str:  # pragma: no cover
+    def css_to_xpath(self, css: str, prefix: str = ...) -> str:  # pyright: ignore # pragma: no cover
         pass
@@ -121,9 +120,15 @@ class TranslatorMixin:
 class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
-    @lru_cache(maxsize=256)
     def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
         return super().css_to_xpath(css, prefix)
 translator = HTMLTranslator()
+# Using a function instead of the translator directly to avoid Pyright override error
+@lru_cache(maxsize=256)
+def css_to_xpath(query: str) -> str:
+    """Return translated XPath version of a given CSS query"""
+    return translator.css_to_xpath(query)

scrapling/engines/_browsers/_base.py CHANGED Viewed

@@ -2,19 +2,27 @@ from time import time
 from asyncio import sleep as asyncio_sleep, Lock
 from camoufox import DefaultAddons
-from playwright.sync_api import BrowserContext, Playwright
+from playwright.sync_api import (
+    Page,
+    Frame,
+    BrowserContext,
+    Playwright,
+    Response as SyncPlaywrightResponse,
+)
 from playwright.async_api import (
-    BrowserContext as AsyncBrowserContext,
+    Page as AsyncPage,
+    Frame as AsyncFrame,
     Playwright as AsyncPlaywright,
+    Response as AsyncPlaywrightResponse,
+    BrowserContext as AsyncBrowserContext,
 )
-from camoufox.utils import (
-    launch_options as generate_launch_options,
-    installed_verstr as camoufox_version,
-)
+from playwright._impl._errors import Error as PlaywrightError
+from camoufox.pkgman import installed_verstr as camoufox_version
+from camoufox.utils import launch_options as generate_launch_options
 from ._page import PageInfo, PagePool
 from scrapling.parser import Selector
-from scrapling.core._types import Dict, Optional
+from scrapling.core._types import Any, cast, Dict, List, Optional, Callable, TYPE_CHECKING
 from scrapling.engines.toolbelt.fingerprints import get_os_name
 from ._validators import validate, PlaywrightConfig, CamoufoxConfig
 from ._config_tools import _compiled_stealth_scripts, _launch_kwargs, _context_kwargs
@@ -28,10 +36,35 @@ class SyncSession:
         self.max_pages = max_pages
         self.page_pool = PagePool(max_pages)
         self._max_wait_for_page = 60
-        self.playwright: Optional[Playwright] = None
-        self.context: Optional[BrowserContext] = None
+        self.playwright: Playwright | Any = None
+        self.context: BrowserContext | Any = None
         self._closed = False
+    def __create__(self):
+        pass
+    def close(self):  # pragma: no cover
+        """Close all resources"""
+        if self._closed:
+            return
+        if self.context:
+            self.context.close()
+            self.context = None
+        if self.playwright:
+            self.playwright.stop()
+            self.playwright = None  # pyright: ignore
+        self._closed = True
+    def __enter__(self):
+        self.__create__()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
     def _get_page(
         self,
         timeout: int | float,
@@ -41,6 +74,7 @@ class SyncSession:
         """Get a new page to use"""
         # No need to check if a page is available or not in sync code because the code blocked before reaching here till the page closed, ofc.
+        assert self.context is not None, "Browser context not initialized"
         page = self.context.new_page()
         page.set_default_navigation_timeout(timeout)
         page.set_default_timeout(timeout)
@@ -54,7 +88,9 @@ class SyncSession:
             for script in _compiled_stealth_scripts():
                 page.add_init_script(script=script)
-        return self.page_pool.add_page(page)
+        page_info = self.page_pool.add_page(page)
+        page_info.mark_busy()
+        return page_info
     def get_pool_stats(self) -> Dict[str, int]:
         """Get statistics about the current page pool"""
@@ -64,14 +100,76 @@ class SyncSession:
             "max_pages": self.max_pages,
         }
+    @staticmethod
+    def _wait_for_networkidle(page: Page | Frame, timeout: Optional[int] = None):
+        """Wait for the page to become idle (no network activity) even if there are never-ending requests."""
+        try:
+            page.wait_for_load_state("networkidle", timeout=timeout)
+        except PlaywrightError:
+            pass
+    def _wait_for_page_stability(self, page: Page | Frame, load_dom: bool, network_idle: bool):
+        page.wait_for_load_state(state="load")
+        if load_dom:
+            page.wait_for_load_state(state="domcontentloaded")
+        if network_idle:
+            self._wait_for_networkidle(page)
-class AsyncSession(SyncSession):
+    @staticmethod
+    def _create_response_handler(page_info: PageInfo, response_container: List) -> Callable:
+        """Create a response handler that captures the final navigation response.
+        :param page_info: The PageInfo object containing the page
+        :param response_container: A list to store the final response (mutable container)
+        :return: A callback function for page.on("response", ...)
+        """
+        def handle_response(finished_response: SyncPlaywrightResponse):
+            if (
+                finished_response.request.resource_type == "document"
+                and finished_response.request.is_navigation_request()
+                and finished_response.request.frame == page_info.page.main_frame
+            ):
+                response_container[0] = finished_response
+        return handle_response
+class AsyncSession:
     def __init__(self, max_pages: int = 1):
-        super().__init__(max_pages)
-        self.playwright: Optional[AsyncPlaywright] = None
-        self.context: Optional[AsyncBrowserContext] = None
+        self.max_pages = max_pages
+        self.page_pool = PagePool(max_pages)
+        self._max_wait_for_page = 60
+        self.playwright: AsyncPlaywright | Any = None
+        self.context: AsyncBrowserContext | Any = None
+        self._closed = False
         self._lock = Lock()
+    async def __create__(self):
+        pass
+    async def close(self):
+        """Close all resources"""
+        if self._closed:  # pragma: no cover
+            return
+        if self.context:
+            await self.context.close()
+            self.context = None  # pyright: ignore
+        if self.playwright:
+            await self.playwright.stop()
+            self.playwright = None  # pyright: ignore
+        self._closed = True
+    async def __aenter__(self):
+        await self.__create__()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
     async def _get_page(
         self,
         timeout: int | float,
@@ -79,6 +177,9 @@ class AsyncSession(SyncSession):
         disable_resources: bool,
     ) -> PageInfo:  # pragma: no cover
         """Get a new page to use"""
+        if TYPE_CHECKING:
+            assert self.context is not None, "Browser context not initialized"
         async with self._lock:
             # If we're at max capacity after cleanup, wait for busy pages to finish
             if self.page_pool.pages_count >= self.max_pages:
@@ -107,6 +208,48 @@ class AsyncSession(SyncSession):
             return self.page_pool.add_page(page)
+    def get_pool_stats(self) -> Dict[str, int]:
+        """Get statistics about the current page pool"""
+        return {
+            "total_pages": self.page_pool.pages_count,
+            "busy_pages": self.page_pool.busy_count,
+            "max_pages": self.max_pages,
+        }
+    @staticmethod
+    async def _wait_for_networkidle(page: AsyncPage | AsyncFrame, timeout: Optional[int] = None):
+        """Wait for the page to become idle (no network activity) even if there are never-ending requests."""
+        try:
+            await page.wait_for_load_state("networkidle", timeout=timeout)
+        except PlaywrightError:
+            pass
+    async def _wait_for_page_stability(self, page: AsyncPage | AsyncFrame, load_dom: bool, network_idle: bool):
+        await page.wait_for_load_state(state="load")
+        if load_dom:
+            await page.wait_for_load_state(state="domcontentloaded")
+        if network_idle:
+            await self._wait_for_networkidle(page)
+    @staticmethod
+    def _create_response_handler(page_info: PageInfo, response_container: List) -> Callable:
+        """Create an async response handler that captures the final navigation response.
+        :param page_info: The PageInfo object containing the page
+        :param response_container: A list to store the final response (mutable container)
+        :return: A callback function for page.on("response", ...)
+        """
+        async def handle_response(finished_response: AsyncPlaywrightResponse):
+            if (
+                finished_response.request.resource_type == "document"
+                and finished_response.request.is_navigation_request()
+                and finished_response.request.frame == page_info.page.main_frame
+            ):
+                response_container[0] = finished_response
+        return handle_response
 class DynamicSessionMixin:
     def __validate__(self, **params):
@@ -133,12 +276,18 @@ class DynamicSessionMixin:
         self.wait_selector = config.wait_selector
         self.init_script = config.init_script
         self.wait_selector_state = config.wait_selector_state
+        self.extra_flags = config.extra_flags
         self.selector_config = config.selector_config
+        self.additional_args = config.additional_args
         self.page_action = config.page_action
-        self._headers_keys = set(map(str.lower, self.extra_headers.keys())) if self.extra_headers else set()
+        self.user_data_dir = config.user_data_dir
+        self._headers_keys = {header.lower() for header in self.extra_headers.keys()} if self.extra_headers else set()
         self.__initiate_browser_options__()
     def __initiate_browser_options__(self):
+        if TYPE_CHECKING:
+            assert isinstance(self.proxy, tuple)
         if not self.cdp_url:
             # `launch_options` is used with persistent context
             self.launch_options = dict(
@@ -152,10 +301,13 @@ class DynamicSessionMixin:
                     self.stealth,
                     self.hide_canvas,
                     self.disable_webgl,
+                    tuple(self.extra_flags) if self.extra_flags else tuple(),
                 )
             )
             self.launch_options["extra_http_headers"] = dict(self.launch_options["extra_http_headers"])
             self.launch_options["proxy"] = dict(self.launch_options["proxy"]) or None
+            self.launch_options["user_data_dir"] = self.user_data_dir
+            self.launch_options.update(cast(Dict, self.additional_args))
             self.context_options = dict()
         else:
             # while `context_options` is left to be used when cdp mode is enabled
@@ -171,11 +323,12 @@ class DynamicSessionMixin:
             )
             self.context_options["extra_http_headers"] = dict(self.context_options["extra_http_headers"])
             self.context_options["proxy"] = dict(self.context_options["proxy"]) or None
+            self.context_options.update(cast(Dict, self.additional_args))
 class StealthySessionMixin:
     def __validate__(self, **params):
-        config = validate(params, model=CamoufoxConfig)
+        config: CamoufoxConfig = validate(params, model=CamoufoxConfig)
         self.max_pages = config.max_pages
         self.headless = config.headless
@@ -204,15 +357,16 @@ class StealthySessionMixin:
         self.selector_config = config.selector_config
         self.additional_args = config.additional_args
         self.page_action = config.page_action
-        self._headers_keys = set(map(str.lower, self.extra_headers.keys())) if self.extra_headers else set()
+        self.user_data_dir = config.user_data_dir
+        self._headers_keys = {header.lower() for header in self.extra_headers.keys()} if self.extra_headers else set()
         self.__initiate_browser_options__()
     def __initiate_browser_options__(self):
         """Initiate browser options."""
-        self.launch_options = generate_launch_options(
+        self.launch_options: Dict[str, Any] = generate_launch_options(
             **{
                 "geoip": self.geoip,
-                "proxy": dict(self.proxy) if self.proxy else self.proxy,
+                "proxy": dict(self.proxy) if self.proxy and isinstance(self.proxy, tuple) else self.proxy,
                 "addons": self.addons,
                 "exclude_addons": [] if self.disable_ads else [DefaultAddons.UBO],
                 "headless": self.headless,
@@ -222,7 +376,7 @@ class StealthySessionMixin:
                 "block_webrtc": self.block_webrtc,
                 "block_images": self.block_images,  # Careful! it makes some websites don't finish loading at all like stackoverflow even in headful mode.
                 "os": None if self.os_randomize else get_os_name(),
-                "user_data_dir": "",
+                "user_data_dir": self.user_data_dir,
                 "ff_version": __ff_version_str__,
                 "firefox_user_prefs": {
                     # This is what enabling `enable_cache` does internally, so we do it from here instead
@@ -232,7 +386,7 @@ class StealthySessionMixin:
                     "browser.cache.disk_cache_ssl": True,
                     "browser.cache.disk.smart_size.enabled": True,
                 },
-                **self.additional_args,
+                **cast(Dict, self.additional_args),
             }
         )

scrapling 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

scrapling 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl