PyPI - scrapling - Versions diffs - 0.3.6__tar.gz → 0.3.7__tar.gz - Mend

scrapling 0.3.6tar.gz → 0.3.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{scrapling-0.3.6/scrapling.egg-info → scrapling-0.3.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scrapling
-Version: 0.3.6
+Version: 0.3.7
 Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair
@@ -77,7 +77,7 @@ Requires-Dist: camoufox>=0.4.11; extra == "fetchers"
 Requires-Dist: geoip2>=5.1.0; extra == "fetchers"
 Requires-Dist: msgspec>=0.19.0; extra == "fetchers"
 Provides-Extra: ai
-Requires-Dist: mcp>=1.15.0; extra == "ai"
+Requires-Dist: mcp>=1.16.0; extra == "ai"
 Requires-Dist: markdownify>=1.2.0; extra == "ai"
 Requires-Dist: scrapling[fetchers]; extra == "ai"
 Provides-Extra: shell
@@ -162,7 +162,6 @@ Built for the modern Web, Scrapling features its own rapid parsing engine and fe
 <a href="https://visit.decodo.com/Dy6W0b" target="_blank" title="Try the Most Efficient Residential Proxies for Free"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/decodo.png"></a>
 <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
 <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
-<a href="https://www.nstproxy.com/?type=flow&utm_source=scrapling" target="_blank" title="One Proxy Service, Infinite Solutions at Unbeatable Prices!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/NSTproxy.png"></a>
 <a href="https://www.rapidproxy.io/?ref=d4v" target="_blank" title="Affordable Access to the Proxy World – bypass CAPTCHAs blocks, and avoid additional costs."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/rapidproxy.jpg"></a>
 <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
@@ -389,7 +388,7 @@ Starting with v0.3.2, this installation only includes the parser engine and its
 ### Docker
 You can also install a Docker image with all extras and browsers with the following command:
 ```bash
-docker pull scrapling
+docker pull pyd4vinci/scrapling
 ```
 This image is automatically built and pushed to Docker Hub through GitHub actions right here.

{scrapling-0.3.6 → scrapling-0.3.7}/README.md RENAMED Viewed

@@ -72,7 +72,6 @@ Built for the modern Web, Scrapling features its own rapid parsing engine and fe
 <a href="https://visit.decodo.com/Dy6W0b" target="_blank" title="Try the Most Efficient Residential Proxies for Free"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/decodo.png"></a>
 <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
 <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
-<a href="https://www.nstproxy.com/?type=flow&utm_source=scrapling" target="_blank" title="One Proxy Service, Infinite Solutions at Unbeatable Prices!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/NSTproxy.png"></a>
 <a href="https://www.rapidproxy.io/?ref=d4v" target="_blank" title="Affordable Access to the Proxy World – bypass CAPTCHAs blocks, and avoid additional costs."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/rapidproxy.jpg"></a>
 <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
@@ -299,7 +298,7 @@ Starting with v0.3.2, this installation only includes the parser engine and its
 ### Docker
 You can also install a Docker image with all extras and browsers with the following command:
 ```bash
-docker pull scrapling
+docker pull pyd4vinci/scrapling
 ```
 This image is automatically built and pushed to Docker Hub through GitHub actions right here.

{scrapling-0.3.6 → scrapling-0.3.7}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "scrapling"
 # Static version instead of dynamic version so we can get better layer caching while building docker, check the docker file to understand
-version = "0.3.6"
+version = "0.3.7"
 description = "Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!"
 readme = {file = "README.md", content-type = "text/markdown"}
 license = {file = "LICENSE"}
@@ -74,7 +74,7 @@ fetchers = [
     "msgspec>=0.19.0",
 ]
 ai = [
-    "mcp>=1.15.0",
+    "mcp>=1.16.0",
     "markdownify>=1.2.0",
     "scrapling[fetchers]",
 ]

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.3.6"
+__version__ = "0.3.7"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"
 from typing import Any, TYPE_CHECKING

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/core/_types.py RENAMED Viewed

@@ -12,9 +12,11 @@ from typing import (
     Generator,
     Iterable,
     List,
+    Set,
     Literal,
     Optional,
     Pattern,
+    Sequence,
     Tuple,
     TypeVar,
     Union,
@@ -22,6 +24,7 @@ from typing import (
     Mapping,
     Awaitable,
     Protocol,
+    Coroutine,
     SupportsIndex,
 )

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/core/ai.py RENAMED Viewed

@@ -20,6 +20,7 @@ from scrapling.core._types import (
     Mapping,
     Dict,
     List,
+    Any,
     SelectorWaitStates,
     Generator,
 )
@@ -171,7 +172,7 @@ class ScraplingMCPServer:
         :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets the referer header as if this request came from a Google search of URL's domain.
         """
         async with FetcherSession() as session:
-            tasks = [
+            tasks: List[Any] = [
                 session.get(
                     url,
                     auth=auth,

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/core/custom_types.py RENAMED Viewed

@@ -5,6 +5,7 @@ from re import compile as re_compile, UNICODE, IGNORECASE
 from orjson import dumps, loads
 from scrapling.core._types import (
+    Any,
     cast,
     Dict,
     List,
@@ -14,7 +15,6 @@ from scrapling.core._types import (
     Literal,
     Pattern,
     Iterable,
-    Optional,
     Generator,
     SupportsIndex,
 )
@@ -33,23 +33,20 @@ class TextHandler(str):
     def __getitem__(self, key: SupportsIndex | slice) -> "TextHandler":  # pragma: no cover
         lst = super().__getitem__(key)
-        return cast(_TextHandlerType, TextHandler(lst))
+        return TextHandler(lst)
-    def split(self, sep: str = None, maxsplit: SupportsIndex = -1) -> "TextHandlers":  # pragma: no cover
-        return TextHandlers(
-            cast(
-                List[_TextHandlerType],
-                [TextHandler(s) for s in super().split(sep, maxsplit)],
-            )
-        )
+    def split(
+        self, sep: str | None = None, maxsplit: SupportsIndex = -1
+    ) -> Union[List, "TextHandlers"]:  # pragma: no cover
+        return TextHandlers([TextHandler(s) for s in super().split(sep, maxsplit)])
-    def strip(self, chars: str = None) -> Union[str, "TextHandler"]:  # pragma: no cover
+    def strip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().strip(chars))
-    def lstrip(self, chars: str = None) -> Union[str, "TextHandler"]:  # pragma: no cover
+    def lstrip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().lstrip(chars))
-    def rstrip(self, chars: str = None) -> Union[str, "TextHandler"]:  # pragma: no cover
+    def rstrip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().rstrip(chars))
     def capitalize(self) -> Union[str, "TextHandler"]:  # pragma: no cover
@@ -64,7 +61,7 @@ class TextHandler(str):
     def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().expandtabs(tabsize))
-    def format(self, *args: str, **kwargs: str) -> Union[str, "TextHandler"]:  # pragma: no cover
+    def format(self, *args: object, **kwargs: str) -> Union[str, "TextHandler"]:  # pragma: no cover
         return TextHandler(super().format(*args, **kwargs))
     def format_map(self, mapping) -> Union[str, "TextHandler"]:  # pragma: no cover
@@ -131,10 +128,11 @@ class TextHandler(str):
     def re(
         self,
         regex: str | Pattern,
-        check_match: Literal[True],
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
+        *,
+        check_match: Literal[True],
     ) -> bool: ...
     @overload
@@ -179,19 +177,14 @@ class TextHandler(str):
             results = flatten(results)
         if not replace_entities:
-            return TextHandlers(cast(List[_TextHandlerType], [TextHandler(string) for string in results]))
+            return TextHandlers([TextHandler(string) for string in results])
-        return TextHandlers(
-            cast(
-                List[_TextHandlerType],
-                [TextHandler(_replace_entities(s)) for s in results],
-            )
-        )
+        return TextHandlers([TextHandler(_replace_entities(s)) for s in results])
     def re_first(
         self,
         regex: str | Pattern,
-        default=None,
+        default: Any = None,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
@@ -232,8 +225,8 @@ class TextHandlers(List[TextHandler]):
     def __getitem__(self, pos: SupportsIndex | slice) -> Union[TextHandler, "TextHandlers"]:
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
-            return TextHandlers(cast(List[_TextHandlerType], lst))
-        return cast(_TextHandlerType, TextHandler(lst))
+            return TextHandlers(cast(List[TextHandler], lst))
+        return TextHandler(cast(TextHandler, lst))
     def re(
         self,
@@ -256,7 +249,7 @@ class TextHandlers(List[TextHandler]):
     def re_first(
         self,
         regex: str | Pattern,
-        default=None,
+        default: Any = None,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
@@ -309,9 +302,9 @@ class AttributesHandler(Mapping[str, _TextHandlerType]):
             )
         # Fastest read-only mapping type
-        self._data = MappingProxyType(mapping)
+        self._data: Mapping[str, Any] = MappingProxyType(mapping)
-    def get(self, key: str, default: Optional[str] = None) -> Optional[_TextHandlerType]:
+    def get(self, key: str, default: Any = None) -> _TextHandlerType:
         """Acts like the standard dictionary `.get()` method"""
         return self._data.get(key, default)

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/core/mixins.py RENAMED Viewed

@@ -1,3 +1,9 @@
+from scrapling.core._types import TYPE_CHECKING
+if TYPE_CHECKING:
+    from scrapling.parser import Selector
 class SelectorsGeneration:
     """
     Functions for generating selectors
@@ -5,7 +11,7 @@ class SelectorsGeneration:
     Inspiration: https://searchfox.org/mozilla-central/source/devtools/shared/inspector/css-logic.js#591
     """
-    def __general_selection(self, selection: str = "css", full_path: bool = False) -> str:
+    def _general_selection(self: "Selector", selection: str = "css", full_path: bool = False) -> str:  # type: ignore[name-defined]
         """Generate a selector for the current element.
         :return: A string of the generated selector.
         """
@@ -47,29 +53,29 @@ class SelectorsGeneration:
         return " > ".join(reversed(selectorPath)) if css else "//" + "/".join(reversed(selectorPath))
     @property
-    def generate_css_selector(self) -> str:
+    def generate_css_selector(self: "Selector") -> str:  # type: ignore[name-defined]
         """Generate a CSS selector for the current element
         :return: A string of the generated selector.
         """
-        return self.__general_selection()
+        return self._general_selection()
     @property
-    def generate_full_css_selector(self) -> str:
+    def generate_full_css_selector(self: "Selector") -> str:  # type: ignore[name-defined]
         """Generate a complete CSS selector for the current element
         :return: A string of the generated selector.
         """
-        return self.__general_selection(full_path=True)
+        return self._general_selection(full_path=True)
     @property
-    def generate_xpath_selector(self) -> str:
+    def generate_xpath_selector(self: "Selector") -> str:  # type: ignore[name-defined]
         """Generate an XPath selector for the current element
         :return: A string of the generated selector.
         """
-        return self.__general_selection("xpath")
+        return self._general_selection("xpath")
     @property
-    def generate_full_xpath_selector(self) -> str:
+    def generate_full_xpath_selector(self: "Selector") -> str:  # type: ignore[name-defined]
         """Generate a complete XPath selector for the current element
         :return: A string of the generated selector.
         """
-        return self.__general_selection("xpath", full_path=True)
+        return self._general_selection("xpath", full_path=True)

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/core/shell.py RENAMED Viewed

@@ -31,6 +31,7 @@ from scrapling.core._types import (
     Optional,
     Dict,
     Any,
+    cast,
     extraction_types,
     Generator,
 )
@@ -540,15 +541,15 @@ class Convertor:
             raise ValueError(f"Unknown extraction type: {extraction_type}")
         else:
             if main_content_only:
-                page = page.css_first("body") or page
+                page = cast(Selector, page.css_first("body")) or page
-            pages = [page] if not css_selector else page.css(css_selector)
+            pages = [page] if not css_selector else cast(Selectors, page.css(css_selector))
             for page in pages:
                 match extraction_type:
                     case "markdown":
                         yield cls._convert_to_markdown(page.html_content)
                     case "html":
-                        yield page.body
+                        yield page.html_content
                     case "text":
                         txt_content = page.get_all_text(strip=True)
                         for s in (

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/core/storage.py RENAMED Viewed

@@ -56,13 +56,13 @@ class StorageSystemMixin(ABC):  # pragma: no cover
     @lru_cache(128, typed=True)
     def _get_hash(identifier: str) -> str:
         """If you want to hash identifier in your storage system, use this safer"""
-        identifier = identifier.lower().strip()
-        if isinstance(identifier, str):
+        _identifier = identifier.lower().strip()
+        if isinstance(_identifier, str):
             # Hash functions have to take bytes
-            identifier = identifier.encode("utf-8")
+            _identifier = _identifier.encode("utf-8")
-        hash_value = sha256(identifier).hexdigest()
-        return f"{hash_value}_{len(identifier)}"  # Length to reduce collision chance
+        hash_value = sha256(_identifier).hexdigest()
+        return f"{hash_value}_{len(_identifier)}"  # Length to reduce collision chance
 @lru_cache(1, typed=True)

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/core/translator.py RENAMED Viewed

@@ -10,24 +10,23 @@ So you don't have to learn a new selectors/api method like what bs4 done with so
 from functools import lru_cache
-from cssselect.xpath import ExpressionError
-from cssselect.xpath import XPathExpr as OriginalXPathExpr
 from cssselect import HTMLTranslator as OriginalHTMLTranslator
+from cssselect.xpath import ExpressionError, XPathExpr as OriginalXPathExpr
 from cssselect.parser import Element, FunctionalPseudoElement, PseudoElement
-from scrapling.core._types import Any, Optional, Protocol, Self
+from scrapling.core._types import Any, Protocol, Self
 class XPathExpr(OriginalXPathExpr):
     textnode: bool = False
-    attribute: Optional[str] = None
+    attribute: str | None = None
     @classmethod
     def from_xpath(
         cls,
         xpath: OriginalXPathExpr,
         textnode: bool = False,
-        attribute: Optional[str] = None,
+        attribute: str | None = None,
     ) -> Self:
         x = cls(path=xpath.path, element=xpath.element, condition=xpath.condition)
         x.textnode = textnode
@@ -71,10 +70,10 @@ class XPathExpr(OriginalXPathExpr):
 # e.g. cssselect.GenericTranslator, cssselect.HTMLTranslator
 class TranslatorProtocol(Protocol):
-    def xpath_element(self, selector: Element) -> OriginalXPathExpr:  # pragma: no cover
+    def xpath_element(self, selector: Element) -> OriginalXPathExpr:  # pyright: ignore # pragma: no cover
         pass
-    def css_to_xpath(self, css: str, prefix: str = ...) -> str:  # pragma: no cover
+    def css_to_xpath(self, css: str, prefix: str = ...) -> str:  # pyright: ignore # pragma: no cover
         pass
@@ -121,9 +120,15 @@ class TranslatorMixin:
 class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
-    @lru_cache(maxsize=256)
     def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
         return super().css_to_xpath(css, prefix)
 translator = HTMLTranslator()
+# Using a function instead of the translator directly to avoid Pyright override error
+@lru_cache(maxsize=256)
+def css_to_xpath(query: str) -> str:
+    """Return translated XPath version of a given CSS query"""
+    return translator.css_to_xpath(query)

{scrapling-0.3.6 → scrapling-0.3.7}/scrapling/engines/_browsers/_base.py RENAMED Viewed

@@ -7,14 +7,12 @@ from playwright.async_api import (
     BrowserContext as AsyncBrowserContext,
     Playwright as AsyncPlaywright,
 )
-from camoufox.utils import (
-    launch_options as generate_launch_options,
-    installed_verstr as camoufox_version,
-)
+from camoufox.pkgman import installed_verstr as camoufox_version
+from camoufox.utils import launch_options as generate_launch_options
 from ._page import PageInfo, PagePool
 from scrapling.parser import Selector
-from scrapling.core._types import Dict, Optional
+from scrapling.core._types import Any, cast, Dict, Optional, TYPE_CHECKING
 from scrapling.engines.toolbelt.fingerprints import get_os_name
 from ._validators import validate, PlaywrightConfig, CamoufoxConfig
 from ._config_tools import _compiled_stealth_scripts, _launch_kwargs, _context_kwargs
@@ -41,6 +39,7 @@ class SyncSession:
         """Get a new page to use"""
         # No need to check if a page is available or not in sync code because the code blocked before reaching here till the page closed, ofc.
+        assert self.context is not None, "Browser context not initialized"
         page = self.context.new_page()
         page.set_default_navigation_timeout(timeout)
         page.set_default_timeout(timeout)
@@ -65,11 +64,14 @@ class SyncSession:
         }
-class AsyncSession(SyncSession):
+class AsyncSession:
     def __init__(self, max_pages: int = 1):
-        super().__init__(max_pages)
+        self.max_pages = max_pages
+        self.page_pool = PagePool(max_pages)
+        self._max_wait_for_page = 60
         self.playwright: Optional[AsyncPlaywright] = None
         self.context: Optional[AsyncBrowserContext] = None
+        self._closed = False
         self._lock = Lock()
     async def _get_page(
@@ -79,6 +81,9 @@ class AsyncSession(SyncSession):
         disable_resources: bool,
     ) -> PageInfo:  # pragma: no cover
         """Get a new page to use"""
+        if TYPE_CHECKING:
+            assert self.context is not None, "Browser context not initialized"
         async with self._lock:
             # If we're at max capacity after cleanup, wait for busy pages to finish
             if self.page_pool.pages_count >= self.max_pages:
@@ -92,6 +97,7 @@ class AsyncSession(SyncSession):
                         f"No pages finished to clear place in the pool within the {self._max_wait_for_page}s timeout period"
                     )
+            assert self.context is not None, "Browser context not initialized"
             page = await self.context.new_page()
             page.set_default_navigation_timeout(timeout)
             page.set_default_timeout(timeout)
@@ -107,6 +113,14 @@ class AsyncSession(SyncSession):
             return self.page_pool.add_page(page)
+    def get_pool_stats(self) -> Dict[str, int]:
+        """Get statistics about the current page pool"""
+        return {
+            "total_pages": self.page_pool.pages_count,
+            "busy_pages": self.page_pool.busy_count,
+            "max_pages": self.max_pages,
+        }
 class DynamicSessionMixin:
     def __validate__(self, **params):
@@ -134,11 +148,16 @@ class DynamicSessionMixin:
         self.init_script = config.init_script
         self.wait_selector_state = config.wait_selector_state
         self.selector_config = config.selector_config
+        self.additional_args = config.additional_args
         self.page_action = config.page_action
-        self._headers_keys = set(map(str.lower, self.extra_headers.keys())) if self.extra_headers else set()
+        self.user_data_dir = config.user_data_dir
+        self._headers_keys = {header.lower() for header in self.extra_headers.keys()} if self.extra_headers else set()
         self.__initiate_browser_options__()
     def __initiate_browser_options__(self):
+        if TYPE_CHECKING:
+            assert isinstance(self.proxy, tuple)
         if not self.cdp_url:
             # `launch_options` is used with persistent context
             self.launch_options = dict(
@@ -156,6 +175,8 @@ class DynamicSessionMixin:
             )
             self.launch_options["extra_http_headers"] = dict(self.launch_options["extra_http_headers"])
             self.launch_options["proxy"] = dict(self.launch_options["proxy"]) or None
+            self.launch_options["user_data_dir"] = self.user_data_dir
+            self.launch_options.update(cast(Dict, self.additional_args))
             self.context_options = dict()
         else:
             # while `context_options` is left to be used when cdp mode is enabled
@@ -171,11 +192,12 @@ class DynamicSessionMixin:
             )
             self.context_options["extra_http_headers"] = dict(self.context_options["extra_http_headers"])
             self.context_options["proxy"] = dict(self.context_options["proxy"]) or None
+            self.context_options.update(cast(Dict, self.additional_args))
 class StealthySessionMixin:
     def __validate__(self, **params):
-        config = validate(params, model=CamoufoxConfig)
+        config: CamoufoxConfig = validate(params, model=CamoufoxConfig)
         self.max_pages = config.max_pages
         self.headless = config.headless
@@ -204,15 +226,16 @@ class StealthySessionMixin:
         self.selector_config = config.selector_config
         self.additional_args = config.additional_args
         self.page_action = config.page_action
-        self._headers_keys = set(map(str.lower, self.extra_headers.keys())) if self.extra_headers else set()
+        self.user_data_dir = config.user_data_dir
+        self._headers_keys = {header.lower() for header in self.extra_headers.keys()} if self.extra_headers else set()
         self.__initiate_browser_options__()
     def __initiate_browser_options__(self):
         """Initiate browser options."""
-        self.launch_options = generate_launch_options(
+        self.launch_options: Dict[str, Any] = generate_launch_options(
             **{
                 "geoip": self.geoip,
-                "proxy": dict(self.proxy) if self.proxy else self.proxy,
+                "proxy": dict(self.proxy) if self.proxy and isinstance(self.proxy, tuple) else self.proxy,
                 "addons": self.addons,
                 "exclude_addons": [] if self.disable_ads else [DefaultAddons.UBO],
                 "headless": self.headless,
@@ -222,7 +245,7 @@ class StealthySessionMixin:
                 "block_webrtc": self.block_webrtc,
                 "block_images": self.block_images,  # Careful! it makes some websites don't finish loading at all like stackoverflow even in headful mode.
                 "os": None if self.os_randomize else get_os_name(),
-                "user_data_dir": "",
+                "user_data_dir": self.user_data_dir,
                 "ff_version": __ff_version_str__,
                 "firefox_user_prefs": {
                     # This is what enabling `enable_cache` does internally, so we do it from here instead
@@ -232,7 +255,7 @@ class StealthySessionMixin:
                     "browser.cache.disk_cache_ssl": True,
                     "browser.cache.disk.smart_size.enabled": True,
                 },
-                **self.additional_args,
+                **cast(Dict, self.additional_args),
             }
         )

scrapling 0.3.6__tar.gz → 0.3.7__tar.gz

scrapling 0.3.6tar.gz → 0.3.7tar.gz