PyPI - scrapling - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

scrapling 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

scrapling/__init__.py +29 -19
scrapling/cli.py +21 -4
scrapling/core/_types.py +3 -2
scrapling/core/ai.py +24 -15
scrapling/core/custom_types.py +20 -27
scrapling/core/mixins.py +15 -9
scrapling/core/shell.py +6 -4
scrapling/core/storage.py +7 -6
scrapling/core/translator.py +13 -8
scrapling/core/utils/__init__.py +0 -1
scrapling/engines/_browsers/__init__.py +0 -2
scrapling/engines/_browsers/_base.py +45 -21
scrapling/engines/_browsers/_camoufox.py +98 -43
scrapling/engines/_browsers/_config_tools.py +1 -1
scrapling/engines/_browsers/_controllers.py +34 -13
scrapling/engines/_browsers/_validators.py +31 -10
scrapling/engines/constants.py +0 -15
scrapling/engines/static.py +749 -336
scrapling/engines/toolbelt/convertor.py +13 -15
scrapling/engines/toolbelt/custom.py +6 -9
scrapling/engines/toolbelt/fingerprints.py +17 -10
scrapling/engines/toolbelt/navigation.py +11 -3
scrapling/fetchers/__init__.py +46 -0
scrapling/fetchers/chrome.py +210 -0
scrapling/fetchers/firefox.py +212 -0
scrapling/fetchers/requests.py +28 -0
scrapling/parser.py +109 -84
{scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/METADATA +17 -16
scrapling-0.3.7.dist-info/RECORD +47 -0
scrapling/fetchers.py +0 -444
scrapling-0.3.5.dist-info/RECORD +0 -44
{scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/WHEEL +0 -0
{scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/entry_points.txt +0 -0
{scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/licenses/LICENSE +0 -0
{scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/top_level.txt +0 -0

scrapling/parser.py CHANGED Viewed

@@ -1,8 +1,8 @@
-import re
 from pathlib import Path
 from inspect import signature
 from urllib.parse import urljoin
 from difflib import SequenceMatcher
+from re import Pattern as re_Pattern
 from lxml.html import HtmlElement, HtmlMixin, HTMLParser
 from cssselect import SelectorError, SelectorSyntaxError, parse as split_selectors
@@ -17,17 +17,21 @@ from lxml.etree import (
 from scrapling.core._types import (
     Any,
+    Set,
     Dict,
+    cast,
     List,
     Tuple,
     Union,
     Pattern,
     Callable,
+    Literal,
     Optional,
     Iterable,
     overload,
     Generator,
     SupportsIndex,
+    TYPE_CHECKING,
 )
 from scrapling.core.custom_types import AttributesHandler, TextHandler, TextHandlers
 from scrapling.core.mixins import SelectorsGeneration
@@ -36,7 +40,7 @@ from scrapling.core.storage import (
     StorageSystemMixin,
     _StorageTools,
 )
-from scrapling.core.translator import translator as _translator
+from scrapling.core.translator import css_to_xpath as _css_to_xpath
 from scrapling.core.utils import clean_spaces, flatten, html_forbidden, log
 __DEFAULT_DB_FILE__ = str(Path(__file__).parent / "elements_storage.db")
@@ -70,20 +74,23 @@ class Selector(SelectorsGeneration):
         "_raw_body",
     )
+    if TYPE_CHECKING:
+        _storage: StorageSystemMixin
     def __init__(
         self,
         content: Optional[str | bytes] = None,
-        url: Optional[str] = None,
+        url: str = "",
         encoding: str = "utf-8",
         huge_tree: bool = True,
         root: Optional[HtmlElement] = None,
         keep_comments: Optional[bool] = False,
         keep_cdata: Optional[bool] = False,
         adaptive: Optional[bool] = False,
-        _storage: object = None,
+        _storage: Optional[StorageSystemMixin] = None,
         storage: Any = SQLiteStorageSystem,
         storage_args: Optional[Dict] = None,
-        **kwargs,
+        **_,
     ):
         """The main class that works as a wrapper for the HTML input data. Using this class, you can search for elements
         with expressions in CSS, XPath, or with simply text. Check the docs for more info.
@@ -131,7 +138,7 @@ class Selector(SelectorsGeneration):
                 default_doctype=True,
                 strip_cdata=(not keep_cdata),
             )
-            self._root = fromstring(body, parser=parser, base_url=url)
+            self._root = cast(HtmlElement, fromstring(body, parser=parser, base_url=url or None))
             self._raw_body = content
         else:
@@ -141,7 +148,7 @@ class Selector(SelectorsGeneration):
                     f"Root have to be a valid element of `html` module types to work, not of type {type(root)}"
                 )
-            self._root = root
+            self._root = cast(HtmlElement, root)
             self._raw_body = ""
         self.__adaptive_enabled = adaptive
@@ -238,6 +245,9 @@ class Selector(SelectorsGeneration):
             **self.__response_data,
         )
+    def __elements_convertor(self, elements: List[HtmlElement]) -> "Selectors":
+        return Selectors(map(self.__element_convertor, elements))
     def __handle_element(
         self, element: Optional[HtmlElement | _ElementUnicodeResult]
     ) -> Optional[Union[TextHandler, "Selector"]]:
@@ -262,7 +272,7 @@ class Selector(SelectorsGeneration):
         if self._is_text_node(result[0]):
             return TextHandlers(map(TextHandler, result))
-        return Selectors(map(self.__element_convertor, result))
+        return self.__elements_convertor(result)
     def __getstate__(self) -> Any:
         # lxml don't like it :)
@@ -323,7 +333,7 @@ class Selector(SelectorsGeneration):
                     if not valid_values or processed_text.strip():
                         _all_strings.append(processed_text)
-        return TextHandler(separator).join(_all_strings)
+        return cast(TextHandler, TextHandler(separator).join(_all_strings))
     def urljoin(self, relative_url: str) -> str:
         """Join this Selector's url with a relative url to form an absolute full URL."""
@@ -341,7 +351,7 @@ class Selector(SelectorsGeneration):
         """Return the inner HTML code of the element"""
         content = tostring(self._root, encoding=self.encoding, method="html", with_tail=False)
         if isinstance(content, bytes):
-            content = content.decode("utf-8")
+            content = content.strip().decode(self.encoding)
         return TextHandler(content)
     @property
@@ -359,7 +369,7 @@ class Selector(SelectorsGeneration):
             with_tail=False,
         )
         if isinstance(content, bytes):
-            content = content.decode("utf-8")
+            content = content.strip().decode(self.encoding)
         return TextHandler(content)
     def has_class(self, class_name: str) -> bool:
@@ -372,13 +382,14 @@ class Selector(SelectorsGeneration):
     @property
     def parent(self) -> Optional["Selector"]:
         """Return the direct parent of the element or ``None`` otherwise"""
-        return self.__handle_element(self._root.getparent())
+        _parent = self._root.getparent()
+        return self.__element_convertor(_parent) if _parent is not None else None
     @property
     def below_elements(self) -> "Selectors":
         """Return all elements under the current element in the DOM tree"""
         below = _find_all_elements(self._root)
-        return self.__handle_elements(below)
+        return self.__elements_convertor(below) if below is not None else Selectors()
     @property
     def children(self) -> "Selectors":
@@ -425,7 +436,7 @@ class Selector(SelectorsGeneration):
             # Ignore HTML comments and unwanted types
             next_element = next_element.getnext()
-        return self.__handle_element(next_element)
+        return self.__element_convertor(next_element) if next_element is not None else None
     @property
     def previous(self) -> Optional["Selector"]:
@@ -435,10 +446,10 @@ class Selector(SelectorsGeneration):
             # Ignore HTML comments and unwanted types
             prev_element = prev_element.getprevious()
-        return self.__handle_element(prev_element)
+        return self.__element_convertor(prev_element) if prev_element is not None else None
     # For easy copy-paste from Scrapy/parsel code when needed :)
-    def get(self, default=None):
+    def get(self, default=None):  # pyright: ignore
         return self
     def get_all(self):
@@ -468,6 +479,16 @@ class Selector(SelectorsGeneration):
         return data + ">"
     # From here we start with the selecting functions
+    @overload
+    def relocate(
+        self, element: Union[Dict, HtmlElement, "Selector"], percentage: int, selector_type: Literal[True]
+    ) -> "Selectors": ...
+    @overload
+    def relocate(
+        self, element: Union[Dict, HtmlElement, "Selector"], percentage: int, selector_type: Literal[False] = False
+    ) -> List[HtmlElement]: ...
     def relocate(
         self,
         element: Union[Dict, HtmlElement, "Selector"],
@@ -506,11 +527,11 @@ class Selector(SelectorsGeneration):
                     log.debug(f"Highest probability was {highest_probability}%")
                     log.debug("Top 5 best matching elements are: ")
                     for percent in tuple(sorted(score_table.keys(), reverse=True))[:5]:
-                        log.debug(f"{percent} -> {self.__handle_elements(score_table[percent])}")
+                        log.debug(f"{percent} -> {self.__elements_convertor(score_table[percent])}")
                 if not selector_type:
                     return score_table[highest_probability]
-                return self.__handle_elements(score_table[highest_probability])
+                return self.__elements_convertor(score_table[highest_probability])
         return []
     def css_first(
@@ -593,7 +614,7 @@ class Selector(SelectorsGeneration):
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
-    ) -> Union["Selectors", List, "TextHandlers"]:
+    ) -> Union["Selectors", List[Any], "TextHandlers"]:
         """Search the current tree with CSS3 selectors
         **Important:
@@ -614,7 +635,7 @@ class Selector(SelectorsGeneration):
         try:
             if not self.__adaptive_enabled or "," not in selector:
                 # No need to split selectors in this case, let's save some CPU cycles :)
-                xpath_selector = _translator.css_to_xpath(selector)
+                xpath_selector = _css_to_xpath(selector)
                 return self.xpath(
                     xpath_selector,
                     identifier or selector,
@@ -628,7 +649,7 @@ class Selector(SelectorsGeneration):
             for single_selector in split_selectors(selector):
                 # I'm doing this only so the `save` function saves data correctly for combined selectors
                 # Like using the ',' to combine two different selectors that point to different elements.
-                xpath_selector = _translator.css_to_xpath(single_selector.canonical())
+                xpath_selector = _css_to_xpath(single_selector.canonical())
                 results += self.xpath(
                     xpath_selector,
                     identifier or single_selector.canonical(),
@@ -731,7 +752,8 @@ class Selector(SelectorsGeneration):
             raise TypeError("You have to pass something to search with, like tag name(s), tag attributes, or both.")
         attributes = dict()
-        tags, patterns = set(), set()
+        tags: Set[str] = set()
+        patterns: Set[Pattern] = set()
         results, functions, selectors = Selectors(), [], []
         # Brace yourself for a wonderful journey!
@@ -740,6 +762,7 @@ class Selector(SelectorsGeneration):
                 tags.add(arg)
             elif type(arg) in (list, tuple, set):
+                arg = cast(Iterable, arg)  # Type narrowing for type checkers like pyright
                 if not all(map(lambda x: isinstance(x, str), arg)):
                     raise TypeError("Nested Iterables are not accepted, only iterables of tag names are accepted")
                 tags.update(set(arg))
@@ -751,7 +774,7 @@ class Selector(SelectorsGeneration):
                     )
                 attributes.update(arg)
-            elif isinstance(arg, re.Pattern):
+            elif isinstance(arg, re_Pattern):
                 patterns.add(arg)
             elif callable(arg):
@@ -774,7 +797,7 @@ class Selector(SelectorsGeneration):
             attributes[attribute_name] = value
         # It's easier and faster to build a selector than traversing the tree
-        tags = tags or ["*"]
+        tags = tags or set("*")
         for tag in tags:
             selector = tag
             for key, value in attributes.items():
@@ -785,7 +808,7 @@ class Selector(SelectorsGeneration):
                 selectors.append(selector)
         if selectors:
-            results = self.css(", ".join(selectors))
+            results = cast(Selectors, self.css(", ".join(selectors)))
             if results:
                 # From the results, get the ones that fulfill passed regex patterns
                 for pattern in patterns:
@@ -828,20 +851,20 @@ class Selector(SelectorsGeneration):
         :return: A percentage score of how similar is the candidate to the original element
         """
         score, checks = 0, 0
-        candidate = _StorageTools.element_to_dict(candidate)
+        data = _StorageTools.element_to_dict(candidate)
         # Possible TODO:
         # Study the idea of giving weight to each test below so some are more important than others
         # Current results: With weights some websites had better score while it was worse for others
-        score += 1 if original["tag"] == candidate["tag"] else 0  # * 0.3  # 30%
+        score += 1 if original["tag"] == data["tag"] else 0  # * 0.3  # 30%
         checks += 1
         if original["text"]:
-            score += SequenceMatcher(None, original["text"], candidate.get("text") or "").ratio()  # * 0.3  # 30%
+            score += SequenceMatcher(None, original["text"], data.get("text") or "").ratio()  # * 0.3  # 30%
             checks += 1
         # if both don't have attributes, it still counts for something!
-        score += self.__calculate_dict_diff(original["attributes"], candidate["attributes"])  # * 0.3  # 30%
+        score += self.__calculate_dict_diff(original["attributes"], data["attributes"])  # * 0.3  # 30%
         checks += 1
         # Separate similarity test for class, id, href,... this will help in full structural changes
@@ -855,23 +878,23 @@ class Selector(SelectorsGeneration):
                 score += SequenceMatcher(
                     None,
                     original["attributes"][attrib],
-                    candidate["attributes"].get(attrib) or "",
+                    data["attributes"].get(attrib) or "",
                 ).ratio()  # * 0.3  # 30%
                 checks += 1
-        score += SequenceMatcher(None, original["path"], candidate["path"]).ratio()  # * 0.1  # 10%
+        score += SequenceMatcher(None, original["path"], data["path"]).ratio()  # * 0.1  # 10%
         checks += 1
         if original.get("parent_name"):
             # Then we start comparing parents' data
-            if candidate.get("parent_name"):
+            if data.get("parent_name"):
                 score += SequenceMatcher(
-                    None, original["parent_name"], candidate.get("parent_name") or ""
+                    None, original["parent_name"], data.get("parent_name") or ""
                 ).ratio()  # * 0.2  # 20%
                 checks += 1
                 score += self.__calculate_dict_diff(
-                    original["parent_attribs"], candidate.get("parent_attribs") or {}
+                    original["parent_attribs"], data.get("parent_attribs") or {}
                 )  # * 0.2  # 20%
                 checks += 1
@@ -879,7 +902,7 @@ class Selector(SelectorsGeneration):
                     score += SequenceMatcher(
                         None,
                         original["parent_text"],
-                        candidate.get("parent_text") or "",
+                        data.get("parent_text") or "",
                     ).ratio()  # * 0.1  # 10%
                     checks += 1
             # else:
@@ -887,9 +910,7 @@ class Selector(SelectorsGeneration):
             #     score -= 0.1
         if original.get("siblings"):
-            score += SequenceMatcher(
-                None, original["siblings"], candidate.get("siblings") or []
-            ).ratio()  # * 0.1  # 10%
+            score += SequenceMatcher(None, original["siblings"], data.get("siblings") or []).ratio()  # * 0.1  # 10%
             checks += 1
         # How % sure? let's see
@@ -902,7 +923,7 @@ class Selector(SelectorsGeneration):
         score += SequenceMatcher(None, tuple(dict1.values()), tuple(dict2.values())).ratio() * 0.5
         return score
-    def save(self, element: Union["Selector", HtmlElement], identifier: str) -> None:
+    def save(self, element: HtmlElement, identifier: str) -> None:
         """Saves the element's unique properties to the storage for retrieval and relocation later
         :param element: The element itself that we want to save to storage, it can be a ` Selector ` or pure ` HtmlElement `
@@ -910,15 +931,16 @@ class Selector(SelectorsGeneration):
             the docs for more info.
         """
         if self.__adaptive_enabled:
-            if isinstance(element, self.__class__):
-                element = element._root
+            target = element
+            if isinstance(target, self.__class__):
+                target: HtmlElement = target._root
-            if self._is_text_node(element):
-                element = element.getparent()
+            if self._is_text_node(target):
+                target: HtmlElement = target.getparent()
-            self._storage.save(element, identifier)
+            self._storage.save(target, identifier)
         else:
-            log.critical(
+            raise RuntimeError(
                 "Can't use `adaptive` features while it's disabled globally, you have to start a new class instance."
             )
@@ -932,10 +954,9 @@ class Selector(SelectorsGeneration):
         if self.__adaptive_enabled:
             return self._storage.retrieve(identifier)
-        log.critical(
+        raise RuntimeError(
             "Can't use `adaptive` features while it's disabled globally, you have to start a new class instance."
         )
-        return None
     # Operations on text functions
     def json(self) -> Dict:
@@ -1104,28 +1125,30 @@ class Selector(SelectorsGeneration):
         if not case_sensitive:
             text = text.lower()
-        for node in self.__handle_elements(_find_all_elements_with_spaces(self._root)):
-            """Check if element matches given text otherwise, traverse the children tree and iterate"""
-            node_text = node.text
-            if clean_match:
-                node_text = node_text.clean()
-            if not case_sensitive:
-                node_text = node_text.lower()
-            if partial:
-                if text in node_text:
+        possible_targets = _find_all_elements_with_spaces(self._root)
+        if possible_targets:
+            for node in self.__elements_convertor(possible_targets):
+                """Check if element matches given text otherwise, traverse the children tree and iterate"""
+                node_text = node.text
+                if clean_match:
+                    node_text = node_text.clean()
+                if not case_sensitive:
+                    node_text = node_text.lower()
+                if partial:
+                    if text in node_text:
+                        results.append(node)
+                elif text == node_text:
                     results.append(node)
-            elif text == node_text:
-                results.append(node)
-            if first_match and results:
-                # we got an element so we should stop
-                break
+                if first_match and results:
+                    # we got an element so we should stop
+                    break
-        if first_match:
-            if results:
-                return results[0]
+            if first_match:
+                if results:
+                    return results[0]
         return results
     def find_by_regex(
@@ -1143,23 +1166,25 @@ class Selector(SelectorsGeneration):
         """
         results = Selectors()
-        for node in self.__handle_elements(_find_all_elements_with_spaces(self._root)):
-            """Check if element matches given regex otherwise, traverse the children tree and iterate"""
-            node_text = node.text
-            if node_text.re(
-                query,
-                check_match=True,
-                clean_match=clean_match,
-                case_sensitive=case_sensitive,
-            ):
-                results.append(node)
+        possible_targets = _find_all_elements_with_spaces(self._root)
+        if possible_targets:
+            for node in self.__elements_convertor(possible_targets):
+                """Check if element matches given regex otherwise, traverse the children tree and iterate"""
+                node_text = node.text
+                if node_text.re(
+                    query,
+                    check_match=True,
+                    clean_match=clean_match,
+                    case_sensitive=case_sensitive,
+                ):
+                    results.append(node)
-            if first_match and results:
-                # we got an element so we should stop
-                break
+                if first_match and results:
+                    # we got an element so we should stop
+                    break
-        if results and first_match:
-            return results[0]
+            if results and first_match:
+                return results[0]
         return results
@@ -1181,9 +1206,9 @@ class Selectors(List[Selector]):
     def __getitem__(self, pos: SupportsIndex | slice) -> Union[Selector, "Selectors"]:
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
-            return self.__class__(lst)
+            return self.__class__(cast(List[Selector], lst))
         else:
-            return lst
+            return cast(Selector, lst)
     def xpath(
         self,
@@ -1265,7 +1290,7 @@ class Selectors(List[Selector]):
     def re_first(
         self,
         regex: str | Pattern,
-        default=None,
+        default: Any = None,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,

{scrapling-0.3.5.dist-info → scrapling-0.3.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scrapling
-Version: 0.3.5
+Version: 0.3.7
 Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair
@@ -64,7 +64,7 @@ Classifier: Typing :: Typed
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: lxml>=6.0.1
+Requires-Dist: lxml>=6.0.2
 Requires-Dist: cssselect>=1.3.0
 Requires-Dist: orjson>=3.11.3
 Requires-Dist: tldextract>=5.3.0
@@ -77,7 +77,7 @@ Requires-Dist: camoufox>=0.4.11; extra == "fetchers"
 Requires-Dist: geoip2>=5.1.0; extra == "fetchers"
 Requires-Dist: msgspec>=0.19.0; extra == "fetchers"
 Provides-Extra: ai
-Requires-Dist: mcp>=1.14.1; extra == "ai"
+Requires-Dist: mcp>=1.16.0; extra == "ai"
 Requires-Dist: markdownify>=1.2.0; extra == "ai"
 Requires-Dist: scrapling[fetchers]; extra == "ai"
 Provides-Extra: shell
@@ -139,7 +139,7 @@ Dynamic: license-file
 Scrapling isn't just another Web Scraping library. It's the first **adaptive** scraping library that learns from website changes and evolves with them. While other libraries break when websites update their structure, Scrapling automatically relocates your elements and keeps your scrapers running.
-Built for the modern Web, Scrapling has its own rapid parsing engine and its fetchers to handle all Web Scraping challenges you are facing or will face. Built by Web Scrapers for Web Scrapers and regular users, there's something for everyone.
+Built for the modern Web, Scrapling features its own rapid parsing engine and fetchers to handle all Web Scraping challenges you face or will face. Built by Web Scrapers for Web Scrapers and regular users, there's something for everyone.
 ```python
 >> from scrapling.fetchers import Fetcher, AsyncFetcher, StealthyFetcher, DynamicFetcher
@@ -162,7 +162,7 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
 <a href="https://visit.decodo.com/Dy6W0b" target="_blank" title="Try the Most Efficient Residential Proxies for Free"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/decodo.png"></a>
 <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
 <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
-<a href="https://www.nstproxy.com/?type=flow&utm_source=scrapling" target="_blank" title="One Proxy Service, Infinite Solutions at Unbeatable Prices!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/NSTproxy.png"></a>
+<a href="https://www.rapidproxy.io/?ref=d4v" target="_blank" title="Affordable Access to the Proxy World – bypass CAPTCHAs blocks, and avoid additional costs."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/rapidproxy.jpg"></a>
 <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
 <!-- /sponsors -->
@@ -176,7 +176,7 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
 ### Advanced Websites Fetching with Session Support
 - **HTTP Requests**: Fast and stealthy HTTP requests with the `Fetcher` class. Can impersonate browsers' TLS fingerprint, headers, and use HTTP3.
 - **Dynamic Loading**: Fetch dynamic websites with full browser automation through the `DynamicFetcher` class supporting Playwright's Chromium, real Chrome, and custom stealth mode.
-- **Anti-bot Bypass**: Advanced stealth capabilities with `StealthyFetcher` using a modified version of Firefox and fingerprint spoofing. Can bypass all levels of Cloudflare's Turnstile with automation easily.
+- **Anti-bot Bypass**: Advanced stealth capabilities with `StealthyFetcher` using a modified version of Firefox and fingerprint spoofing. Can bypass all types of Cloudflare's Turnstile and Interstitial with automation easily.
 - **Session Management**: Persistent session support with `FetcherSession`, `StealthySession`, and `DynamicSession` classes for cookie and state management across requests.
 - **Async Support**: Complete async support across all fetchers and dedicated async session classes.
@@ -200,13 +200,7 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
 - 📝 **Auto Selector Generation**: Generate robust CSS/XPath selectors for any element.
 - 🔌 **Familiar API**: Similar to Scrapy/BeautifulSoup with the same pseudo-elements used in Scrapy/Parsel.
 - 📘 **Complete Type Coverage**: Full type hints for excellent IDE support and code completion.
-### New Session Architecture
-Scrapling 0.3 introduces a completely revamped session system:
-- **Persistent Sessions**: Maintain cookies, headers, and authentication across multiple requests
-- **Automatic Session Management**: Smart session lifecycle handling with proper cleanup
-- **Session Inheritance**: All fetchers support both one-off requests and persistent session usage
-- **Concurrent Session Support**: Run multiple isolated sessions simultaneously
+- 🔋 **Ready Docker image**: With each release, a Docker image containing all browsers is automatically built and pushed.
 ## Getting Started
@@ -324,11 +318,11 @@ scrapling extract stealthy-fetch 'https://nopecha.com/demo/cloudflare' captchas.
 ```
 > [!NOTE]
-> There are many additional features, but we want to keep this page short, like the MCP server and the interactive Web Scraping Shell. Check out the full documentation [here](https://scrapling.readthedocs.io/en/latest/)
+> There are many additional features, but we want to keep this page concise, such as the MCP server and the interactive Web Scraping Shell. Check out the full documentation [here](https://scrapling.readthedocs.io/en/latest/)
 ## Performance Benchmarks
-Scrapling isn't just powerful—it's also blazing fast, and the updates since version 0.3 deliver exceptional performance improvements across all operations!
+Scrapling isn't just powerful—it's also blazing fast, and the updates since version 0.3 have delivered exceptional performance improvements across all operations.
 ### Text Extraction Speed Test (5000 nested elements)
@@ -391,6 +385,13 @@ Starting with v0.3.2, this installation only includes the parser engine and its
        ```
    Don't forget that you need to install the browser dependencies with `scrapling install` after any of these extras (if you didn't already)
+### Docker
+You can also install a Docker image with all extras and browsers with the following command:
+```bash
+docker pull pyd4vinci/scrapling
+```
+This image is automatically built and pushed to Docker Hub through GitHub actions right here.
 ## Contributing
 We welcome contributions! Please read our [contributing guidelines](https://github.com/D4Vinci/Scrapling/blob/main/CONTRIBUTING.md) before getting started.
@@ -398,7 +399,7 @@ We welcome contributions! Please read our [contributing guidelines](https://gith
 ## Disclaimer
 > [!CAUTION]
-> This library is provided for educational and research purposes only. By using this library, you agree to comply with local and international data scraping and privacy laws. The authors and contributors are not responsible for any misuse of this software. Always respect website terms of service and robots.txt files.
+> This library is provided for educational and research purposes only. By using this library, you agree to comply with local and international data scraping and privacy laws. The authors and contributors are not responsible for any misuse of this software. Always respect the terms of service of websites and robots.txt files.
 ## License

scrapling-0.3.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,47 @@
+scrapling/__init__.py,sha256=ckdQrdwM2SRKBMcORUsCUgU6JWoUwGtrbC3U0OH5RN4,1522
+scrapling/cli.py,sha256=gbhfy2GCz_VqcWhBaNMK4wevayxNtLb72SQIUR9Ebik,26916
+scrapling/parser.py,sha256=bQ7_c3rHjnjJsWI-qqkvEVkVx4-NM-1SWYpQrcwbflQ,58837
+scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
+scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+scrapling/core/_html_utils.py,sha256=ki47v54SsTL5-khi1jcLkJqAHqEq19cuex-dqzXdbEI,20328
+scrapling/core/_types.py,sha256=SwJpeZ6TSi20y0FWHo-BmfGlkHpbot6qaql01PskgHw,897
+scrapling/core/ai.py,sha256=xE0RXQxZzH62fCdFiNxcSWbdeuUZK3TlXd4hPkdOO80,36295
+scrapling/core/custom_types.py,sha256=JlaOKvtI28ZkJ5ylaXIKfqqhlOOhIsZDNBhTbLfyWPo,13423
+scrapling/core/mixins.py,sha256=Npw36VPmsHMrEZ5VXgBbLL1OyYcFqMUWkUB5oWATqtw,3522
+scrapling/core/shell.py,sha256=kx7_6zGRXAd9NulL0cyX4YVQMGf4Ij1MYUtceSSE9xk,22983
+scrapling/core/storage.py,sha256=eEAwl88bmAexXwnow86alV7TaGNf1an5_J7e1Mas7PU,6309
+scrapling/core/translator.py,sha256=5Wk1rn3mSXO-1ACYnrORjO7n9aP2f5-OAzT8MeNjv-M,5354
+scrapling/core/utils/__init__.py,sha256=zE2I4Zm355kdGjZBAAghFdFYQ-yRGvZbNqQuDP93-Ok,155
+scrapling/core/utils/_shell.py,sha256=zes71MmFTs7V9f0JFstaWcjQhKNZN6xvspu29YVQtRc,1707
+scrapling/core/utils/_utils.py,sha256=ATy-wwz00U-alOGH-NGK-VoPNr1qYmUwEoWuqAHjDkg,3143
+scrapling/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+scrapling/engines/constants.py,sha256=aOIOFCjbtgxH3hehlPU_3EwlnjpdUHRFK342nDQy-Vc,3596
+scrapling/engines/static.py,sha256=3m86QAC1bnK9MD5Cjcs5u2Bu8zb51dzQBLK4Si1K5K8,50062
+scrapling/engines/_browsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+scrapling/engines/_browsers/_base.py,sha256=hSmzQ_UBI8WcgVZyVw2qppNOdR47xwjxypOULfBhkhQ,12546
+scrapling/engines/_browsers/_camoufox.py,sha256=6YikWmY_z38xl9pYW2LgVRRdCwXdCrgFiyyYhBSVQug,38593
+scrapling/engines/_browsers/_config_tools.py,sha256=Vbl-0G3E7_QsA6tZ6FrkUqUy33h--a2O8LveSvVF2y8,4617
+scrapling/engines/_browsers/_controllers.py,sha256=WppP9Tkl4KNCszVTjy3BQ12gyMCPfiv7mUdbjA_l0JY,28705
+scrapling/engines/_browsers/_page.py,sha256=1z-P6c97cTkULE-FVrsMY589e6eL_20Ae8pUe6vjggE,2206
+scrapling/engines/_browsers/_validators.py,sha256=Bpk6P5urruUKDrdrXSnkiBHQWJ-F0JpXvepzlXj6Gfk,8033
+scrapling/engines/toolbelt/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+scrapling/engines/toolbelt/convertor.py,sha256=tO-floNdsM6PmxaoRPJm9SK3rT7lyCQqwlkoeEe4t_0,13193
+scrapling/engines/toolbelt/custom.py,sha256=GWqKrciMfry_8Vc_0LlaTGNUX8XxVlPquQ9obohIPtY,7447
+scrapling/engines/toolbelt/fingerprints.py,sha256=3DaxNNLConjf_zDi97YswQ8cWgdA6Bq2mdR_l57Ul5E,2521
+scrapling/engines/toolbelt/navigation.py,sha256=VHQ5sMVI-5UtcSpK-_Pin0e16fRLRzW8lYu-MObCxkY,3858
+scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
+scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
+scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js,sha256=clzuf7KYcvDWYaKKxT_bkAoCT2fGsOcUw47948CHjAc,267
+scrapling/engines/toolbelt/bypasses/screen_props.js,sha256=fZEuHMQ1-fYuxxUMoQXUvVWYUkPUbblkfMfpiLvBY7w,599
+scrapling/engines/toolbelt/bypasses/webdriver_fully.js,sha256=hdJw4clRAJQqIdq5gIFC_eC-x7C1i2ab01KV5ylmOBs,728
+scrapling/engines/toolbelt/bypasses/window_chrome.js,sha256=D7hqzNGGDorh8JVlvm2YIv7Bk2CoVkG55MDIdyqhT1w,6808
+scrapling/fetchers/__init__.py,sha256=V2PSNzVPqtW7bdRrLygsaxHXqbu_7kdyI3byYr5AFbU,1687
+scrapling/fetchers/chrome.py,sha256=Ky8bxKkvcbT1gmgazdxrUmJ8qHQDa_dhXswi-wtVzNg,12728
+scrapling/fetchers/firefox.py,sha256=Ix_RVatrDOnC3qR_IzkzkD_PbKv66Jd5C5P58YaOUF4,13190
+scrapling/fetchers/requests.py,sha256=Y-ZXhm2Ui1Ugc5lvMgBDIBAmaoh3upjPlbJswdCnyok,978
+scrapling-0.3.7.dist-info/licenses/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
+scrapling-0.3.7.dist-info/METADATA,sha256=pcX6f6EBl28AL3O_n8bHO8I5_fyXIH2CvT2sgvvjUe8,22465
+scrapling-0.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+scrapling-0.3.7.dist-info/entry_points.txt,sha256=DHyt2Blxy0P5OE2HRcP95Wz9_xo2ERCDcNqrJjYS3o8,49
+scrapling-0.3.7.dist-info/top_level.txt,sha256=Ud-yF-PC2U5HQ3nc5QwT7HSPdIpF1RuwQ_mYgBzHHIM,10
+scrapling-0.3.7.dist-info/RECORD,,

scrapling 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

scrapling 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl