PyPI - scrapling - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

scrapling 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

scrapling/__init__.py +1 -1
scrapling/defaults.py +6 -0
scrapling/engines/camo.py +2 -2
scrapling/engines/pw.py +2 -2
scrapling/engines/static.py +2 -2
scrapling/engines/toolbelt/custom.py +3 -4
scrapling/parser.py +11 -2
{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/METADATA +15 -4
{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/RECORD +12 -11
{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/LICENSE +0 -0
{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/WHEEL +0 -0
{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/top_level.txt +0 -0

scrapling/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from scrapling.parser import Adaptor, Adaptors
 from scrapling.core.custom_types import TextHandler, AttributesHandler
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"

scrapling/defaults.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .fetchers import Fetcher, StealthyFetcher, PlayWrightFetcher
+# If you are going to use Fetchers with the default settings, import them from this file instead for a cleaner looking code
+Fetcher = Fetcher()
+StealthyFetcher = StealthyFetcher()
+PlayWrightFetcher = PlayWrightFetcher()

scrapling/engines/camo.py CHANGED Viewed

@@ -114,14 +114,14 @@ class CamoufoxEngine:
             response = Response(
                 url=res.url,
                 text=page.content(),
-                content=res.body(),
+                body=res.body(),
                 status=res.status,
                 reason=res.status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
                 headers=res.all_headers(),
                 request_headers=res.request.all_headers(),
-                adaptor_arguments=self.adaptor_arguments
+                **self.adaptor_arguments
             )
             page.close()

scrapling/engines/pw.py CHANGED Viewed

@@ -224,14 +224,14 @@ class PlaywrightEngine:
             response = Response(
                 url=res.url,
                 text=page.content(),
-                content=res.body(),
+                body=res.body(),
                 status=res.status,
                 reason=res.status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
                 headers=res.all_headers(),
                 request_headers=res.request.all_headers(),
-                adaptor_arguments=self.adaptor_arguments
+                **self.adaptor_arguments
             )
             page.close()
         return response

scrapling/engines/static.py CHANGED Viewed

@@ -53,14 +53,14 @@ class StaticEngine:
         return Response(
             url=str(response.url),
             text=response.text,
-            content=response.content,
+            body=response.content,
             status=response.status_code,
             reason=response.reason_phrase,
             encoding=response.encoding or 'utf-8',
             cookies=dict(response.cookies),
             headers=dict(response.headers),
             request_headers=dict(response.request.headers),
-            adaptor_arguments=self.adaptor_arguments
+            **self.adaptor_arguments
         )
     def get(self, url: str, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:

scrapling/engines/toolbelt/custom.py CHANGED Viewed

@@ -12,15 +12,14 @@ from scrapling.core._types import Any, List, Type, Union, Optional, Dict, Callab
 class Response(Adaptor):
     """This class is returned by all engines as a way to unify response type between different libraries."""
-    def __init__(self, url: str, text: str, content: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict, adaptor_arguments: Dict, encoding: str = 'utf-8'):
+    def __init__(self, url: str, text: str, body: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict, encoding: str = 'utf-8', **adaptor_arguments: Dict):
         automatch_domain = adaptor_arguments.pop('automatch_domain', None)
-        super().__init__(text=text, body=content, url=automatch_domain or url, encoding=encoding, **adaptor_arguments)
         self.status = status
         self.reason = reason
         self.cookies = cookies
         self.headers = headers
         self.request_headers = request_headers
+        super().__init__(text=text, body=body, url=automatch_domain or url, encoding=encoding, **adaptor_arguments)
         # For back-ward compatibility
         self.adaptor = self
@@ -31,7 +30,7 @@ class Response(Adaptor):
 class BaseFetcher:
     def __init__(
             self, huge_tree: bool = True, keep_comments: Optional[bool] = False, auto_match: Optional[bool] = True,
-            storage: Any = SQLiteStorageSystem, storage_args: Optional[Dict] = None, debug: Optional[bool] = True,
+            storage: Any = SQLiteStorageSystem, storage_args: Optional[Dict] = None, debug: Optional[bool] = False,
             automatch_domain: Optional[str] = None,
     ):
         """Arguments below are the same from the Adaptor class so you can pass them directly, the rest of Adaptor's arguments

scrapling/parser.py CHANGED Viewed

@@ -32,6 +32,7 @@ class Adaptor(SelectorsGeneration):
             storage: Any = SQLiteStorageSystem,
             storage_args: Optional[Dict] = None,
             debug: Optional[bool] = True,
+            **kwargs
     ):
         """The main class that works as a wrapper for the HTML input data. Using this class, you can search for elements
         with expressions in CSS, XPath, or with simply text. Check the docs for more info.
@@ -117,6 +118,10 @@ class Adaptor(SelectorsGeneration):
         self.__attributes = None
         self.__tag = None
         self.__debug = debug
+        # No need to check if all response attributes exist or not because if `status` exist, then the rest exist (Save some CPU cycles for speed)
+        self.__response_data = {
+            key: getattr(self, key) for key in ('status', 'reason', 'cookies', 'headers', 'request_headers',)
+        } if hasattr(self, 'status') else {}
     # Node functionalities, I wanted to move to separate Mixin class but it had slight impact on performance
     @staticmethod
@@ -138,10 +143,14 @@ class Adaptor(SelectorsGeneration):
             return TextHandler(str(element))
         else:
             if issubclass(type(element), html.HtmlMixin):
                 return self.__class__(
-                    root=element, url=self.url, encoding=self.encoding, auto_match=self.__auto_match_enabled,
+                    root=element,
+                    text='', body=b'',  # Since root argument is provided, both `text` and `body` will be ignored so this is just a filler
+                    url=self.url, encoding=self.encoding, auto_match=self.__auto_match_enabled,
                     keep_comments=True,  # if the comments are already removed in initialization, no need to try to delete them in sub-elements
-                    huge_tree=self.__huge_tree_enabled, debug=self.__debug
+                    huge_tree=self.__huge_tree_enabled, debug=self.__debug,
+                    **self.__response_data
                 )
             return element

{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scrapling
-Version: 0.2.1
+Version: 0.2.2
 Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair
@@ -52,9 +52,9 @@ Dealing with failing web scrapers due to anti-bot protections or website changes
 Scrapling is a high-performance, intelligent web scraping library for Python that automatically adapts to website changes while significantly outperforming popular alternatives. For both beginners and experts, Scrapling provides powerful features while maintaining simplicity.
 ```python
->> from scrapling import Fetcher, StealthyFetcher, PlayWrightFetcher
+>> from scrapling.default import Fetcher, StealthyFetcher, PlayWrightFetcher
 # Fetch websites' source under the radar!
->> page = StealthyFetcher().fetch('https://example.com', headless=True, network_idle=True)
+>> page = StealthyFetcher.fetch('https://example.com', headless=True, network_idle=True)
 >> print(page.status)
 200
 >> products = page.css('.product', auto_save=True)  # Scrape data that survives website design changes!
@@ -257,12 +257,21 @@ python -m browserforge update
 ```
 ## Fetching Websites Features
-All fetcher-type classes are imported in the same way
+You might be a little bit confused by now so let me clear things up. All fetcher-type classes are imported in the same way
 ```python
 from scrapling import Fetcher, StealthyFetcher, PlayWrightFetcher
 ```
 And all of them can take these initialization arguments: `auto_match`, `huge_tree`, `keep_comments`, `storage`, `storage_args`, and `debug` which are the same ones you give to the `Adaptor` class.
+If you don't want to pass arguments to the generated `Adaptor` object and want to use the default values, you can use this import instead for cleaner code:
+```python
+from scrapling.default import Fetcher, StealthyFetcher, PlayWrightFetcher
+```
+then use it right away without initializing like:
+```python
+page = StealthyFetcher.fetch('https://example.com')
+```
 Also, the `Response` object returned from all fetchers is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`. All `cookies`, `headers`, and `request_headers` are always of type `dictionary`.
 > [!NOTE]
 > The `auto_match` argument is enabled by default which is the one you should care about the most as you will see later.
@@ -803,6 +812,8 @@ Yes, Scrapling instances are thread-safe. Each Adaptor instance maintains its st
 ## More Sponsors!
 [![Capsolver Banner](https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/CapSolver.png)](https://www.capsolver.com/?utm_source=github&utm_medium=repo&utm_campaign=scraping&utm_term=Scrapling)
+<a href="https://serpapi.com/?utm_source=scrapling"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png" height="500" width="500" alt="SerpApi Banner" ></a>
 ## Contributing
 Everybody is invited and welcome to contribute to Scrapling. There is a lot to do!

{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,7 @@
-scrapling/__init__.py,sha256=x8S2Da-4KgUBzNYdM9ahYw3hDw5875KnpDliQWxQiGo,435
+scrapling/__init__.py,sha256=lpRuPRo5y_KrUeY78qgX5H_C2dWFV33VqrTX0OafHO8,435
+scrapling/defaults.py,sha256=blYDLiuI5DgDSLRWnUgpp21WtFOsv1BsCRCmPeg8Xc4,287
 scrapling/fetchers.py,sha256=_6mL7XSTZE1fHXBqbxE2bBHnlQP1lH-4MCiQHQd5hQs,16017
-scrapling/parser.py,sha256=VGbrARu2hxXyKLbUgtdtht_tljDYPT1jaWZWgoncv5U,53551
+scrapling/parser.py,sha256=d2n00uF5i7W5lf0afLNRdk17ZFcNyiF9EzXLRQGA0NM,54111
 scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
 scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 scrapling/core/_types.py,sha256=nD2ZY_fitLohx3MfDmqoKJ9ZShrnRhQ8-d1SU1zEGAY,552
@@ -10,12 +11,12 @@ scrapling/core/storage_adaptors.py,sha256=Kbak0BOJX5e9I1PbUS_4sUJi2Wxw8Bv5XsaLHA
 scrapling/core/translator.py,sha256=oU-dQCkNQOccZPrXbPW_VSgC5ll10Bb89C3ezW2lI0o,5228
 scrapling/core/utils.py,sha256=fXdANUgRBbVbOerJ94fRY9vi7n5zsbm8t3G4qQ-F3ak,3792
 scrapling/engines/__init__.py,sha256=zwMqcSdNGh-IX0d4zXazrgAeHrkqIN_v5Ia7RU1g8W0,267
-scrapling/engines/camo.py,sha256=P8kPxP0awgV-AGMibMNDJUaxZC9oYDP64Ei_dk9D3jA,7549
+scrapling/engines/camo.py,sha256=41vp2Nh51kKuOSZ1PijsIpROpQZgFfUPybVbEX8pEXk,7530
 scrapling/engines/constants.py,sha256=jSDA6lgbvEIB8z2m2SFzCKkvFEZnp28Mondy2__FpkM,3721
-scrapling/engines/pw.py,sha256=JKPdJkfz--8YyngLxFNwEyWF0O3_o5xR7zQCxF1D8Cs,12121
-scrapling/engines/static.py,sha256=dY1iLBe7YhzRJYd9MM8P7hbqF44cpwOgTJ6CkIVfaRA,7120
+scrapling/engines/pw.py,sha256=l5MrSW_WNBKAxAlyxbt09ka_lEGo61XKuaOgWpYmvHk,12102
+scrapling/engines/static.py,sha256=Wsp6_-soZUQJT6kHoKPkLOdHU9J50chLdYxDmQjO4FQ,7101
 scrapling/engines/toolbelt/__init__.py,sha256=BnBp34aDeohYgqdysEAAWnGZgA02YlExkc5FJLetMSo,367
-scrapling/engines/toolbelt/custom.py,sha256=XB_oINjmVnigODxfP9hl-teRy0BkJqfrEprWDAqO-Jo,7473
+scrapling/engines/toolbelt/custom.py,sha256=8lvGHWIZoOotSTF97KgPb3CbJquel2QFx8rP8Hf2sQ4,7469
 scrapling/engines/toolbelt/fingerprints.py,sha256=kkVtZKSt2ukc0CV0g6QUvSWR0Yx5p8Mv8xiqACAsMBo,2917
 scrapling/engines/toolbelt/navigation.py,sha256=Tde5_6Wv7lOeWXMzs9D6TRaxAbJ3b-zIX6-4HggZbCQ,4017
 tests/__init__.py,sha256=YHFB5ftzgLQVh6gbPfbYcY4yOS9DOBp5dBa6I-qtm8U,32
@@ -26,8 +27,8 @@ tests/fetchers/test_playwright.py,sha256=YOWn89urd9NwoCHfTFj8fY4xYrRY2BeszTt5Q-T
 tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/parser/test_automatch.py,sha256=BeeYJi3cYCghbiZmi57z4bqcGPaoUA8GAm7MALBBkkk,2486
 tests/parser/test_general.py,sha256=NfTuGLgAm-LH0dVV0pvbRcYSNI-wSu05rdnuRzmB0m4,11664
-scrapling-0.2.1.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
-scrapling-0.2.1.dist-info/METADATA,sha256=aeExP8jl7VQxIUnfvvo4QxIeasqfziscacOrOoHOuXk,64155
-scrapling-0.2.1.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
-scrapling-0.2.1.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
-scrapling-0.2.1.dist-info/RECORD,,
+scrapling-0.2.2.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
+scrapling-0.2.2.dist-info/METADATA,sha256=gk7fij0BkRwA51dJlCbARlx_FW9_U9v9ptk3Mc5-YKQ,64784
+scrapling-0.2.2.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+scrapling-0.2.2.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
+scrapling-0.2.2.dist-info/RECORD,,

{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{scrapling-0.2.1.dist-info → scrapling-0.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

scrapling 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

scrapling 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl