PyPI - scrapling - Versions diffs - 0.2.9__tar.gz → 0.2.91__tar.gz - Mend

scrapling 0.2.9tar.gz → 0.2.91tar.gz

Files changed (54) hide show

{scrapling-0.2.9/scrapling.egg-info → scrapling-0.2.91}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scrapling
-Version: 0.2.9
+Version: 0.2.91
 Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair
@@ -21,7 +21,6 @@ Classifier: Topic :: Text Processing :: Markup :: HTML
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
@@ -38,7 +37,7 @@ Requires-Dist: cssselect>=1.2
 Requires-Dist: w3lib
 Requires-Dist: orjson>=3
 Requires-Dist: tldextract
-Requires-Dist: httpx[brotli,zstd]
+Requires-Dist: httpx[brotli,socks,zstd]
 Requires-Dist: playwright>=1.49.1
 Requires-Dist: rebrowser-playwright>=1.49.1
 Requires-Dist: camoufox[geoip]>=0.4.9

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/__init__.py RENAMED Viewed

@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
 from scrapling.parser import Adaptor, Adaptors
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.2.9"
+__version__ = "0.2.91"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/core/_types.py RENAMED Viewed

@@ -5,6 +5,8 @@ Type definitions for type checking purposes.
 from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable,
                     List, Literal, Optional, Pattern, Tuple, Type, Union)
+SelectorWaitStates = Literal["attached", "detached", "hidden", "visible"]
 try:
     from typing import Protocol
 except ImportError:

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/camo.py RENAMED Viewed

@@ -3,7 +3,7 @@ from camoufox.async_api import AsyncCamoufox
 from camoufox.sync_api import Camoufox
 from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
-                                   Union)
+                                   SelectorWaitStates, Union)
 from scrapling.core.utils import log
 from scrapling.engines.toolbelt import (Response, StatusText,
                                         async_intercept_route,
@@ -18,7 +18,7 @@ class CamoufoxEngine:
             self, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
             block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
             timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
-            wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
+            wait_selector_state: Optional[SelectorWaitStates] = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
             proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True,
             geoip: Optional[bool] = False,
             adaptor_arguments: Dict = None,
@@ -84,6 +84,14 @@ class CamoufoxEngine:
         :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
         addons = [] if self.disable_ads else [DefaultAddons.UBO]
+        # Store the final response
+        final_response = None
+        def handle_response(finished_response):
+            nonlocal final_response
+            if finished_response.request.resource_type == "document":
+                final_response = finished_response
         with Camoufox(
                 geoip=self.geoip,
                 proxy=self.proxy,
@@ -100,13 +108,15 @@ class CamoufoxEngine:
             page = browser.new_page()
             page.set_default_navigation_timeout(self.timeout)
             page.set_default_timeout(self.timeout)
+            # Listen for all responses
+            page.on("response", handle_response)
             if self.disable_resources:
                 page.route("**/*", intercept_route)
             if self.extra_headers:
                 page.set_extra_http_headers(self.extra_headers)
-            res = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
+            first_response = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
             page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
                 page.wait_for_load_state('networkidle')
@@ -123,21 +133,24 @@ class CamoufoxEngine:
                 if self.network_idle:
                     page.wait_for_load_state('networkidle')
+            response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
+            # In case we didn't catch a document type somehow
+            final_response = final_response if final_response else first_response
             # This will be parsed inside `Response`
-            encoding = res.headers.get('content-type', '') or 'utf-8'  # default encoding
+            encoding = final_response.headers.get('content-type', '') or 'utf-8'  # default encoding
             # PlayWright API sometimes give empty status text for some reason!
-            status_text = res.status_text or StatusText.get(res.status)
+            status_text = final_response.status_text or StatusText.get(final_response.status)
             response = Response(
-                url=res.url,
+                url=final_response.url,
                 text=page.content(),
-                body=page.content().encode('utf-8'),
-                status=res.status,
+                body=response_bytes,
+                status=final_response.status,
                 reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
-                headers=res.all_headers(),
-                request_headers=res.request.all_headers(),
+                headers=final_response.all_headers(),
+                request_headers=final_response.request.all_headers(),
                 **self.adaptor_arguments
             )
             page.close()
@@ -151,6 +164,14 @@ class CamoufoxEngine:
         :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
         addons = [] if self.disable_ads else [DefaultAddons.UBO]
+        # Store the final response
+        final_response = None
+        async def handle_response(finished_response):
+            nonlocal final_response
+            if finished_response.request.resource_type == "document":
+                final_response = finished_response
         async with AsyncCamoufox(
                 geoip=self.geoip,
                 proxy=self.proxy,
@@ -167,13 +188,15 @@ class CamoufoxEngine:
             page = await browser.new_page()
             page.set_default_navigation_timeout(self.timeout)
             page.set_default_timeout(self.timeout)
+            # Listen for all responses
+            page.on("response", handle_response)
             if self.disable_resources:
                 await page.route("**/*", async_intercept_route)
             if self.extra_headers:
                 await page.set_extra_http_headers(self.extra_headers)
-            res = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
+            first_response = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
             await page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
                 await page.wait_for_load_state('networkidle')
@@ -190,21 +213,24 @@ class CamoufoxEngine:
                 if self.network_idle:
                     await page.wait_for_load_state('networkidle')
+            response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
+            # In case we didn't catch a document type somehow
+            final_response = final_response if final_response else first_response
             # This will be parsed inside `Response`
-            encoding = res.headers.get('content-type', '') or 'utf-8'  # default encoding
+            encoding = final_response.headers.get('content-type', '') or 'utf-8'  # default encoding
             # PlayWright API sometimes give empty status text for some reason!
-            status_text = res.status_text or StatusText.get(res.status)
+            status_text = final_response.status_text or StatusText.get(final_response.status)
             response = Response(
-                url=res.url,
+                url=final_response.url,
                 text=await page.content(),
-                body=(await page.content()).encode('utf-8'),
-                status=res.status,
+                body=response_bytes,
+                status=final_response.status,
                 reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
-                headers=await res.all_headers(),
-                request_headers=await res.request.all_headers(),
+                headers=await final_response.all_headers(),
+                request_headers=await final_response.request.all_headers(),
                 **self.adaptor_arguments
             )
             await page.close()

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/pw.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import json
-from scrapling.core._types import Callable, Dict, Optional, Union
+from scrapling.core._types import (Callable, Dict, Optional,
+                                   SelectorWaitStates, Union)
 from scrapling.core.utils import log, lru_cache
 from scrapling.engines.constants import (DEFAULT_STEALTH_FLAGS,
                                          NSTBROWSER_DEFAULT_QUERY)
@@ -23,7 +24,7 @@ class PlaywrightEngine:
             page_action: Callable = None,
             wait_selector: Optional[str] = None,
             locale: Optional[str] = 'en-US',
-            wait_selector_state: Optional[str] = 'attached',
+            wait_selector_state: SelectorWaitStates = 'attached',
             stealth: Optional[bool] = False,
             real_chrome: Optional[bool] = False,
             hide_canvas: Optional[bool] = False,
@@ -193,12 +194,21 @@ class PlaywrightEngine:
         :param url: Target url.
         :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
+        from playwright.sync_api import Response as PlaywrightResponse
         if not self.stealth or self.real_chrome:
             # Because rebrowser_playwright doesn't play well with real browsers
             from playwright.sync_api import sync_playwright
         else:
             from rebrowser_playwright.sync_api import sync_playwright
+        # Store the final response
+        final_response = None
+        def handle_response(finished_response: PlaywrightResponse):
+            nonlocal final_response
+            if finished_response.request.resource_type == "document":
+                final_response = finished_response
         with sync_playwright() as p:
             # Creating the browser
             if self.cdp_url:
@@ -212,6 +222,8 @@ class PlaywrightEngine:
             page = context.new_page()
             page.set_default_navigation_timeout(self.timeout)
             page.set_default_timeout(self.timeout)
+            # Listen for all responses
+            page.on("response", handle_response)
             if self.extra_headers:
                 page.set_extra_http_headers(self.extra_headers)
@@ -223,7 +235,7 @@ class PlaywrightEngine:
                 for script in self.__stealth_scripts():
                     page.add_init_script(path=script)
-            res = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
+            first_response = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
             page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
                 page.wait_for_load_state('networkidle')
@@ -240,21 +252,24 @@ class PlaywrightEngine:
                 if self.network_idle:
                     page.wait_for_load_state('networkidle')
+            response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
+            # In case we didn't catch a document type somehow
+            final_response = final_response if final_response else first_response
             # This will be parsed inside `Response`
-            encoding = res.headers.get('content-type', '') or 'utf-8'  # default encoding
+            encoding = final_response.headers.get('content-type', '') or 'utf-8'  # default encoding
             # PlayWright API sometimes give empty status text for some reason!
-            status_text = res.status_text or StatusText.get(res.status)
+            status_text = final_response.status_text or StatusText.get(final_response.status)
             response = Response(
-                url=res.url,
+                url=final_response.url,
                 text=page.content(),
-                body=page.content().encode('utf-8'),
-                status=res.status,
+                body=response_bytes,
+                status=final_response.status,
                 reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
-                headers=res.all_headers(),
-                request_headers=res.request.all_headers(),
+                headers=final_response.all_headers(),
+                request_headers=final_response.request.all_headers(),
                 **self.adaptor_arguments
             )
             page.close()
@@ -266,12 +281,21 @@ class PlaywrightEngine:
         :param url: Target url.
         :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
         """
+        from playwright.async_api import Response as PlaywrightResponse
         if not self.stealth or self.real_chrome:
             # Because rebrowser_playwright doesn't play well with real browsers
             from playwright.async_api import async_playwright
         else:
             from rebrowser_playwright.async_api import async_playwright
+        # Store the final response
+        final_response = None
+        async def handle_response(finished_response: PlaywrightResponse):
+            nonlocal final_response
+            if finished_response.request.resource_type == "document":
+                final_response = finished_response
         async with async_playwright() as p:
             # Creating the browser
             if self.cdp_url:
@@ -285,6 +309,8 @@ class PlaywrightEngine:
             page = await context.new_page()
             page.set_default_navigation_timeout(self.timeout)
             page.set_default_timeout(self.timeout)
+            # Listen for all responses
+            page.on("response", handle_response)
             if self.extra_headers:
                 await page.set_extra_http_headers(self.extra_headers)
@@ -296,7 +322,7 @@ class PlaywrightEngine:
                 for script in self.__stealth_scripts():
                     await page.add_init_script(path=script)
-            res = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
+            first_response = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
             await page.wait_for_load_state(state="domcontentloaded")
             if self.network_idle:
                 await page.wait_for_load_state('networkidle')
@@ -313,21 +339,24 @@ class PlaywrightEngine:
                 if self.network_idle:
                     await page.wait_for_load_state('networkidle')
+            response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
+            # In case we didn't catch a document type somehow
+            final_response = final_response if final_response else first_response
             # This will be parsed inside `Response`
-            encoding = res.headers.get('content-type', '') or 'utf-8'  # default encoding
+            encoding = final_response.headers.get('content-type', '') or 'utf-8'  # default encoding
             # PlayWright API sometimes give empty status text for some reason!
-            status_text = res.status_text or StatusText.get(res.status)
+            status_text = final_response.status_text or StatusText.get(final_response.status)
             response = Response(
-                url=res.url,
+                url=final_response.url,
                 text=await page.content(),
-                body=(await page.content()).encode('utf-8'),
-                status=res.status,
+                body=response_bytes,
+                status=final_response.status,
                 reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
-                headers=await res.all_headers(),
-                request_headers=await res.request.all_headers(),
+                headers=await final_response.all_headers(),
+                request_headers=await final_response.request.all_headers(),
                 **self.adaptor_arguments
             )
             await page.close()

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/custom.py RENAMED Viewed

@@ -84,8 +84,6 @@ class ResponseEncoding:
 class Response(Adaptor):
     """This class is returned by all engines as a way to unify response type between different libraries."""
-    _is_response_result_logged = False  # Class-level flag, initialized to False
     def __init__(self, url: str, text: str, body: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict,
                  encoding: str = 'utf-8', method: str = 'GET', **adaptor_arguments: Dict):
         automatch_domain = adaptor_arguments.pop('automatch_domain', None)
@@ -99,9 +97,7 @@ class Response(Adaptor):
         # For back-ward compatibility
         self.adaptor = self
         # For easier debugging while working from a Python shell
-        if not Response._is_response_result_logged:
-            log.info(f'Fetched ({status}) <{method} {url}> (referer: {request_headers.get("referer")})')
-            Response._is_response_result_logged = True
+        log.info(f'Fetched ({status}) <{method} {url}> (referer: {request_headers.get("referer")})')
     # def __repr__(self):
     #     return f'<{self.__class__.__name__} [{self.status} {self.reason}]>'

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/fetchers.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
-                                   Union)
+                                   SelectorWaitStates, Union)
 from scrapling.engines import (CamoufoxEngine, PlaywrightEngine, StaticEngine,
                                check_if_engine_usable)
 from scrapling.engines.toolbelt import BaseFetcher, Response
@@ -176,8 +176,8 @@ class StealthyFetcher(BaseFetcher):
             self, url: str, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
             block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
             timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
-            wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
-            os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
+            wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
+            proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
@@ -234,8 +234,8 @@ class StealthyFetcher(BaseFetcher):
             self, url: str, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
             block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
             timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
-            wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
-            os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
+            wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
+            proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
     ) -> Response:
         """
         Opens up a browser and do your request based on your chosen options below.
@@ -308,7 +308,7 @@ class PlayWrightFetcher(BaseFetcher):
     def fetch(
             self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
             useragent: Optional[str] = None, network_idle: Optional[bool] = False, timeout: Optional[float] = 30000,
-            page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: Optional[str] = 'attached',
+            page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: SelectorWaitStates = 'attached',
             hide_canvas: Optional[bool] = False, disable_webgl: Optional[bool] = False, extra_headers: Optional[Dict[str, str]] = None, google_search: Optional[bool] = True,
             proxy: Optional[Union[str, Dict[str, str]]] = None, locale: Optional[str] = 'en-US',
             stealth: Optional[bool] = False, real_chrome: Optional[bool] = False,
@@ -368,7 +368,7 @@ class PlayWrightFetcher(BaseFetcher):
     async def async_fetch(
             self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
             useragent: Optional[str] = None, network_idle: Optional[bool] = False, timeout: Optional[float] = 30000,
-            page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: Optional[str] = 'attached',
+            page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: SelectorWaitStates = 'attached',
             hide_canvas: Optional[bool] = False, disable_webgl: Optional[bool] = False, extra_headers: Optional[Dict[str, str]] = None, google_search: Optional[bool] = True,
             proxy: Optional[Union[str, Dict[str, str]]] = None, locale: Optional[str] = 'en-US',
             stealth: Optional[bool] = False, real_chrome: Optional[bool] = False,

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/parser.py RENAMED Viewed

@@ -155,7 +155,7 @@ class Adaptor(SelectorsGeneration):
         else:
             if issubclass(type(element), html.HtmlMixin):
-                return self.__class__(
+                return Adaptor(
                     root=element,
                     text='', body=b'',  # Since root argument is provided, both `text` and `body` will be ignored so this is just a filler
                     url=self.url, encoding=self.encoding, auto_match=self.__auto_match_enabled,

{scrapling-0.2.9 → scrapling-0.2.91/scrapling.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scrapling
-Version: 0.2.9
+Version: 0.2.91
 Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair
@@ -21,7 +21,6 @@ Classifier: Topic :: Text Processing :: Markup :: HTML
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
@@ -38,7 +37,7 @@ Requires-Dist: cssselect>=1.2
 Requires-Dist: w3lib
 Requires-Dist: orjson>=3
 Requires-Dist: tldextract
-Requires-Dist: httpx[brotli,zstd]
+Requires-Dist: httpx[brotli,socks,zstd]
 Requires-Dist: playwright>=1.49.1
 Requires-Dist: rebrowser-playwright>=1.49.1
 Requires-Dist: camoufox[geoip]>=0.4.9

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling.egg-info/requires.txt RENAMED Viewed

@@ -4,7 +4,7 @@ cssselect>=1.2
 w3lib
 orjson>=3
 tldextract
-httpx[brotli,zstd]
+httpx[brotli,socks,zstd]
 playwright>=1.49.1
 rebrowser-playwright>=1.49.1
 camoufox[geoip]>=0.4.9

{scrapling-0.2.9 → scrapling-0.2.91}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = scrapling
-version = 0.2.9
+version = 0.2.91
 author = Karim Shoair
 author_email = karim.shoair@pm.me
 description = Scrapling is an undetectable, powerful, flexible, adaptive, and high-performance web scraping library for Python.

{scrapling-0.2.9 → scrapling-0.2.91}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setup(
     name="scrapling",
-    version="0.2.9",
+    version="0.2.91",
     description="""Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
      simplifies the process of extracting data from websites, even when they undergo structural changes, and offers
      impressive speed improvements over many popular scraping tools.""",
@@ -37,7 +37,6 @@ setup(
         "Topic :: Software Development :: Libraries :: Python Modules",
         "Programming Language :: Python :: 3",
         "Programming Language :: Python :: 3 :: Only",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
@@ -54,7 +53,7 @@ setup(
         "w3lib",
         "orjson>=3",
         "tldextract",
-        'httpx[brotli,zstd]',
+        'httpx[brotli,zstd, socks]',
         'playwright>=1.49.1',
         'rebrowser-playwright>=1.49.1',
         'camoufox[geoip]>=0.4.9'

{scrapling-0.2.9 → scrapling-0.2.91}/LICENSE RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/MANIFEST.in RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/README.md RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/core/__init__.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/core/custom_types.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/core/mixins.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/core/storage_adaptors.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/core/translator.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/core/utils.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/defaults.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/__init__.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/constants.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/static.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/__init__.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/bypasses/navigator_plugins.js RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/bypasses/notification_permission.js RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/bypasses/pdf_viewer.js RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/bypasses/screen_props.js RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/bypasses/webdriver_fully.js RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/bypasses/window_chrome.js RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/fingerprints.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/engines/toolbelt/navigation.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling/py.typed RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling.egg-info/not-zip-safe RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/scrapling.egg-info/top_level.txt RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/__init__.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/__init__.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/async/__init__.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/async/test_camoufox.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/async/test_httpx.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/async/test_playwright.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/sync/__init__.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/sync/test_camoufox.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/sync/test_httpx.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/sync/test_playwright.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/fetchers/test_utils.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/parser/__init__.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/parser/test_automatch.py RENAMED Viewed

File without changes

{scrapling-0.2.9 → scrapling-0.2.91}/tests/parser/test_general.py RENAMED Viewed

File without changes

scrapling 0.2.9__tar.gz → 0.2.91__tar.gz

scrapling 0.2.9tar.gz → 0.2.91tar.gz