PyPI - scrapling - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

scrapling 0.2.3py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

scrapling/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from scrapling.parser import Adaptor, Adaptors
 from scrapling.core.custom_types import TextHandler, AttributesHandler
 __author__ = "Karim Shoair (karim.shoair@pm.me)"
-__version__ = "0.2.3"
+__version__ = "0.2.4"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"

scrapling/engines/camo.py CHANGED Viewed

@@ -4,6 +4,7 @@ from scrapling.core._types import Union, Callable, Optional, Dict, List, Literal
 from scrapling.engines.toolbelt import (
     Response,
     do_nothing,
+    StatusText,
     get_os_name,
     intercept_route,
     check_type_validity,
@@ -111,12 +112,17 @@ class CamoufoxEngine:
             if 'charset=' in content_type.lower():
                 encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
+            status_text = res.status_text
+            # PlayWright API sometimes give empty status text for some reason!
+            if not status_text:
+                status_text = StatusText.get(res.status)
             response = Response(
                 url=res.url,
                 text=page.content(),
-                body=res.body(),
+                body=page.content().encode('utf-8'),
                 status=res.status,
-                reason=res.status_text,
+                reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
                 headers=res.all_headers(),

scrapling/engines/pw.py CHANGED Viewed

@@ -6,6 +6,7 @@ from scrapling.engines.constants import DEFAULT_STEALTH_FLAGS, NSTBROWSER_DEFAUL
 from scrapling.engines.toolbelt import (
     Response,
     do_nothing,
+    StatusText,
     js_bypass_path,
     intercept_route,
     generate_headers,
@@ -221,12 +222,17 @@ class PlaywrightEngine:
             if 'charset=' in content_type.lower():
                 encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
+            status_text = res.status_text
+            # PlayWright API sometimes give empty status text for some reason!
+            if not status_text:
+                status_text = StatusText.get(res.status)
             response = Response(
                 url=res.url,
                 text=page.content(),
-                body=res.body(),
+                body=page.content().encode('utf-8'),
                 status=res.status,
-                reason=res.status_text,
+                reason=status_text,
                 encoding=encoding,
                 cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
                 headers=res.all_headers(),

scrapling/engines/toolbelt/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .fingerprints import (
 from .custom import (
     Response,
     do_nothing,
+    StatusText,
     BaseFetcher,
     get_variable_name,
     check_type_validity,

scrapling/engines/toolbelt/custom.py CHANGED Viewed

@@ -4,8 +4,9 @@ Functions related to custom types or type checking
 import inspect
 import logging
-from scrapling.core.utils import setup_basic_logging
+from scrapling.core.custom_types import MappingProxyType
 from scrapling.parser import Adaptor, SQLiteStorageSystem
+from scrapling.core.utils import setup_basic_logging, cache
 from scrapling.core._types import Any, List, Type, Union, Optional, Dict, Callable
@@ -67,6 +68,83 @@ class BaseFetcher:
                 self.adaptor_arguments.update({'automatch_domain': automatch_domain})
+class StatusText:
+    """A class that gets the status text of response status code.
+        Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
+    """
+    _phrases = MappingProxyType({
+        100: "Continue",
+        101: "Switching Protocols",
+        102: "Processing",
+        103: "Early Hints",
+        200: "OK",
+        201: "Created",
+        202: "Accepted",
+        203: "Non-Authoritative Information",
+        204: "No Content",
+        205: "Reset Content",
+        206: "Partial Content",
+        207: "Multi-Status",
+        208: "Already Reported",
+        226: "IM Used",
+        300: "Multiple Choices",
+        301: "Moved Permanently",
+        302: "Found",
+        303: "See Other",
+        304: "Not Modified",
+        305: "Use Proxy",
+        307: "Temporary Redirect",
+        308: "Permanent Redirect",
+        400: "Bad Request",
+        401: "Unauthorized",
+        402: "Payment Required",
+        403: "Forbidden",
+        404: "Not Found",
+        405: "Method Not Allowed",
+        406: "Not Acceptable",
+        407: "Proxy Authentication Required",
+        408: "Request Timeout",
+        409: "Conflict",
+        410: "Gone",
+        411: "Length Required",
+        412: "Precondition Failed",
+        413: "Payload Too Large",
+        414: "URI Too Long",
+        415: "Unsupported Media Type",
+        416: "Range Not Satisfiable",
+        417: "Expectation Failed",
+        418: "I'm a teapot",
+        421: "Misdirected Request",
+        422: "Unprocessable Entity",
+        423: "Locked",
+        424: "Failed Dependency",
+        425: "Too Early",
+        426: "Upgrade Required",
+        428: "Precondition Required",
+        429: "Too Many Requests",
+        431: "Request Header Fields Too Large",
+        451: "Unavailable For Legal Reasons",
+        500: "Internal Server Error",
+        501: "Not Implemented",
+        502: "Bad Gateway",
+        503: "Service Unavailable",
+        504: "Gateway Timeout",
+        505: "HTTP Version Not Supported",
+        506: "Variant Also Negotiates",
+        507: "Insufficient Storage",
+        508: "Loop Detected",
+        510: "Not Extended",
+        511: "Network Authentication Required"
+    })
+    @classmethod
+    @cache(maxsize=128)
+    def get(cls, status_code: int) -> str:
+        """Get the phrase for a given HTTP status code."""
+        return cls._phrases.get(status_code, "Unknown Status Code")
 def check_if_engine_usable(engine: Callable) -> Union[Callable, None]:
     """This function check if the passed engine can be used by a Fetcher-type class or not.

{scrapling-0.2.3.dist-info → scrapling-0.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scrapling
-Version: 0.2.3
+Version: 0.2.4
 Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
 Home-page: https://github.com/D4Vinci/Scrapling
 Author: Karim Shoair

{scrapling-0.2.3.dist-info → scrapling-0.2.4.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-scrapling/__init__.py,sha256=tNB1LdlhamZYjlxqbEwo1BpsVE48Bt8acfjl8DNKpEE,435
+scrapling/__init__.py,sha256=Tj_pDeN1yhufhlxQ0bY7Qnuxntq_JaqBUCQZrz01EFA,435
 scrapling/defaults.py,sha256=blYDLiuI5DgDSLRWnUgpp21WtFOsv1BsCRCmPeg8Xc4,287
 scrapling/fetchers.py,sha256=_6mL7XSTZE1fHXBqbxE2bBHnlQP1lH-4MCiQHQd5hQs,16017
 scrapling/parser.py,sha256=d2n00uF5i7W5lf0afLNRdk17ZFcNyiF9EzXLRQGA0NM,54111
@@ -11,12 +11,12 @@ scrapling/core/storage_adaptors.py,sha256=Kbak0BOJX5e9I1PbUS_4sUJi2Wxw8Bv5XsaLHA
 scrapling/core/translator.py,sha256=oU-dQCkNQOccZPrXbPW_VSgC5ll10Bb89C3ezW2lI0o,5228
 scrapling/core/utils.py,sha256=fXdANUgRBbVbOerJ94fRY9vi7n5zsbm8t3G4qQ-F3ak,3792
 scrapling/engines/__init__.py,sha256=zwMqcSdNGh-IX0d4zXazrgAeHrkqIN_v5Ia7RU1g8W0,267
-scrapling/engines/camo.py,sha256=41vp2Nh51kKuOSZ1PijsIpROpQZgFfUPybVbEX8pEXk,7530
+scrapling/engines/camo.py,sha256=WJNDR3OY5LLqNHRMD4YbwuqUdnEZ8U-Et_1YUn6vDiw,7773
 scrapling/engines/constants.py,sha256=jSDA6lgbvEIB8z2m2SFzCKkvFEZnp28Mondy2__FpkM,3721
-scrapling/engines/pw.py,sha256=l5MrSW_WNBKAxAlyxbt09ka_lEGo61XKuaOgWpYmvHk,12102
+scrapling/engines/pw.py,sha256=6iNdnNF9M45FJkazeCvFRicyTFD2EkxSISJJP__uOug,12345
 scrapling/engines/static.py,sha256=Wsp6_-soZUQJT6kHoKPkLOdHU9J50chLdYxDmQjO4FQ,7101
-scrapling/engines/toolbelt/__init__.py,sha256=BnBp34aDeohYgqdysEAAWnGZgA02YlExkc5FJLetMSo,367
-scrapling/engines/toolbelt/custom.py,sha256=8lvGHWIZoOotSTF97KgPb3CbJquel2QFx8rP8Hf2sQ4,7469
+scrapling/engines/toolbelt/__init__.py,sha256=BbxfC0depVOV3i3BnBnyfjHtLcZrDbhz6c5rTRczZUc,383
+scrapling/engines/toolbelt/custom.py,sha256=6Ip-9t2G8TaXLGLARQAEcbnFqvAN7AmgN1ah0glRiMs,9953
 scrapling/engines/toolbelt/fingerprints.py,sha256=kkVtZKSt2ukc0CV0g6QUvSWR0Yx5p8Mv8xiqACAsMBo,2917
 scrapling/engines/toolbelt/navigation.py,sha256=Tde5_6Wv7lOeWXMzs9D6TRaxAbJ3b-zIX6-4HggZbCQ,4017
 scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
@@ -34,8 +34,8 @@ tests/fetchers/test_playwright.py,sha256=YOWn89urd9NwoCHfTFj8fY4xYrRY2BeszTt5Q-T
 tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/parser/test_automatch.py,sha256=BeeYJi3cYCghbiZmi57z4bqcGPaoUA8GAm7MALBBkkk,2486
 tests/parser/test_general.py,sha256=NfTuGLgAm-LH0dVV0pvbRcYSNI-wSu05rdnuRzmB0m4,11664
-scrapling-0.2.3.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
-scrapling-0.2.3.dist-info/METADATA,sha256=YGUnQmOdwDVs7rHLUCG9hpLg567s_5I0Cu8TIKKDc_Y,64785
-scrapling-0.2.3.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
-scrapling-0.2.3.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
-scrapling-0.2.3.dist-info/RECORD,,
+scrapling-0.2.4.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
+scrapling-0.2.4.dist-info/METADATA,sha256=uOp98w2qzOGqE4ofFFG_TgWgZGrscQHWhmP49pfIV3s,64785
+scrapling-0.2.4.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+scrapling-0.2.4.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
+scrapling-0.2.4.dist-info/RECORD,,

{scrapling-0.2.3.dist-info → scrapling-0.2.4.dist-info}/LICENSE RENAMED Viewed

File without changes

{scrapling-0.2.3.dist-info → scrapling-0.2.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{scrapling-0.2.3.dist-info → scrapling-0.2.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

scrapling 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

scrapling 0.2.3py3-none-any.whl → 0.2.4py3-none-any.whl