scrapling 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scrapling/__init__.py CHANGED
@@ -4,7 +4,7 @@ from scrapling.parser import Adaptor, Adaptors
4
4
  from scrapling.core.custom_types import TextHandler, AttributesHandler
5
5
 
6
6
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
7
- __version__ = "0.2.3"
7
+ __version__ = "0.2.4"
8
8
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
9
9
 
10
10
 
scrapling/engines/camo.py CHANGED
@@ -4,6 +4,7 @@ from scrapling.core._types import Union, Callable, Optional, Dict, List, Literal
4
4
  from scrapling.engines.toolbelt import (
5
5
  Response,
6
6
  do_nothing,
7
+ StatusText,
7
8
  get_os_name,
8
9
  intercept_route,
9
10
  check_type_validity,
@@ -111,12 +112,17 @@ class CamoufoxEngine:
111
112
  if 'charset=' in content_type.lower():
112
113
  encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
113
114
 
115
+ status_text = res.status_text
116
+ # PlayWright API sometimes give empty status text for some reason!
117
+ if not status_text:
118
+ status_text = StatusText.get(res.status)
119
+
114
120
  response = Response(
115
121
  url=res.url,
116
122
  text=page.content(),
117
- body=res.body(),
123
+ body=page.content().encode('utf-8'),
118
124
  status=res.status,
119
- reason=res.status_text,
125
+ reason=status_text,
120
126
  encoding=encoding,
121
127
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
122
128
  headers=res.all_headers(),
scrapling/engines/pw.py CHANGED
@@ -6,6 +6,7 @@ from scrapling.engines.constants import DEFAULT_STEALTH_FLAGS, NSTBROWSER_DEFAUL
6
6
  from scrapling.engines.toolbelt import (
7
7
  Response,
8
8
  do_nothing,
9
+ StatusText,
9
10
  js_bypass_path,
10
11
  intercept_route,
11
12
  generate_headers,
@@ -221,12 +222,17 @@ class PlaywrightEngine:
221
222
  if 'charset=' in content_type.lower():
222
223
  encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
223
224
 
225
+ status_text = res.status_text
226
+ # PlayWright API sometimes give empty status text for some reason!
227
+ if not status_text:
228
+ status_text = StatusText.get(res.status)
229
+
224
230
  response = Response(
225
231
  url=res.url,
226
232
  text=page.content(),
227
- body=res.body(),
233
+ body=page.content().encode('utf-8'),
228
234
  status=res.status,
229
- reason=res.status_text,
235
+ reason=status_text,
230
236
  encoding=encoding,
231
237
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
232
238
  headers=res.all_headers(),
@@ -6,6 +6,7 @@ from .fingerprints import (
6
6
  from .custom import (
7
7
  Response,
8
8
  do_nothing,
9
+ StatusText,
9
10
  BaseFetcher,
10
11
  get_variable_name,
11
12
  check_type_validity,
@@ -4,8 +4,9 @@ Functions related to custom types or type checking
4
4
  import inspect
5
5
  import logging
6
6
 
7
- from scrapling.core.utils import setup_basic_logging
7
+ from scrapling.core.custom_types import MappingProxyType
8
8
  from scrapling.parser import Adaptor, SQLiteStorageSystem
9
+ from scrapling.core.utils import setup_basic_logging, cache
9
10
  from scrapling.core._types import Any, List, Type, Union, Optional, Dict, Callable
10
11
 
11
12
 
@@ -67,6 +68,83 @@ class BaseFetcher:
67
68
  self.adaptor_arguments.update({'automatch_domain': automatch_domain})
68
69
 
69
70
 
71
+ class StatusText:
72
+ """A class that gets the status text of response status code.
73
+
74
+ Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
75
+ """
76
+ _phrases = MappingProxyType({
77
+ 100: "Continue",
78
+ 101: "Switching Protocols",
79
+ 102: "Processing",
80
+ 103: "Early Hints",
81
+ 200: "OK",
82
+ 201: "Created",
83
+ 202: "Accepted",
84
+ 203: "Non-Authoritative Information",
85
+ 204: "No Content",
86
+ 205: "Reset Content",
87
+ 206: "Partial Content",
88
+ 207: "Multi-Status",
89
+ 208: "Already Reported",
90
+ 226: "IM Used",
91
+ 300: "Multiple Choices",
92
+ 301: "Moved Permanently",
93
+ 302: "Found",
94
+ 303: "See Other",
95
+ 304: "Not Modified",
96
+ 305: "Use Proxy",
97
+ 307: "Temporary Redirect",
98
+ 308: "Permanent Redirect",
99
+ 400: "Bad Request",
100
+ 401: "Unauthorized",
101
+ 402: "Payment Required",
102
+ 403: "Forbidden",
103
+ 404: "Not Found",
104
+ 405: "Method Not Allowed",
105
+ 406: "Not Acceptable",
106
+ 407: "Proxy Authentication Required",
107
+ 408: "Request Timeout",
108
+ 409: "Conflict",
109
+ 410: "Gone",
110
+ 411: "Length Required",
111
+ 412: "Precondition Failed",
112
+ 413: "Payload Too Large",
113
+ 414: "URI Too Long",
114
+ 415: "Unsupported Media Type",
115
+ 416: "Range Not Satisfiable",
116
+ 417: "Expectation Failed",
117
+ 418: "I'm a teapot",
118
+ 421: "Misdirected Request",
119
+ 422: "Unprocessable Entity",
120
+ 423: "Locked",
121
+ 424: "Failed Dependency",
122
+ 425: "Too Early",
123
+ 426: "Upgrade Required",
124
+ 428: "Precondition Required",
125
+ 429: "Too Many Requests",
126
+ 431: "Request Header Fields Too Large",
127
+ 451: "Unavailable For Legal Reasons",
128
+ 500: "Internal Server Error",
129
+ 501: "Not Implemented",
130
+ 502: "Bad Gateway",
131
+ 503: "Service Unavailable",
132
+ 504: "Gateway Timeout",
133
+ 505: "HTTP Version Not Supported",
134
+ 506: "Variant Also Negotiates",
135
+ 507: "Insufficient Storage",
136
+ 508: "Loop Detected",
137
+ 510: "Not Extended",
138
+ 511: "Network Authentication Required"
139
+ })
140
+
141
+ @classmethod
142
+ @cache(maxsize=128)
143
+ def get(cls, status_code: int) -> str:
144
+ """Get the phrase for a given HTTP status code."""
145
+ return cls._phrases.get(status_code, "Unknown Status Code")
146
+
147
+
70
148
  def check_if_engine_usable(engine: Callable) -> Union[Callable, None]:
71
149
  """This function check if the passed engine can be used by a Fetcher-type class or not.
72
150
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scrapling
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -1,4 +1,4 @@
1
- scrapling/__init__.py,sha256=tNB1LdlhamZYjlxqbEwo1BpsVE48Bt8acfjl8DNKpEE,435
1
+ scrapling/__init__.py,sha256=Tj_pDeN1yhufhlxQ0bY7Qnuxntq_JaqBUCQZrz01EFA,435
2
2
  scrapling/defaults.py,sha256=blYDLiuI5DgDSLRWnUgpp21WtFOsv1BsCRCmPeg8Xc4,287
3
3
  scrapling/fetchers.py,sha256=_6mL7XSTZE1fHXBqbxE2bBHnlQP1lH-4MCiQHQd5hQs,16017
4
4
  scrapling/parser.py,sha256=d2n00uF5i7W5lf0afLNRdk17ZFcNyiF9EzXLRQGA0NM,54111
@@ -11,12 +11,12 @@ scrapling/core/storage_adaptors.py,sha256=Kbak0BOJX5e9I1PbUS_4sUJi2Wxw8Bv5XsaLHA
11
11
  scrapling/core/translator.py,sha256=oU-dQCkNQOccZPrXbPW_VSgC5ll10Bb89C3ezW2lI0o,5228
12
12
  scrapling/core/utils.py,sha256=fXdANUgRBbVbOerJ94fRY9vi7n5zsbm8t3G4qQ-F3ak,3792
13
13
  scrapling/engines/__init__.py,sha256=zwMqcSdNGh-IX0d4zXazrgAeHrkqIN_v5Ia7RU1g8W0,267
14
- scrapling/engines/camo.py,sha256=41vp2Nh51kKuOSZ1PijsIpROpQZgFfUPybVbEX8pEXk,7530
14
+ scrapling/engines/camo.py,sha256=WJNDR3OY5LLqNHRMD4YbwuqUdnEZ8U-Et_1YUn6vDiw,7773
15
15
  scrapling/engines/constants.py,sha256=jSDA6lgbvEIB8z2m2SFzCKkvFEZnp28Mondy2__FpkM,3721
16
- scrapling/engines/pw.py,sha256=l5MrSW_WNBKAxAlyxbt09ka_lEGo61XKuaOgWpYmvHk,12102
16
+ scrapling/engines/pw.py,sha256=6iNdnNF9M45FJkazeCvFRicyTFD2EkxSISJJP__uOug,12345
17
17
  scrapling/engines/static.py,sha256=Wsp6_-soZUQJT6kHoKPkLOdHU9J50chLdYxDmQjO4FQ,7101
18
- scrapling/engines/toolbelt/__init__.py,sha256=BnBp34aDeohYgqdysEAAWnGZgA02YlExkc5FJLetMSo,367
19
- scrapling/engines/toolbelt/custom.py,sha256=8lvGHWIZoOotSTF97KgPb3CbJquel2QFx8rP8Hf2sQ4,7469
18
+ scrapling/engines/toolbelt/__init__.py,sha256=BbxfC0depVOV3i3BnBnyfjHtLcZrDbhz6c5rTRczZUc,383
19
+ scrapling/engines/toolbelt/custom.py,sha256=6Ip-9t2G8TaXLGLARQAEcbnFqvAN7AmgN1ah0glRiMs,9953
20
20
  scrapling/engines/toolbelt/fingerprints.py,sha256=kkVtZKSt2ukc0CV0g6QUvSWR0Yx5p8Mv8xiqACAsMBo,2917
21
21
  scrapling/engines/toolbelt/navigation.py,sha256=Tde5_6Wv7lOeWXMzs9D6TRaxAbJ3b-zIX6-4HggZbCQ,4017
22
22
  scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
@@ -34,8 +34,8 @@ tests/fetchers/test_playwright.py,sha256=YOWn89urd9NwoCHfTFj8fY4xYrRY2BeszTt5Q-T
34
34
  tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  tests/parser/test_automatch.py,sha256=BeeYJi3cYCghbiZmi57z4bqcGPaoUA8GAm7MALBBkkk,2486
36
36
  tests/parser/test_general.py,sha256=NfTuGLgAm-LH0dVV0pvbRcYSNI-wSu05rdnuRzmB0m4,11664
37
- scrapling-0.2.3.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
38
- scrapling-0.2.3.dist-info/METADATA,sha256=YGUnQmOdwDVs7rHLUCG9hpLg567s_5I0Cu8TIKKDc_Y,64785
39
- scrapling-0.2.3.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
40
- scrapling-0.2.3.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
41
- scrapling-0.2.3.dist-info/RECORD,,
37
+ scrapling-0.2.4.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
38
+ scrapling-0.2.4.dist-info/METADATA,sha256=uOp98w2qzOGqE4ofFFG_TgWgZGrscQHWhmP49pfIV3s,64785
39
+ scrapling-0.2.4.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
40
+ scrapling-0.2.4.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
41
+ scrapling-0.2.4.dist-info/RECORD,,