scrapling 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
scrapling/__init__.py CHANGED
@@ -4,7 +4,7 @@ from scrapling.parser import Adaptor, Adaptors
4
4
  from scrapling.core.custom_types import TextHandler, AttributesHandler
5
5
 
6
6
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
7
- __version__ = "0.2.3"
7
+ __version__ = "0.2.4"
8
8
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
9
9
 
10
10
 
scrapling/engines/camo.py CHANGED
@@ -4,6 +4,7 @@ from scrapling.core._types import Union, Callable, Optional, Dict, List, Literal
4
4
  from scrapling.engines.toolbelt import (
5
5
  Response,
6
6
  do_nothing,
7
+ StatusText,
7
8
  get_os_name,
8
9
  intercept_route,
9
10
  check_type_validity,
@@ -111,12 +112,17 @@ class CamoufoxEngine:
111
112
  if 'charset=' in content_type.lower():
112
113
  encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
113
114
 
115
+ status_text = res.status_text
116
+ # PlayWright API sometimes give empty status text for some reason!
117
+ if not status_text:
118
+ status_text = StatusText.get(res.status)
119
+
114
120
  response = Response(
115
121
  url=res.url,
116
122
  text=page.content(),
117
- body=res.body(),
123
+ body=page.content().encode('utf-8'),
118
124
  status=res.status,
119
- reason=res.status_text,
125
+ reason=status_text,
120
126
  encoding=encoding,
121
127
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
122
128
  headers=res.all_headers(),
scrapling/engines/pw.py CHANGED
@@ -6,6 +6,7 @@ from scrapling.engines.constants import DEFAULT_STEALTH_FLAGS, NSTBROWSER_DEFAUL
6
6
  from scrapling.engines.toolbelt import (
7
7
  Response,
8
8
  do_nothing,
9
+ StatusText,
9
10
  js_bypass_path,
10
11
  intercept_route,
11
12
  generate_headers,
@@ -221,12 +222,17 @@ class PlaywrightEngine:
221
222
  if 'charset=' in content_type.lower():
222
223
  encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
223
224
 
225
+ status_text = res.status_text
226
+ # PlayWright API sometimes give empty status text for some reason!
227
+ if not status_text:
228
+ status_text = StatusText.get(res.status)
229
+
224
230
  response = Response(
225
231
  url=res.url,
226
232
  text=page.content(),
227
- body=res.body(),
233
+ body=page.content().encode('utf-8'),
228
234
  status=res.status,
229
- reason=res.status_text,
235
+ reason=status_text,
230
236
  encoding=encoding,
231
237
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
232
238
  headers=res.all_headers(),
@@ -6,6 +6,7 @@ from .fingerprints import (
6
6
  from .custom import (
7
7
  Response,
8
8
  do_nothing,
9
+ StatusText,
9
10
  BaseFetcher,
10
11
  get_variable_name,
11
12
  check_type_validity,
@@ -4,8 +4,9 @@ Functions related to custom types or type checking
4
4
  import inspect
5
5
  import logging
6
6
 
7
- from scrapling.core.utils import setup_basic_logging
7
+ from scrapling.core.custom_types import MappingProxyType
8
8
  from scrapling.parser import Adaptor, SQLiteStorageSystem
9
+ from scrapling.core.utils import setup_basic_logging, cache
9
10
  from scrapling.core._types import Any, List, Type, Union, Optional, Dict, Callable
10
11
 
11
12
 
@@ -67,6 +68,83 @@ class BaseFetcher:
67
68
  self.adaptor_arguments.update({'automatch_domain': automatch_domain})
68
69
 
69
70
 
71
+ class StatusText:
72
+ """A class that gets the status text of response status code.
73
+
74
+ Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
75
+ """
76
+ _phrases = MappingProxyType({
77
+ 100: "Continue",
78
+ 101: "Switching Protocols",
79
+ 102: "Processing",
80
+ 103: "Early Hints",
81
+ 200: "OK",
82
+ 201: "Created",
83
+ 202: "Accepted",
84
+ 203: "Non-Authoritative Information",
85
+ 204: "No Content",
86
+ 205: "Reset Content",
87
+ 206: "Partial Content",
88
+ 207: "Multi-Status",
89
+ 208: "Already Reported",
90
+ 226: "IM Used",
91
+ 300: "Multiple Choices",
92
+ 301: "Moved Permanently",
93
+ 302: "Found",
94
+ 303: "See Other",
95
+ 304: "Not Modified",
96
+ 305: "Use Proxy",
97
+ 307: "Temporary Redirect",
98
+ 308: "Permanent Redirect",
99
+ 400: "Bad Request",
100
+ 401: "Unauthorized",
101
+ 402: "Payment Required",
102
+ 403: "Forbidden",
103
+ 404: "Not Found",
104
+ 405: "Method Not Allowed",
105
+ 406: "Not Acceptable",
106
+ 407: "Proxy Authentication Required",
107
+ 408: "Request Timeout",
108
+ 409: "Conflict",
109
+ 410: "Gone",
110
+ 411: "Length Required",
111
+ 412: "Precondition Failed",
112
+ 413: "Payload Too Large",
113
+ 414: "URI Too Long",
114
+ 415: "Unsupported Media Type",
115
+ 416: "Range Not Satisfiable",
116
+ 417: "Expectation Failed",
117
+ 418: "I'm a teapot",
118
+ 421: "Misdirected Request",
119
+ 422: "Unprocessable Entity",
120
+ 423: "Locked",
121
+ 424: "Failed Dependency",
122
+ 425: "Too Early",
123
+ 426: "Upgrade Required",
124
+ 428: "Precondition Required",
125
+ 429: "Too Many Requests",
126
+ 431: "Request Header Fields Too Large",
127
+ 451: "Unavailable For Legal Reasons",
128
+ 500: "Internal Server Error",
129
+ 501: "Not Implemented",
130
+ 502: "Bad Gateway",
131
+ 503: "Service Unavailable",
132
+ 504: "Gateway Timeout",
133
+ 505: "HTTP Version Not Supported",
134
+ 506: "Variant Also Negotiates",
135
+ 507: "Insufficient Storage",
136
+ 508: "Loop Detected",
137
+ 510: "Not Extended",
138
+ 511: "Network Authentication Required"
139
+ })
140
+
141
+ @classmethod
142
+ @cache(maxsize=128)
143
+ def get(cls, status_code: int) -> str:
144
+ """Get the phrase for a given HTTP status code."""
145
+ return cls._phrases.get(status_code, "Unknown Status Code")
146
+
147
+
70
148
  def check_if_engine_usable(engine: Callable) -> Union[Callable, None]:
71
149
  """This function check if the passed engine can be used by a Fetcher-type class or not.
72
150
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scrapling
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -1,4 +1,4 @@
1
- scrapling/__init__.py,sha256=tNB1LdlhamZYjlxqbEwo1BpsVE48Bt8acfjl8DNKpEE,435
1
+ scrapling/__init__.py,sha256=Tj_pDeN1yhufhlxQ0bY7Qnuxntq_JaqBUCQZrz01EFA,435
2
2
  scrapling/defaults.py,sha256=blYDLiuI5DgDSLRWnUgpp21WtFOsv1BsCRCmPeg8Xc4,287
3
3
  scrapling/fetchers.py,sha256=_6mL7XSTZE1fHXBqbxE2bBHnlQP1lH-4MCiQHQd5hQs,16017
4
4
  scrapling/parser.py,sha256=d2n00uF5i7W5lf0afLNRdk17ZFcNyiF9EzXLRQGA0NM,54111
@@ -11,12 +11,12 @@ scrapling/core/storage_adaptors.py,sha256=Kbak0BOJX5e9I1PbUS_4sUJi2Wxw8Bv5XsaLHA
11
11
  scrapling/core/translator.py,sha256=oU-dQCkNQOccZPrXbPW_VSgC5ll10Bb89C3ezW2lI0o,5228
12
12
  scrapling/core/utils.py,sha256=fXdANUgRBbVbOerJ94fRY9vi7n5zsbm8t3G4qQ-F3ak,3792
13
13
  scrapling/engines/__init__.py,sha256=zwMqcSdNGh-IX0d4zXazrgAeHrkqIN_v5Ia7RU1g8W0,267
14
- scrapling/engines/camo.py,sha256=41vp2Nh51kKuOSZ1PijsIpROpQZgFfUPybVbEX8pEXk,7530
14
+ scrapling/engines/camo.py,sha256=WJNDR3OY5LLqNHRMD4YbwuqUdnEZ8U-Et_1YUn6vDiw,7773
15
15
  scrapling/engines/constants.py,sha256=jSDA6lgbvEIB8z2m2SFzCKkvFEZnp28Mondy2__FpkM,3721
16
- scrapling/engines/pw.py,sha256=l5MrSW_WNBKAxAlyxbt09ka_lEGo61XKuaOgWpYmvHk,12102
16
+ scrapling/engines/pw.py,sha256=6iNdnNF9M45FJkazeCvFRicyTFD2EkxSISJJP__uOug,12345
17
17
  scrapling/engines/static.py,sha256=Wsp6_-soZUQJT6kHoKPkLOdHU9J50chLdYxDmQjO4FQ,7101
18
- scrapling/engines/toolbelt/__init__.py,sha256=BnBp34aDeohYgqdysEAAWnGZgA02YlExkc5FJLetMSo,367
19
- scrapling/engines/toolbelt/custom.py,sha256=8lvGHWIZoOotSTF97KgPb3CbJquel2QFx8rP8Hf2sQ4,7469
18
+ scrapling/engines/toolbelt/__init__.py,sha256=BbxfC0depVOV3i3BnBnyfjHtLcZrDbhz6c5rTRczZUc,383
19
+ scrapling/engines/toolbelt/custom.py,sha256=6Ip-9t2G8TaXLGLARQAEcbnFqvAN7AmgN1ah0glRiMs,9953
20
20
  scrapling/engines/toolbelt/fingerprints.py,sha256=kkVtZKSt2ukc0CV0g6QUvSWR0Yx5p8Mv8xiqACAsMBo,2917
21
21
  scrapling/engines/toolbelt/navigation.py,sha256=Tde5_6Wv7lOeWXMzs9D6TRaxAbJ3b-zIX6-4HggZbCQ,4017
22
22
  scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
@@ -34,8 +34,8 @@ tests/fetchers/test_playwright.py,sha256=YOWn89urd9NwoCHfTFj8fY4xYrRY2BeszTt5Q-T
34
34
  tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  tests/parser/test_automatch.py,sha256=BeeYJi3cYCghbiZmi57z4bqcGPaoUA8GAm7MALBBkkk,2486
36
36
  tests/parser/test_general.py,sha256=NfTuGLgAm-LH0dVV0pvbRcYSNI-wSu05rdnuRzmB0m4,11664
37
- scrapling-0.2.3.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
38
- scrapling-0.2.3.dist-info/METADATA,sha256=YGUnQmOdwDVs7rHLUCG9hpLg567s_5I0Cu8TIKKDc_Y,64785
39
- scrapling-0.2.3.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
40
- scrapling-0.2.3.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
41
- scrapling-0.2.3.dist-info/RECORD,,
37
+ scrapling-0.2.4.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
38
+ scrapling-0.2.4.dist-info/METADATA,sha256=uOp98w2qzOGqE4ofFFG_TgWgZGrscQHWhmP49pfIV3s,64785
39
+ scrapling-0.2.4.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
40
+ scrapling-0.2.4.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
41
+ scrapling-0.2.4.dist-info/RECORD,,