scrapling 0.2.3__tar.gz → 0.2.4__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. {scrapling-0.2.3/scrapling.egg-info → scrapling-0.2.4}/PKG-INFO +1 -1
  2. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/__init__.py +1 -1
  3. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/camo.py +8 -2
  4. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/pw.py +8 -2
  5. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/__init__.py +1 -0
  6. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/custom.py +79 -1
  7. {scrapling-0.2.3 → scrapling-0.2.4/scrapling.egg-info}/PKG-INFO +1 -1
  8. {scrapling-0.2.3 → scrapling-0.2.4}/setup.cfg +1 -1
  9. {scrapling-0.2.3 → scrapling-0.2.4}/setup.py +1 -1
  10. {scrapling-0.2.3 → scrapling-0.2.4}/LICENSE +0 -0
  11. {scrapling-0.2.3 → scrapling-0.2.4}/MANIFEST.in +0 -0
  12. {scrapling-0.2.3 → scrapling-0.2.4}/README.md +0 -0
  13. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/core/__init__.py +0 -0
  14. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/core/_types.py +0 -0
  15. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/core/custom_types.py +0 -0
  16. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/core/mixins.py +0 -0
  17. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/core/storage_adaptors.py +0 -0
  18. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/core/translator.py +0 -0
  19. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/core/utils.py +0 -0
  20. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/defaults.py +0 -0
  21. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/__init__.py +0 -0
  22. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/constants.py +0 -0
  23. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/static.py +0 -0
  24. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/bypasses/navigator_plugins.js +0 -0
  25. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/bypasses/notification_permission.js +0 -0
  26. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/bypasses/pdf_viewer.js +0 -0
  27. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +0 -0
  28. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/bypasses/screen_props.js +0 -0
  29. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/bypasses/webdriver_fully.js +0 -0
  30. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/bypasses/window_chrome.js +0 -0
  31. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/fingerprints.py +0 -0
  32. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/engines/toolbelt/navigation.py +0 -0
  33. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/fetchers.py +0 -0
  34. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/parser.py +0 -0
  35. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling/py.typed +0 -0
  36. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling.egg-info/SOURCES.txt +0 -0
  37. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling.egg-info/dependency_links.txt +0 -0
  38. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling.egg-info/not-zip-safe +0 -0
  39. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling.egg-info/requires.txt +0 -0
  40. {scrapling-0.2.3 → scrapling-0.2.4}/scrapling.egg-info/top_level.txt +0 -0
  41. {scrapling-0.2.3 → scrapling-0.2.4}/tests/__init__.py +0 -0
  42. {scrapling-0.2.3 → scrapling-0.2.4}/tests/fetchers/__init__.py +0 -0
  43. {scrapling-0.2.3 → scrapling-0.2.4}/tests/fetchers/test_camoufox.py +0 -0
  44. {scrapling-0.2.3 → scrapling-0.2.4}/tests/fetchers/test_httpx.py +0 -0
  45. {scrapling-0.2.3 → scrapling-0.2.4}/tests/fetchers/test_playwright.py +0 -0
  46. {scrapling-0.2.3 → scrapling-0.2.4}/tests/parser/__init__.py +0 -0
  47. {scrapling-0.2.3 → scrapling-0.2.4}/tests/parser/test_automatch.py +0 -0
  48. {scrapling-0.2.3 → scrapling-0.2.4}/tests/parser/test_general.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scrapling
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -4,7 +4,7 @@ from scrapling.parser import Adaptor, Adaptors
4
4
  from scrapling.core.custom_types import TextHandler, AttributesHandler
5
5
 
6
6
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
7
- __version__ = "0.2.3"
7
+ __version__ = "0.2.4"
8
8
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
9
9
 
10
10
 
@@ -4,6 +4,7 @@ from scrapling.core._types import Union, Callable, Optional, Dict, List, Literal
4
4
  from scrapling.engines.toolbelt import (
5
5
  Response,
6
6
  do_nothing,
7
+ StatusText,
7
8
  get_os_name,
8
9
  intercept_route,
9
10
  check_type_validity,
@@ -111,12 +112,17 @@ class CamoufoxEngine:
111
112
  if 'charset=' in content_type.lower():
112
113
  encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
113
114
 
115
+ status_text = res.status_text
116
+ # PlayWright API sometimes give empty status text for some reason!
117
+ if not status_text:
118
+ status_text = StatusText.get(res.status)
119
+
114
120
  response = Response(
115
121
  url=res.url,
116
122
  text=page.content(),
117
- body=res.body(),
123
+ body=page.content().encode('utf-8'),
118
124
  status=res.status,
119
- reason=res.status_text,
125
+ reason=status_text,
120
126
  encoding=encoding,
121
127
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
122
128
  headers=res.all_headers(),
@@ -6,6 +6,7 @@ from scrapling.engines.constants import DEFAULT_STEALTH_FLAGS, NSTBROWSER_DEFAUL
6
6
  from scrapling.engines.toolbelt import (
7
7
  Response,
8
8
  do_nothing,
9
+ StatusText,
9
10
  js_bypass_path,
10
11
  intercept_route,
11
12
  generate_headers,
@@ -221,12 +222,17 @@ class PlaywrightEngine:
221
222
  if 'charset=' in content_type.lower():
222
223
  encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
223
224
 
225
+ status_text = res.status_text
226
+ # PlayWright API sometimes give empty status text for some reason!
227
+ if not status_text:
228
+ status_text = StatusText.get(res.status)
229
+
224
230
  response = Response(
225
231
  url=res.url,
226
232
  text=page.content(),
227
- body=res.body(),
233
+ body=page.content().encode('utf-8'),
228
234
  status=res.status,
229
- reason=res.status_text,
235
+ reason=status_text,
230
236
  encoding=encoding,
231
237
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
232
238
  headers=res.all_headers(),
@@ -6,6 +6,7 @@ from .fingerprints import (
6
6
  from .custom import (
7
7
  Response,
8
8
  do_nothing,
9
+ StatusText,
9
10
  BaseFetcher,
10
11
  get_variable_name,
11
12
  check_type_validity,
@@ -4,8 +4,9 @@ Functions related to custom types or type checking
4
4
  import inspect
5
5
  import logging
6
6
 
7
- from scrapling.core.utils import setup_basic_logging
7
+ from scrapling.core.custom_types import MappingProxyType
8
8
  from scrapling.parser import Adaptor, SQLiteStorageSystem
9
+ from scrapling.core.utils import setup_basic_logging, cache
9
10
  from scrapling.core._types import Any, List, Type, Union, Optional, Dict, Callable
10
11
 
11
12
 
@@ -67,6 +68,83 @@ class BaseFetcher:
67
68
  self.adaptor_arguments.update({'automatch_domain': automatch_domain})
68
69
 
69
70
 
71
+ class StatusText:
72
+ """A class that gets the status text of response status code.
73
+
74
+ Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
75
+ """
76
+ _phrases = MappingProxyType({
77
+ 100: "Continue",
78
+ 101: "Switching Protocols",
79
+ 102: "Processing",
80
+ 103: "Early Hints",
81
+ 200: "OK",
82
+ 201: "Created",
83
+ 202: "Accepted",
84
+ 203: "Non-Authoritative Information",
85
+ 204: "No Content",
86
+ 205: "Reset Content",
87
+ 206: "Partial Content",
88
+ 207: "Multi-Status",
89
+ 208: "Already Reported",
90
+ 226: "IM Used",
91
+ 300: "Multiple Choices",
92
+ 301: "Moved Permanently",
93
+ 302: "Found",
94
+ 303: "See Other",
95
+ 304: "Not Modified",
96
+ 305: "Use Proxy",
97
+ 307: "Temporary Redirect",
98
+ 308: "Permanent Redirect",
99
+ 400: "Bad Request",
100
+ 401: "Unauthorized",
101
+ 402: "Payment Required",
102
+ 403: "Forbidden",
103
+ 404: "Not Found",
104
+ 405: "Method Not Allowed",
105
+ 406: "Not Acceptable",
106
+ 407: "Proxy Authentication Required",
107
+ 408: "Request Timeout",
108
+ 409: "Conflict",
109
+ 410: "Gone",
110
+ 411: "Length Required",
111
+ 412: "Precondition Failed",
112
+ 413: "Payload Too Large",
113
+ 414: "URI Too Long",
114
+ 415: "Unsupported Media Type",
115
+ 416: "Range Not Satisfiable",
116
+ 417: "Expectation Failed",
117
+ 418: "I'm a teapot",
118
+ 421: "Misdirected Request",
119
+ 422: "Unprocessable Entity",
120
+ 423: "Locked",
121
+ 424: "Failed Dependency",
122
+ 425: "Too Early",
123
+ 426: "Upgrade Required",
124
+ 428: "Precondition Required",
125
+ 429: "Too Many Requests",
126
+ 431: "Request Header Fields Too Large",
127
+ 451: "Unavailable For Legal Reasons",
128
+ 500: "Internal Server Error",
129
+ 501: "Not Implemented",
130
+ 502: "Bad Gateway",
131
+ 503: "Service Unavailable",
132
+ 504: "Gateway Timeout",
133
+ 505: "HTTP Version Not Supported",
134
+ 506: "Variant Also Negotiates",
135
+ 507: "Insufficient Storage",
136
+ 508: "Loop Detected",
137
+ 510: "Not Extended",
138
+ 511: "Network Authentication Required"
139
+ })
140
+
141
+ @classmethod
142
+ @cache(maxsize=128)
143
+ def get(cls, status_code: int) -> str:
144
+ """Get the phrase for a given HTTP status code."""
145
+ return cls._phrases.get(status_code, "Unknown Status Code")
146
+
147
+
70
148
  def check_if_engine_usable(engine: Callable) -> Union[Callable, None]:
71
149
  """This function check if the passed engine can be used by a Fetcher-type class or not.
72
150
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scrapling
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = scrapling
3
- version = 0.2.3
3
+ version = 0.2.4
4
4
  author = Karim Shoair
5
5
  author_email = karim.shoair@pm.me
6
6
  description = Scrapling is an undetectable, powerful, flexible, adaptive, and high-performance web scraping library for Python.
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
6
6
 
7
7
  setup(
8
8
  name="scrapling",
9
- version="0.2.3",
9
+ version="0.2.4",
10
10
  description="""Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
11
11
  simplifies the process of extracting data from websites, even when they undergo structural changes, and offers
12
12
  impressive speed improvements over many popular scraping tools.""",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes