abstract-webtools 0.1.6.14__py3-none-any.whl → 0.1.6.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,4 @@
1
- from abstract_webtools import * #.soupManager import *
2
-
3
-
1
+ from .soupManager import * #.soupManager import *
4
2
  from urllib.parse import urlparse, urljoin
5
3
  import os
6
4
  import xml.etree.ElementTree as ET
@@ -17,9 +15,8 @@ class crawlManager:
17
15
 
18
16
  def get_new_source_and_url(self, url=None):
19
17
  """Fetches new source code and response for a given URL."""
20
- if url is None:
21
- url = self.url
22
- self.req_mgr.set_url(url)
18
+ url = url
19
+ self.req_mgr = get_req_mgr(url=url)
23
20
  self.source_code = self.req_mgr.source_code
24
21
  self.response = self.req_mgr.response
25
22
 
@@ -41,8 +41,9 @@ class urlManager:
41
41
  url (str or None): The URL to manage (default is None).
42
42
  session (requests.Session): A custom requests session (default is the requests module's session).
43
43
  """
44
- self._url=url or 'www.example.com'
45
- self.url = url or 'www.example.com'
44
+ url = url or 'www.example.com'
45
+ self._url=url
46
+ self.url = url
46
47
  self.session= session or requests
47
48
  self.clean_urls = self.clean_url(url=url)
48
49
  self.url = self.get_correct_url(clean_urls=self.clean_urls)
@@ -68,8 +69,7 @@ class urlManager:
68
69
  Given a URL, return a list with potential URL versions including with and without 'www.',
69
70
  and with 'http://' and 'https://'.
70
71
  """
71
- if url == None:
72
- url=self.url
72
+ url = url or self.url
73
73
  urls=[]
74
74
  if url:
75
75
  # Remove http:// or https:// prefix
@@ -134,7 +134,8 @@ class urlManager:
134
134
  self.url =self.correct_url
135
135
  self.protocol,self.domain,self.path,self.query=self.url_to_pieces(url=self.url)
136
136
  self.all_urls = []
137
- def get_domain(self,url):
137
+ def get_domain(self,url=None):
138
+ url = url or self.url
138
139
  return urlparse(url).netloc
139
140
  def url_join(self,url,path):
140
141
  url = eatOuter(url,['/'])
@@ -150,16 +151,17 @@ class urlManager:
150
151
  @url.setter
151
152
  def url(self, new_url):
152
153
  self._url = new_url
153
- @staticmethod
154
- def is_valid_url(url):
154
+ def is_valid_url(self,url=None):
155
155
  """
156
156
  Check if the given URL is valid.
157
157
  """
158
+ url = url or self.url
158
159
  parsed = urlparse(url)
159
160
  return bool(parsed.netloc) and bool(parsed.scheme)
160
- @staticmethod
161
- def make_valid(href,url):
161
+
162
+ def make_valid(self,href,url=None):
162
163
  def is_valid_url(url):
164
+ url = url or self.url
163
165
  """
164
166
  Check if the given URL is valid.
165
167
  """
@@ -171,24 +173,27 @@ class urlManager:
171
173
  if is_valid_url(new_link):
172
174
  return new_link
173
175
  return False
174
- @staticmethod
175
- def get_relative_href(url,href):
176
+
177
+ def get_relative_href(self,url,href):
176
178
  # join the URL if it's relative (not an absolute link)
179
+ url = url or self.url
177
180
  href = urljoin(url, href)
178
181
  parsed_href = urlparse(href)
179
182
  # remove URL GET parameters, URL fragments, etc.
180
183
  href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
181
184
  return href
182
- def url_basename(url):
185
+ def url_basename(self,url=None):
186
+ url = url or self.url
183
187
  path = urllib.parse.urlparse(url).path
184
188
  return path.strip('/').split('/')[-1]
185
189
 
186
190
 
187
- def base_url(url):
191
+ def base_url(self,url=None):
192
+ url = url or self.url
188
193
  return re.match(r'https?://[^?#]+/', url).group()
189
194
 
190
195
 
191
- def urljoin(base, path):
196
+ def urljoin(self,base, path):
192
197
  if isinstance(path, bytes):
193
198
  path = path.decode()
194
199
  if not isinstance(path, str) or not path:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.14
3
+ Version: 0.1.6.16
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -7,7 +7,7 @@ abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE7
7
7
  abstract_webtools/url_grabber_new.py,sha256=Oh2Kc0gBScCo0xpopNsg8JE5lIbPuzZVKM5f5GoZmw0,3454
8
8
  abstract_webtools/managers/__init__.py,sha256=5aIpbdUsDWTrhPUAjfIKnG54OULqOKan9LBL5EIUllo,407
9
9
  abstract_webtools/managers/cipherManager.py,sha256=NHQGdR11eNSm-1H-GezD5dyQgsPTJwY5kczt8Sher2s,1621
10
- abstract_webtools/managers/crawlManager.py,sha256=Qc0mhurJgwRyduw9Jvv8nzMf5Gdxev-Nc3fh1mak0fE,7899
10
+ abstract_webtools/managers/crawlManager.py,sha256=VN57a7fOjAHGbRrqcz4KVfDxFdHgWUMTR88_JcMLBhU,7869
11
11
  abstract_webtools/managers/domainManager.py,sha256=95znOBv05W77mW_fbZAfl4RmlENDlYqhEOMkL02L220,3610
12
12
  abstract_webtools/managers/dynamicRateLimiter.py,sha256=gopQcQo50JG2D0KcyepNCIQ_1uDQEBIHBzWf4R2Wgy0,7617
13
13
  abstract_webtools/managers/linkManager.py,sha256=m6y9s8jknrTX8RtOAFKeHd4yd23G7Rgf0T7Sp7wmHUw,12180
@@ -18,11 +18,11 @@ abstract_webtools/managers/seleniumManager.py,sha256=qSY8gH3N5YJIMwE_Alj9HNQRip_
18
18
  abstract_webtools/managers/soupManager.py,sha256=7nDB_QKneGjyTZUzchfbdHNvxxYiTyIn8AHon8ObTSY,17148
19
19
  abstract_webtools/managers/sslManager.py,sha256=C-QgQw9CW84uOE5kx2MPjC3RsLbE2JQqdwdTs0H4ecc,1370
20
20
  abstract_webtools/managers/tlsAdapter.py,sha256=XZSMZz9EUOhv-h3_Waf6mjV1dA3oN_M_oWuoo4VZ_HE,1454
21
- abstract_webtools/managers/urlManager.py,sha256=XqMrCM84BeWEfWtHc_8UFpT91ZtG-okzdKdCuC49vsA,8678
21
+ abstract_webtools/managers/urlManager.py,sha256=Dvf-TiSo5j_YjZS2Eq6lFfbhveneD6NA_wEE0xUXy_E,8858
22
22
  abstract_webtools/managers/userAgentManager.py,sha256=33SB2p2FG7EYZl7l2iYm1U4gI9PcdkGTZHw5lg_Ogrw,1653
23
23
  abstract_webtools/managers/videoDownloader.py,sha256=6G_aLc05BTMUYUWc7iqYtHF_BaR7DnCNK_NJ-QnjsYY,10531
24
- abstract_webtools-0.1.6.14.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
25
- abstract_webtools-0.1.6.14.dist-info/METADATA,sha256=JmKFn-30rtiDahGCn_cpQ9TrCKrkjzrqZVCRBJi7bOE,15858
26
- abstract_webtools-0.1.6.14.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
27
- abstract_webtools-0.1.6.14.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
28
- abstract_webtools-0.1.6.14.dist-info/RECORD,,
24
+ abstract_webtools-0.1.6.16.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
25
+ abstract_webtools-0.1.6.16.dist-info/METADATA,sha256=E3Z9_JiEo0cYQ4uXnuvpeVtiTXFasutqkWFu17BdWmI,15858
26
+ abstract_webtools-0.1.6.16.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
27
+ abstract_webtools-0.1.6.16.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
28
+ abstract_webtools-0.1.6.16.dist-info/RECORD,,