abstract-webtools 0.1.4.1__py3-none-any.whl → 0.1.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/abstract_crawler.py +5 -3
- abstract_webtools/abstract_webtools.py +154 -91
- {abstract_webtools-0.1.4.1.dist-info → abstract_webtools-0.1.4.12.dist-info}/METADATA +1 -1
- abstract_webtools-0.1.4.12.dist-info/RECORD +11 -0
- abstract_webtools-0.1.4.1.dist-info/RECORD +0 -11
- {abstract_webtools-0.1.4.1.dist-info → abstract_webtools-0.1.4.12.dist-info}/LICENSE +0 -0
- {abstract_webtools-0.1.4.1.dist-info → abstract_webtools-0.1.4.12.dist-info}/WHEEL +0 -0
- {abstract_webtools-0.1.4.1.dist-info → abstract_webtools-0.1.4.12.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
|
-
from
|
1
|
+
from abstract_webtools import *
|
2
|
+
response = SafeRequest()
|
2
3
|
def discover_classes_with_links(base_url):
|
3
4
|
"""
|
4
5
|
Discovers classes in the HTML content of the provided URL
|
@@ -10,9 +11,9 @@ def discover_classes_with_links(base_url):
|
|
10
11
|
Returns:
|
11
12
|
set: A set of unique class names.
|
12
13
|
"""
|
13
|
-
|
14
|
+
|
14
15
|
if response:
|
15
|
-
soup = BeautifulSoup(response
|
16
|
+
soup = BeautifulSoup(response, 'html.parser')
|
16
17
|
|
17
18
|
unique_classes = set()
|
18
19
|
|
@@ -228,4 +229,5 @@ def generate_sitemap(domain):
|
|
228
229
|
|
229
230
|
if __name__ == '__main__':
|
230
231
|
domain = "https://uuvo.com"
|
232
|
+
request_manager = SafeRequest(url=domain)
|
231
233
|
generate_sitemap(domain)
|
@@ -69,6 +69,7 @@ Version: 1.0
|
|
69
69
|
import ssl
|
70
70
|
import requests
|
71
71
|
from requests.adapters import HTTPAdapter
|
72
|
+
from typing import Optional, List
|
72
73
|
from requests.packages.urllib3.poolmanager import PoolManager
|
73
74
|
from requests.packages.urllib3.util import ssl_
|
74
75
|
from urllib.parse import urlparse
|
@@ -196,7 +197,6 @@ class DynamicRateLimiter:
|
|
196
197
|
print(f"Adjusted tokens to: {self.tokens} and high_limit to: {self.current_limit}")
|
197
198
|
class DynamicRateLimiterManagerSingleton:
|
198
199
|
_instance = None
|
199
|
-
|
200
200
|
@staticmethod
|
201
201
|
def get_instance():
|
202
202
|
if DynamicRateLimiterManagerSingleton._instance is None:
|
@@ -205,7 +205,7 @@ class DynamicRateLimiterManagerSingleton:
|
|
205
205
|
|
206
206
|
class CipherManager:
|
207
207
|
@staticmethod
|
208
|
-
def
|
208
|
+
def get_default_ciphers()-> list:
|
209
209
|
return [
|
210
210
|
"ECDHE-RSA-AES256-GCM-SHA384", "ECDHE-ECDSA-AES256-GCM-SHA384",
|
211
211
|
"ECDHE-RSA-AES256-SHA384", "ECDHE-ECDSA-AES256-SHA384",
|
@@ -215,19 +215,32 @@ class CipherManager:
|
|
215
215
|
"AES256-SHA", "AES128-SHA"
|
216
216
|
]
|
217
217
|
|
218
|
+
def __init__(self,cipher_list=None):
|
219
|
+
if cipher_list == None:
|
220
|
+
cipher_list=self.get_default_ciphers()
|
221
|
+
self.cipher_list = cipher_list
|
222
|
+
self.create_list()
|
223
|
+
self.ciphers_string = self.add_string_list()
|
224
|
+
def add_string_list(self):
|
225
|
+
if len(self.cipher_list)==0:
|
226
|
+
return ''
|
227
|
+
return','.join(self.cipher_list)
|
228
|
+
def create_list(self):
|
229
|
+
if self.cipher_list == None:
|
230
|
+
self.cipher_list= []
|
231
|
+
elif isinstance(self.cipher_list, str):
|
232
|
+
self.cipher_list=self.cipher_list.split(',')
|
233
|
+
if isinstance(self.cipher_list, str):
|
234
|
+
self.cipher_list=[self.cipher_list]
|
235
|
+
class CipherManagerSingleton:
|
236
|
+
_instance = None
|
218
237
|
@staticmethod
|
219
|
-
def
|
220
|
-
if
|
221
|
-
|
222
|
-
elif
|
223
|
-
|
224
|
-
return
|
225
|
-
|
226
|
-
@staticmethod
|
227
|
-
def add_string_list(cipher_list=[], delim=','):
|
228
|
-
return delim.join(cipher_list)
|
229
|
-
|
230
|
-
|
238
|
+
def get_instance(cipher_list=None):
|
239
|
+
if CipherManagerSingleton._instance is None:
|
240
|
+
CipherManagerSingleton._instance = CipherManager(cipher_list=cipher_list)
|
241
|
+
elif CipherManagerSingleton._instance.cipher_list != cipher_list:
|
242
|
+
CipherManagerSingleton._instance = CipherManager(cipher_list=cipher_list)
|
243
|
+
return CipherManagerSingleton._instance
|
231
244
|
class SSLManager:
|
232
245
|
@staticmethod
|
233
246
|
def get_default_certification():
|
@@ -320,40 +333,70 @@ class SSLManager:
|
|
320
333
|
"OP_ALL"
|
321
334
|
]
|
322
335
|
|
323
|
-
@staticmethod
|
324
|
-
def create_list(lst=None):
|
325
|
-
return CipherManager.create_list(lst)
|
326
|
-
|
327
|
-
@staticmethod
|
328
|
-
def combine_ssl_options(ssl_options_values=[]):
|
329
|
-
combined_options = 0
|
330
|
-
for option in ssl_options_values:
|
331
|
-
combined_options |= option
|
332
|
-
return combined_options
|
333
|
-
|
334
|
-
@staticmethod
|
335
|
-
def get_options_values(ssl_options_list=[]):
|
336
|
-
return [getattr(ssl, option_name) for option_name in ssl_options_list]
|
337
|
-
|
338
336
|
@staticmethod
|
339
337
|
def get_context(ciphers=None, options=None, cert_reqs=None):
|
338
|
+
|
340
339
|
return ssl_.create_urllib3_context(ciphers=ciphers, cert_reqs=cert_reqs, options=options)
|
341
340
|
|
342
341
|
def __init__(self, ciphers=None, ssl_options_list=None, certification=None):
|
343
|
-
self.ssl_options_list =
|
344
|
-
self.
|
345
|
-
self.
|
342
|
+
self.ssl_options_list = ssl_options_list
|
343
|
+
self.create_list()
|
344
|
+
self.ssl_options_values = self.get_options_values()
|
345
|
+
self.ssl_options = self.combine_ssl_options()
|
346
346
|
self.certification = certification or self.get_default_certification()
|
347
|
-
self.
|
347
|
+
self.cipher_manager = CipherManagerSingleton().get_instance(cipher_list=ciphers)
|
348
|
+
self.ssl_context = self.get_context(ciphers=self.cipher_manager.ciphers_string, options=self.ssl_options, cert_reqs=self.certification)
|
349
|
+
def create_list(self):
|
350
|
+
if self.ssl_options_list == None:
|
351
|
+
self.ssl_options_list= []
|
352
|
+
elif isinstance(self.ssl_options_list, str):
|
353
|
+
self.ssl_options_list=self.ssl_options_list.split(',')
|
354
|
+
if isinstance(self.ssl_options_list, str):
|
355
|
+
self.ssl_options_list=[self.ssl_options_list]
|
356
|
+
def get_options_values(self):
|
357
|
+
return [getattr(ssl, option_name) for option_name in self.ssl_options_list]
|
358
|
+
def combine_ssl_options(self):
|
359
|
+
combined_options = 0
|
360
|
+
for option in self.ssl_options_values:
|
361
|
+
combined_options |= option
|
362
|
+
return combined_options
|
363
|
+
class SSLManagerSingleton:
|
364
|
+
_instance = None
|
365
|
+
@staticmethod
|
366
|
+
def get_instance(ciphers=None, ssl_options_list=None, certification=None):
|
367
|
+
if SSLManagerSingleton._instance is None:
|
368
|
+
SSLManagerSingleton._instance = SSLManager(ciphers=ciphers, ssl_options_list=ssl_options_list, certification=certification)
|
369
|
+
elif SSLManagerSingleton._instance.cipher_manager.ciphers_string != ciphers or SSLManagerSingleton._instance.ssl_options_list !=ssl_options_list or SSLManagerSingleton._instance.certification !=certification:
|
370
|
+
SSLManagerSingleton._instance = SSLManager(ciphers=ciphers, ssl_options_list=ssl_options_list, certification=certification)
|
371
|
+
return SSLManagerSingleton._instance
|
348
372
|
class TLSAdapter(HTTPAdapter):
|
349
|
-
def __init__(self, ciphers=None, certification=None, ssl_options=None):
|
350
|
-
ssl_manager = SSLManager(ciphers, ssl_options, certification)
|
351
|
-
self.ssl_context = ssl_manager.ssl_context
|
373
|
+
def __init__(self, ciphers: Optional[List[str]] = None, certification: Optional[str] = None, ssl_options: Optional[List[str]] = None):
|
352
374
|
super().__init__()
|
375
|
+
self.ciphers = ciphers
|
376
|
+
self.certification = certification
|
377
|
+
self.ssl_options = ssl_options
|
378
|
+
|
379
|
+
self.cipher_manager = CipherManagerSingleton.get_instance(cipher_list=self.ciphers)
|
380
|
+
self.ssl_manager = SSLManagerSingleton.get_instance(
|
381
|
+
ciphers=self.cipher_manager.ciphers_string,
|
382
|
+
ssl_options_list=ssl_options,
|
383
|
+
certification=certification
|
384
|
+
)
|
385
|
+
self.ssl_context = self.ssl_manager.ssl_context
|
353
386
|
|
354
387
|
def init_poolmanager(self, *args, **kwargs):
|
355
|
-
kwargs['ssl_context'] = self.ssl_context
|
356
388
|
return super().init_poolmanager(*args, **kwargs)
|
389
|
+
|
390
|
+
|
391
|
+
class TLSAdapterSingleton:
|
392
|
+
_instance = None
|
393
|
+
@staticmethod
|
394
|
+
def get_instance(ciphers=None, certification=None, ssl_options=None):
|
395
|
+
if TLSAdapterSingleton._instance is None:
|
396
|
+
TLSAdapterSingleton._instance = TLSAdapter(ciphers=ciphers, certification=certification, ssl_options=ssl_options)
|
397
|
+
elif TLSAdapterSingleton._instance.ciphers != ciphers or SSLManagerSingleton._instance.certification !=certification or SSLManagerSingleton._instance.ssl_options_list !=ssl_options:
|
398
|
+
TLSAdapterSingleton._instance = TLSAdapter(ciphers=ciphers, certification=certification, ssl_options=ssl_options)
|
399
|
+
return TLSAdapterSingleton._instance
|
357
400
|
class UserAgentManager:
|
358
401
|
@staticmethod
|
359
402
|
def desktop_user_agents() -> list:
|
@@ -382,39 +425,49 @@ class UserAgentManager:
|
|
382
425
|
"""
|
383
426
|
return {"user-agent": user_agent}
|
384
427
|
def __init__(self,user_agent=desktop_user_agents()[0]):
|
385
|
-
self.user_agent=
|
428
|
+
self.user_agent = user_agent
|
429
|
+
self.user_agent_header=self.get_user_agent(user_agent=user_agent)
|
430
|
+
class UserAgentManagerSingleton:
|
431
|
+
_instance = None
|
432
|
+
@staticmethod
|
433
|
+
def get_instance(user_agent=UserAgentManager.desktop_user_agents()[0]):
|
434
|
+
if UserAgentManagerSingleton._instance is None:
|
435
|
+
UserAgentManagerSingleton._instance = UserAgentManager(user_agent=user_agent)
|
436
|
+
elif UserAgentManagerSingleton._instance.user_agent != user_agent:
|
437
|
+
UserAgentManagerSingleton._instance = UserAgentManager(user_agent=user_agent)
|
438
|
+
return UserAgentManagerSingleton._instance
|
386
439
|
class SafeRequest:
|
387
440
|
def __init__(self,
|
388
441
|
url=None,
|
389
|
-
headers:dict=
|
442
|
+
headers:dict=UserAgentManagerSingleton().get_instance().user_agent_header,
|
390
443
|
max_retries=3,
|
391
444
|
last_request_time=None,
|
392
445
|
request_wait_limit=1.5,
|
393
446
|
):
|
447
|
+
self.url =url
|
394
448
|
if isinstance(headers,str):
|
395
|
-
|
449
|
+
headers = UserAgentManagerSingleton().get_instance(user_agent=headers).user_agent_header
|
396
450
|
self.headers = headers
|
451
|
+
self.max_retries=max_retries
|
452
|
+
|
453
|
+
self.request_wait_limit = request_wait_limit
|
454
|
+
|
455
|
+
self.url_manager = URLManagerSingleton().get_instance(url=url)
|
397
456
|
self.session = self.initialize_session()
|
398
|
-
|
457
|
+
|
399
458
|
self.last_request_time = last_request_time
|
400
|
-
|
401
|
-
if url == None:
|
402
|
-
return
|
403
|
-
self.url = url
|
404
|
-
self.url_manager = URLManager(url=self.url)
|
459
|
+
|
405
460
|
self.response = self.make_request()
|
406
|
-
self.status_code = self.response.status_code
|
407
|
-
self.source_code = self.response.text if self.response else None
|
408
461
|
|
409
|
-
|
410
|
-
|
411
|
-
|
462
|
+
self.status_code = None if self.response == None else self.response.status_code
|
463
|
+
|
464
|
+
self.source_code = '' if self.response == None else self.response.text
|
465
|
+
def initialize_session(self):
|
412
466
|
s = requests.Session()
|
413
467
|
s.cookies["cf_clearance"] = "cb4c883efc59d0e990caf7508902591f4569e7bf-1617321078-0-150"
|
414
468
|
s.headers.update(self.headers)
|
415
469
|
# Add any other headers or cookie settings here
|
416
|
-
adapter =
|
417
|
-
|
470
|
+
adapter = TLSAdapterSingleton().get_instance()
|
418
471
|
s.mount('https://', adapter)
|
419
472
|
return s
|
420
473
|
|
@@ -462,18 +515,15 @@ class SafeRequest:
|
|
462
515
|
|
463
516
|
|
464
517
|
self.wait_between_requests()
|
465
|
-
|
466
|
-
cleaned_urls = self.clean_url(self.url)
|
518
|
+
|
467
519
|
for _ in range(self.max_retries):
|
468
|
-
cleaned_url = self.url_manager.correct_url
|
469
520
|
try:
|
470
|
-
response = self.session.get(
|
471
|
-
|
521
|
+
response = self.session.get(url=self.url_manager.correct_url, timeout=10) # 10 seconds timeout
|
472
522
|
if response.status_code == 200:
|
473
523
|
self.last_request_time = get_time_stamp()
|
474
524
|
return response
|
475
525
|
elif response.status_code == 429:
|
476
|
-
logging.warning(f"Rate limited by {
|
526
|
+
logging.warning(f"Rate limited by {self.url_manager.correct_url}. Retrying...")
|
477
527
|
get_sleep(5) # adjust this based on the server's rate limit reset time
|
478
528
|
except requests.Timeout as e:
|
479
529
|
logging.error(f"Request to {cleaned_url} timed out: {e}")
|
@@ -484,10 +534,8 @@ class SafeRequest:
|
|
484
534
|
except requests.RequestException as e:
|
485
535
|
logging.error(f"Request exception for URL {cleaned_url}: {e}")
|
486
536
|
|
487
|
-
logging.error(f"Failed to retrieve content from {
|
537
|
+
logging.error(f"Failed to retrieve content from {self.url_manager.correct_url} after {self.max_retries} retries.")
|
488
538
|
return None
|
489
|
-
|
490
|
-
|
491
539
|
@staticmethod
|
492
540
|
def is_valid_url(url):
|
493
541
|
"""
|
@@ -497,12 +545,21 @@ class SafeRequest:
|
|
497
545
|
return bool(parsed.netloc) and bool(parsed.scheme)
|
498
546
|
def get_source_code(self, url=None,response=None):
|
499
547
|
if self.response:
|
500
|
-
input(self.response)
|
501
548
|
return self.response.text if self.response else None
|
502
549
|
else:
|
503
550
|
logging.error(f"Invalid URL: {url}")
|
504
551
|
return None
|
505
|
-
self.clean_url(self.
|
552
|
+
self.clean_url(self.url_manager.correct_url)
|
553
|
+
class SafeRequestSingleton:
|
554
|
+
_instance = None
|
555
|
+
@staticmethod
|
556
|
+
|
557
|
+
def get_instance(url=None,headers:dict=UserAgentManager().user_agent_header,max_retries=3,last_request_time=None,request_wait_limit=1.5):
|
558
|
+
if SafeRequestSingleton._instance is None:
|
559
|
+
SafeRequestSingleton._instance = SafeRequest(url=url,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
|
560
|
+
elif SafeRequestSingleton._instance.url != url or SafeRequestSingleton._instance.headers != headers or SafeRequestSingleton._instance.max_retries != max_retries or SafeRequestSingleton._instance.request_wait_limit != request_wait_limit:
|
561
|
+
SafeRequestSingleton._instance = SafeRequest(url=url,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
|
562
|
+
return SafeRequestSingleton._instance
|
506
563
|
## ##
|
507
564
|
# Usage
|
508
565
|
## safe_requester = SafeRequest()
|
@@ -523,7 +580,7 @@ class SafeRequest:
|
|
523
580
|
class URLManager:
|
524
581
|
def __init__(self,url=None,session=requests):
|
525
582
|
self.url = url
|
526
|
-
self.session =
|
583
|
+
self.session = session
|
527
584
|
self.striped_url = None if url == None else self.strip_web()
|
528
585
|
self.clean_urls = None if url == None else self.clean_url()
|
529
586
|
self.correct_url = None if url == None else self.get_correct_url()
|
@@ -584,6 +641,16 @@ class URLManager:
|
|
584
641
|
except requests.exceptions.RequestException as e:
|
585
642
|
print(e)
|
586
643
|
return None
|
644
|
+
class URLManagerSingleton:
|
645
|
+
_instance = None
|
646
|
+
@staticmethod
|
647
|
+
def get_instance(url=None,session=requests):
|
648
|
+
if URLManagerSingleton._instance is None:
|
649
|
+
URLManagerSingleton._instance = URLManager(url=url,session=session)
|
650
|
+
elif URLManagerSingleton._instance.session != session or URLManagerSingleton._instance.url != url:
|
651
|
+
URLManagerSingleton._instance = URLManager(url=url,session=session)
|
652
|
+
return URLManagerSingleton._instance
|
653
|
+
|
587
654
|
def get_limited_request(request_url=str,service_name="default"):
|
588
655
|
manager = DynamicRateLimiterManagerSingleton.get_instance() # Get the singleton instance
|
589
656
|
unwanted_response=True
|
@@ -678,16 +745,15 @@ def get_Source_code(url: str ='https://www.example.com' , user_agent:str= UserAg
|
|
678
745
|
Returns:
|
679
746
|
str or None: The source code of the URL if the request is successful, or None if the request fails.
|
680
747
|
"""
|
681
|
-
|
682
|
-
if url is None:
|
683
|
-
return
|
684
|
-
|
748
|
+
url_manager = URLManagerSingleton().get_source_code(url=url)
|
749
|
+
if url_manager.url is None:
|
750
|
+
return []
|
685
751
|
s = requests.Session()
|
686
752
|
s.cookies["cf_clearance"] = "cb4c883efc59d0e990caf7508902591f4569e7bf-1617321078-0-150"
|
687
753
|
s.headers.update(get_user_agent(user_agent))
|
688
754
|
adapter = TLSAdapter()
|
689
755
|
s.mount('https://', adapter)
|
690
|
-
r = try_request(url=
|
756
|
+
r = try_request(url=url_manager.correct_url, session=s)
|
691
757
|
|
692
758
|
if r is None:
|
693
759
|
return None
|
@@ -703,12 +769,11 @@ def parse_react_source(url:str=None) -> list:
|
|
703
769
|
Returns:
|
704
770
|
list: A list of strings containing JavaScript and JSX source code found in <script> tags.
|
705
771
|
"""
|
706
|
-
|
707
|
-
if url is None:
|
772
|
+
url_manager = URLManagerSingleton().get_source_code(url=url)
|
773
|
+
if url_manager.url is None:
|
708
774
|
return []
|
709
|
-
|
710
|
-
|
711
|
-
soup = BeautifulSoup(data, 'html.parser')
|
775
|
+
request_manager = SafeRequestSingleton().get_instance(url=url_manager.url)
|
776
|
+
soup = BeautifulSoup(request_manager.source_code, "html.parser")
|
712
777
|
script_tags = soup.find_all('script', type=lambda t: t and ('javascript' in t or 'jsx' in t))
|
713
778
|
react_source_code = []
|
714
779
|
for script_tag in script_tags:
|
@@ -725,14 +790,15 @@ def get_all_website_links(url:str=None) -> list:
|
|
725
790
|
Returns:
|
726
791
|
list: A list of URLs that belong to the same website as the specified URL.
|
727
792
|
"""
|
728
|
-
|
729
|
-
if url is None:
|
793
|
+
request_manager = SafeRequestSingleton().get_instance(url=url)
|
794
|
+
if request_manager.url is None:
|
730
795
|
return []
|
731
796
|
|
732
|
-
|
797
|
+
|
798
|
+
urls = [request_manager.url]
|
733
799
|
# domain name of the URL without the protocol
|
734
|
-
domain_name = urlparse(url).netloc
|
735
|
-
soup = BeautifulSoup(
|
800
|
+
domain_name = urlparse(request_manager.url).netloc
|
801
|
+
soup = BeautifulSoup(request_manager.source_code, "html.parser")
|
736
802
|
for a_tag in soup.findAll("a"):
|
737
803
|
href = a_tag.attrs.get("href")
|
738
804
|
if href == "" or href is None:
|
@@ -765,11 +831,12 @@ def parse_all(url:str=None):
|
|
765
831
|
Returns:
|
766
832
|
dict: A dict containing keys: [element_types, attribute_values, attribute_names, class_names] with values as lists for keys element types, attribute values, attribute names, and class names found in the source code.
|
767
833
|
"""
|
768
|
-
|
769
|
-
if url is None:
|
834
|
+
request_manager = SafeRequest().get_source_code(url=url)
|
835
|
+
if request_manager.url is None:
|
770
836
|
return [], [], [], []
|
771
837
|
|
772
|
-
|
838
|
+
|
839
|
+
soup = BeautifulSoup(request_manager.source_code, 'html.parser')
|
773
840
|
element_types, attribute_values, attribute_names, class_names = [], [], [], []
|
774
841
|
data = str(data).split('<')
|
775
842
|
for k in range(1, len(data)):
|
@@ -801,13 +868,10 @@ def extract_elements(url:str=None, element_type:str=None, attribute_name:str=Non
|
|
801
868
|
Returns:
|
802
869
|
list: A list of strings containing portions of the source code that match the provided filters.
|
803
870
|
"""
|
804
|
-
|
805
|
-
if url is None:
|
806
|
-
return []
|
807
|
-
|
808
|
-
data = SafeRequest().get_source_code(url=url)
|
809
|
-
soup = BeautifulSoup(data, 'html.parser')
|
810
|
-
|
871
|
+
request_manager = SafeRequest().get_source_code(url=url)
|
872
|
+
if request_manager.url is None:
|
873
|
+
return [], [], [], []
|
874
|
+
soup = BeautifulSoup(request_manager.source_code, 'html.parser')
|
811
875
|
elements = []
|
812
876
|
|
813
877
|
# If no filters are provided, return the entire source code
|
@@ -832,4 +896,3 @@ def get_response(response):
|
|
832
896
|
if data:
|
833
897
|
return data.get("response", data)
|
834
898
|
return response.text
|
835
|
-
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: abstract-webtools
|
3
|
-
Version: 0.1.4.
|
3
|
+
Version: 0.1.4.12
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -0,0 +1,11 @@
|
|
1
|
+
abstract_webtools/__init__.py,sha256=2SWEfdPDHqqjUYsOQYlaOHF644ZYcO160nWKiAjga4w,34
|
2
|
+
abstract_webtools/abstract_crawler.py,sha256=e8jVVv1_EB8poqlrdQaJ19z9Z0t8un5uc-DKnj1Ud5s,8002
|
3
|
+
abstract_webtools/abstract_webtools.py,sha256=sjrsbUa4AkFD80usyU2D2Ez966N_kDt_j4hf2-c7AkQ,40159
|
4
|
+
abstract_webtools/abstract_webtools2.py,sha256=dlhhgmUTaN_NgkT6GcJMVBLuXjmW38gAOeCrKxYqytk,30685
|
5
|
+
abstract_webtools/grab_source_gui.py,sha256=w7vDsEu1IfOIzcsfWeP2IpdS3yhFA_x5IVIeLFlfppw,14708
|
6
|
+
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
7
|
+
abstract_webtools-0.1.4.12.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
|
8
|
+
abstract_webtools-0.1.4.12.dist-info/METADATA,sha256=d4kh-bQrfhjqlmyDnxiyoGbm7xAvsVR7ot5yOmiMcNY,8963
|
9
|
+
abstract_webtools-0.1.4.12.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
10
|
+
abstract_webtools-0.1.4.12.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
11
|
+
abstract_webtools-0.1.4.12.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
abstract_webtools/__init__.py,sha256=2SWEfdPDHqqjUYsOQYlaOHF644ZYcO160nWKiAjga4w,34
|
2
|
-
abstract_webtools/abstract_crawler.py,sha256=5mbFI9tFW0OEeESnC9FjL4uITv1Vvp5CoEH6glFuND8,7994
|
3
|
-
abstract_webtools/abstract_webtools.py,sha256=BIT-ZzoAOhGPPeXa75Qudnl_qQ0WZSU5R6a7x9HJXbo,34926
|
4
|
-
abstract_webtools/abstract_webtools2.py,sha256=dlhhgmUTaN_NgkT6GcJMVBLuXjmW38gAOeCrKxYqytk,30685
|
5
|
-
abstract_webtools/grab_source_gui.py,sha256=w7vDsEu1IfOIzcsfWeP2IpdS3yhFA_x5IVIeLFlfppw,14708
|
6
|
-
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
7
|
-
abstract_webtools-0.1.4.1.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
|
8
|
-
abstract_webtools-0.1.4.1.dist-info/METADATA,sha256=cjZwVqntAL286xh80p0sbzba5sATSFl1bzM75ENlPjU,8962
|
9
|
-
abstract_webtools-0.1.4.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
10
|
-
abstract_webtools-0.1.4.1.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
11
|
-
abstract_webtools-0.1.4.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|