PyPI - abstract-webtools - Versions diffs - 0.1.6.90__py3-none-any.whl → 0.1.6.92__py3-none-any.whl - Mend

abstract-webtools 0.1.6.90py3-none-any.whl → 0.1.6.92py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

abstract_webtools/domain_identifier.py ADDED Viewed

@@ -0,0 +1,82 @@
+from extention_list import get_extention,popular_extentions
+from urllib.parse import urlparse, urljoin
+from abstract_utilities import *
+def try_request(url,timeout=None):
+    if timeout == None:
+        timeout= 5
+    elif timeout == 0:
+        timeout = None
+    try:
+        result = requests.get(url, timeout=timeout)  # Set timeout to 5 seconds
+    except requests.exceptions.RequestException as e:
+        print(f"Request failed for {url}: {e}")
+        result = None
+    return result
+def is_result_200(result):
+    try:
+        if result.status_code == 200:
+            return True
+    except:
+        return False
+    return False
+def url_to_pieces(url):
+    """
+    Split a URL into protocol, domain, path, and query components.
+    Uses urlparse for robustness.
+    """
+    parsed_url = {'parsed':'', 'scheme':'', 'netloc':'', 'subdomain':'', 'domain':url,'extention':'', 'path':'', 'params':'', 'query':'', 'fragment':''}
+    try:
+        parsed = urlparse(url)
+        parsed_url['parsed']= parsed
+        parsed_url['scheme'] = parsed.scheme if parsed.scheme else ""
+        parsed_url['netloc'] = parsed.netloc if parsed.netloc else ""
+        parsed_url['path'] = parsed.path or ""
+        parsed_url['params'] = parsed.params or ""
+        parsed_url['query'] = parsed.query or ""
+        parsed_url['fragment'] = parsed.fragment or ""
+        if parsed_url['netloc'] == '' and parsed_url['path']:
+            parsed_url['netloc'] = parsed_url['path']
+            if '/' in parsed_url['path']:
+                parsed_url['netloc'] = parsed_url['path'].split('/')[0]
+                parsed_url['path'] = '/'+'/'.join(parsed_url['path'].split('/')[1:])
+            else:
+                parsed_url['path']=''
+        if parsed_url['netloc']:
+            if parsed_url['netloc'].startswith('www.'):
+                parsed_url['subdomain']= 'www.'
+                parsed_url['domain'] = parsed_url['netloc'][len('www.'):]
+            else:
+                parsed_url['domain'] = parsed_url['netloc']
+            parsed_url.update(get_extention(parsed_url['domain']))
+    except Exception as e:
+        print(f'The URL {url} was not reachable: {e}')
+    return parsed_url
+def correct_domains(url):
+    urls = [url]
+    protocols = {'https':['','www.'],'http':['','www.'],'':['','www.']}
+    parsed_url = url_to_pieces(url)
+    scheme,subdomain,extentions = parsed_url['scheme'], parsed_url['subdomain'],make_list(parsed_url['extention'] or popular_extentions)
+    subdomains = protocols.get(scheme)
+    if subdomain in subdomains:
+        subdomains.remove(subdomain)
+    protocols[scheme] = subdomains
+    for extention in extentions:
+        link = f"{parsed_url['domain']}{extention}{parsed_url['path']}{parsed_url['params']}"
+        for key,values in protocols.items():
+            for value in values:
+                new_link = f"{value}{link}"
+                if key:
+                    new_link = f"{key}://{new_link}"
+                urls.append(new_link)
+    return urls
+def tryAllDomains(url):
+    urls = correct_domains(url)
+    for i, url in enumerate(urls):
+        result = try_request(url)
+        if is_result_200(result):
+            return url
+def tryDomain(url):
+    request_mgr = requestManager(url)
+    return request_mgr.source_code
+url='thedailydialectics'
+input(tryAllDomains(url))

abstract_webtools/extention_list.py ADDED Viewed

@@ -0,0 +1,11 @@
+extentions=['.ac', '.academy', '.accountant', '.actor', '.agency', '.ai', '.airforce', '.am', '.apartments', '.archi', '.army', '.art', '.asia', '.associates', '.at', '.attorney', '.auction', '.audio', '.baby', '.band', '.bar', '.bargains', '.be', '.beer', '.berlin', '.best', '.bet', '.bid', '.bike', '.bingo', '.bio', '.biz', '.black', '.blackfriday', '.blog', '.blue', '.boston', '.boutique', '.br.com', '.build', '.builders', '.business', '.buzz', '.buz', '.ca', '.cab', '.cafe', '.camera', '.camp', '.capital', '.cards', '.care', '.careers', '.casa', '.cash', '.casino', '.catering', '.cc', '.center', '.ceo', '.ch', '.charity', '.chat', '.cheap', '.christmas', '.church', '.city', '.claims', '.cleaning', '.click', '.clinic', '.clothing', '.cloud', '.club', '.cn.com', '.co', '.co.com', '.co.in', '.co.nz', '.co.uk', '.coach', '.codes', '.coffee', '.college', '.com', '.com.co', '.com.mx', '.com.tw', '.community', '.company', '.computer', '.condos', '.construction', '.consulting', '.contact', '.contractors', '.cooking', '.cool', '.coupons', '.courses', '.credit', '.creditcard', '.cricket', '.cruises', '.cymru', '.cz', '.dance', '.date', '.dating', '.de', '.de.com', '.deals', '.degree', '.delivery', '.democrat', '.dental', '.dentist', '.desi', '.design', '.diamonds', '.diet', '.digital', '.direct', '.directory', '.discount', '.doctor', '.dog', '.domains', '.download', '.earth', '.eco', '.education', '.email', '.energy', '.engineer', '.engineering', '.enterprises', '.equipment', '.estate', '.eu', '.eu.com', '.events', '.exchange', '.expert', '.exposed', '.express', '.fail', '.faith', '.family', '.fans', '.farm', '.fashion', '.film', '.finance', '.financial', '.fish', '.fishing', '.fit', '.fitness', '.flights', '.florist', '.flowers', '.fm', '.football', '.forsale', '.foundation', '.fun', '.fund', '.furniture', '.futbol', '.fyi', '.gallery', '.games', '.garden', '.gay', '.gift', '.gifts', '.gives', '.glass', '.global', '.gmbh', '.gold', '.golf', '.graphics', '.gratis', '.green', '.gripe', '.group', '.gs', '.guide', '.guitars', '.guru', '.haus', '.healthcare', '.help', '.hiphop', '.hn', '.hockey', '.holdings', '.holiday', '.horse', '.host', '.hosting', '.house', '.how', '.immo', '.in', '.industries', '.info', '.ink', '.institue', '.insure', '.international', '.investments', '.io', '.irish', '.it', '.jetzt', '.jewelry', '.jp', '.jpn.com', '.juegos', '.kaufen', '.kim', '.kitchen', '.kiwi', '.la', '.land', '.lawyer', '.lease', '.legal', '.lgbt', '.li', '.life', '.lighting', '.limited', '.limo', '.link', '.live', '.llc', '.loan', '.loans', '.lol', '.london', '.love', '.ltd', '.luxury ', '.maison', '.managment', '.market', '.marketing', '.mba', '.me', '.me.uk', '.media', '.memorial', '.men', '.menu', '.miami', '.mobi', '.moda', '.moe', '.money', '.monster', '.mortgage', '.mx', '.nagoya', '.navy', '.net', '.net.co', '.network', '.news', '.ngo', '.ninja', '.nl', '.nyc', '.okinawa', '.one', '.ong', '.online', '.org', '.org.in', '.org.uk', '.partners', '.parts', '.party', '.pet', '.ph', '.photo', '.photography', '.photos', '.physio', '.pics', '.pictures', '.pink', '.pizza', '.pl', '.place', '.plumbing', '.plus', '.poker', '.press', '.pro', '.productions', '.promo', '.properties', '.property', '.pub', '.qpon', '.quebec', '.racing', '.realty', '.recipes', '.red', '.rehab', '.reisen', '.rent', '.rentals', '.repair', '.report', '.republican', '.rest', '.restaurant', '.review', '.reviews', '.rip', '.rocks', '.rodeo', '.run', '.sa.com', '.sale', '.sarl', '.sc', '.school', '.schule', '.science', '.se.net', '.services', '.sexy', '.sg', '.shiksha', '.shoes', '.shop', '.shopping', '.show', '.singles', '.site', '.ski', '.soccer', '.social', '.software', '.solar', '.solutions', '.soy', '.space', '.srl', '.store', '.stream', '.studio', '.study', '.style', '.supplies', '.supply', '.support', '.surf', '.surgery', '.systems', '.tattoo', '.tax', '.taxi', '.team', '.tech', '.technology', '.tel', '.tennis', '.theater', '.tienda', '.tips', '.today', '.tokyo', '.tools', '.tours', '.town', '.toys', '.trade', '.training', '.tv', '.tw', '.uk', '.uk.com', '.university', '.uno', '.us', '.us.com', '.vacations', '.vc', '.vegas', '.ventures', '.vet', '.viajes', '.video', '.villas', '.vip', '.vision', '.vodka', '.vote', '.voting', '.voyage', '.watch', '.webcam', '.website', '.wedding', '.wiki', '.win', '.wine', '.work', '.works', '.world', '.ws', '.wtf', '.xyz', '.yoga', '.za.com', '.zone']
+popular_extentions = ['.com','.net','.org','.co','.us']
+extentions = popular_extentions+[extention for extention in extentions if extention not in popular_extentions]
+def get_extention(domain):
+    domain_js = {"domain":domain,"extention":''}
+    for extention in extentions:
+        if domain.endswith(extention):
+            domain_js["domain"] = domain[:-len(extention)]
+            domain_js["extention"] = extention
+            break
+    return domain_js

abstract_webtools/find_dirs.py ADDED Viewed

@@ -0,0 +1,81 @@
+import os
+def get_dir_size(path):
+    """Calculate the total size of a directory in bytes."""
+    total_size = 0
+    try:
+        for dirpath, dirnames, filenames in os.walk(path):
+            for filename in filenames:
+                file_path = os.path.join(dirpath, filename)
+                try:
+                    total_size += os.path.getsize(file_path)
+                except (OSError, PermissionError) as e:
+                    #print(f"Error accessing {file_path}: {e}")
+                    pass
+    except (OSError, PermissionError) as e:
+        print(f"Error accessing {path}: {e}")
+    return total_size
+def compare_dirs(dir1_path, dir2_path):
+    """Compare the sizes of two directories."""
+    dir1_size = get_dir_size(dir1_path)
+    dir2_size = get_dir_size(dir2_path)
+    print(f"Size of {dir1_path}: {dir1_size} bytes")
+    print(f"Size of {dir2_path}: {dir2_size} bytes")
+    if dir1_size > dir2_size:
+        print(f"{dir1_path} is larger than {dir2_path}")
+    elif dir2_size > dir1_size:
+        print(f"{dir2_path} is larger than {dir1_path}")
+    else:
+        print("Both directories are the same size")
+twentyfourT = """/mnt/24T/evo_970
+/mnt/24T/main_drive
+/mnt/24T/nvmeHeatSync-new
+/mnt/24T/PNY_1T
+/mnt/24T/serverBack
+/mnt/24T/solcatcher_backup
+/mnt/24T/transferDrive
+/mnt/24T/wd_black
+/mnt/24T/wd_black_980_home
+/mnt/24T/wdBlack_970_evo
+/mnt/24T/wd_main_980
+/mnt/24T/wd_nvm
+/mnt/24T/.Trash-1000
+/mnt/24T/testfile.txt"""
+sixteenT = """/mnt/16T/24T/24T/evo980-new
+/mnt/16T/24T/24T/500Gb_pny
+/mnt/16T/24T/24T/wdBlack_970_evo
+/mnt/16T/24T/24T/wd_nvm
+/mnt/16T/24T/24T/wd_main_980
+/mnt/16T/24T/24T/nvmeHeatSync-new
+/mnt/16T/24T/24T/PNY_1T
+/mnt/16T/24T/24T/serverBack
+/mnt/16T/24T/24T/transferDrive
+/mnt/16T/24T/24T/.Trash-1000
+/mnt/16T/24T/24T/solcatcher_backup
+/mnt/16T/24T/24T/wd_black_980_home
+/mnt/16T/24T/24T/abstract_images-0.0.0.5-py3-none-any
+/mnt/16T/24T/24T/evo_970
+/mnt/16T/24T/24T/main_drive
+/mnt/16T/24T/24T/wd_black
+/mnt/16T/24T/24T/testfile.txtt"""
+sixteenT = sixteenT.split('\n')
+twentyfourT = twentyfourT.split('\n')
+def is_dirname_in_sixteenT(dirname):
+    basenames = [directory for directory in sixteenT if os.path.basename(directory) == dirname]
+    if basenames:
+        return basenames[0]
+for directory in twentyfourT:
+    dirname = os.path.basename(directory)
+    size1 = get_dir_size(directory))
+    sixteenT_dir = is_dirname_in_sixteenT(dirname)
+    size2 = get_dir_size(sixteenT_dir))
+    print(directory)
+    print(f"size == {size1}")
+    print(sixteenT_dir)
+    input(f"size == {size2}")
+    input(compare_dirs(directory, sixteenT_dir))

{abstract_webtools-0.1.6.90.dist-info → abstract_webtools-0.1.6.92.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstract_webtools
-Version: 0.1.6.90
+Version: 0.1.6.92
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff

{abstract_webtools-0.1.6.90.dist-info → abstract_webtools-0.1.6.92.dist-info}/RECORD RENAMED Viewed

@@ -3,6 +3,9 @@ abstract_webtools/abstract_userpit.py,sha256=Rg_0Orx79rxqEePt6Sf-evGslPq5KLlTiL-
 abstract_webtools/abstract_usurpit.py,sha256=7PDUb5LNETjvU1rhfJaToKLIKmSXRkcJAmM4wOX7PsQ,7170
 abstract_webtools/abstract_webtools.py,sha256=3NzGmJlZvrdVtEcUi2K5iUgWr1822IBPhIN9us2e2t0,3859
 abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
+abstract_webtools/domain_identifier.py,sha256=AvWlGD7C19rySa_J_Brxi3kz43LMWvGsshuuZNg7MvI,3320
+abstract_webtools/extention_list.py,sha256=gRSO4nMbuuXDYzd-ss4s64sS80ZHmUoazMCpgoKG5vE,4884
+abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZCQ,2609
 abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
 abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
 abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE70B8,10441
@@ -38,7 +41,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
 abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
 abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
 abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
-abstract_webtools-0.1.6.90.dist-info/METADATA,sha256=9KBoZzDcF1imzfdgBeOglz185TT5kGvkdtbDRNqlQrw,16029
-abstract_webtools-0.1.6.90.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-abstract_webtools-0.1.6.90.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
-abstract_webtools-0.1.6.90.dist-info/RECORD,,
+abstract_webtools-0.1.6.92.dist-info/METADATA,sha256=4aTdbNUjectbYqf27r46QeI8_9Fr_azCOJQTIxUdMoM,16029
+abstract_webtools-0.1.6.92.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
+abstract_webtools-0.1.6.92.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
+abstract_webtools-0.1.6.92.dist-info/RECORD,,

{abstract_webtools-0.1.6.90.dist-info → abstract_webtools-0.1.6.92.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.1.0)
+Generator: setuptools (80.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{abstract_webtools-0.1.6.90.dist-info → abstract_webtools-0.1.6.92.dist-info}/top_level.txt RENAMED Viewed

File without changes

abstract-webtools 0.1.6.90__py3-none-any.whl → 0.1.6.92__py3-none-any.whl

abstract-webtools 0.1.6.90py3-none-any.whl → 0.1.6.92py3-none-any.whl