PyPI - abstract-webtools - Versions diffs - 0.1.6.89__tar.gz → 0.1.6.91__tar.gz - Mend

abstract-webtools 0.1.6.89tar.gz → 0.1.6.91tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{abstract_webtools-0.1.6.89 → abstract_webtools-0.1.6.91}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstract_webtools
-Version: 0.1.6.89
+Version: 0.1.6.91
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff

{abstract_webtools-0.1.6.89 → abstract_webtools-0.1.6.91}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 setuptools.setup(
     name='abstract_webtools',
-    version='0.1.6.89',
+    version='0.1.6.91',
     author='putkoff',
     author_email='partners@abstractendeavors.com',
     description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',

abstract_webtools-0.1.6.91/src/abstract_webtools/domain_identifier.py ADDED Viewed

@@ -0,0 +1,82 @@
+from extention_list import get_extention,popular_extentions
+from urllib.parse import urlparse, urljoin
+from abstract_utilities import *
+def try_request(url,timeout=None):
+    if timeout == None:
+        timeout= 5
+    elif timeout == 0:
+        timeout = None
+    try:
+        result = requests.get(url, timeout=timeout)  # Set timeout to 5 seconds
+    except requests.exceptions.RequestException as e:
+        print(f"Request failed for {url}: {e}")
+        result = None
+    return result
+def is_result_200(result):
+    try:
+        if result.status_code == 200:
+            return True
+    except:
+        return False
+    return False
+def url_to_pieces(url):
+    """
+    Split a URL into protocol, domain, path, and query components.
+    Uses urlparse for robustness.
+    """
+    parsed_url = {'parsed':'', 'scheme':'', 'netloc':'', 'subdomain':'', 'domain':url,'extention':'', 'path':'', 'params':'', 'query':'', 'fragment':''}
+    try:
+        parsed = urlparse(url)
+        parsed_url['parsed']= parsed
+        parsed_url['scheme'] = parsed.scheme if parsed.scheme else ""
+        parsed_url['netloc'] = parsed.netloc if parsed.netloc else ""
+        parsed_url['path'] = parsed.path or ""
+        parsed_url['params'] = parsed.params or ""
+        parsed_url['query'] = parsed.query or ""
+        parsed_url['fragment'] = parsed.fragment or ""
+        if parsed_url['netloc'] == '' and parsed_url['path']:
+            parsed_url['netloc'] = parsed_url['path']
+            if '/' in parsed_url['path']:
+                parsed_url['netloc'] = parsed_url['path'].split('/')[0]
+                parsed_url['path'] = '/'+'/'.join(parsed_url['path'].split('/')[1:])
+            else:
+                parsed_url['path']=''
+        if parsed_url['netloc']:
+            if parsed_url['netloc'].startswith('www.'):
+                parsed_url['subdomain']= 'www.'
+                parsed_url['domain'] = parsed_url['netloc'][len('www.'):]
+            else:
+                parsed_url['domain'] = parsed_url['netloc']
+            parsed_url.update(get_extention(parsed_url['domain']))
+    except Exception as e:
+        print(f'The URL {url} was not reachable: {e}')
+    return parsed_url
+def correct_domains(url):
+    urls = [url]
+    protocols = {'https':['','www.'],'http':['','www.'],'':['','www.']}
+    parsed_url = url_to_pieces(url)
+    scheme,subdomain,extentions = parsed_url['scheme'], parsed_url['subdomain'],make_list(parsed_url['extention'] or popular_extentions)
+    subdomains = protocols.get(scheme)
+    if subdomain in subdomains:
+        subdomains.remove(subdomain)
+    protocols[scheme] = subdomains
+    for extention in extentions:
+        link = f"{parsed_url['domain']}{extention}{parsed_url['path']}{parsed_url['params']}"
+        for key,values in protocols.items():
+            for value in values:
+                new_link = f"{value}{link}"
+                if key:
+                    new_link = f"{key}://{new_link}"
+                urls.append(new_link)
+    return urls
+def tryAllDomains(url):
+    urls = correct_domains(url)
+    for i, url in enumerate(urls):
+        result = try_request(url)
+        if is_result_200(result):
+            return url
+def tryDomain(url):
+    request_mgr = requestManager(url)
+    return request_mgr.source_code
+url='thedailydialectics'
+input(tryAllDomains(url))

abstract_webtools-0.1.6.91/src/abstract_webtools/extention_list.py ADDED Viewed

@@ -0,0 +1,11 @@
+extentions=['.ac', '.academy', '.accountant', '.actor', '.agency', '.ai', '.airforce', '.am', '.apartments', '.archi', '.army', '.art', '.asia', '.associates', '.at', '.attorney', '.auction', '.audio', '.baby', '.band', '.bar', '.bargains', '.be', '.beer', '.berlin', '.best', '.bet', '.bid', '.bike', '.bingo', '.bio', '.biz', '.black', '.blackfriday', '.blog', '.blue', '.boston', '.boutique', '.br.com', '.build', '.builders', '.business', '.buzz', '.buz', '.ca', '.cab', '.cafe', '.camera', '.camp', '.capital', '.cards', '.care', '.careers', '.casa', '.cash', '.casino', '.catering', '.cc', '.center', '.ceo', '.ch', '.charity', '.chat', '.cheap', '.christmas', '.church', '.city', '.claims', '.cleaning', '.click', '.clinic', '.clothing', '.cloud', '.club', '.cn.com', '.co', '.co.com', '.co.in', '.co.nz', '.co.uk', '.coach', '.codes', '.coffee', '.college', '.com', '.com.co', '.com.mx', '.com.tw', '.community', '.company', '.computer', '.condos', '.construction', '.consulting', '.contact', '.contractors', '.cooking', '.cool', '.coupons', '.courses', '.credit', '.creditcard', '.cricket', '.cruises', '.cymru', '.cz', '.dance', '.date', '.dating', '.de', '.de.com', '.deals', '.degree', '.delivery', '.democrat', '.dental', '.dentist', '.desi', '.design', '.diamonds', '.diet', '.digital', '.direct', '.directory', '.discount', '.doctor', '.dog', '.domains', '.download', '.earth', '.eco', '.education', '.email', '.energy', '.engineer', '.engineering', '.enterprises', '.equipment', '.estate', '.eu', '.eu.com', '.events', '.exchange', '.expert', '.exposed', '.express', '.fail', '.faith', '.family', '.fans', '.farm', '.fashion', '.film', '.finance', '.financial', '.fish', '.fishing', '.fit', '.fitness', '.flights', '.florist', '.flowers', '.fm', '.football', '.forsale', '.foundation', '.fun', '.fund', '.furniture', '.futbol', '.fyi', '.gallery', '.games', '.garden', '.gay', '.gift', '.gifts', '.gives', '.glass', '.global', '.gmbh', '.gold', '.golf', '.graphics', '.gratis', '.green', '.gripe', '.group', '.gs', '.guide', '.guitars', '.guru', '.haus', '.healthcare', '.help', '.hiphop', '.hn', '.hockey', '.holdings', '.holiday', '.horse', '.host', '.hosting', '.house', '.how', '.immo', '.in', '.industries', '.info', '.ink', '.institue', '.insure', '.international', '.investments', '.io', '.irish', '.it', '.jetzt', '.jewelry', '.jp', '.jpn.com', '.juegos', '.kaufen', '.kim', '.kitchen', '.kiwi', '.la', '.land', '.lawyer', '.lease', '.legal', '.lgbt', '.li', '.life', '.lighting', '.limited', '.limo', '.link', '.live', '.llc', '.loan', '.loans', '.lol', '.london', '.love', '.ltd', '.luxury ', '.maison', '.managment', '.market', '.marketing', '.mba', '.me', '.me.uk', '.media', '.memorial', '.men', '.menu', '.miami', '.mobi', '.moda', '.moe', '.money', '.monster', '.mortgage', '.mx', '.nagoya', '.navy', '.net', '.net.co', '.network', '.news', '.ngo', '.ninja', '.nl', '.nyc', '.okinawa', '.one', '.ong', '.online', '.org', '.org.in', '.org.uk', '.partners', '.parts', '.party', '.pet', '.ph', '.photo', '.photography', '.photos', '.physio', '.pics', '.pictures', '.pink', '.pizza', '.pl', '.place', '.plumbing', '.plus', '.poker', '.press', '.pro', '.productions', '.promo', '.properties', '.property', '.pub', '.qpon', '.quebec', '.racing', '.realty', '.recipes', '.red', '.rehab', '.reisen', '.rent', '.rentals', '.repair', '.report', '.republican', '.rest', '.restaurant', '.review', '.reviews', '.rip', '.rocks', '.rodeo', '.run', '.sa.com', '.sale', '.sarl', '.sc', '.school', '.schule', '.science', '.se.net', '.services', '.sexy', '.sg', '.shiksha', '.shoes', '.shop', '.shopping', '.show', '.singles', '.site', '.ski', '.soccer', '.social', '.software', '.solar', '.solutions', '.soy', '.space', '.srl', '.store', '.stream', '.studio', '.study', '.style', '.supplies', '.supply', '.support', '.surf', '.surgery', '.systems', '.tattoo', '.tax', '.taxi', '.team', '.tech', '.technology', '.tel', '.tennis', '.theater', '.tienda', '.tips', '.today', '.tokyo', '.tools', '.tours', '.town', '.toys', '.trade', '.training', '.tv', '.tw', '.uk', '.uk.com', '.university', '.uno', '.us', '.us.com', '.vacations', '.vc', '.vegas', '.ventures', '.vet', '.viajes', '.video', '.villas', '.vip', '.vision', '.vodka', '.vote', '.voting', '.voyage', '.watch', '.webcam', '.website', '.wedding', '.wiki', '.win', '.wine', '.work', '.works', '.world', '.ws', '.wtf', '.xyz', '.yoga', '.za.com', '.zone']
+popular_extentions = ['.com','.net','.org','.co','.us']
+extentions = popular_extentions+[extention for extention in extentions if extention not in popular_extentions]
+def get_extention(domain):
+    domain_js = {"domain":domain,"extention":''}
+    for extention in extentions:
+        if domain.endswith(extention):
+            domain_js["domain"] = domain[:-len(extention)]
+            domain_js["extention"] = extention
+            break
+    return domain_js

abstract_webtools-0.1.6.91/src/abstract_webtools/find_dirs.py ADDED Viewed

@@ -0,0 +1,81 @@
+import os
+def get_dir_size(path):
+    """Calculate the total size of a directory in bytes."""
+    total_size = 0
+    try:
+        for dirpath, dirnames, filenames in os.walk(path):
+            for filename in filenames:
+                file_path = os.path.join(dirpath, filename)
+                try:
+                    total_size += os.path.getsize(file_path)
+                except (OSError, PermissionError) as e:
+                    #print(f"Error accessing {file_path}: {e}")
+                    pass
+    except (OSError, PermissionError) as e:
+        print(f"Error accessing {path}: {e}")
+    return total_size
+def compare_dirs(dir1_path, dir2_path):
+    """Compare the sizes of two directories."""
+    dir1_size = get_dir_size(dir1_path)
+    dir2_size = get_dir_size(dir2_path)
+    print(f"Size of {dir1_path}: {dir1_size} bytes")
+    print(f"Size of {dir2_path}: {dir2_size} bytes")
+    if dir1_size > dir2_size:
+        print(f"{dir1_path} is larger than {dir2_path}")
+    elif dir2_size > dir1_size:
+        print(f"{dir2_path} is larger than {dir1_path}")
+    else:
+        print("Both directories are the same size")
+twentyfourT = """/mnt/24T/evo_970
+/mnt/24T/main_drive
+/mnt/24T/nvmeHeatSync-new
+/mnt/24T/PNY_1T
+/mnt/24T/serverBack
+/mnt/24T/solcatcher_backup
+/mnt/24T/transferDrive
+/mnt/24T/wd_black
+/mnt/24T/wd_black_980_home
+/mnt/24T/wdBlack_970_evo
+/mnt/24T/wd_main_980
+/mnt/24T/wd_nvm
+/mnt/24T/.Trash-1000
+/mnt/24T/testfile.txt"""
+sixteenT = """/mnt/16T/24T/24T/evo980-new
+/mnt/16T/24T/24T/500Gb_pny
+/mnt/16T/24T/24T/wdBlack_970_evo
+/mnt/16T/24T/24T/wd_nvm
+/mnt/16T/24T/24T/wd_main_980
+/mnt/16T/24T/24T/nvmeHeatSync-new
+/mnt/16T/24T/24T/PNY_1T
+/mnt/16T/24T/24T/serverBack
+/mnt/16T/24T/24T/transferDrive
+/mnt/16T/24T/24T/.Trash-1000
+/mnt/16T/24T/24T/solcatcher_backup
+/mnt/16T/24T/24T/wd_black_980_home
+/mnt/16T/24T/24T/abstract_images-0.0.0.5-py3-none-any
+/mnt/16T/24T/24T/evo_970
+/mnt/16T/24T/24T/main_drive
+/mnt/16T/24T/24T/wd_black
+/mnt/16T/24T/24T/testfile.txtt"""
+sixteenT = sixteenT.split('\n')
+twentyfourT = twentyfourT.split('\n')
+def is_dirname_in_sixteenT(dirname):
+    basenames = [directory for directory in sixteenT if os.path.basename(directory) == dirname]
+    if basenames:
+        return basenames[0]
+for directory in twentyfourT:
+    dirname = os.path.basename(directory)
+    size1 = get_dir_size(directory))
+    sixteenT_dir = is_dirname_in_sixteenT(dirname)
+    size2 = get_dir_size(sixteenT_dir))
+    print(directory)
+    print(f"size == {size1}")
+    print(sixteenT_dir)
+    input(f"size == {size2}")
+    input(compare_dirs(directory, sixteenT_dir))

{abstract_webtools-0.1.6.89 → abstract_webtools-0.1.6.91}/src/abstract_webtools/managers/videoDownloader.py RENAMED Viewed

@@ -136,6 +136,117 @@ class VideoDownloader:
     def stop(self):
         self.monitoring = False
         self.pause_event.set()
+def download_image(url, save_path=None):
+    """
+    Downloads an image from a URL and saves it to the specified path.
+    Args:
+        url (str): The URL of the image to download
+        save_path (str, optional): Path to save the image. If None, uses the filename from URL
+    Returns:
+        str: Path where the image was saved, or None if download failed
+    """
+    try:
+        # Send GET request to the URL
+        response = requests.get(url, stream=True)
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Set decode_content=True to automatically handle Content-Encoding
+            response.raw.decode_content = True
+            # If no save_path provided, extract filename from URL
+            if save_path is None:
+                # Get filename from URL
+                filename = url.split('/')[-1]
+                save_path = filename
+            # Ensure the directory exists
+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
+            # Write the image content to file
+            with open(save_path, 'wb') as f:
+                f.write(response.content)
+            print(f"Image successfully downloaded to {save_path}")
+            return save_path
+        else:
+            print(f"Failed to download image. Status code: {response.status_code}")
+            return None
+    except requests.exceptions.RequestException as e:
+        print(f"Error downloading image: {str(e)}")
+        return None
+    except Exception as e:
+        print(f"An unexpected error occurred: {str(e)}")
+        return None
+def get_thumbnails(directory,info):
+    thumbnails_dir = os.path.join(directory,'thumbnails')
+    os.makedirs(thumbnails_dir, exist_ok=True)
+    thumbnails = info.get('thumbnails',[])
+    for i,thumbnail_info in enumerate(thumbnails):
+        thumbnail_url = thumbnail_info.get('url')
+        thumbnail_base_url = thumbnail_url.split('?')[0]
+        baseName = os.path.basename(thumbnail_base_url)
+        fileName,ext = os.path.splitext(baseName)
+        baseName = f"{fileName}{ext}"
+        resolution = info['thumbnails'][i].get('resolution')
+        if resolution:
+            baseName = f"{resolution}_{baseName}"
+        img_id = info['thumbnails'][i].get('id')
+        if img_id:
+            baseName = f"{img_id}_{baseName}"
+        thumbnail_path = os.path.join(thumbnails_dir,baseName)
+        info['thumbnails'][i]['path']=thumbnail_path
+        download_image(thumbnail_url, save_path=thumbnail_path)
+    return info
+def optimize_video_for_safari(input_file, reencode=False):
+    """
+    Optimizes an MP4 file for Safari by moving the 'moov' atom to the beginning.
+    Optionally, re-encodes the video for maximum compatibility.
+    Args:
+        input_file (str): Path to the original MP4 file.
+        reencode (bool): If True, re-encode the video for Safari compatibility.
+    Returns:
+        str: Path to the optimized MP4 file.
+    """
+    tmp_dir = tempfile.mkdtemp()
+    try:
+        local_input = os.path.join(tmp_dir, os.path.basename(input_file))
+        shutil.copy2(input_file, local_input)
+        base, ext = os.path.splitext(local_input)
+        local_output = f"{base}_optimized{ext}"
+        if reencode:
+            # Re-encoding command for maximum Safari compatibility
+            command = [
+                "ffmpeg", "-i", local_input,
+                "-c:v", "libx264", "-profile:v", "baseline", "-level", "3.0", "-pix_fmt", "yuv420p",
+                "-c:a", "aac", "-b:a", "128k",
+                "-movflags", "faststart",
+                local_output
+            ]
+        else:
+            # Simple faststart with stream copy
+            command = [
+                "ffmpeg", "-i", local_input,
+                "-c", "copy", "-movflags", "faststart",
+                local_output
+            ]
+        try:
+            subprocess.run(command, check=True)
+            shutil.copy2(local_output, input_file)
+            print(f"Optimized video saved as {input_file}")
+        except subprocess.CalledProcessError as e:
+            print(f"Error during optimization: {e}")
+        return input_file
+    finally:
+        shutil.rmtree(tmp_dir)
 def bool_or_default(obj,default=True):
     if obj == None:
         obj =  default

{abstract_webtools-0.1.6.89 → abstract_webtools-0.1.6.91}/src/abstract_webtools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstract_webtools
-Version: 0.1.6.89
+Version: 0.1.6.91
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff

{abstract_webtools-0.1.6.89 → abstract_webtools-0.1.6.91}/src/abstract_webtools.egg-info/SOURCES.txt RENAMED Viewed

@@ -6,6 +6,9 @@ src/abstract_webtools/__init__.py
 src/abstract_webtools/abstract_usurpit.py
 src/abstract_webtools/abstract_webtools.py
 src/abstract_webtools/big_user_agent_list.py
+src/abstract_webtools/domain_identifier.py
+src/abstract_webtools/extention_list.py
+src/abstract_webtools/find_dirs.py
 src/abstract_webtools/main.py
 src/abstract_webtools/soup_gui.py
 src/abstract_webtools/url_grabber.py