PyPI - abstract-webtools - Versions diffs - 0.1.6.23__py3-none-any.whl → 0.1.6.25__py3-none-any.whl - Mend

abstract-webtools 0.1.6.23py3-none-any.whl → 0.1.6.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

abstract_webtools/managers/crawlmgr2.py ADDED Viewed

@@ -0,0 +1,63 @@
+import os
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin, urlparse
+visited = set()
+def download_page(url, destination_dir):
+    """Download a single page to the destination directory."""
+    # Create directory if needed
+    os.makedirs(destination_dir, exist_ok=True)
+    os.chmod(destination_dir, 0o755)  # optional: set directory perms
+    # Download
+    response = requests.get(url)
+    response.raise_for_status()
+    # Build a safe file name for the HTML file
+    # E.g., for "https://example.com/about/", you might store "about.html"
+    parsed = urlparse(url)
+    filename = "index.html" if not parsed.path or parsed.path.endswith("/") else os.path.basename(parsed.path)
+    if not filename.endswith(".html"):
+        filename += ".html"
+    filepath = os.path.join(destination_dir, filename)
+    with open(filepath, "wb") as f:
+        f.write(response.content)
+    return response.text, filepath
+def crawl(url, destination_dir):
+    """Recursively download a site starting from `url`."""
+    if url in visited:
+        return
+    visited.add(url)
+    try:
+        html, _ = download_page(url, destination_dir)
+    except Exception as e:
+        print(f"Failed to download {url}: {e}")
+        return
+    soup = BeautifulSoup(html, "html.parser")
+    # Find all <a> tags with an href
+    for link_tag in soup.find_all("a", href=True):
+        link = link_tag["href"]
+        # Convert a relative URL to an absolute one
+        absolute_link = urljoin(url, link)
+        # (Optional) Check domain if you only want to crawl the same site
+        # or skip mailto:, javascript:, etc.
+        if absolute_link.startswith("http"):
+            # Recurse
+            crawl(absolute_link, destination_dir)
+if __name__ == "__main__":
+    start_url = "https://svscomics.com/category/giantess/page/24"
+    destination = "/home/svc"
+    crawl(start_url, destination)

abstract_webtools/managers/curlMgr.py ADDED Viewed

@@ -0,0 +1,48 @@
+import os
+import requests
+import os
+import subprocess
+import stat
+def get_site(website, destination_dir, filename):
+    # Ensure the directory exists
+    os.makedirs(destination_dir, exist_ok=True)
+    # Adjust directory permissions if needed (e.g. rwxr-xr-x -> 0o755)
+    os.chmod(destination_dir, 0o755)
+    # Construct the complete file path
+    destination_path = os.path.join(destination_dir, filename)
+    # Use curl to download the site
+    # The example user-agent is arbitrary; you can change it to your needs
+    os.system(
+        f'curl -L --output "{destination_path}" '
+        f'-H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+        f'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 '
+        f'Safari/537.36" -H "Accept: */*" "{website}"'
+    )
+def download_site(website, destination_dir, filename):
+    os.makedirs(destination_dir, exist_ok=True)
+    os.chmod(destination_dir, 0o755)  # set directory permissions if needed
+    destination_path = os.path.join(destination_dir, filename)
+    # GET the resource
+    response = requests.get(website, headers={
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                      "AppleWebKit/537.36 (KHTML, like Gecko) "
+                      "Chrome/91.0.4472.124 Safari/537.36",
+        "Accept": "*/*"
+    }, allow_redirects=True)
+    # Raise an exception if the download fails
+    response.raise_for_status()
+    # Write content to file
+    with open(destination_path, "wb") as f:
+        f.write(response.content)
+website = 'https://www.pornhub.com'
+destination = '/home/computron/Documents/doge'
+get_site(website,destination,'doge')

abstract_webtools/managers/soupManager.py CHANGED Viewed

@@ -340,13 +340,13 @@ class SoupManagerSingleton():
         elif parse_type != SoupManagerSingleton._instance.parse_type  or source_code != SoupManagerSingleton._instance.source_code:
             SoupManagerSingleton._instance = SoupManager(url_mgr,requestManager,parse_type=parse_type,source_code=source_code)
         return SoupManagerSingleton._instance
-def get_soup_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None):
+def get_soup_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,parse_type="html.parser"):
     url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
     url = get_url(url=url,url_mgr=url_mgr)
     req_mgr = get_req_mgr(url_mgr=url_mgr,url=url,source_code=source_code)
     soup_mgr = soup_mgr or soupManager(url_mgr=url_mgr,req_mgr=req_mgr,url=url,source_code=source_code)
     return soup_mgr
-def get_all_attribute_values(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,tags_list = None):
+def get_all_attribute_values(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,tags_list = None,parse_type="html.parser"):
     soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr)
     return soup_mgr.get_all_attribute_values(tags_list=tags_list)
 def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,parse_type="html.parser"):

abstract_webtools/managers/videoDownloader.py CHANGED Viewed

@@ -1,206 +1,218 @@
-import os
-from .soupManager import *
-class VideoDownloader:
-    """
-    VideoDownloader is a class for downloading videos from URLs using YouTube-DL.
-    Args:
-        link (str or list): The URL(s) of the video(s) to be downloaded.
-        temp_directory (str or None): The directory to store temporary video files (default is None, uses video_directory/temp_files).
-        video_directory (str or None): The directory to store downloaded videos (default is None, uses 'videos' in the current working directory).
-        remove_existing (bool): Whether to remove existing video files with the same name (default is True).
-    Methods:
-        count_outliers(speed, threshold): Count speed outliers below the threshold.
-        filter_outliers(speeds): Filter out speed outliers in the list of speeds.
-        remove_temps(file_name): Remove temporary video files based on the file name.
-        move_video(): Move the downloaded video to the final directory.
-        yt_dlp_downloader(url, ydl_opts={}, download=True): Download video information using YouTube-DL.
-        progress_callback(d): Callback function to monitor download progress.
-        download(): Download video(s) based on the provided URL(s).
-        monitor(): Monitor the download progress.
-        start(): Start the download and monitoring threads.
+from abstract_webtools import requestManager, urlManager, soupManager, requests, linkManager
+import threading,os,re,yt_dlp,urllib.request,m3u8_To_MP4,subprocess
+from abstract_utilities import get_logFile,safe_dump_to_file
-    Note:
-        - The VideoDownloader class uses YouTube-DL to download videos.
-        - It allows downloading from multiple URLs.
-        - You need to have YouTube-DL installed to use this class.
-    """
-    def __init__(self, link,temp_directory=None,video_directory=None,remove_existing=True):
-        if video_directory==None:
-            video_directory=os.path.join(os.getcwd(),'videos')
-        if temp_directory == None:
-            temp_directory=os.path.join(video_directory,'temp_files')
-        self.thread_manager = ThreadManager()
-        self.pause_event = self.thread_manager.add_thread('pause_event')
-        self.link = link
-        self.temp_directory = temp_directory
-        self.video_directory = video_directory
-        self.remove_existing=remove_existing
-        self.video_urls=self.link if isinstance(self.link,list) else [self.link]
+from m3u8 import M3U8  # Install: pip install m3u8
+from urllib.parse import urljoin
+from yt_dlp.postprocessor.ffmpeg import FFmpegFixupPostProcessor
+from abstract_math import divide_it,add_it,multiply_it,subtract_it
+from abstract_webtools import *
+logger = get_logFile('video_bp')
+class VideoDownloader:
+    def __init__(self, url, title=None, download_directory=os.getcwd(), user_agent=None, video_extention='mp4',
+                 download_video=True, get_info=False, auto_file_gen=True, standalone_download=False, output_filename=None):
+        self.url = url
+        self.monitoring = True
+        self.pause_event = threading.Event()
+        self.get_download = download_video
+        self.get_info = get_info
+        self.user_agent = user_agent
+        self.title = title
+        self.auto_file_gen = auto_file_gen
+        self.standalone_download = standalone_download
+        self.video_extention = video_extention
+        self.download_directory = download_directory
+        self.output_filename = output_filename  # New parameter for custom filename
+        self.header = {}  # Placeholder for UserAgentManagerSingleton if needed
+        self.base_name = os.path.basename(self.url)
+        self.file_name, self.ext = os.path.splitext(self.base_name)
+        self.video_urls = [self.url]
+        self.info = {}
         self.starttime = None
         self.downloaded = 0
-        self.time_interval=60
-        self.monitoring=True
-        self.temp_file_name = None
-        self.file_name = None
-        self.dl_speed = None
-        self.dl_eta=None
-        self.total_bytes_est=None
-        self.percent_speed=None
-        self.percent=None
-        self.speed_track = []
-        self.video_url=None
-        self.last_checked = get_time_stamp()
-        self.num=0
-        self.start()
-    def count_outliers(self,speed,threshold):
-        if speed < threshold:
-            self.outlier_count+=1
+        self.video_urls = url if isinstance(url, list) else [url]
+        self.send_to_dl()
+    def get_request(self, url):
+        self.request_manager = requestManagerSingleton.get_instance(url=url)
+        return self.request_manager
+    def send_to_dl(self):
+        if self.standalone_download:
+            self.standalone_downloader()
         else:
-            self.outlier_count=0
-    def filter_outliers(self,speeds):
-        # Step 1: Compute initial average
-        initial_avg = sum(speeds) / len(speeds)
-        # Step 2: Remove speeds 25% under the average
-        threshold = initial_avg * 0.75  # 25% under average
-        filtered_speeds = [speed for speed in speeds if speed >= threshold]
-        # Step 3: Compute the new average of the filtered list
-        if filtered_speeds:  # Ensure the list is not empty
-            self.count_outliers(speeds[-1],threshold)
-            return filtered_speeds
+            self.start()
+    def get_headers(self, url):
+        response = requests.get(url)
+        if response.status_code == 200:
+            return response.headers
         else:
-            # This can happen if all values are outliers, it's up to you how to handle it
-            self.outlier_count=0
-            return speeds
-    def remove_temps(self,file_name):
-        for temp_vid in os.listdir(self.temp_directory):
-            if len(file_name)<=len(temp_vid):
-                if temp_vid[:len(file_name)] == file_name:
-                    os.remove(os.path.join(self.temp_directory,temp_vid))
-                    print(f"removing {temp_vid} from {self.temp_directory}")
-    def move_video(self):
-        if os.path.exists(self.temp_file_path):
-            shutil.move(self.temp_file_path, self.video_directory)
-            print(f"moving {self.file_name} from {self.temp_directory} to {self.video_directory}")
-            self.remove_temps(self.file_name)
-            return True
-        if os.path.exists(self.complete_file_path):
-            print(f"{self.file_name} already existed in {self.video_directory}; removing it from {self.temp_directory}")
-            self.remove_temps(self.file_name)
-            return True
-        return False
-    def yt_dlp_downloader(self,url,ydl_opts={},download=True):
-        try:
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                self.info_dict=ydl.extract_info(url=url, download=download)
-            return True
-        except:
-            return False
-    def progress_callback(self, d):
-        self.status_dict = d
-        keys = ['status',
-                'downloaded_bytes',
-                'fragment_index',
-                'fragment_count',
-                'filename',
-                'tmpfilename',
-                'max_progress',
-                'progress_idx',
-                'elapsed',
-                'total_bytes_estimate',
-                'speed',
-                'eta',
-                '_eta_str',
-                '_speed_str',
-                '_percent_str',
-                '_total_bytes_str',
-                '_total_bytes_estimate_str',
-                '_downloaded_bytes_str',
-                '_elapsed_str',
-                '_default_template']
-        if self.status_dict['status'] == 'finished':
-            print("Done downloading, moving video to final directory...")
-            self.move_video()
-            return
-        if get_time_stamp()-self.last_checked>5:
-            print(self.status_dict['_default_template'])
-            self.last_checked = get_time_stamp()
-            if (get_time_stamp()-self.start_time/5)>6:
-                self.speed_track.append(self.status_dict['speed'])
-                self.speed_track=self.filter_outliers(self.speed_track)
+            logger.error(f"Failed to retrieve headers for {url}. Status code: {response.status_code}")
+            return {}
+    @staticmethod
+    def get_directory_path(directory, name, video_extention):
+        file_path = os.path.join(directory, f"{name}.{video_extention}")
+        i = 0
+        while os.path.exists(file_path):
+            file_path = os.path.join(directory, f"{name}_{i}.{video_extention}")
+            i += 1
+        return file_path
+    def progress_callback(self, stream, chunk, bytes_remaining):
+        total_size = stream.filesize
+        self.downloaded = total_size - bytes_remaining
     def download(self):
-        if not os.path.exists(self.video_directory):
-            os.makedirs(self.video_directory,exist_ok=True)
-        if not os.path.exists(self.temp_directory):
-            os.makedirs(self.temp_directory,exist_ok=True)
-        for self.num,video_url in enumerate(self.video_urls):
-            if video_url != self.video_url or self.video_url == None:
-                self.video_url=video_url
-                self.info_dict=None
-                result = self.yt_dlp_downloader(url=self.video_url,ydl_opts={'quiet': True, 'no_warnings': True},download=False)
-                if self.info_dict != None and result:
-                    self.start_time = get_time_stamp()
-                    self.downloaded = 0
-                    self.video_title = self.info_dict.get('title', None)
-                    self.video_ext = self.info_dict.get('ext', 'mp4')
-                    self.file_name =f"{self.video_title}.{self.video_ext}"
-                    self.temp_file_path = os.path.join(self.temp_directory, self.file_name)
-                    self.complete_file_path = os.path.join(self.video_directory, self.file_name)
-                    if not self.move_video():
-                        self.dl_speed = []
-                        self.percent=None
-                        self.dl_eta=None
-                        self.total_bytes_est=None
-                        self.percent_speed=None
-                        self.speed_track = []
-                        self.outlier_count=0
-                        ydl_opts = {
-                            'outtmpl': self.temp_file_path,
-                            'noprogress':True,
-                            'progress_hooks': [self.progress_callback]
-                        }
-                        print("Starting download...")  # Check if this point in code is reached
-                        result = self.yt_dlp_downloader(url=self.video_url,ydl_opts=ydl_opts,download=True)
-                        if result:
-                            print("Download finished!")  # Check if download completes
-                        else:
-                            print(f'error downloding {self.video_url}')
-                        self.move_video()
-                    else:
-                        print(f"The video from {self.video_url} already exists in the directory {self.video_directory}. Skipping download.")
-                else:
-                    print(f"could not find video info from {self.video_url} Skipping download.")
-        if self.num==len(self.video_urls)-1:
-            self.monitoring=False
-            self.time_interval=0
+        for video_url in self.video_urls:
+            # Use custom filename if provided, otherwise generate a short temporary one
+            if self.output_filename:
+                outtmpl = os.path.join(self.download_directory, self.output_filename)
+            else:
+                temp_id = re.sub(r'[^\w\d.-]', '_', video_url)[-20:]  # Short temp ID from URL
+                outtmpl = os.path.join(self.download_directory, f"temp_{temp_id}.%(ext)s")
+            ydl_opts = {
+                'external_downloader': 'ffmpeg',
+                'outtmpl': outtmpl,
+                'noprogress': True,
+                'quiet': True,  # Reduce verbosity in logs
+            }
+            try:
+                with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                    self.info = ydl.extract_info(video_url, download=self.get_download)
+                    self.downloading = False
+                    self.starttime = get_time_stamp()  # Assuming get_time_stamp() exists
+                    if self.auto_file_gen:
+                        file_path = ydl.prepare_filename(self.info)
+                        if self.get_info:
+                            self.info['file_path'] = file_path  # Fixed typo 'aath'
+                    if self.get_info:
+                        self.stop()
+                        return self.info
+            except Exception as e:
+                logger.error(f"Failed to download {video_url}: {str(e)}")
+            self.stop()
+        return self.info
     def monitor(self):
         while self.monitoring:
-            self.thread_manager.wait(name='pause_event',n=self.time_interval)# check every minute
-            if self.monitoring:
-                if 'eta' in self.status_dict:
-                    if self.outlier_count>=3 and (self.status_dict['eta']/60)>10:
-                        self.start()
+            logger.info("Monitoring...")
+            self.pause_event.wait(60)  # Check every minute
+            if self.starttime:
+                elapsed_time = subtract_it(get_time_stamp(),self.starttime)
+                if self.downloaded != 0 and elapsed_time != 0:
+                    cumulative_time = add_it(self.downloaded,elapsed_time)
+                    percent = divide_it(self.downloaded,cumulative_time)
+                else:
+                    percent = 0
+                if elapsed_time != 0:
+                    try:
+                        downloaded_minutes = divide_it(elapsed_time,60)
+                        estimated_download_minutes = divide_it(downloaded_minutes,percent)
+                        estimated_download_time =  subtract_it(estimated_download_minutes,downloaded_minutes)
+                    except ZeroDivisionError:
+                        logger.warning("Caught a division by zero in monitor!")
+                        continue
+                if downloaded_minutes != 0 and subtract_it(percent,downloaded_minutes) != 0:
+                    estimated_download_minutes = divide_it(downloaded_minutes,percent)
+                    estimated_download_time =  subtract_it(estimated_download_minutes,downloaded_minutes)
+                    logger.info(f"Estimated download time: {estimated_download_time} minutes")
+                if estimated_download_time >= 1.5:
+                    logger.info("Restarting download due to slow speed...")
+                    self.start()  # Restart download
     def start(self):
-        download_thread = self.thread_manager.add_thread(name='download_thread',target_function=self.download)
-        monitor_thread = self.thread_manager.add_thread(name='monitor_thread',target_function=self.monitor)
-        self.thread_manager.start(name='download_thread')
-        self.thread_manager.start(name='monitor_thread')
-        self.thread_manager.join(name='download_thread')
-        self.thread_manager.join(name='monitor_thread')
-class VideoDownloaderSingleton():
-    _instance = None
-    @staticmethod
-    def get_instance(url_manager,request_manager,title=None,video_extention='mp4',download_directory=os.getcwd(),user_agent=None,download=True,get_info=False):
-        if VideoDownloaderSingleton._instance is None:
-            VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
-        elif VideoDownloaderSingleton._instance.title != title or video_extention != VideoDownloaderSingleton._instance.video_extention or url != VideoDownloaderSingleton._instance.url or download_directory != VideoDownloaderSingleton._instance.download_directory or user_agent != VideoDownloaderSingleton._instance.user_agent:
-            VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
-        return VideoDownloaderSingleton._instance
+        self.download_thread = threading.Thread(target=self.download)
+        self.download_thread.daemon = True
+        self.monitor_thread = threading.Thread(target=self.monitor)
+        self.download_thread.start()
+        self.monitor_thread.start()
+        self.download_thread.join()
+        self.monitor_thread.join()
+    def stop(self):
+        self.monitoring = False
+        self.pause_event.set()
+def download_image(url, save_path=None):
+    """
+    Downloads an image from a URL and saves it to the specified path.
+    Args:
+        url (str): The URL of the image to download
+        save_path (str, optional): Path to save the image. If None, uses the filename from URL
+    Returns:
+        str: Path where the image was saved, or None if download failed
+    """
+    try:
+        # Send GET request to the URL
+        response = requests.get(url, stream=True)
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Set decode_content=True to automatically handle Content-Encoding
+            response.raw.decode_content = True
+            # If no save_path provided, extract filename from URL
+            if save_path is None:
+                # Get filename from URL
+                filename = url.split('/')[-1]
+                save_path = filename
+            # Ensure the directory exists
+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
+            # Write the image content to file
+            with open(save_path, 'wb') as f:
+                f.write(response.content)
+            print(f"Image successfully downloaded to {save_path}")
+            return save_path
+        else:
+            print(f"Failed to download image. Status code: {response.status_code}")
+            return None
+    except requests.exceptions.RequestException as e:
+        print(f"Error downloading image: {str(e)}")
+        return None
+    except Exception as e:
+        print(f"An unexpected error occurred: {str(e)}")
+        return None
+def get_thumbnails(directory,info):
+    thumbnails_dir = os.path.join(directory,'thumbnails')
+    os.makedirs(thumbnails_dir, exist_ok=True)
+    thumbnails = info.get('thumbnails',[])
+    for i,thumbnail_info in enumerate(thumbnails):
+        thumbnail_url = thumbnail_info.get('url')
+        thumbnail_base_url = thumbnail_url.split('?')[0]
+        baseName = os.path.basename(thumbnail_base_url)
+        fileName,ext = os.path.splitext(baseName)
+        baseName = f"{fileName}{ext}"
+        resolution = info['thumbnails'][i].get('resolution')
+        if resolution:
+            baseName = f"{resolution}_{baseName}"
+        img_id = info['thumbnails'][i].get('id')
+        if img_id:
+            baseName = f"{img_id}_{baseName}"
+        thumbnail_path = os.path.join(thumbnails_dir,baseName)
+        info['thumbnails'][i]['path']=thumbnail_path
+        download_image(thumbnail_url, save_path=thumbnail_path)
+    return info
+def downloadvideo(url,directory=False,thumbnails=True):
+    directory = directory or os.getcwd()
+    temp_id = re.sub(r'[^\w\d.-]', '_', url)[-20:]
+    temp_filename = f"temp_{temp_id}.mp4"
+    video_mgr = VideoDownloader(
+        url=url,
+        download_directory=directory,
+        download_video=True,
+        get_info=True,
+        output_filename=temp_filename
+    )
+    info = video_mgr.info
+    if thumbnails:
+        info = get_thumbnails(directory,info)
+    return info

abstract_webtools/managers/videoDownloader2.py ADDED Viewed

@@ -0,0 +1,291 @@
+from abstract_webtools import requestManager, urlManager, soupManager, requests, linkManager
+import threading,os,re,yt_dlp,urllib.request,m3u8_To_MP4,subprocess
+from abstract_utilities import get_logFile,safe_dump_to_file
+from m3u8 import M3U8  # Install: pip install m3u8
+from urllib.parse import urljoin
+from yt_dlp.postprocessor.ffmpeg import FFmpegFixupPostProcessor
+from abstract_math import divide_it,add_it,multiply_it,subtract_it
+from abstract_pandas import *
+class VideoDownloader:
+    def __init__(self, url, title=None, download_directory=os.getcwd(), user_agent=None, video_extention='mp4',
+                 download_video=True, get_info=False, auto_file_gen=True, standalone_download=False, output_filename=None):
+        self.url = url
+        self.monitoring = True
+        self.pause_event = threading.Event()
+        self.get_download = download_video
+        self.get_info = get_info
+        self.user_agent = user_agent
+        self.title = title
+        self.auto_file_gen = auto_file_gen
+        self.standalone_download = standalone_download
+        self.video_extention = video_extention
+        self.download_directory = download_directory
+        self.output_filename = output_filename  # New parameter for custom filename
+        self.header = {}  # Placeholder for UserAgentManagerSingleton if needed
+        self.base_name = os.path.basename(self.url)
+        self.file_name, self.ext = os.path.splitext(self.base_name)
+        self.video_urls = [self.url]
+        self.info = {}
+        self.starttime = None
+        self.downloaded = 0
+        self.video_urls = url if isinstance(url, list) else [url]
+        self.send_to_dl()
+    def get_request(self, url):
+        self.request_manager = requestManagerSingleton.get_instance(url=url)
+        return self.request_manager
+    def send_to_dl(self):
+        if self.standalone_download:
+            self.standalone_downloader()
+        else:
+            self.start()
+    def get_headers(self, url):
+        response = requests.get(url)
+        if response.status_code == 200:
+            return response.headers
+        else:
+            logger.error(f"Failed to retrieve headers for {url}. Status code: {response.status_code}")
+            return {}
+    @staticmethod
+    def get_directory_path(directory, name, video_extention):
+        file_path = os.path.join(directory, f"{name}.{video_extention}")
+        i = 0
+        while os.path.exists(file_path):
+            file_path = os.path.join(directory, f"{name}_{i}.{video_extention}")
+            i += 1
+        return file_path
+    def progress_callback(self, stream, chunk, bytes_remaining):
+        total_size = stream.filesize
+        self.downloaded = total_size - bytes_remaining
+    def download(self):
+        for video_url in self.video_urls:
+            # Use custom filename if provided, otherwise generate a short temporary one
+            if self.output_filename:
+                outtmpl = os.path.join(self.download_directory, self.output_filename)
+            else:
+                temp_id = re.sub(r'[^\w\d.-]', '_', video_url)[-20:]  # Short temp ID from URL
+                outtmpl = os.path.join(self.download_directory, f"temp_{temp_id}.%(ext)s")
+            ydl_opts = {
+                'external_downloader': 'ffmpeg',
+                'outtmpl': outtmpl,
+                'noprogress': True,
+                'quiet': True,  # Reduce verbosity in logs
+            }
+            try:
+                with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                    self.info = ydl.extract_info(video_url, download=self.get_download)
+                    self.downloading = False
+                    self.starttime = get_time_stamp()  # Assuming get_time_stamp() exists
+                    if self.auto_file_gen:
+                        file_path = ydl.prepare_filename(self.info)
+                        if self.get_info:
+                            self.info['file_path'] = file_path  # Fixed typo 'aath'
+                    if self.get_info:
+                        self.stop()
+                        return self.info
+            except Exception as e:
+                logger.error(f"Failed to download {video_url}: {str(e)}")
+            self.stop()
+        return self.info
+    def monitor(self):
+        while self.monitoring:
+            logger.info("Monitoring...")
+            self.pause_event.wait(60)  # Check every minute
+            if self.starttime:
+                elapsed_time = subtract_it(get_time_stamp(),self.starttime)
+                if self.downloaded != 0 and elapsed_time != 0:
+                    cumulative_time = add_it(self.downloaded,elapsed_time)
+                    percent = divide_it(self.downloaded,cumulative_time)
+                else:
+                    percent = 0
+                if elapsed_time != 0:
+                    try:
+                        downloaded_minutes = divide_it(elapsed_time,60)
+                        estimated_download_minutes = divide_it(downloaded_minutes,percent)
+                        estimated_download_time =  subtract_it(estimated_download_minutes,downloaded_minutes)
+                    except ZeroDivisionError:
+                        logger.warning("Caught a division by zero in monitor!")
+                        continue
+                if downloaded_minutes != 0 and subtract_it(percent,downloaded_minutes) != 0:
+                    estimated_download_minutes = divide_it(downloaded_minutes,percent)
+                    estimated_download_time =  subtract_it(estimated_download_minutes,downloaded_minutes)
+                    logger.info(f"Estimated download time: {estimated_download_time} minutes")
+                if estimated_download_time >= 1.5:
+                    logger.info("Restarting download due to slow speed...")
+                    self.start()  # Restart download
+    def start(self):
+        self.download_thread = threading.Thread(target=self.download)
+        self.download_thread.daemon = True
+        self.monitor_thread = threading.Thread(target=self.monitor)
+        self.download_thread.start()
+        self.monitor_thread.start()
+        self.download_thread.join()
+        self.monitor_thread.join()
+    def stop(self):
+        self.monitoring = False
+        self.pause_event.set()
+def download_image(url, save_path=None):
+    """
+    Downloads an image from a URL and saves it to the specified path.
+    Args:
+        url (str): The URL of the image to download
+        save_path (str, optional): Path to save the image. If None, uses the filename from URL
+    Returns:
+        str: Path where the image was saved, or None if download failed
+    """
+    try:
+        # Send GET request to the URL
+        response = requests.get(url, stream=True)
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Set decode_content=True to automatically handle Content-Encoding
+            response.raw.decode_content = True
+            # If no save_path provided, extract filename from URL
+            if save_path is None:
+                # Get filename from URL
+                filename = url.split('/')[-1]
+                save_path = filename
+            # Ensure the directory exists
+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
+            # Write the image content to file
+            with open(save_path, 'wb') as f:
+                f.write(response.content)
+            print(f"Image successfully downloaded to {save_path}")
+            return save_path
+        else:
+            print(f"Failed to download image. Status code: {response.status_code}")
+            return None
+    except requests.exceptions.RequestException as e:
+        print(f"Error downloading image: {str(e)}")
+        return None
+    except Exception as e:
+        print(f"An unexpected error occurred: {str(e)}")
+        return None
+def get_thumbnails(directory,info):
+    thumbnails_dir = os.path.join(directory,'thumbnails')
+    os.makedirs(thumbnails_dir, exist_ok=True)
+    thumbnails = info.get('thumbnails',[])
+    for i,thumbnail_info in enumerate(thumbnails):
+        thumbnail_url = thumbnail_info.get('url')
+        thumbnail_base_url = thumbnail_url.split('?')[0]
+        baseName = os.path.basename(thumbnail_base_url)
+        fileName,ext = os.path.splitext(baseName)
+        baseName = f"{fileName}{ext}"
+        resolution = info['thumbnails'][i].get('resolution')
+        if resolution:
+            baseName = f"{resolution}_{baseName}"
+        img_id = info['thumbnails'][i].get('id')
+        if img_id:
+            baseName = f"{img_id}_{baseName}"
+        thumbnail_path = os.path.join(thumbnails_dir,baseName)
+        info['thumbnails'][i]['path']=thumbnail_path
+        download_image(thumbnail_url, save_path=thumbnail_path)
+    return info
+def download_audio(directory, info):
+    """
+    Download the highest-quality audio (e.g., hls-audio-128000-Audio) from info.json and save it to a directory.
+    Args:
+        directory (str): Base directory for saving files (e.g., /var/www/clownworld/data/downloads/videos/videos/1897210679465328845/)
+        info (dict): Dictionary containing video metadata from info.json, including 'formats' and 'video_id'
+    Returns:
+        dict: Updated info with the audio file path
+    """
+    # Create an 'audio' subdirectory
+    audio_dir = os.path.join(directory, 'audio')
+    os.makedirs(audio_dir, exist_ok=True)
+    # Find the highest-quality audio format (e.g., hls-audio-128000-Audio with 128 kbps)
+    audio_formats = [f for f in info.get('formats', []) if f['format_id'].startswith('hls-audio')]
+    if not audio_formats:
+        logger.info("No audio formats found in info.json")
+        return info
+    # Sort by bitrate (tbr) to get the highest quality
+    audio_format = max(audio_formats, key=lambda x: x.get('tbr', 0))
+    audio_url = audio_format.get('url')
+    audio_ext = audio_format.get('ext', 'mp4')  # Default to MP4 if not specified
+    # Extract video_id for filename
+    video_id = info.get('video_id', 'unknown_video')
+    title = info.get('title', 'audio').replace(' ', '_')  # Clean title for filename
+    filename = f"{title}_{video_id}.{audio_ext}"
+    audio_path = os.path.join(audio_dir, filename)
+    # Download and process the M3U8/HLS audio stream
+    try:
+        # Fetch the M3U8 playlist
+        response = requests.get(audio_url, headers={
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.19 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'Accept-Language': 'en-us,en;q=0.5',
+            'Sec-Fetch-Mode': 'navigate'
+        })
+        response.raise_for_status()
+        # Parse the M3U8 playlist
+        m3u8_obj = M3U8(response.text)
+        base_url = '/'.join(audio_url.split('/')[:-1]) + '/'  # Base URL for relative segment paths
+        # Download all TS segments
+        segments = []
+        for segment in m3u8_obj.segments:
+            segment_url = urljoin(base_url, segment.uri)
+            segment_response = requests.get(segment_url, headers=response.request.headers)
+            segment_response.raise_for_status()
+            segments.append(segment_response.content)
+        # Save segments to temporary files for processing with ffmpeg
+        temp_dir = os.path.join(audio_dir, 'temp_segments')
+        os.makedirs(temp_dir, exist_ok=True)
+        segment_paths = []
+        for i, segment_data in enumerate(segments):
+            segment_path = os.path.join(temp_dir, f'segment_{i}.ts')
+            with open(segment_path, 'wb') as f:
+                f.write(segment_data)
+            segment_paths.append(segment_path)
+        # Use ffmpeg to concatenate TS segments into a single MP4 audio file
+        output_path = audio_path
+        try:
+            ffmpeg.input('concat:' + '|'.join(segment_paths), format='concat', safe=0).output(
+                output_path, c='copy', loglevel='quiet'
+            ).run()
+        except Exception as e:
+            logger.info(f"FFmpeg error: {e.stderr.decode()}")
+        # Clean up temporary segment files
+        for segment_path in segment_paths:
+            os.remove(segment_path)
+        os.rmdir(temp_dir)
+        # Update info with the audio path
+        info['audio_path'] = audio_path
+        info['audio_url'] = f"https://clownworld.biz/data/downloads/videos/videos/{video_id}/audio/{filename}"
+    except requests.RequestException as e:
+        logger.info(f"Failed to download audio: {str(e)}")
+    except Exception as e:
+        logger.info(f"Error processing audio: {str(e)}")
+    return info

abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4 ADDED Viewed

Binary file

{abstract_webtools-0.1.6.23.dist-info → abstract_webtools-0.1.6.25.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: abstract_webtools
-Version: 0.1.6.23
+Version: 0.1.6.25
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff
@@ -13,10 +13,19 @@ Classifier: Programming Language :: Python :: 3.11
 Requires-Python: >=3.6
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: abstract-utilities >=0.2.2.30
-Requires-Dist: PySimpleGUI >=4.60.5
-Requires-Dist: urllib3 >=2.0.4
-Requires-Dist: requests >=2.31.0
+Requires-Dist: abstract_utilities>=0.2.2.30
+Requires-Dist: PySimpleGUI>=4.60.5
+Requires-Dist: urllib3>=2.0.4
+Requires-Dist: requests>=2.31.0
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # Abstract WebTools
 Provides utilities for inspecting and parsing web content, including React components and URL utilities, with enhanced capabilities for managing HTTP requests and TLS configurations.

{abstract_webtools-0.1.6.23.dist-info → abstract_webtools-0.1.6.25.dist-info}/RECORD RENAMED Viewed

@@ -8,6 +8,8 @@ abstract_webtools/url_grabber_new.py,sha256=Oh2Kc0gBScCo0xpopNsg8JE5lIbPuzZVKM5f
 abstract_webtools/managers/__init__.py,sha256=5aIpbdUsDWTrhPUAjfIKnG54OULqOKan9LBL5EIUllo,407
 abstract_webtools/managers/cipherManager.py,sha256=NHQGdR11eNSm-1H-GezD5dyQgsPTJwY5kczt8Sher2s,1621
 abstract_webtools/managers/crawlManager.py,sha256=62Ej6AQC6-qXX_EWOmcJ2szNvEjmebFGugMz65HF1qI,12983
+abstract_webtools/managers/crawlmgr2.py,sha256=PvHas-FSlp98osc-2so9zw-2c7amUMdwIj6tmc6Rl00,1910
+abstract_webtools/managers/curlMgr.py,sha256=ghi0QsSAxjZu3HALFST5Kv_262XhHSAPGlQLvmguxPY,1657
 abstract_webtools/managers/domainManager.py,sha256=95znOBv05W77mW_fbZAfl4RmlENDlYqhEOMkL02L220,3610
 abstract_webtools/managers/dynamicRateLimiter.py,sha256=gopQcQo50JG2D0KcyepNCIQ_1uDQEBIHBzWf4R2Wgy0,7617
 abstract_webtools/managers/get_test.py,sha256=nISrhUGdyvRv18wTGoifGhizBFoHeK0N3FymMASloFw,825
@@ -16,14 +18,16 @@ abstract_webtools/managers/mySocketClient.py,sha256=-j1Q8Ds9RCSbjZdx3ZF9mVpgwxaO
 abstract_webtools/managers/networkManager.py,sha256=Op2QDXrP-gmm0tCToe-Ryt9xuOtMppcN2KLKP1WZiu0,952
 abstract_webtools/managers/requestManager.py,sha256=zXD31WAYghV1OjnTQzRQnQGqZz6_J4mjHTdNLnBop_0,17343
 abstract_webtools/managers/seleniumManager.py,sha256=qSY8gH3N5YJIMwE_Alj9HNQRip_PziIo4_T9AZE_FQo,4273
-abstract_webtools/managers/soupManager.py,sha256=7nDB_QKneGjyTZUzchfbdHNvxxYiTyIn8AHon8ObTSY,17148
+abstract_webtools/managers/soupManager.py,sha256=-_mRCWlyzfKlF64UU53WXBmCvJ98jQ4GyHh8S8Pw3xs,17198
 abstract_webtools/managers/sslManager.py,sha256=C-QgQw9CW84uOE5kx2MPjC3RsLbE2JQqdwdTs0H4ecc,1370
 abstract_webtools/managers/tlsAdapter.py,sha256=XZSMZz9EUOhv-h3_Waf6mjV1dA3oN_M_oWuoo4VZ_HE,1454
 abstract_webtools/managers/urlManager.py,sha256=Dvf-TiSo5j_YjZS2Eq6lFfbhveneD6NA_wEE0xUXy_E,8858
 abstract_webtools/managers/userAgentManager.py,sha256=33SB2p2FG7EYZl7l2iYm1U4gI9PcdkGTZHw5lg_Ogrw,1653
-abstract_webtools/managers/videoDownloader.py,sha256=4sPV0D8f3_S8qNYSySfB_b-aBP_xAm4Ex7MJ1WIFhHE,10567
-abstract_webtools-0.1.6.23.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
-abstract_webtools-0.1.6.23.dist-info/METADATA,sha256=3SlDHjHws2FzMhvMfr8jN48y37Ad5R50UlnTaGiTaws,15858
-abstract_webtools-0.1.6.23.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
-abstract_webtools-0.1.6.23.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
-abstract_webtools-0.1.6.23.dist-info/RECORD,,
+abstract_webtools/managers/videoDownloader.py,sha256=oFmRsN84_GACAhVpk21SzFJbHfJZMBLQHMUVLcKK9OI,9388
+abstract_webtools/managers/videoDownloader2.py,sha256=v3H6akdhvVWGrB-r35m3cp_-aKkNWadpfCiMylOnv6w,12748
+abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
+abstract_webtools-0.1.6.25.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
+abstract_webtools-0.1.6.25.dist-info/METADATA,sha256=_Jl7eCzHpI7lehgiyjXQlXWQhtDNqpFFqSojUhxx0JY,16051
+abstract_webtools-0.1.6.25.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+abstract_webtools-0.1.6.25.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
+abstract_webtools-0.1.6.25.dist-info/RECORD,,

{abstract_webtools-0.1.6.23.dist-info → abstract_webtools-0.1.6.25.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.5.0)
+Generator: setuptools (75.8.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{abstract_webtools-0.1.6.23.dist-info → abstract_webtools-0.1.6.25.dist-info}/LICENSE RENAMED Viewed

File without changes

{abstract_webtools-0.1.6.23.dist-info → abstract_webtools-0.1.6.25.dist-info}/top_level.txt RENAMED Viewed

File without changes

abstract-webtools 0.1.6.23__py3-none-any.whl → 0.1.6.25__py3-none-any.whl

abstract-webtools 0.1.6.23py3-none-any.whl → 0.1.6.25py3-none-any.whl