PyPI - abstract-webtools - Versions diffs - 0.1.6.105__py3-none-any.whl → 0.1.6.107__py3-none-any.whl - Mend

abstract-webtools 0.1.6.105py3-none-any.whl → 0.1.6.107py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

abstract_webtools/k2s_downloader.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import re
 import time
 import requests
+import hashlib
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
 from selenium import webdriver
@@ -11,10 +12,12 @@ from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from abstract_security import *
 from abstract_webtools import *
-from abstract_utilities import safe_dump_to_file,safe_load_from_json
+from abstract_utilities import safe_dump_to_file, safe_load_from_json
 DOWNLOAD_DIR = os.path.abspath("./downloads")
 class K2SDownloader:
-    def __init__(self,env_path=None,download_dir=None,json_file_path=None):
+    def __init__(self, env_path=None, download_dir=None, json_file_path=None):
         self.download_dir = download_dir or DOWNLOAD_DIR
         self.json_file_path = json_file_path
         os.makedirs(self.download_dir, exist_ok=True)
@@ -33,28 +36,63 @@ class K2SDownloader:
         return webdriver.Chrome(options=options)
     def login(self):
-        userName = get_env_value('userName',path=self.env_path)
-        passWord = get_env_value('passWord',path=self.env_path)
-        self.driver.get("https://k2s.cc/auth/login")
-        time.sleep(3)
-        email_input = self.driver.find_element(By.NAME, "email")
-        password_input = self.driver.find_element(By.NAME, "input-password-auto-complete-on")
-        email_input.send_keys(userName)
-        password_input.send_keys(passWord)
-        password_input.send_keys(Keys.RETURN)
-        #WebDriverWait(self.driver, 20).until(
-        #    EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'Logout')]"))
-        #)
-        self.logged_in = True
-        print("Login successful")
-        #except Exception as e:
-        #    print(f"Login failed: {e}")
-        #    with open('login_error.html', 'w', encoding='utf-8') as f:
-        #        f.write(self.driver.page_source)
+        userName = get_env_value('userName', path=self.env_path)
+        passWord = get_env_value('passWord', path=self.env_path)
+        try:
+            self.driver.get("https://k2s.cc/auth/login")
+            print("Navigating to login page")
+            time.sleep(3)
+            email_input = WebDriverWait(self.driver, 10).until(
+                EC.presence_of_element_located((By.NAME, "email"))
+            )
+            password_input = WebDriverWait(self.driver, 10).until(
+                EC.presence_of_element_located((By.NAME, "password"))  # Updated field name
+            )
+            email_input.send_keys(userName)
+            password_input.send_keys(passWord)
+            password_input.send_keys(Keys.RETURN)
+            print("Submitted login credentials")
+            WebDriverWait(self.driver, 15).until(
+                EC.url_contains("dashboard")  # Adjust based on post-login URL
+            )
+            self.logged_in = True
+            print("Login successful")
+        except Exception as e:
+            print(f"Login failed: {e}")
+            with open('login_error.html', 'w', encoding='utf-8') as f:
+                f.write(self.driver.page_source)
+            raise
+    def get_file_metadata(self, download_url):
+        """Fetch filename and metadata using a HEAD request or page inspection."""
+        metadata = {'url': download_url, 'filename': None, 'size': None}
+        try:
+            # Try HEAD request first
+            response = self.session.head(download_url, allow_redirects=True)
+            if response.status_code == 200:
+                cd = response.headers.get('Content-Disposition', '')
+                if 'filename=' in cd:
+                    metadata['filename'] = cd.split('filename=')[-1].strip('"')
+                metadata['size'] = response.headers.get('Content-Length')
+                if not metadata['filename']:
+                    metadata['filename'] = download_url.split('/')[-1].split('?')[0]
+            else:
+                # Fallback to page inspection if HEAD fails
+                self.driver.get(download_url)
+                WebDriverWait(self.driver, 10).until(
+                    EC.presence_of_element_located((By.TAG_NAME, "body"))
+                )
+                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
+                filename_tag = soup.select_one('a[href*="/download"]')
+                metadata['filename'] = filename_tag.text.strip() if filename_tag else download_url.split('/')[-1]
+                size_tag = soup.find(string=re.compile(r'\d+\.?\d*\s*(MB|GB|KB)'))
+                metadata['size'] = size_tag.strip() if size_tag else None
+        except Exception as e:
+            print(f"Failed to fetch metadata for {download_url}: {e}")
+        return metadata
     def download_file(self, url):
         if not self.logged_in:
@@ -62,83 +100,107 @@ class K2SDownloader:
         print(f"Navigating to: {url}")
         self.driver.get(url)
-        time.sleep(5)
+        WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
         if 'captcha' in self.driver.page_source.lower():
             print("CAPTCHA detected. Manual intervention required.")
-            return
+            return None
         try:
             download_button = WebDriverWait(self.driver, 30).until(
                 EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[href*="/download"], button[class*="download"]'))
             )
-            print("Download button found; attempting to click or fetch URL")
+            print("Download button found; attempting to fetch URL")
             download_url = download_button.get_attribute('href')
             if download_url:
-                response = self.session.get(download_url, stream=True)
-                file_name = self._extract_filename(response, download_url)
+                # Get metadata before downloading
+                metadata = self.get_file_metadata(download_url)
+                file_name = metadata['filename'] or self._extract_filename(None, download_url)
                 file_path = os.path.join(self.download_dir, file_name)
+                # Download the file
+                response = self.session.get(download_url, stream=True)
+                response.raise_for_status()
                 with open(file_path, 'wb') as f:
                     for chunk in response.iter_content(chunk_size=8192):
                         f.write(chunk)
                 print(f"Downloaded: {file_path}")
-                return file_path
+                # Update metadata with file size if not already set
+                if not metadata['size']:
+                    metadata['size'] = os.path.getsize(file_path)
+                metadata['file_path'] = file_path
+                return metadata
             else:
                 download_button.click()
                 print("Button clicked. Waiting for download...")
-                time.sleep(30)  # adjust as needed
+                time.sleep(30)
+                return None
         except Exception as e:
             print(f"Download failed for {url}: {e}")
+            return None
     def _extract_filename(self, response, url):
-        cd = response.headers.get('Content-Disposition', '')
-        if 'filename=' in cd:
-            return cd.split('filename=')[-1].strip('"')
+        if response:
+            cd = response.headers.get('Content-Disposition', '')
+            if 'filename=' in cd:
+                return cd.split('filename=')[-1].strip('"')
         return url.split('/')[-1].split('?')[0]
-def get_json_key_value(json_data,key):
-    if json_data and isinstance(json_data,dict):
+def get_json_key_value(json_data, key):
+    if json_data and isinstance(json_data, dict):
         return json_data.get(key)
-def compare_keys(json_data,comp_json_data,key):
-    json_key_value = get_json_key_value(json_data,key)
-    comp_json_key_value = get_json_key_value(comp_json_data,key)
-    if json_key_value and comp_json_key_value and comp_json_key_value==json_key_value:
-        return True
-def check_json_data(json_list,new_data):
-    keys = ['k2s','link','name']
+def compare_keys(json_data, comp_json_data, key):
+    json_key_value = get_json_key_value(json_data, key)
+    comp_json_key_value = get_json_key_value(comp_json_data, key)
+    return json_key_value and comp_json_key_value and json_key_value == comp_json_key_value
+def check_json_data(json_list, new_data):
+    keys = ['k2s', 'filename', 'size']  # Check k2s URL, filename, and size
     for json_data in json_list:
         for key in keys:
-            result = compare_keys(json_data,new_data,key)
-            if result:
-                return result
+            if compare_keys(json_data, new_data, key):
+                return True
+    return False
 class dlsManager:
     def __init__(self, downloader):
         self.downloader = downloader
         self.json_file_path = self.downloader.json_file_path
-        all_dls= None
+        all_dls = None
         if self.json_file_path:
             all_dls = safe_load_from_json(self.json_file_path)
-        self.all_dls = all_dls or  []
+        self.all_dls = all_dls or []
         self.last_data = None
     def is_prev_dl(self, data):
-        if check_json_data(self.all_dls,data):
+        # Include metadata in data for duplicate checking
+        extended_data = data.copy()
+        if data.get('k2s'):
+            metadata = self.downloader.get_file_metadata(data['k2s'])
+            extended_data.update({
+                'filename': metadata['filename'],
+                'size': metadata['size']
+            })
+        if check_json_data(self.all_dls, extended_data):
             self.last_data = None
             return True
-        self.last_data = data
+        self.last_data = extended_data
         return False
     def dl_k2s_link(self, k2s_link):
         if k2s_link:
             print(f"Downloading: {k2s_link}")
-            self.downloader.download_file(k2s_link)
+            metadata = self.downloader.download_file(k2s_link)
             time.sleep(10)
-            if self.json_file_path:
+            if metadata and self.json_file_path and self.last_data:
+                self.last_data.update(metadata)  # Merge download metadata
                 self.all_dls.append(self.last_data)
-                safe_dump_to_file(data=self.all_dls,
-                                  file_path=self.json_file_path)
+                safe_dump_to_file(data=self.all_dls, file_path=self.json_file_path)
 def get_soup(url):
     try:

{abstract_webtools-0.1.6.105.dist-info → abstract_webtools-0.1.6.107.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstract_webtools
-Version: 0.1.6.105
+Version: 0.1.6.107
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff

{abstract_webtools-0.1.6.105.dist-info → abstract_webtools-0.1.6.107.dist-info}/RECORD RENAMED Viewed

@@ -6,7 +6,7 @@ abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSj
 abstract_webtools/domain_identifier.py,sha256=AvWlGD7C19rySa_J_Brxi3kz43LMWvGsshuuZNg7MvI,3320
 abstract_webtools/extention_list.py,sha256=gRSO4nMbuuXDYzd-ss4s64sS80ZHmUoazMCpgoKG5vE,4884
 abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZCQ,2609
-abstract_webtools/k2s_downloader.py,sha256=dN6wZNMiczjeqq6ISTQfoiS0ZMycvWzCfQCZMQl3Tn8,6272
+abstract_webtools/k2s_downloader.py,sha256=m2M1LlYdXGSOy3MNn8YPn0Gz70LLbXXDa_aUP3tvUm0,9213
 abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
 abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
 abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE70B8,10441
@@ -42,7 +42,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
 abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
 abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
 abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
-abstract_webtools-0.1.6.105.dist-info/METADATA,sha256=uGmrjyRas-bDHSmOFnX9QZitnOY6b9o3sj8HmQorgx4,7289
-abstract_webtools-0.1.6.105.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-abstract_webtools-0.1.6.105.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
-abstract_webtools-0.1.6.105.dist-info/RECORD,,
+abstract_webtools-0.1.6.107.dist-info/METADATA,sha256=std8u1_zW1pWxvRY0djcPaeCuUUX9yohGkY8fT-cwTs,7289
+abstract_webtools-0.1.6.107.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+abstract_webtools-0.1.6.107.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
+abstract_webtools-0.1.6.107.dist-info/RECORD,,

{abstract_webtools-0.1.6.105.dist-info → abstract_webtools-0.1.6.107.dist-info}/WHEEL RENAMED Viewed

File without changes

{abstract_webtools-0.1.6.105.dist-info → abstract_webtools-0.1.6.107.dist-info}/top_level.txt RENAMED Viewed

File without changes

abstract-webtools 0.1.6.105__py3-none-any.whl → 0.1.6.107__py3-none-any.whl

abstract-webtools 0.1.6.105py3-none-any.whl → 0.1.6.107py3-none-any.whl