abstract-webtools 0.1.6.106__py3-none-any.whl → 0.1.6.107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import os
2
2
  import re
3
3
  import time
4
4
  import requests
5
+ import hashlib
5
6
  from bs4 import BeautifulSoup
6
7
  from urllib.parse import urljoin
7
8
  from selenium import webdriver
@@ -11,10 +12,12 @@ from selenium.webdriver.support.ui import WebDriverWait
11
12
  from selenium.webdriver.support import expected_conditions as EC
12
13
  from abstract_security import *
13
14
  from abstract_webtools import *
14
- from abstract_utilities import safe_dump_to_file,safe_load_from_json
15
+ from abstract_utilities import safe_dump_to_file, safe_load_from_json
16
+
15
17
  DOWNLOAD_DIR = os.path.abspath("./downloads")
18
+
16
19
  class K2SDownloader:
17
- def __init__(self,env_path=None,download_dir=None,json_file_path=None):
20
+ def __init__(self, env_path=None, download_dir=None, json_file_path=None):
18
21
  self.download_dir = download_dir or DOWNLOAD_DIR
19
22
  self.json_file_path = json_file_path
20
23
  os.makedirs(self.download_dir, exist_ok=True)
@@ -33,28 +36,63 @@ class K2SDownloader:
33
36
  return webdriver.Chrome(options=options)
34
37
 
35
38
  def login(self):
36
- userName = get_env_value('userName',path=self.env_path)
37
- passWord = get_env_value('passWord',path=self.env_path)
38
-
39
- self.driver.get("https://k2s.cc/auth/login")
40
- time.sleep(3)
41
-
42
-
43
- email_input = self.driver.find_element(By.NAME, "email")
44
- password_input = self.driver.find_element(By.NAME, "input-password-auto-complete-on")
45
- email_input.send_keys(userName)
46
- password_input.send_keys(passWord)
47
- password_input.send_keys(Keys.RETURN)
48
-
49
- #WebDriverWait(self.driver, 20).until(
50
- # EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'Logout')]"))
51
- #)
52
- self.logged_in = True
53
- print("Login successful")
54
- #except Exception as e:
55
- # print(f"Login failed: {e}")
56
- # with open('login_error.html', 'w', encoding='utf-8') as f:
57
- # f.write(self.driver.page_source)
39
+ userName = get_env_value('userName', path=self.env_path)
40
+ passWord = get_env_value('passWord', path=self.env_path)
41
+
42
+ try:
43
+ self.driver.get("https://k2s.cc/auth/login")
44
+ print("Navigating to login page")
45
+ time.sleep(3)
46
+
47
+ email_input = WebDriverWait(self.driver, 10).until(
48
+ EC.presence_of_element_located((By.NAME, "email"))
49
+ )
50
+ password_input = WebDriverWait(self.driver, 10).until(
51
+ EC.presence_of_element_located((By.NAME, "password")) # Updated field name
52
+ )
53
+ email_input.send_keys(userName)
54
+ password_input.send_keys(passWord)
55
+ password_input.send_keys(Keys.RETURN)
56
+ print("Submitted login credentials")
57
+
58
+ WebDriverWait(self.driver, 15).until(
59
+ EC.url_contains("dashboard") # Adjust based on post-login URL
60
+ )
61
+ self.logged_in = True
62
+ print("Login successful")
63
+ except Exception as e:
64
+ print(f"Login failed: {e}")
65
+ with open('login_error.html', 'w', encoding='utf-8') as f:
66
+ f.write(self.driver.page_source)
67
+ raise
68
+
69
+ def get_file_metadata(self, download_url):
70
+ """Fetch filename and metadata using a HEAD request or page inspection."""
71
+ metadata = {'url': download_url, 'filename': None, 'size': None}
72
+ try:
73
+ # Try HEAD request first
74
+ response = self.session.head(download_url, allow_redirects=True)
75
+ if response.status_code == 200:
76
+ cd = response.headers.get('Content-Disposition', '')
77
+ if 'filename=' in cd:
78
+ metadata['filename'] = cd.split('filename=')[-1].strip('"')
79
+ metadata['size'] = response.headers.get('Content-Length')
80
+ if not metadata['filename']:
81
+ metadata['filename'] = download_url.split('/')[-1].split('?')[0]
82
+ else:
83
+ # Fallback to page inspection if HEAD fails
84
+ self.driver.get(download_url)
85
+ WebDriverWait(self.driver, 10).until(
86
+ EC.presence_of_element_located((By.TAG_NAME, "body"))
87
+ )
88
+ soup = BeautifulSoup(self.driver.page_source, 'html.parser')
89
+ filename_tag = soup.select_one('a[href*="/download"]')
90
+ metadata['filename'] = filename_tag.text.strip() if filename_tag else download_url.split('/')[-1]
91
+ size_tag = soup.find(string=re.compile(r'\d+\.?\d*\s*(MB|GB|KB)'))
92
+ metadata['size'] = size_tag.strip() if size_tag else None
93
+ except Exception as e:
94
+ print(f"Failed to fetch metadata for {download_url}: {e}")
95
+ return metadata
58
96
 
59
97
  def download_file(self, url):
60
98
  if not self.logged_in:
@@ -62,56 +100,72 @@ class K2SDownloader:
62
100
 
63
101
  print(f"Navigating to: {url}")
64
102
  self.driver.get(url)
65
- time.sleep(5)
103
+ WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
66
104
 
67
105
  if 'captcha' in self.driver.page_source.lower():
68
106
  print("CAPTCHA detected. Manual intervention required.")
69
- return
107
+ return None
70
108
 
71
109
  try:
72
110
  download_button = WebDriverWait(self.driver, 30).until(
73
111
  EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[href*="/download"], button[class*="download"]'))
74
112
  )
75
- print("Download button found; attempting to click or fetch URL")
113
+ print("Download button found; attempting to fetch URL")
76
114
  download_url = download_button.get_attribute('href')
77
115
 
78
116
  if download_url:
79
- response = self.session.get(download_url, stream=True)
80
- file_name = self._extract_filename(response, download_url)
117
+ # Get metadata before downloading
118
+ metadata = self.get_file_metadata(download_url)
119
+ file_name = metadata['filename'] or self._extract_filename(None, download_url)
81
120
  file_path = os.path.join(self.download_dir, file_name)
82
121
 
122
+ # Download the file
123
+ response = self.session.get(download_url, stream=True)
124
+ response.raise_for_status()
125
+
83
126
  with open(file_path, 'wb') as f:
84
127
  for chunk in response.iter_content(chunk_size=8192):
85
128
  f.write(chunk)
86
129
  print(f"Downloaded: {file_path}")
87
- return file_path
130
+
131
+ # Update metadata with file size if not already set
132
+ if not metadata['size']:
133
+ metadata['size'] = os.path.getsize(file_path)
134
+ metadata['file_path'] = file_path
135
+
136
+ return metadata
88
137
  else:
89
138
  download_button.click()
90
139
  print("Button clicked. Waiting for download...")
91
- time.sleep(30) # adjust as needed
140
+ time.sleep(30)
141
+ return None
92
142
  except Exception as e:
93
143
  print(f"Download failed for {url}: {e}")
144
+ return None
94
145
 
95
146
  def _extract_filename(self, response, url):
96
- cd = response.headers.get('Content-Disposition', '')
97
- if 'filename=' in cd:
98
- return cd.split('filename=')[-1].strip('"')
147
+ if response:
148
+ cd = response.headers.get('Content-Disposition', '')
149
+ if 'filename=' in cd:
150
+ return cd.split('filename=')[-1].strip('"')
99
151
  return url.split('/')[-1].split('?')[0]
100
- def get_json_key_value(json_data,key):
101
- if json_data and isinstance(json_data,dict):
152
+
153
+ def get_json_key_value(json_data, key):
154
+ if json_data and isinstance(json_data, dict):
102
155
  return json_data.get(key)
103
- def compare_keys(json_data,comp_json_data,key):
104
- json_key_value = get_json_key_value(json_data,key)
105
- comp_json_key_value = get_json_key_value(comp_json_data,key)
106
- if json_key_value and comp_json_key_value and comp_json_key_value==json_key_value:
107
- return True
108
- def check_json_data(json_list,new_data):
109
- keys = ['k2s','link','name']
156
+
157
+ def compare_keys(json_data, comp_json_data, key):
158
+ json_key_value = get_json_key_value(json_data, key)
159
+ comp_json_key_value = get_json_key_value(comp_json_data, key)
160
+ return json_key_value and comp_json_key_value and json_key_value == comp_json_key_value
161
+
162
+ def check_json_data(json_list, new_data):
163
+ keys = ['k2s', 'filename', 'size'] # Check k2s URL, filename, and size
110
164
  for json_data in json_list:
111
165
  for key in keys:
112
- result = compare_keys(json_data,new_data,key)
113
- if result:
114
- return result
166
+ if compare_keys(json_data, new_data, key):
167
+ return True
168
+ return False
115
169
 
116
170
  class dlsManager:
117
171
  def __init__(self, downloader):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.106
3
+ Version: 0.1.6.107
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -6,7 +6,7 @@ abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSj
6
6
  abstract_webtools/domain_identifier.py,sha256=AvWlGD7C19rySa_J_Brxi3kz43LMWvGsshuuZNg7MvI,3320
7
7
  abstract_webtools/extention_list.py,sha256=gRSO4nMbuuXDYzd-ss4s64sS80ZHmUoazMCpgoKG5vE,4884
8
8
  abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZCQ,2609
9
- abstract_webtools/k2s_downloader.py,sha256=aiUTLqFSNC_S9lOs98hfuUVj5agDbEiEVWmUqDLXdPU,6708
9
+ abstract_webtools/k2s_downloader.py,sha256=m2M1LlYdXGSOy3MNn8YPn0Gz70LLbXXDa_aUP3tvUm0,9213
10
10
  abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
11
11
  abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
12
12
  abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE70B8,10441
@@ -42,7 +42,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
42
42
  abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
43
43
  abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
44
44
  abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
45
- abstract_webtools-0.1.6.106.dist-info/METADATA,sha256=Sxl5mcl7sgkg4FHIpNmaGT7cKHSoGp5DXOGpmkLJSWI,7289
46
- abstract_webtools-0.1.6.106.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
47
- abstract_webtools-0.1.6.106.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
48
- abstract_webtools-0.1.6.106.dist-info/RECORD,,
45
+ abstract_webtools-0.1.6.107.dist-info/METADATA,sha256=std8u1_zW1pWxvRY0djcPaeCuUUX9yohGkY8fT-cwTs,7289
46
+ abstract_webtools-0.1.6.107.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
47
+ abstract_webtools-0.1.6.107.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
48
+ abstract_webtools-0.1.6.107.dist-info/RECORD,,