abstract-webtools 0.1.6.105__py3-none-any.whl → 0.1.6.107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import os
2
2
  import re
3
3
  import time
4
4
  import requests
5
+ import hashlib
5
6
  from bs4 import BeautifulSoup
6
7
  from urllib.parse import urljoin
7
8
  from selenium import webdriver
@@ -11,10 +12,12 @@ from selenium.webdriver.support.ui import WebDriverWait
11
12
  from selenium.webdriver.support import expected_conditions as EC
12
13
  from abstract_security import *
13
14
  from abstract_webtools import *
14
- from abstract_utilities import safe_dump_to_file,safe_load_from_json
15
+ from abstract_utilities import safe_dump_to_file, safe_load_from_json
16
+
15
17
  DOWNLOAD_DIR = os.path.abspath("./downloads")
18
+
16
19
  class K2SDownloader:
17
- def __init__(self,env_path=None,download_dir=None,json_file_path=None):
20
+ def __init__(self, env_path=None, download_dir=None, json_file_path=None):
18
21
  self.download_dir = download_dir or DOWNLOAD_DIR
19
22
  self.json_file_path = json_file_path
20
23
  os.makedirs(self.download_dir, exist_ok=True)
@@ -33,28 +36,63 @@ class K2SDownloader:
33
36
  return webdriver.Chrome(options=options)
34
37
 
35
38
  def login(self):
36
- userName = get_env_value('userName',path=self.env_path)
37
- passWord = get_env_value('passWord',path=self.env_path)
38
-
39
- self.driver.get("https://k2s.cc/auth/login")
40
- time.sleep(3)
41
-
42
-
43
- email_input = self.driver.find_element(By.NAME, "email")
44
- password_input = self.driver.find_element(By.NAME, "input-password-auto-complete-on")
45
- email_input.send_keys(userName)
46
- password_input.send_keys(passWord)
47
- password_input.send_keys(Keys.RETURN)
48
-
49
- #WebDriverWait(self.driver, 20).until(
50
- # EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'Logout')]"))
51
- #)
52
- self.logged_in = True
53
- print("Login successful")
54
- #except Exception as e:
55
- # print(f"Login failed: {e}")
56
- # with open('login_error.html', 'w', encoding='utf-8') as f:
57
- # f.write(self.driver.page_source)
39
+ userName = get_env_value('userName', path=self.env_path)
40
+ passWord = get_env_value('passWord', path=self.env_path)
41
+
42
+ try:
43
+ self.driver.get("https://k2s.cc/auth/login")
44
+ print("Navigating to login page")
45
+ time.sleep(3)
46
+
47
+ email_input = WebDriverWait(self.driver, 10).until(
48
+ EC.presence_of_element_located((By.NAME, "email"))
49
+ )
50
+ password_input = WebDriverWait(self.driver, 10).until(
51
+ EC.presence_of_element_located((By.NAME, "password")) # Updated field name
52
+ )
53
+ email_input.send_keys(userName)
54
+ password_input.send_keys(passWord)
55
+ password_input.send_keys(Keys.RETURN)
56
+ print("Submitted login credentials")
57
+
58
+ WebDriverWait(self.driver, 15).until(
59
+ EC.url_contains("dashboard") # Adjust based on post-login URL
60
+ )
61
+ self.logged_in = True
62
+ print("Login successful")
63
+ except Exception as e:
64
+ print(f"Login failed: {e}")
65
+ with open('login_error.html', 'w', encoding='utf-8') as f:
66
+ f.write(self.driver.page_source)
67
+ raise
68
+
69
+ def get_file_metadata(self, download_url):
70
+ """Fetch filename and metadata using a HEAD request or page inspection."""
71
+ metadata = {'url': download_url, 'filename': None, 'size': None}
72
+ try:
73
+ # Try HEAD request first
74
+ response = self.session.head(download_url, allow_redirects=True)
75
+ if response.status_code == 200:
76
+ cd = response.headers.get('Content-Disposition', '')
77
+ if 'filename=' in cd:
78
+ metadata['filename'] = cd.split('filename=')[-1].strip('"')
79
+ metadata['size'] = response.headers.get('Content-Length')
80
+ if not metadata['filename']:
81
+ metadata['filename'] = download_url.split('/')[-1].split('?')[0]
82
+ else:
83
+ # Fallback to page inspection if HEAD fails
84
+ self.driver.get(download_url)
85
+ WebDriverWait(self.driver, 10).until(
86
+ EC.presence_of_element_located((By.TAG_NAME, "body"))
87
+ )
88
+ soup = BeautifulSoup(self.driver.page_source, 'html.parser')
89
+ filename_tag = soup.select_one('a[href*="/download"]')
90
+ metadata['filename'] = filename_tag.text.strip() if filename_tag else download_url.split('/')[-1]
91
+ size_tag = soup.find(string=re.compile(r'\d+\.?\d*\s*(MB|GB|KB)'))
92
+ metadata['size'] = size_tag.strip() if size_tag else None
93
+ except Exception as e:
94
+ print(f"Failed to fetch metadata for {download_url}: {e}")
95
+ return metadata
58
96
 
59
97
  def download_file(self, url):
60
98
  if not self.logged_in:
@@ -62,83 +100,107 @@ class K2SDownloader:
62
100
 
63
101
  print(f"Navigating to: {url}")
64
102
  self.driver.get(url)
65
- time.sleep(5)
103
+ WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
66
104
 
67
105
  if 'captcha' in self.driver.page_source.lower():
68
106
  print("CAPTCHA detected. Manual intervention required.")
69
- return
107
+ return None
70
108
 
71
109
  try:
72
110
  download_button = WebDriverWait(self.driver, 30).until(
73
111
  EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[href*="/download"], button[class*="download"]'))
74
112
  )
75
- print("Download button found; attempting to click or fetch URL")
113
+ print("Download button found; attempting to fetch URL")
76
114
  download_url = download_button.get_attribute('href')
77
115
 
78
116
  if download_url:
79
- response = self.session.get(download_url, stream=True)
80
- file_name = self._extract_filename(response, download_url)
117
+ # Get metadata before downloading
118
+ metadata = self.get_file_metadata(download_url)
119
+ file_name = metadata['filename'] or self._extract_filename(None, download_url)
81
120
  file_path = os.path.join(self.download_dir, file_name)
82
121
 
122
+ # Download the file
123
+ response = self.session.get(download_url, stream=True)
124
+ response.raise_for_status()
125
+
83
126
  with open(file_path, 'wb') as f:
84
127
  for chunk in response.iter_content(chunk_size=8192):
85
128
  f.write(chunk)
86
129
  print(f"Downloaded: {file_path}")
87
- return file_path
130
+
131
+ # Update metadata with file size if not already set
132
+ if not metadata['size']:
133
+ metadata['size'] = os.path.getsize(file_path)
134
+ metadata['file_path'] = file_path
135
+
136
+ return metadata
88
137
  else:
89
138
  download_button.click()
90
139
  print("Button clicked. Waiting for download...")
91
- time.sleep(30) # adjust as needed
140
+ time.sleep(30)
141
+ return None
92
142
  except Exception as e:
93
143
  print(f"Download failed for {url}: {e}")
144
+ return None
94
145
 
95
146
  def _extract_filename(self, response, url):
96
- cd = response.headers.get('Content-Disposition', '')
97
- if 'filename=' in cd:
98
- return cd.split('filename=')[-1].strip('"')
147
+ if response:
148
+ cd = response.headers.get('Content-Disposition', '')
149
+ if 'filename=' in cd:
150
+ return cd.split('filename=')[-1].strip('"')
99
151
  return url.split('/')[-1].split('?')[0]
100
- def get_json_key_value(json_data,key):
101
- if json_data and isinstance(json_data,dict):
152
+
153
+ def get_json_key_value(json_data, key):
154
+ if json_data and isinstance(json_data, dict):
102
155
  return json_data.get(key)
103
- def compare_keys(json_data,comp_json_data,key):
104
- json_key_value = get_json_key_value(json_data,key)
105
- comp_json_key_value = get_json_key_value(comp_json_data,key)
106
- if json_key_value and comp_json_key_value and comp_json_key_value==json_key_value:
107
- return True
108
- def check_json_data(json_list,new_data):
109
- keys = ['k2s','link','name']
156
+
157
+ def compare_keys(json_data, comp_json_data, key):
158
+ json_key_value = get_json_key_value(json_data, key)
159
+ comp_json_key_value = get_json_key_value(comp_json_data, key)
160
+ return json_key_value and comp_json_key_value and json_key_value == comp_json_key_value
161
+
162
+ def check_json_data(json_list, new_data):
163
+ keys = ['k2s', 'filename', 'size'] # Check k2s URL, filename, and size
110
164
  for json_data in json_list:
111
165
  for key in keys:
112
- result = compare_keys(json_data,new_data,key)
113
- if result:
114
- return result
166
+ if compare_keys(json_data, new_data, key):
167
+ return True
168
+ return False
115
169
 
116
170
  class dlsManager:
117
171
  def __init__(self, downloader):
118
172
  self.downloader = downloader
119
173
  self.json_file_path = self.downloader.json_file_path
120
- all_dls= None
174
+ all_dls = None
121
175
  if self.json_file_path:
122
176
  all_dls = safe_load_from_json(self.json_file_path)
123
- self.all_dls = all_dls or []
177
+ self.all_dls = all_dls or []
124
178
  self.last_data = None
179
+
125
180
  def is_prev_dl(self, data):
126
- if check_json_data(self.all_dls,data):
181
+ # Include metadata in data for duplicate checking
182
+ extended_data = data.copy()
183
+ if data.get('k2s'):
184
+ metadata = self.downloader.get_file_metadata(data['k2s'])
185
+ extended_data.update({
186
+ 'filename': metadata['filename'],
187
+ 'size': metadata['size']
188
+ })
189
+ if check_json_data(self.all_dls, extended_data):
127
190
  self.last_data = None
128
191
  return True
129
- self.last_data = data
192
+ self.last_data = extended_data
130
193
  return False
131
194
 
132
195
  def dl_k2s_link(self, k2s_link):
133
196
  if k2s_link:
134
197
  print(f"Downloading: {k2s_link}")
135
- self.downloader.download_file(k2s_link)
198
+ metadata = self.downloader.download_file(k2s_link)
136
199
  time.sleep(10)
137
- if self.json_file_path:
200
+ if metadata and self.json_file_path and self.last_data:
201
+ self.last_data.update(metadata) # Merge download metadata
138
202
  self.all_dls.append(self.last_data)
139
- safe_dump_to_file(data=self.all_dls,
140
- file_path=self.json_file_path)
141
-
203
+ safe_dump_to_file(data=self.all_dls, file_path=self.json_file_path)
142
204
 
143
205
  def get_soup(url):
144
206
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.105
3
+ Version: 0.1.6.107
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -6,7 +6,7 @@ abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSj
6
6
  abstract_webtools/domain_identifier.py,sha256=AvWlGD7C19rySa_J_Brxi3kz43LMWvGsshuuZNg7MvI,3320
7
7
  abstract_webtools/extention_list.py,sha256=gRSO4nMbuuXDYzd-ss4s64sS80ZHmUoazMCpgoKG5vE,4884
8
8
  abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZCQ,2609
9
- abstract_webtools/k2s_downloader.py,sha256=dN6wZNMiczjeqq6ISTQfoiS0ZMycvWzCfQCZMQl3Tn8,6272
9
+ abstract_webtools/k2s_downloader.py,sha256=m2M1LlYdXGSOy3MNn8YPn0Gz70LLbXXDa_aUP3tvUm0,9213
10
10
  abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
11
11
  abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
12
12
  abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE70B8,10441
@@ -42,7 +42,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
42
42
  abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
43
43
  abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
44
44
  abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
45
- abstract_webtools-0.1.6.105.dist-info/METADATA,sha256=uGmrjyRas-bDHSmOFnX9QZitnOY6b9o3sj8HmQorgx4,7289
46
- abstract_webtools-0.1.6.105.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
47
- abstract_webtools-0.1.6.105.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
48
- abstract_webtools-0.1.6.105.dist-info/RECORD,,
45
+ abstract_webtools-0.1.6.107.dist-info/METADATA,sha256=std8u1_zW1pWxvRY0djcPaeCuUUX9yohGkY8fT-cwTs,7289
46
+ abstract_webtools-0.1.6.107.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
47
+ abstract_webtools-0.1.6.107.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
48
+ abstract_webtools-0.1.6.107.dist-info/RECORD,,