abstract-webtools 0.1.6.107__py3-none-any.whl → 0.1.6.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,6 @@ import os
2
2
  import re
3
3
  import time
4
4
  import requests
5
- import hashlib
6
5
  from bs4 import BeautifulSoup
7
6
  from urllib.parse import urljoin
8
7
  from selenium import webdriver
@@ -12,12 +11,10 @@ from selenium.webdriver.support.ui import WebDriverWait
12
11
  from selenium.webdriver.support import expected_conditions as EC
13
12
  from abstract_security import *
14
13
  from abstract_webtools import *
15
- from abstract_utilities import safe_dump_to_file, safe_load_from_json
16
-
14
+ from abstract_utilities import safe_dump_to_file,safe_read_from_json
17
15
  DOWNLOAD_DIR = os.path.abspath("./downloads")
18
-
19
16
  class K2SDownloader:
20
- def __init__(self, env_path=None, download_dir=None, json_file_path=None):
17
+ def __init__(self,env_path=None,download_dir=None,json_file_path=None):
21
18
  self.download_dir = download_dir or DOWNLOAD_DIR
22
19
  self.json_file_path = json_file_path
23
20
  os.makedirs(self.download_dir, exist_ok=True)
@@ -36,63 +33,28 @@ class K2SDownloader:
36
33
  return webdriver.Chrome(options=options)
37
34
 
38
35
  def login(self):
39
- userName = get_env_value('userName', path=self.env_path)
40
- passWord = get_env_value('passWord', path=self.env_path)
41
-
42
- try:
43
- self.driver.get("https://k2s.cc/auth/login")
44
- print("Navigating to login page")
45
- time.sleep(3)
46
-
47
- email_input = WebDriverWait(self.driver, 10).until(
48
- EC.presence_of_element_located((By.NAME, "email"))
49
- )
50
- password_input = WebDriverWait(self.driver, 10).until(
51
- EC.presence_of_element_located((By.NAME, "password")) # Updated field name
52
- )
53
- email_input.send_keys(userName)
54
- password_input.send_keys(passWord)
55
- password_input.send_keys(Keys.RETURN)
56
- print("Submitted login credentials")
57
-
58
- WebDriverWait(self.driver, 15).until(
59
- EC.url_contains("dashboard") # Adjust based on post-login URL
60
- )
61
- self.logged_in = True
62
- print("Login successful")
63
- except Exception as e:
64
- print(f"Login failed: {e}")
65
- with open('login_error.html', 'w', encoding='utf-8') as f:
66
- f.write(self.driver.page_source)
67
- raise
68
-
69
- def get_file_metadata(self, download_url):
70
- """Fetch filename and metadata using a HEAD request or page inspection."""
71
- metadata = {'url': download_url, 'filename': None, 'size': None}
72
- try:
73
- # Try HEAD request first
74
- response = self.session.head(download_url, allow_redirects=True)
75
- if response.status_code == 200:
76
- cd = response.headers.get('Content-Disposition', '')
77
- if 'filename=' in cd:
78
- metadata['filename'] = cd.split('filename=')[-1].strip('"')
79
- metadata['size'] = response.headers.get('Content-Length')
80
- if not metadata['filename']:
81
- metadata['filename'] = download_url.split('/')[-1].split('?')[0]
82
- else:
83
- # Fallback to page inspection if HEAD fails
84
- self.driver.get(download_url)
85
- WebDriverWait(self.driver, 10).until(
86
- EC.presence_of_element_located((By.TAG_NAME, "body"))
87
- )
88
- soup = BeautifulSoup(self.driver.page_source, 'html.parser')
89
- filename_tag = soup.select_one('a[href*="/download"]')
90
- metadata['filename'] = filename_tag.text.strip() if filename_tag else download_url.split('/')[-1]
91
- size_tag = soup.find(string=re.compile(r'\d+\.?\d*\s*(MB|GB|KB)'))
92
- metadata['size'] = size_tag.strip() if size_tag else None
93
- except Exception as e:
94
- print(f"Failed to fetch metadata for {download_url}: {e}")
95
- return metadata
36
+ userName = get_env_value('userName',path=self.env_path)
37
+ passWord = get_env_value('passWord',path=self.env_path)
38
+
39
+ self.driver.get("https://k2s.cc/auth/login")
40
+ time.sleep(3)
41
+
42
+
43
+ email_input = self.driver.find_element(By.NAME, "email")
44
+ password_input = self.driver.find_element(By.NAME, "input-password-auto-complete-on")
45
+ email_input.send_keys(userName)
46
+ password_input.send_keys(passWord)
47
+ password_input.send_keys(Keys.RETURN)
48
+
49
+ #WebDriverWait(self.driver, 20).until(
50
+ # EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'Logout')]"))
51
+ #)
52
+ self.logged_in = True
53
+ print("Login successful")
54
+ #except Exception as e:
55
+ # print(f"Login failed: {e}")
56
+ # with open('login_error.html', 'w', encoding='utf-8') as f:
57
+ # f.write(self.driver.page_source)
96
58
 
97
59
  def download_file(self, url):
98
60
  if not self.logged_in:
@@ -100,107 +62,83 @@ class K2SDownloader:
100
62
 
101
63
  print(f"Navigating to: {url}")
102
64
  self.driver.get(url)
103
- WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
65
+ time.sleep(5)
104
66
 
105
67
  if 'captcha' in self.driver.page_source.lower():
106
68
  print("CAPTCHA detected. Manual intervention required.")
107
- return None
69
+ return
108
70
 
109
71
  try:
110
72
  download_button = WebDriverWait(self.driver, 30).until(
111
73
  EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[href*="/download"], button[class*="download"]'))
112
74
  )
113
- print("Download button found; attempting to fetch URL")
75
+ print("Download button found; attempting to click or fetch URL")
114
76
  download_url = download_button.get_attribute('href')
115
77
 
116
78
  if download_url:
117
- # Get metadata before downloading
118
- metadata = self.get_file_metadata(download_url)
119
- file_name = metadata['filename'] or self._extract_filename(None, download_url)
120
- file_path = os.path.join(self.download_dir, file_name)
121
-
122
- # Download the file
123
79
  response = self.session.get(download_url, stream=True)
124
- response.raise_for_status()
80
+ file_name = self._extract_filename(response, download_url)
81
+ file_path = os.path.join(self.download_dir, file_name)
125
82
 
126
83
  with open(file_path, 'wb') as f:
127
84
  for chunk in response.iter_content(chunk_size=8192):
128
85
  f.write(chunk)
129
86
  print(f"Downloaded: {file_path}")
130
-
131
- # Update metadata with file size if not already set
132
- if not metadata['size']:
133
- metadata['size'] = os.path.getsize(file_path)
134
- metadata['file_path'] = file_path
135
-
136
- return metadata
87
+ return file_path
137
88
  else:
138
89
  download_button.click()
139
90
  print("Button clicked. Waiting for download...")
140
- time.sleep(30)
141
- return None
91
+ time.sleep(30) # adjust as needed
142
92
  except Exception as e:
143
93
  print(f"Download failed for {url}: {e}")
144
- return None
145
94
 
146
95
  def _extract_filename(self, response, url):
147
- if response:
148
- cd = response.headers.get('Content-Disposition', '')
149
- if 'filename=' in cd:
150
- return cd.split('filename=')[-1].strip('"')
96
+ cd = response.headers.get('Content-Disposition', '')
97
+ if 'filename=' in cd:
98
+ return cd.split('filename=')[-1].strip('"')
151
99
  return url.split('/')[-1].split('?')[0]
152
-
153
- def get_json_key_value(json_data, key):
154
- if json_data and isinstance(json_data, dict):
100
+ def get_json_key_value(json_data,key):
101
+ if json_data and isinstance(json_data,dict):
155
102
  return json_data.get(key)
156
-
157
- def compare_keys(json_data, comp_json_data, key):
158
- json_key_value = get_json_key_value(json_data, key)
159
- comp_json_key_value = get_json_key_value(comp_json_data, key)
160
- return json_key_value and comp_json_key_value and json_key_value == comp_json_key_value
161
-
162
- def check_json_data(json_list, new_data):
163
- keys = ['k2s', 'filename', 'size'] # Check k2s URL, filename, and size
103
+ def compare_keys(json_data,comp_json_data,key):
104
+ json_key_value = get_json_key_value(json_data,key)
105
+ comp_json_key_value = get_json_key_value(comp_json_data,key)
106
+ if json_key_value and comp_json_key_value and comp_json_key_value==json_key_value:
107
+ return True
108
+ def check_json_data(json_list,new_data):
109
+ keys = ['k2s','link','name']
164
110
  for json_data in json_list:
165
111
  for key in keys:
166
- if compare_keys(json_data, new_data, key):
167
- return True
168
- return False
112
+ result = compare_keys(json_data,new_data,key)
113
+ if result:
114
+ return result
169
115
 
170
116
  class dlsManager:
171
117
  def __init__(self, downloader):
172
118
  self.downloader = downloader
173
119
  self.json_file_path = self.downloader.json_file_path
174
- all_dls = None
120
+ all_dls= None
175
121
  if self.json_file_path:
176
- all_dls = safe_load_from_json(self.json_file_path)
177
- self.all_dls = all_dls or []
122
+ all_dls = safe_read_from_json(self.json_file_path)
123
+ self.all_dls = all_dls or []
178
124
  self.last_data = None
179
-
180
125
  def is_prev_dl(self, data):
181
- # Include metadata in data for duplicate checking
182
- extended_data = data.copy()
183
- if data.get('k2s'):
184
- metadata = self.downloader.get_file_metadata(data['k2s'])
185
- extended_data.update({
186
- 'filename': metadata['filename'],
187
- 'size': metadata['size']
188
- })
189
- if check_json_data(self.all_dls, extended_data):
126
+ if check_json_data(self.all_dls,data):
190
127
  self.last_data = None
191
128
  return True
192
- self.last_data = extended_data
129
+ self.last_data = data
193
130
  return False
194
131
 
195
132
  def dl_k2s_link(self, k2s_link):
196
133
  if k2s_link:
197
134
  print(f"Downloading: {k2s_link}")
198
- metadata = self.downloader.download_file(k2s_link)
135
+ self.downloader.download_file(k2s_link)
199
136
  time.sleep(10)
200
- if metadata and self.json_file_path and self.last_data:
201
- self.last_data.update(metadata) # Merge download metadata
137
+ if self.json_file_path:
202
138
  self.all_dls.append(self.last_data)
203
- safe_dump_to_file(data=self.all_dls, file_path=self.json_file_path)
139
+ safe_dump_to_file(data=self.all_dls,
140
+ file_path=self.json_file_path)
141
+
204
142
 
205
143
  def get_soup(url):
206
144
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.107
3
+ Version: 0.1.6.109
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -6,7 +6,7 @@ abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSj
6
6
  abstract_webtools/domain_identifier.py,sha256=AvWlGD7C19rySa_J_Brxi3kz43LMWvGsshuuZNg7MvI,3320
7
7
  abstract_webtools/extention_list.py,sha256=gRSO4nMbuuXDYzd-ss4s64sS80ZHmUoazMCpgoKG5vE,4884
8
8
  abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZCQ,2609
9
- abstract_webtools/k2s_downloader.py,sha256=m2M1LlYdXGSOy3MNn8YPn0Gz70LLbXXDa_aUP3tvUm0,9213
9
+ abstract_webtools/k2s_downloader.py,sha256=rwbV7854DLP5YDpVfeayD8TzYraO-aKQgHqcP2UfFI0,6272
10
10
  abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
11
11
  abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
12
12
  abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE70B8,10441
@@ -42,7 +42,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
42
42
  abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
43
43
  abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
44
44
  abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
45
- abstract_webtools-0.1.6.107.dist-info/METADATA,sha256=std8u1_zW1pWxvRY0djcPaeCuUUX9yohGkY8fT-cwTs,7289
46
- abstract_webtools-0.1.6.107.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
47
- abstract_webtools-0.1.6.107.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
48
- abstract_webtools-0.1.6.107.dist-info/RECORD,,
45
+ abstract_webtools-0.1.6.109.dist-info/METADATA,sha256=2Gl7FRH5k93PPJcVx4XlTCqaOa3r_AHIbrtzsTbUhJM,7289
46
+ abstract_webtools-0.1.6.109.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
47
+ abstract_webtools-0.1.6.109.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
48
+ abstract_webtools-0.1.6.109.dist-info/RECORD,,