abstract-webtools 0.1.6.105__tar.gz → 0.1.6.107__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/PKG-INFO +1 -1
  2. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/setup.py +1 -1
  3. abstract_webtools-0.1.6.107/src/abstract_webtools/k2s_downloader.py +227 -0
  4. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools.egg-info/PKG-INFO +1 -1
  5. abstract_webtools-0.1.6.105/src/abstract_webtools/k2s_downloader.py +0 -165
  6. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/README.md +0 -0
  7. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/pyproject.toml +0 -0
  8. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/setup.cfg +0 -0
  9. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/__init__.py +0 -0
  10. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/abstract_usurpit.py +0 -0
  11. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/abstract_webtools.py +0 -0
  12. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/big_user_agent_list.py +0 -0
  13. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/domain_identifier.py +0 -0
  14. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/extention_list.py +0 -0
  15. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/find_dirs.py +0 -0
  16. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/main.py +0 -0
  17. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/__init__.py +0 -0
  18. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/allss//.py" +0 -0
  19. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/cipherManager.py +0 -0
  20. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/crawlManager.py +0 -0
  21. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/crawlmgr2.py +0 -0
  22. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/curlMgr.py +0 -0
  23. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/domainManager.py +0 -0
  24. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
  25. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/get_test.py +0 -0
  26. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/linkManager/__init__.py +0 -0
  27. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/linkManager/linkManager.py +0 -0
  28. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/mySocketClient.py +0 -0
  29. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/networkManager.py +0 -0
  30. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/requestManager/__init__.py +0 -0
  31. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/requestManager/requestManager.py +0 -0
  32. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/seleniumManager.py +0 -0
  33. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/soupManager/__init__.py +0 -0
  34. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/soupManager/asoueces.py +0 -0
  35. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/soupManager/soupManager.py +0 -0
  36. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/sslManager.py +0 -0
  37. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
  38. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/urlManager/__init__.py +0 -0
  39. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/urlManager/urlManager.py +0 -0
  40. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/userAgentManager.py +0 -0
  41. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/videoDownloader.py +0 -0
  42. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/managers/videoDownloader2.py +0 -0
  43. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/soup_gui.py +0 -0
  44. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/url_grabber.py +0 -0
  45. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools/url_grabber_new.py +0 -0
  46. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools.egg-info/SOURCES.txt +0 -0
  47. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
  48. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools.egg-info/requires.txt +0 -0
  49. {abstract_webtools-0.1.6.105 → abstract_webtools-0.1.6.107}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.105
3
+ Version: 0.1.6.107
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
4
4
  long_description = fh.read()
5
5
  setuptools.setup(
6
6
  name='abstract_webtools',
7
- version='0.1.6.105',
7
+ version='0.1.6.107',
8
8
  author='putkoff',
9
9
  author_email='partners@abstractendeavors.com',
10
10
  description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
@@ -0,0 +1,227 @@
1
+ import os
2
+ import re
3
+ import time
4
+ import requests
5
+ import hashlib
6
+ from bs4 import BeautifulSoup
7
+ from urllib.parse import urljoin
8
+ from selenium import webdriver
9
+ from selenium.webdriver.common.by import By
10
+ from selenium.webdriver.common.keys import Keys
11
+ from selenium.webdriver.support.ui import WebDriverWait
12
+ from selenium.webdriver.support import expected_conditions as EC
13
+ from abstract_security import *
14
+ from abstract_webtools import *
15
+ from abstract_utilities import safe_dump_to_file, safe_load_from_json
16
+
17
+ DOWNLOAD_DIR = os.path.abspath("./downloads")
18
+
19
+ class K2SDownloader:
20
+ def __init__(self, env_path=None, download_dir=None, json_file_path=None):
21
+ self.download_dir = download_dir or DOWNLOAD_DIR
22
+ self.json_file_path = json_file_path
23
+ os.makedirs(self.download_dir, exist_ok=True)
24
+ self.env_path = env_path
25
+ self.session = requests.Session()
26
+ self.session.headers.update({
27
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
28
+ })
29
+ self.driver = self._init_driver()
30
+ self.logged_in = False
31
+
32
+ def _init_driver(self):
33
+ options = webdriver.ChromeOptions()
34
+ options.add_argument("--disable-blink-features=AutomationControlled")
35
+ options.add_argument("--headless")
36
+ return webdriver.Chrome(options=options)
37
+
38
+ def login(self):
39
+ userName = get_env_value('userName', path=self.env_path)
40
+ passWord = get_env_value('passWord', path=self.env_path)
41
+
42
+ try:
43
+ self.driver.get("https://k2s.cc/auth/login")
44
+ print("Navigating to login page")
45
+ time.sleep(3)
46
+
47
+ email_input = WebDriverWait(self.driver, 10).until(
48
+ EC.presence_of_element_located((By.NAME, "email"))
49
+ )
50
+ password_input = WebDriverWait(self.driver, 10).until(
51
+ EC.presence_of_element_located((By.NAME, "password")) # Updated field name
52
+ )
53
+ email_input.send_keys(userName)
54
+ password_input.send_keys(passWord)
55
+ password_input.send_keys(Keys.RETURN)
56
+ print("Submitted login credentials")
57
+
58
+ WebDriverWait(self.driver, 15).until(
59
+ EC.url_contains("dashboard") # Adjust based on post-login URL
60
+ )
61
+ self.logged_in = True
62
+ print("Login successful")
63
+ except Exception as e:
64
+ print(f"Login failed: {e}")
65
+ with open('login_error.html', 'w', encoding='utf-8') as f:
66
+ f.write(self.driver.page_source)
67
+ raise
68
+
69
+ def get_file_metadata(self, download_url):
70
+ """Fetch filename and metadata using a HEAD request or page inspection."""
71
+ metadata = {'url': download_url, 'filename': None, 'size': None}
72
+ try:
73
+ # Try HEAD request first
74
+ response = self.session.head(download_url, allow_redirects=True)
75
+ if response.status_code == 200:
76
+ cd = response.headers.get('Content-Disposition', '')
77
+ if 'filename=' in cd:
78
+ metadata['filename'] = cd.split('filename=')[-1].strip('"')
79
+ metadata['size'] = response.headers.get('Content-Length')
80
+ if not metadata['filename']:
81
+ metadata['filename'] = download_url.split('/')[-1].split('?')[0]
82
+ else:
83
+ # Fallback to page inspection if HEAD fails
84
+ self.driver.get(download_url)
85
+ WebDriverWait(self.driver, 10).until(
86
+ EC.presence_of_element_located((By.TAG_NAME, "body"))
87
+ )
88
+ soup = BeautifulSoup(self.driver.page_source, 'html.parser')
89
+ filename_tag = soup.select_one('a[href*="/download"]')
90
+ metadata['filename'] = filename_tag.text.strip() if filename_tag else download_url.split('/')[-1]
91
+ size_tag = soup.find(string=re.compile(r'\d+\.?\d*\s*(MB|GB|KB)'))
92
+ metadata['size'] = size_tag.strip() if size_tag else None
93
+ except Exception as e:
94
+ print(f"Failed to fetch metadata for {download_url}: {e}")
95
+ return metadata
96
+
97
+ def download_file(self, url):
98
+ if not self.logged_in:
99
+ self.login()
100
+
101
+ print(f"Navigating to: {url}")
102
+ self.driver.get(url)
103
+ WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
104
+
105
+ if 'captcha' in self.driver.page_source.lower():
106
+ print("CAPTCHA detected. Manual intervention required.")
107
+ return None
108
+
109
+ try:
110
+ download_button = WebDriverWait(self.driver, 30).until(
111
+ EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[href*="/download"], button[class*="download"]'))
112
+ )
113
+ print("Download button found; attempting to fetch URL")
114
+ download_url = download_button.get_attribute('href')
115
+
116
+ if download_url:
117
+ # Get metadata before downloading
118
+ metadata = self.get_file_metadata(download_url)
119
+ file_name = metadata['filename'] or self._extract_filename(None, download_url)
120
+ file_path = os.path.join(self.download_dir, file_name)
121
+
122
+ # Download the file
123
+ response = self.session.get(download_url, stream=True)
124
+ response.raise_for_status()
125
+
126
+ with open(file_path, 'wb') as f:
127
+ for chunk in response.iter_content(chunk_size=8192):
128
+ f.write(chunk)
129
+ print(f"Downloaded: {file_path}")
130
+
131
+ # Update metadata with file size if not already set
132
+ if not metadata['size']:
133
+ metadata['size'] = os.path.getsize(file_path)
134
+ metadata['file_path'] = file_path
135
+
136
+ return metadata
137
+ else:
138
+ download_button.click()
139
+ print("Button clicked. Waiting for download...")
140
+ time.sleep(30)
141
+ return None
142
+ except Exception as e:
143
+ print(f"Download failed for {url}: {e}")
144
+ return None
145
+
146
+ def _extract_filename(self, response, url):
147
+ if response:
148
+ cd = response.headers.get('Content-Disposition', '')
149
+ if 'filename=' in cd:
150
+ return cd.split('filename=')[-1].strip('"')
151
+ return url.split('/')[-1].split('?')[0]
152
+
153
+ def get_json_key_value(json_data, key):
154
+ if json_data and isinstance(json_data, dict):
155
+ return json_data.get(key)
156
+
157
+ def compare_keys(json_data, comp_json_data, key):
158
+ json_key_value = get_json_key_value(json_data, key)
159
+ comp_json_key_value = get_json_key_value(comp_json_data, key)
160
+ return json_key_value and comp_json_key_value and json_key_value == comp_json_key_value
161
+
162
+ def check_json_data(json_list, new_data):
163
+ keys = ['k2s', 'filename', 'size'] # Check k2s URL, filename, and size
164
+ for json_data in json_list:
165
+ for key in keys:
166
+ if compare_keys(json_data, new_data, key):
167
+ return True
168
+ return False
169
+
170
+ class dlsManager:
171
+ def __init__(self, downloader):
172
+ self.downloader = downloader
173
+ self.json_file_path = self.downloader.json_file_path
174
+ all_dls = None
175
+ if self.json_file_path:
176
+ all_dls = safe_load_from_json(self.json_file_path)
177
+ self.all_dls = all_dls or []
178
+ self.last_data = None
179
+
180
+ def is_prev_dl(self, data):
181
+ # Include metadata in data for duplicate checking
182
+ extended_data = data.copy()
183
+ if data.get('k2s'):
184
+ metadata = self.downloader.get_file_metadata(data['k2s'])
185
+ extended_data.update({
186
+ 'filename': metadata['filename'],
187
+ 'size': metadata['size']
188
+ })
189
+ if check_json_data(self.all_dls, extended_data):
190
+ self.last_data = None
191
+ return True
192
+ self.last_data = extended_data
193
+ return False
194
+
195
+ def dl_k2s_link(self, k2s_link):
196
+ if k2s_link:
197
+ print(f"Downloading: {k2s_link}")
198
+ metadata = self.downloader.download_file(k2s_link)
199
+ time.sleep(10)
200
+ if metadata and self.json_file_path and self.last_data:
201
+ self.last_data.update(metadata) # Merge download metadata
202
+ self.all_dls.append(self.last_data)
203
+ safe_dump_to_file(data=self.all_dls, file_path=self.json_file_path)
204
+
205
+ def get_soup(url):
206
+ try:
207
+ resp = requests.get(url)
208
+ resp.raise_for_status()
209
+ return BeautifulSoup(resp.text, 'html.parser')
210
+ except Exception as e:
211
+ print(f"Failed to fetch soup for {url}: {e}")
212
+ return None
213
+
214
+ def get_k2s_link(soup):
215
+ match = re.search(r'https://k2s\.cc/file/[^"<]+', str(soup))
216
+ return match.group(0) if match else None
217
+
218
+ def get_sections_content(content,get_post_attribute,dls_mgr):
219
+ results=[]
220
+ if not content:
221
+ return []
222
+ for section in content:
223
+ data = get_post_attribute(section)
224
+ if data and data.get('k2s') and not dls_mgr.is_prev_dl(data):
225
+ dls_mgr.dl_k2s_link(data['k2s'])
226
+ results.append(data)
227
+ return results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.105
3
+ Version: 0.1.6.107
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -1,165 +0,0 @@
1
- import os
2
- import re
3
- import time
4
- import requests
5
- from bs4 import BeautifulSoup
6
- from urllib.parse import urljoin
7
- from selenium import webdriver
8
- from selenium.webdriver.common.by import By
9
- from selenium.webdriver.common.keys import Keys
10
- from selenium.webdriver.support.ui import WebDriverWait
11
- from selenium.webdriver.support import expected_conditions as EC
12
- from abstract_security import *
13
- from abstract_webtools import *
14
- from abstract_utilities import safe_dump_to_file,safe_load_from_json
15
- DOWNLOAD_DIR = os.path.abspath("./downloads")
16
- class K2SDownloader:
17
- def __init__(self,env_path=None,download_dir=None,json_file_path=None):
18
- self.download_dir = download_dir or DOWNLOAD_DIR
19
- self.json_file_path = json_file_path
20
- os.makedirs(self.download_dir, exist_ok=True)
21
- self.env_path = env_path
22
- self.session = requests.Session()
23
- self.session.headers.update({
24
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
25
- })
26
- self.driver = self._init_driver()
27
- self.logged_in = False
28
-
29
- def _init_driver(self):
30
- options = webdriver.ChromeOptions()
31
- options.add_argument("--disable-blink-features=AutomationControlled")
32
- options.add_argument("--headless")
33
- return webdriver.Chrome(options=options)
34
-
35
- def login(self):
36
- userName = get_env_value('userName',path=self.env_path)
37
- passWord = get_env_value('passWord',path=self.env_path)
38
-
39
- self.driver.get("https://k2s.cc/auth/login")
40
- time.sleep(3)
41
-
42
-
43
- email_input = self.driver.find_element(By.NAME, "email")
44
- password_input = self.driver.find_element(By.NAME, "input-password-auto-complete-on")
45
- email_input.send_keys(userName)
46
- password_input.send_keys(passWord)
47
- password_input.send_keys(Keys.RETURN)
48
-
49
- #WebDriverWait(self.driver, 20).until(
50
- # EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'Logout')]"))
51
- #)
52
- self.logged_in = True
53
- print("Login successful")
54
- #except Exception as e:
55
- # print(f"Login failed: {e}")
56
- # with open('login_error.html', 'w', encoding='utf-8') as f:
57
- # f.write(self.driver.page_source)
58
-
59
- def download_file(self, url):
60
- if not self.logged_in:
61
- self.login()
62
-
63
- print(f"Navigating to: {url}")
64
- self.driver.get(url)
65
- time.sleep(5)
66
-
67
- if 'captcha' in self.driver.page_source.lower():
68
- print("CAPTCHA detected. Manual intervention required.")
69
- return
70
-
71
- try:
72
- download_button = WebDriverWait(self.driver, 30).until(
73
- EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[href*="/download"], button[class*="download"]'))
74
- )
75
- print("Download button found; attempting to click or fetch URL")
76
- download_url = download_button.get_attribute('href')
77
-
78
- if download_url:
79
- response = self.session.get(download_url, stream=True)
80
- file_name = self._extract_filename(response, download_url)
81
- file_path = os.path.join(self.download_dir, file_name)
82
-
83
- with open(file_path, 'wb') as f:
84
- for chunk in response.iter_content(chunk_size=8192):
85
- f.write(chunk)
86
- print(f"Downloaded: {file_path}")
87
- return file_path
88
- else:
89
- download_button.click()
90
- print("Button clicked. Waiting for download...")
91
- time.sleep(30) # adjust as needed
92
- except Exception as e:
93
- print(f"Download failed for {url}: {e}")
94
-
95
- def _extract_filename(self, response, url):
96
- cd = response.headers.get('Content-Disposition', '')
97
- if 'filename=' in cd:
98
- return cd.split('filename=')[-1].strip('"')
99
- return url.split('/')[-1].split('?')[0]
100
- def get_json_key_value(json_data,key):
101
- if json_data and isinstance(json_data,dict):
102
- return json_data.get(key)
103
- def compare_keys(json_data,comp_json_data,key):
104
- json_key_value = get_json_key_value(json_data,key)
105
- comp_json_key_value = get_json_key_value(comp_json_data,key)
106
- if json_key_value and comp_json_key_value and comp_json_key_value==json_key_value:
107
- return True
108
- def check_json_data(json_list,new_data):
109
- keys = ['k2s','link','name']
110
- for json_data in json_list:
111
- for key in keys:
112
- result = compare_keys(json_data,new_data,key)
113
- if result:
114
- return result
115
-
116
- class dlsManager:
117
- def __init__(self, downloader):
118
- self.downloader = downloader
119
- self.json_file_path = self.downloader.json_file_path
120
- all_dls= None
121
- if self.json_file_path:
122
- all_dls = safe_load_from_json(self.json_file_path)
123
- self.all_dls = all_dls or []
124
- self.last_data = None
125
- def is_prev_dl(self, data):
126
- if check_json_data(self.all_dls,data):
127
- self.last_data = None
128
- return True
129
- self.last_data = data
130
- return False
131
-
132
- def dl_k2s_link(self, k2s_link):
133
- if k2s_link:
134
- print(f"Downloading: {k2s_link}")
135
- self.downloader.download_file(k2s_link)
136
- time.sleep(10)
137
- if self.json_file_path:
138
- self.all_dls.append(self.last_data)
139
- safe_dump_to_file(data=self.all_dls,
140
- file_path=self.json_file_path)
141
-
142
-
143
- def get_soup(url):
144
- try:
145
- resp = requests.get(url)
146
- resp.raise_for_status()
147
- return BeautifulSoup(resp.text, 'html.parser')
148
- except Exception as e:
149
- print(f"Failed to fetch soup for {url}: {e}")
150
- return None
151
-
152
- def get_k2s_link(soup):
153
- match = re.search(r'https://k2s\.cc/file/[^"<]+', str(soup))
154
- return match.group(0) if match else None
155
-
156
- def get_sections_content(content,get_post_attribute,dls_mgr):
157
- results=[]
158
- if not content:
159
- return []
160
- for section in content:
161
- data = get_post_attribute(section)
162
- if data and data.get('k2s') and not dls_mgr.is_prev_dl(data):
163
- dls_mgr.dl_k2s_link(data['k2s'])
164
- results.append(data)
165
- return results