quasarr 1.26.2__py3-none-any.whl → 1.26.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quasarr might be problematic. Click here for more details.
- quasarr/downloads/sources/dl.py +333 -46
- quasarr/providers/obfuscated.py +6 -6
- quasarr/providers/version.py +1 -1
- {quasarr-1.26.2.dist-info → quasarr-1.26.4.dist-info}/METADATA +2 -2
- {quasarr-1.26.2.dist-info → quasarr-1.26.4.dist-info}/RECORD +9 -9
- {quasarr-1.26.2.dist-info → quasarr-1.26.4.dist-info}/WHEEL +0 -0
- {quasarr-1.26.2.dist-info → quasarr-1.26.4.dist-info}/entry_points.txt +0 -0
- {quasarr-1.26.2.dist-info → quasarr-1.26.4.dist-info}/licenses/LICENSE +0 -0
- {quasarr-1.26.2.dist-info → quasarr-1.26.4.dist-info}/top_level.txt +0 -0
quasarr/downloads/sources/dl.py
CHANGED
|
@@ -3,14 +3,35 @@
|
|
|
3
3
|
# Project by https://github.com/rix1337
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
|
+
from io import BytesIO
|
|
6
8
|
|
|
7
|
-
from
|
|
9
|
+
from PIL import Image
|
|
10
|
+
from bs4 import BeautifulSoup, NavigableString
|
|
8
11
|
|
|
9
12
|
from quasarr.providers.log import info, debug
|
|
10
13
|
from quasarr.providers.sessions.dl import retrieve_and_validate_session, fetch_via_requests_session, invalidate_session
|
|
11
14
|
|
|
12
15
|
hostname = "dl"
|
|
13
16
|
|
|
17
|
+
# Common TLDs to strip for mirror name comparison
|
|
18
|
+
COMMON_TLDS = {'.com', '.net', '.io', '.cc', '.to', '.me', '.org', '.co', '.de', '.eu', '.info'}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def normalize_mirror_name(name):
|
|
22
|
+
"""
|
|
23
|
+
Normalize mirror name for comparison by lowercasing and removing TLDs.
|
|
24
|
+
e.g., "DDownload.com" -> "ddownload", "Rapidgator.net" -> "rapidgator"
|
|
25
|
+
"""
|
|
26
|
+
if not name:
|
|
27
|
+
return ""
|
|
28
|
+
normalized = name.lower().strip()
|
|
29
|
+
for tld in COMMON_TLDS:
|
|
30
|
+
if normalized.endswith(tld):
|
|
31
|
+
normalized = normalized[:-len(tld)]
|
|
32
|
+
break
|
|
33
|
+
return normalized
|
|
34
|
+
|
|
14
35
|
|
|
15
36
|
def extract_password_from_post(soup, host):
|
|
16
37
|
"""
|
|
@@ -60,52 +81,289 @@ def extract_mirror_name_from_link(link_element):
|
|
|
60
81
|
'1fichier': ['1fichier'],
|
|
61
82
|
}
|
|
62
83
|
|
|
63
|
-
if
|
|
84
|
+
# Skip if link text is a URL
|
|
85
|
+
if link_text and len(link_text) > 2 and not link_text.startswith('http'):
|
|
64
86
|
cleaned = re.sub(r'[^\w\s-]', '', link_text).strip().lower()
|
|
65
87
|
if cleaned and cleaned not in common_non_hosters:
|
|
66
88
|
main_part = cleaned.split()[0] if ' ' in cleaned else cleaned
|
|
67
89
|
if 2 < len(main_part) < 30:
|
|
68
90
|
return main_part
|
|
69
91
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
92
|
+
# Check previous siblings including text nodes
|
|
93
|
+
for sibling in link_element.previous_siblings:
|
|
94
|
+
# Handle text nodes (NavigableString)
|
|
95
|
+
if isinstance(sibling, NavigableString):
|
|
96
|
+
text = sibling.strip()
|
|
97
|
+
if text:
|
|
98
|
+
# Remove common separators like @ : -
|
|
99
|
+
cleaned = re.sub(r'[@:\-–—\s]+$', '', text).strip().lower()
|
|
100
|
+
cleaned = re.sub(r'[^\w\s.-]', '', cleaned).strip()
|
|
101
|
+
if cleaned and len(cleaned) > 2 and cleaned not in common_non_hosters:
|
|
102
|
+
# Take the last word as mirror name (e.g., "Rapidgator" from "Rapidgator @")
|
|
103
|
+
parts = cleaned.split()
|
|
104
|
+
if parts:
|
|
105
|
+
mirror = parts[-1]
|
|
106
|
+
if 2 < len(mirror) < 30:
|
|
107
|
+
return mirror
|
|
108
|
+
continue
|
|
76
109
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
110
|
+
# Skip non-Tag elements
|
|
111
|
+
if not hasattr(sibling, 'name') or sibling.name is None:
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
# Skip spoiler elements entirely
|
|
115
|
+
classes = sibling.get('class', [])
|
|
116
|
+
if classes and any('spoiler' in str(c).lower() for c in classes):
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# Check for images with hoster names in src/alt/data-url
|
|
120
|
+
img = sibling.find('img') if sibling.name != 'img' else sibling
|
|
121
|
+
if img:
|
|
122
|
+
img_identifiers = (img.get('src', '') + img.get('alt', '') + img.get('data-url', '')).lower()
|
|
123
|
+
for hoster, patterns in known_hosters.items():
|
|
124
|
+
if any(pattern in img_identifiers for pattern in patterns):
|
|
125
|
+
return hoster
|
|
126
|
+
|
|
127
|
+
sibling_text = sibling.get_text(strip=True).lower()
|
|
128
|
+
# Skip if text is too long - likely NFO content or other non-mirror text
|
|
129
|
+
if len(sibling_text) > 30:
|
|
130
|
+
continue
|
|
131
|
+
if sibling_text and len(sibling_text) > 2 and sibling_text not in common_non_hosters:
|
|
132
|
+
cleaned = re.sub(r'[^\w\s-]', '', sibling_text).strip()
|
|
133
|
+
if cleaned and 2 < len(cleaned) < 30:
|
|
134
|
+
return cleaned.split()[0] if ' ' in cleaned else cleaned
|
|
135
|
+
|
|
136
|
+
return None
|
|
81
137
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
138
|
+
|
|
139
|
+
def generate_status_url(href, crypter_type):
|
|
140
|
+
"""
|
|
141
|
+
Generate a status URL for crypters that support it.
|
|
142
|
+
Returns None if status URL cannot be generated.
|
|
143
|
+
"""
|
|
144
|
+
if crypter_type == "hide":
|
|
145
|
+
# hide.cx links: https://hide.cx/folder/{UUID} → https://hide.cx/state/{UUID}
|
|
146
|
+
match = re.search(r'hide\.cx/(?:folder/)?([a-f0-9-]{36})', href, re.IGNORECASE)
|
|
147
|
+
if match:
|
|
148
|
+
uuid = match.group(1)
|
|
149
|
+
return f"https://hide.cx/state/{uuid}"
|
|
150
|
+
|
|
151
|
+
elif crypter_type == "tolink":
|
|
152
|
+
# tolink links: https://tolink.to/f/{ID} → https://tolink.to/f/{ID}/s/status.png
|
|
153
|
+
match = re.search(r'tolink\.to/f/([a-zA-Z0-9]+)', href, re.IGNORECASE)
|
|
154
|
+
if match:
|
|
155
|
+
link_id = match.group(1)
|
|
156
|
+
return f"https://tolink.to/f/{link_id}/s/status.png"
|
|
157
|
+
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def extract_status_url_from_html(link_element, crypter_type):
|
|
162
|
+
"""
|
|
163
|
+
Extract status image URL from HTML near the link element.
|
|
164
|
+
Used primarily for FileCrypt where status URLs cannot be generated.
|
|
165
|
+
"""
|
|
166
|
+
if crypter_type != "filecrypt":
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
# Look for status image in the link itself
|
|
170
|
+
img = link_element.find('img')
|
|
171
|
+
if img:
|
|
172
|
+
for attr in ['src', 'data-url']:
|
|
173
|
+
url = img.get(attr, '')
|
|
174
|
+
if 'filecrypt.cc/Stat/' in url:
|
|
175
|
+
return url
|
|
176
|
+
|
|
177
|
+
# Look in siblings
|
|
178
|
+
for sibling in link_element.next_siblings:
|
|
179
|
+
if not hasattr(sibling, 'name') or sibling.name is None:
|
|
180
|
+
continue
|
|
181
|
+
if sibling.name == 'img':
|
|
182
|
+
for attr in ['src', 'data-url']:
|
|
183
|
+
url = sibling.get(attr, '')
|
|
184
|
+
if 'filecrypt.cc/Stat/' in url:
|
|
185
|
+
return url
|
|
186
|
+
# Check nested images
|
|
187
|
+
nested_img = sibling.find('img') if hasattr(sibling, 'find') else None
|
|
188
|
+
if nested_img:
|
|
189
|
+
for attr in ['src', 'data-url']:
|
|
190
|
+
url = nested_img.get(attr, '')
|
|
191
|
+
if 'filecrypt.cc/Stat/' in url:
|
|
192
|
+
return url
|
|
193
|
+
# Stop at next link
|
|
194
|
+
if sibling.name == 'a':
|
|
195
|
+
break
|
|
98
196
|
|
|
99
197
|
return None
|
|
100
198
|
|
|
101
199
|
|
|
200
|
+
def build_filecrypt_status_map(soup):
|
|
201
|
+
"""
|
|
202
|
+
Build a map of mirror names to FileCrypt status URLs.
|
|
203
|
+
Handles cases where status images are in a separate section from links.
|
|
204
|
+
Returns dict: {mirror_name_lowercase: status_url}
|
|
205
|
+
"""
|
|
206
|
+
status_map = {}
|
|
207
|
+
|
|
208
|
+
# Find all FileCrypt status images in the post
|
|
209
|
+
for img in soup.find_all('img'):
|
|
210
|
+
status_url = None
|
|
211
|
+
for attr in ['src', 'data-url']:
|
|
212
|
+
url = img.get(attr, '')
|
|
213
|
+
if 'filecrypt.cc/Stat/' in url:
|
|
214
|
+
status_url = url
|
|
215
|
+
break
|
|
216
|
+
|
|
217
|
+
if not status_url:
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
# Look for associated mirror name in previous text/siblings
|
|
221
|
+
mirror_name = None
|
|
222
|
+
|
|
223
|
+
# Check parent's previous siblings and text nodes
|
|
224
|
+
parent = img.parent
|
|
225
|
+
if parent:
|
|
226
|
+
# Get all previous text content before this image
|
|
227
|
+
prev_text = ""
|
|
228
|
+
for prev in parent.previous_siblings:
|
|
229
|
+
if hasattr(prev, 'get_text'):
|
|
230
|
+
prev_text = prev.get_text(strip=True)
|
|
231
|
+
elif isinstance(prev, NavigableString):
|
|
232
|
+
prev_text = prev.strip()
|
|
233
|
+
if prev_text:
|
|
234
|
+
break
|
|
235
|
+
|
|
236
|
+
# Also check text directly before within parent
|
|
237
|
+
for prev in img.previous_siblings:
|
|
238
|
+
if isinstance(prev, NavigableString) and prev.strip():
|
|
239
|
+
prev_text = prev.strip()
|
|
240
|
+
break
|
|
241
|
+
elif hasattr(prev, 'get_text'):
|
|
242
|
+
text = prev.get_text(strip=True)
|
|
243
|
+
if text:
|
|
244
|
+
prev_text = text
|
|
245
|
+
break
|
|
246
|
+
|
|
247
|
+
if prev_text:
|
|
248
|
+
# Clean up the text to get mirror name
|
|
249
|
+
cleaned = re.sub(r'[^\w\s.-]', '', prev_text).strip().lower()
|
|
250
|
+
# Take last word/phrase as it's likely the mirror name
|
|
251
|
+
parts = cleaned.split()
|
|
252
|
+
if parts:
|
|
253
|
+
mirror_name = parts[-1] if len(parts[-1]) > 2 else cleaned
|
|
254
|
+
|
|
255
|
+
if mirror_name and mirror_name not in status_map:
|
|
256
|
+
status_map[mirror_name] = status_url
|
|
257
|
+
debug(f"Mapped status image for mirror: {mirror_name} -> {status_url}")
|
|
258
|
+
|
|
259
|
+
return status_map
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def image_has_green(image_data):
|
|
263
|
+
"""
|
|
264
|
+
Analyze image data to check if it contains green pixels.
|
|
265
|
+
Returns True if any significant green is detected (indicating online status).
|
|
266
|
+
"""
|
|
267
|
+
try:
|
|
268
|
+
img = Image.open(BytesIO(image_data))
|
|
269
|
+
img = img.convert('RGB')
|
|
270
|
+
|
|
271
|
+
pixels = list(img.getdata())
|
|
272
|
+
|
|
273
|
+
for r, g, b in pixels:
|
|
274
|
+
# Check if pixel is greenish: green channel is dominant
|
|
275
|
+
# and has a reasonable absolute value
|
|
276
|
+
if g > 100 and g > r * 1.3 and g > b * 1.3:
|
|
277
|
+
return True
|
|
278
|
+
|
|
279
|
+
return False
|
|
280
|
+
except Exception as e:
|
|
281
|
+
debug(f"Error analyzing status image: {e}")
|
|
282
|
+
# If we can't analyze, assume online to not skip valid links
|
|
283
|
+
return True
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def fetch_status_image(status_url):
|
|
287
|
+
"""
|
|
288
|
+
Fetch a status image and return (status_url, image_data).
|
|
289
|
+
Returns (status_url, None) on failure.
|
|
290
|
+
"""
|
|
291
|
+
try:
|
|
292
|
+
import requests
|
|
293
|
+
response = requests.get(status_url, timeout=10)
|
|
294
|
+
if response.status_code == 200:
|
|
295
|
+
return (status_url, response.content)
|
|
296
|
+
except Exception as e:
|
|
297
|
+
debug(f"Error fetching status image {status_url}: {e}")
|
|
298
|
+
return (status_url, None)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def check_links_online_status(links_with_status):
|
|
302
|
+
"""
|
|
303
|
+
Check online status for links that have status URLs.
|
|
304
|
+
Returns list of links that are online (or have no status URL to check).
|
|
305
|
+
|
|
306
|
+
links_with_status: list of [href, identifier, status_url] where status_url can be None
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
links_to_check = [(i, link) for i, link in enumerate(links_with_status) if link[2]]
|
|
310
|
+
|
|
311
|
+
if not links_to_check:
|
|
312
|
+
# No status URLs to check, return all links as potentially online
|
|
313
|
+
return [[link[0], link[1]] for link in links_with_status]
|
|
314
|
+
|
|
315
|
+
# Batch fetch status images
|
|
316
|
+
status_results = {} # status_url -> has_green
|
|
317
|
+
status_urls = list(set(link[2] for _, link in links_to_check))
|
|
318
|
+
|
|
319
|
+
batch_size = 10
|
|
320
|
+
for i in range(0, len(status_urls), batch_size):
|
|
321
|
+
batch = status_urls[i:i + batch_size]
|
|
322
|
+
with ThreadPoolExecutor(max_workers=batch_size) as executor:
|
|
323
|
+
futures = [executor.submit(fetch_status_image, url) for url in batch]
|
|
324
|
+
for future in as_completed(futures):
|
|
325
|
+
try:
|
|
326
|
+
status_url, image_data = future.result()
|
|
327
|
+
if image_data:
|
|
328
|
+
status_results[status_url] = image_has_green(image_data)
|
|
329
|
+
else:
|
|
330
|
+
# Could not fetch, assume online
|
|
331
|
+
status_results[status_url] = True
|
|
332
|
+
except Exception as e:
|
|
333
|
+
debug(f"Error checking status: {e}")
|
|
334
|
+
|
|
335
|
+
# Filter to online links
|
|
336
|
+
online_links = []
|
|
337
|
+
|
|
338
|
+
for link in links_with_status:
|
|
339
|
+
href, identifier, status_url = link
|
|
340
|
+
if not status_url:
|
|
341
|
+
# No status URL, include link (keeplinks case)
|
|
342
|
+
online_links.append([href, identifier])
|
|
343
|
+
elif status_url in status_results:
|
|
344
|
+
if status_results[status_url]:
|
|
345
|
+
online_links.append([href, identifier])
|
|
346
|
+
debug(f"Link online: {identifier} ({href})")
|
|
347
|
+
else:
|
|
348
|
+
debug(f"Link offline: {identifier} ({href})")
|
|
349
|
+
else:
|
|
350
|
+
# Status check failed, include link
|
|
351
|
+
online_links.append([href, identifier])
|
|
352
|
+
|
|
353
|
+
return online_links
|
|
354
|
+
|
|
355
|
+
|
|
102
356
|
def extract_links_and_password_from_post(post_content, host):
|
|
103
357
|
"""
|
|
104
358
|
Extract download links and password from a forum post.
|
|
359
|
+
Returns links with status URLs for online checking.
|
|
105
360
|
"""
|
|
106
|
-
links = []
|
|
361
|
+
links = [] # [href, identifier, status_url]
|
|
107
362
|
soup = BeautifulSoup(post_content, 'html.parser')
|
|
108
363
|
|
|
364
|
+
# Build status map for FileCrypt links (handles separated status images)
|
|
365
|
+
filecrypt_status_map = build_filecrypt_status_map(soup)
|
|
366
|
+
|
|
109
367
|
for link in soup.find_all('a', href=True):
|
|
110
368
|
href = link.get('href')
|
|
111
369
|
|
|
@@ -127,12 +385,29 @@ def extract_links_and_password_from_post(post_content, host):
|
|
|
127
385
|
mirror_name = extract_mirror_name_from_link(link)
|
|
128
386
|
identifier = mirror_name if mirror_name else crypter_type
|
|
129
387
|
|
|
130
|
-
|
|
131
|
-
|
|
388
|
+
# Get status URL - try extraction first, then status map, then generation
|
|
389
|
+
status_url = extract_status_url_from_html(link, crypter_type)
|
|
390
|
+
|
|
391
|
+
if not status_url and crypter_type == "filecrypt" and mirror_name:
|
|
392
|
+
# Try to find in status map by mirror name (normalized, case-insensitive, TLD-stripped)
|
|
393
|
+
mirror_normalized = normalize_mirror_name(mirror_name)
|
|
394
|
+
for map_key, map_url in filecrypt_status_map.items():
|
|
395
|
+
map_key_normalized = normalize_mirror_name(map_key)
|
|
396
|
+
if mirror_normalized in map_key_normalized or map_key_normalized in mirror_normalized:
|
|
397
|
+
status_url = map_url
|
|
398
|
+
break
|
|
399
|
+
|
|
400
|
+
if not status_url:
|
|
401
|
+
status_url = generate_status_url(href, crypter_type)
|
|
402
|
+
|
|
403
|
+
# Avoid duplicates (check href and identifier)
|
|
404
|
+
if not any(l[0] == href and l[1] == identifier for l in links):
|
|
405
|
+
links.append([href, identifier, status_url])
|
|
406
|
+
status_info = f"status: {status_url}" if status_url else "no status URL"
|
|
132
407
|
if mirror_name:
|
|
133
|
-
debug(f"Found {crypter_type} link for mirror: {mirror_name}")
|
|
408
|
+
debug(f"Found {crypter_type} link for mirror: {mirror_name} ({status_info})")
|
|
134
409
|
else:
|
|
135
|
-
debug(f"Found {crypter_type} link (
|
|
410
|
+
debug(f"Found {crypter_type} link ({status_info})")
|
|
136
411
|
|
|
137
412
|
password = ""
|
|
138
413
|
if links:
|
|
@@ -146,6 +421,7 @@ def get_dl_download_links(shared_state, url, mirror, title, password):
|
|
|
146
421
|
KEEP THE SIGNATURE EVEN IF SOME PARAMETERS ARE UNUSED!
|
|
147
422
|
|
|
148
423
|
DL source handler - extracts links and password from forum thread.
|
|
424
|
+
Iterates through posts to find one with online links.
|
|
149
425
|
|
|
150
426
|
Note: The password parameter is unused intentionally - password must be extracted from the post.
|
|
151
427
|
"""
|
|
@@ -166,24 +442,35 @@ def get_dl_download_links(shared_state, url, mirror, title, password):
|
|
|
166
442
|
|
|
167
443
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
168
444
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
445
|
+
# Get all posts in thread
|
|
446
|
+
posts = soup.select('article.message--post')
|
|
447
|
+
if not posts:
|
|
448
|
+
info(f"Could not find any posts in thread: {url}")
|
|
172
449
|
return {"links": [], "password": ""}
|
|
173
450
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
451
|
+
# Iterate through posts to find one with online links
|
|
452
|
+
for post_index, post in enumerate(posts):
|
|
453
|
+
post_content = post.select_one('div.bbWrapper')
|
|
454
|
+
if not post_content:
|
|
455
|
+
continue
|
|
178
456
|
|
|
179
|
-
|
|
457
|
+
links_with_status, extracted_password = extract_links_and_password_from_post(str(post_content), host)
|
|
180
458
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
459
|
+
if not links_with_status:
|
|
460
|
+
continue
|
|
461
|
+
|
|
462
|
+
# Check which links are online
|
|
463
|
+
online_links = check_links_online_status(links_with_status)
|
|
184
464
|
|
|
185
|
-
|
|
186
|
-
|
|
465
|
+
if online_links:
|
|
466
|
+
post_info = "first post" if post_index == 0 else f"post #{post_index + 1}"
|
|
467
|
+
debug(f"Found {len(online_links)} online link(s) in {post_info} for: {title}")
|
|
468
|
+
return {"links": online_links, "password": extracted_password}
|
|
469
|
+
else:
|
|
470
|
+
debug(f"All links in post #{post_index + 1} are offline, checking next post...")
|
|
471
|
+
|
|
472
|
+
info(f"No online download links found in any post: {url}")
|
|
473
|
+
return {"links": [], "password": ""}
|
|
187
474
|
|
|
188
475
|
except Exception as e:
|
|
189
476
|
info(f"Error extracting download links from {url}: {e}")
|