quasarr 2.4.8__py3-none-any.whl → 2.4.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quasarr might be problematic. Click here for more details.
- quasarr/__init__.py +134 -70
- quasarr/api/__init__.py +40 -31
- quasarr/api/arr/__init__.py +116 -108
- quasarr/api/captcha/__init__.py +262 -137
- quasarr/api/config/__init__.py +76 -46
- quasarr/api/packages/__init__.py +138 -102
- quasarr/api/sponsors_helper/__init__.py +29 -16
- quasarr/api/statistics/__init__.py +19 -19
- quasarr/downloads/__init__.py +165 -72
- quasarr/downloads/linkcrypters/al.py +35 -18
- quasarr/downloads/linkcrypters/filecrypt.py +107 -52
- quasarr/downloads/linkcrypters/hide.py +5 -6
- quasarr/downloads/packages/__init__.py +342 -177
- quasarr/downloads/sources/al.py +191 -100
- quasarr/downloads/sources/by.py +31 -13
- quasarr/downloads/sources/dd.py +27 -14
- quasarr/downloads/sources/dj.py +1 -3
- quasarr/downloads/sources/dl.py +126 -71
- quasarr/downloads/sources/dt.py +11 -5
- quasarr/downloads/sources/dw.py +28 -14
- quasarr/downloads/sources/he.py +32 -24
- quasarr/downloads/sources/mb.py +19 -9
- quasarr/downloads/sources/nk.py +14 -10
- quasarr/downloads/sources/nx.py +8 -18
- quasarr/downloads/sources/sf.py +45 -20
- quasarr/downloads/sources/sj.py +1 -3
- quasarr/downloads/sources/sl.py +9 -5
- quasarr/downloads/sources/wd.py +32 -12
- quasarr/downloads/sources/wx.py +35 -21
- quasarr/providers/auth.py +42 -37
- quasarr/providers/cloudflare.py +28 -30
- quasarr/providers/hostname_issues.py +2 -1
- quasarr/providers/html_images.py +2 -2
- quasarr/providers/html_templates.py +22 -14
- quasarr/providers/imdb_metadata.py +149 -80
- quasarr/providers/jd_cache.py +131 -39
- quasarr/providers/log.py +1 -1
- quasarr/providers/myjd_api.py +260 -196
- quasarr/providers/notifications.py +53 -41
- quasarr/providers/obfuscated.py +9 -4
- quasarr/providers/sessions/al.py +71 -55
- quasarr/providers/sessions/dd.py +21 -14
- quasarr/providers/sessions/dl.py +30 -19
- quasarr/providers/sessions/nx.py +23 -14
- quasarr/providers/shared_state.py +292 -141
- quasarr/providers/statistics.py +75 -43
- quasarr/providers/utils.py +33 -27
- quasarr/providers/version.py +45 -14
- quasarr/providers/web_server.py +10 -5
- quasarr/search/__init__.py +30 -18
- quasarr/search/sources/al.py +124 -73
- quasarr/search/sources/by.py +110 -59
- quasarr/search/sources/dd.py +57 -35
- quasarr/search/sources/dj.py +69 -48
- quasarr/search/sources/dl.py +159 -100
- quasarr/search/sources/dt.py +110 -74
- quasarr/search/sources/dw.py +121 -61
- quasarr/search/sources/fx.py +108 -62
- quasarr/search/sources/he.py +78 -49
- quasarr/search/sources/mb.py +96 -48
- quasarr/search/sources/nk.py +80 -50
- quasarr/search/sources/nx.py +91 -62
- quasarr/search/sources/sf.py +171 -106
- quasarr/search/sources/sj.py +69 -48
- quasarr/search/sources/sl.py +115 -71
- quasarr/search/sources/wd.py +67 -44
- quasarr/search/sources/wx.py +188 -123
- quasarr/storage/config.py +65 -52
- quasarr/storage/setup.py +238 -140
- quasarr/storage/sqlite_database.py +10 -4
- {quasarr-2.4.8.dist-info → quasarr-2.4.10.dist-info}/METADATA +4 -3
- quasarr-2.4.10.dist-info/RECORD +81 -0
- quasarr-2.4.8.dist-info/RECORD +0 -81
- {quasarr-2.4.8.dist-info → quasarr-2.4.10.dist-info}/WHEEL +0 -0
- {quasarr-2.4.8.dist-info → quasarr-2.4.10.dist-info}/entry_points.txt +0 -0
- {quasarr-2.4.8.dist-info → quasarr-2.4.10.dist-info}/licenses/LICENSE +0 -0
quasarr/downloads/sources/by.py
CHANGED
|
@@ -11,7 +11,7 @@ import requests
|
|
|
11
11
|
from bs4 import BeautifulSoup
|
|
12
12
|
|
|
13
13
|
from quasarr.providers.hostname_issues import mark_hostname_issue
|
|
14
|
-
from quasarr.providers.log import
|
|
14
|
+
from quasarr.providers.log import debug, info
|
|
15
15
|
|
|
16
16
|
hostname = "by"
|
|
17
17
|
|
|
@@ -25,7 +25,7 @@ def get_by_download_links(shared_state, url, mirror, title, password):
|
|
|
25
25
|
|
|
26
26
|
by = shared_state.values["config"]("Hostnames").get("by")
|
|
27
27
|
headers = {
|
|
28
|
-
|
|
28
|
+
"User-Agent": shared_state.values["user_agent"],
|
|
29
29
|
}
|
|
30
30
|
|
|
31
31
|
mirror_lower = mirror.lower() if mirror else None
|
|
@@ -35,9 +35,11 @@ def get_by_download_links(shared_state, url, mirror, title, password):
|
|
|
35
35
|
r = requests.get(url, headers=headers, timeout=10)
|
|
36
36
|
r.raise_for_status()
|
|
37
37
|
soup = BeautifulSoup(r.text, "html.parser")
|
|
38
|
-
frames = [
|
|
38
|
+
frames = [
|
|
39
|
+
iframe.get("src") for iframe in soup.find_all("iframe") if iframe.get("src")
|
|
40
|
+
]
|
|
39
41
|
|
|
40
|
-
frame_urls = [src for src in frames if f
|
|
42
|
+
frame_urls = [src for src in frames if f"https://{by}" in src]
|
|
41
43
|
if not frame_urls:
|
|
42
44
|
debug(f"No iframe hosts found on {url} for {title}.")
|
|
43
45
|
return []
|
|
@@ -64,8 +66,12 @@ def get_by_download_links(shared_state, url, mirror, title, password):
|
|
|
64
66
|
url_hosters = []
|
|
65
67
|
for content, source in async_results:
|
|
66
68
|
host_soup = BeautifulSoup(content, "html.parser")
|
|
67
|
-
link = host_soup.find(
|
|
68
|
-
|
|
69
|
+
link = host_soup.find(
|
|
70
|
+
"a",
|
|
71
|
+
href=re.compile(
|
|
72
|
+
r"https?://(?:www\.)?(?:hide\.cx|filecrypt\.(?:cc|co|to))/container/"
|
|
73
|
+
),
|
|
74
|
+
)
|
|
69
75
|
|
|
70
76
|
# Fallback to the old format
|
|
71
77
|
if not link:
|
|
@@ -79,7 +85,9 @@ def get_by_download_links(shared_state, url, mirror, title, password):
|
|
|
79
85
|
hostname_lower = link_hostname.lower()
|
|
80
86
|
|
|
81
87
|
if mirror_lower and mirror_lower not in hostname_lower:
|
|
82
|
-
debug(
|
|
88
|
+
debug(
|
|
89
|
+
f'Skipping link from "{link_hostname}" (not the desired mirror "{mirror}")!'
|
|
90
|
+
)
|
|
83
91
|
continue
|
|
84
92
|
|
|
85
93
|
url_hosters.append((href, link_hostname))
|
|
@@ -87,7 +95,9 @@ def get_by_download_links(shared_state, url, mirror, title, password):
|
|
|
87
95
|
def resolve_redirect(href_hostname):
|
|
88
96
|
href, hostname = href_hostname
|
|
89
97
|
try:
|
|
90
|
-
rq = requests.get(
|
|
98
|
+
rq = requests.get(
|
|
99
|
+
href, headers=headers, timeout=10, allow_redirects=True
|
|
100
|
+
)
|
|
91
101
|
rq.raise_for_status()
|
|
92
102
|
if "/404.html" in rq.url:
|
|
93
103
|
info(f"Link leads to 404 page for {hostname}: {r.url}")
|
|
@@ -96,7 +106,9 @@ def get_by_download_links(shared_state, url, mirror, title, password):
|
|
|
96
106
|
return rq.url
|
|
97
107
|
except Exception as e:
|
|
98
108
|
info(f"Error resolving link for {hostname}: {e}")
|
|
99
|
-
mark_hostname_issue(
|
|
109
|
+
mark_hostname_issue(
|
|
110
|
+
hostname, "download", str(e) if "e" in dir() else "Download error"
|
|
111
|
+
)
|
|
100
112
|
return None
|
|
101
113
|
|
|
102
114
|
for pair in url_hosters:
|
|
@@ -106,16 +118,22 @@ def get_by_download_links(shared_state, url, mirror, title, password):
|
|
|
106
118
|
if not link_hostname:
|
|
107
119
|
link_hostname = urlparse(resolved_url).hostname
|
|
108
120
|
|
|
109
|
-
if
|
|
110
|
-
|
|
121
|
+
if (
|
|
122
|
+
resolved_url
|
|
123
|
+
and link_hostname
|
|
124
|
+
and link_hostname.startswith(
|
|
125
|
+
("ddownload", "rapidgator", "turbobit", "filecrypt")
|
|
126
|
+
)
|
|
127
|
+
):
|
|
111
128
|
if "rapidgator" in link_hostname:
|
|
112
129
|
links.insert(0, [resolved_url, link_hostname])
|
|
113
130
|
else:
|
|
114
131
|
links.append([resolved_url, link_hostname])
|
|
115
132
|
|
|
116
|
-
|
|
117
133
|
except Exception as e:
|
|
118
134
|
info(f"Error loading BY download links: {e}")
|
|
119
|
-
mark_hostname_issue(
|
|
135
|
+
mark_hostname_issue(
|
|
136
|
+
hostname, "download", str(e) if "e" in dir() else "Download error"
|
|
137
|
+
)
|
|
120
138
|
|
|
121
139
|
return {"links": links}
|
quasarr/downloads/sources/dd.py
CHANGED
|
@@ -3,8 +3,11 @@
|
|
|
3
3
|
# Project by https://github.com/rix1337
|
|
4
4
|
|
|
5
5
|
from quasarr.providers.hostname_issues import mark_hostname_issue
|
|
6
|
-
from quasarr.providers.log import
|
|
7
|
-
from quasarr.providers.sessions.dd import
|
|
6
|
+
from quasarr.providers.log import debug, info
|
|
7
|
+
from quasarr.providers.sessions.dd import (
|
|
8
|
+
create_and_persist_session,
|
|
9
|
+
retrieve_and_validate_session,
|
|
10
|
+
)
|
|
8
11
|
|
|
9
12
|
hostname = "dd"
|
|
10
13
|
|
|
@@ -35,17 +38,17 @@ def get_dd_download_links(shared_state, url, mirror, title, password):
|
|
|
35
38
|
"web-1080p-x265",
|
|
36
39
|
"web-2160p-x265-hdr",
|
|
37
40
|
"movie-1080p-x265",
|
|
38
|
-
"movie-2160p-webdl-x265-hdr"
|
|
41
|
+
"movie-2160p-webdl-x265-hdr",
|
|
39
42
|
]
|
|
40
43
|
|
|
41
44
|
headers = {
|
|
42
|
-
|
|
45
|
+
"User-Agent": shared_state.values["user_agent"],
|
|
43
46
|
}
|
|
44
47
|
|
|
45
48
|
try:
|
|
46
49
|
release_list = []
|
|
47
50
|
for page in range(0, 100, 20):
|
|
48
|
-
api_url = f
|
|
51
|
+
api_url = f"https://{dd}/index/search/keyword/{title}/qualities/{','.join(qualities)}/from/{page}/search"
|
|
49
52
|
|
|
50
53
|
r = dd_session.get(api_url, headers=headers, timeout=10)
|
|
51
54
|
r.raise_for_status()
|
|
@@ -56,23 +59,29 @@ def get_dd_download_links(shared_state, url, mirror, title, password):
|
|
|
56
59
|
for release in release_list:
|
|
57
60
|
try:
|
|
58
61
|
if release.get("fake"):
|
|
59
|
-
debug(
|
|
62
|
+
debug(
|
|
63
|
+
f"Release {release.get('release')} marked as fake. Invalidating DD session..."
|
|
64
|
+
)
|
|
60
65
|
create_and_persist_session(shared_state)
|
|
61
66
|
return {"links": []}
|
|
62
67
|
elif release.get("release") == title:
|
|
63
68
|
filtered_links = []
|
|
64
69
|
for link in release["links"]:
|
|
65
70
|
if mirror and mirror not in link["hostname"]:
|
|
66
|
-
debug(
|
|
71
|
+
debug(
|
|
72
|
+
f'Skipping link from "{link["hostname"]}" (not the desired mirror "{mirror}")!'
|
|
73
|
+
)
|
|
67
74
|
continue
|
|
68
75
|
|
|
69
76
|
if any(
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
existing_link["hostname"] == link["hostname"]
|
|
78
|
+
and existing_link["url"].endswith(".mkv")
|
|
79
|
+
and link["url"].endswith(".mkv")
|
|
80
|
+
for existing_link in filtered_links
|
|
74
81
|
):
|
|
75
|
-
debug(
|
|
82
|
+
debug(
|
|
83
|
+
f"Skipping duplicate `.mkv` link from {link['hostname']}"
|
|
84
|
+
)
|
|
76
85
|
continue
|
|
77
86
|
filtered_links.append(link)
|
|
78
87
|
|
|
@@ -81,11 +90,15 @@ def get_dd_download_links(shared_state, url, mirror, title, password):
|
|
|
81
90
|
break
|
|
82
91
|
except Exception as e:
|
|
83
92
|
info(f"Error parsing DD download: {e}")
|
|
84
|
-
mark_hostname_issue(
|
|
93
|
+
mark_hostname_issue(
|
|
94
|
+
hostname, "download", str(e) if "e" in dir() else "Download error"
|
|
95
|
+
)
|
|
85
96
|
continue
|
|
86
97
|
|
|
87
98
|
except Exception as e:
|
|
88
99
|
info(f"Error loading DD download: {e}")
|
|
89
|
-
mark_hostname_issue(
|
|
100
|
+
mark_hostname_issue(
|
|
101
|
+
hostname, "download", str(e) if "e" in dir() else "Download error"
|
|
102
|
+
)
|
|
90
103
|
|
|
91
104
|
return {"links": links}
|
quasarr/downloads/sources/dj.py
CHANGED
quasarr/downloads/sources/dl.py
CHANGED
|
@@ -7,14 +7,30 @@ import re
|
|
|
7
7
|
from bs4 import BeautifulSoup, NavigableString
|
|
8
8
|
|
|
9
9
|
from quasarr.providers.hostname_issues import mark_hostname_issue
|
|
10
|
-
from quasarr.providers.log import
|
|
11
|
-
from quasarr.providers.sessions.dl import
|
|
12
|
-
|
|
10
|
+
from quasarr.providers.log import debug, info
|
|
11
|
+
from quasarr.providers.sessions.dl import (
|
|
12
|
+
fetch_via_requests_session,
|
|
13
|
+
invalidate_session,
|
|
14
|
+
retrieve_and_validate_session,
|
|
15
|
+
)
|
|
16
|
+
from quasarr.providers.utils import check_links_online_status, generate_status_url
|
|
13
17
|
|
|
14
18
|
hostname = "dl"
|
|
15
19
|
|
|
16
20
|
# Common TLDs to strip for mirror name comparison
|
|
17
|
-
COMMON_TLDS = {
|
|
21
|
+
COMMON_TLDS = {
|
|
22
|
+
".com",
|
|
23
|
+
".net",
|
|
24
|
+
".io",
|
|
25
|
+
".cc",
|
|
26
|
+
".to",
|
|
27
|
+
".me",
|
|
28
|
+
".org",
|
|
29
|
+
".co",
|
|
30
|
+
".de",
|
|
31
|
+
".eu",
|
|
32
|
+
".info",
|
|
33
|
+
}
|
|
18
34
|
|
|
19
35
|
|
|
20
36
|
def normalize_mirror_name(name):
|
|
@@ -27,7 +43,7 @@ def normalize_mirror_name(name):
|
|
|
27
43
|
normalized = name.lower().strip()
|
|
28
44
|
for tld in COMMON_TLDS:
|
|
29
45
|
if normalized.endswith(tld):
|
|
30
|
-
normalized = normalized[
|
|
46
|
+
normalized = normalized[: -len(tld)]
|
|
31
47
|
break
|
|
32
48
|
return normalized
|
|
33
49
|
|
|
@@ -38,21 +54,24 @@ def extract_password_from_post(soup, host):
|
|
|
38
54
|
Returns empty string if no password found or if explicitly marked as 'no password'.
|
|
39
55
|
"""
|
|
40
56
|
post_text = soup.get_text()
|
|
41
|
-
post_text = re.sub(r
|
|
57
|
+
post_text = re.sub(r"\s+", " ", post_text).strip()
|
|
42
58
|
|
|
43
|
-
password_pattern = r
|
|
59
|
+
password_pattern = r"(?:passwort|password|pass|pw)[\s:]+([a-zA-Z0-9._-]{2,50})"
|
|
44
60
|
match = re.search(password_pattern, post_text, re.IGNORECASE)
|
|
45
61
|
|
|
46
62
|
if match:
|
|
47
63
|
password = match.group(1).strip()
|
|
48
|
-
if not re.match(
|
|
49
|
-
|
|
64
|
+
if not re.match(
|
|
65
|
+
r"^(?:download|mirror|link|episode|info|mediainfo|spoiler|hier|click|klick|kein|none|no)",
|
|
66
|
+
password,
|
|
67
|
+
re.IGNORECASE,
|
|
68
|
+
):
|
|
50
69
|
debug(f"Found password: {password}")
|
|
51
70
|
return password
|
|
52
71
|
|
|
53
72
|
no_password_patterns = [
|
|
54
|
-
r
|
|
55
|
-
r
|
|
73
|
+
r"(?:passwort|password|pass|pw)[\s:]*(?:kein(?:es)?|none|no|nicht|not|nein|-|–|—)",
|
|
74
|
+
r"(?:kein(?:es)?|none|no|nicht|not|nein)\s*(?:passwort|password|pass|pw)",
|
|
56
75
|
]
|
|
57
76
|
|
|
58
77
|
for pattern in no_password_patterns:
|
|
@@ -70,21 +89,30 @@ def extract_mirror_name_from_link(link_element):
|
|
|
70
89
|
Extract the mirror/hoster name from the link text or nearby text.
|
|
71
90
|
"""
|
|
72
91
|
link_text = link_element.get_text(strip=True)
|
|
73
|
-
common_non_hosters = {
|
|
92
|
+
common_non_hosters = {
|
|
93
|
+
"download",
|
|
94
|
+
"mirror",
|
|
95
|
+
"link",
|
|
96
|
+
"hier",
|
|
97
|
+
"click",
|
|
98
|
+
"klick",
|
|
99
|
+
"code",
|
|
100
|
+
"spoiler",
|
|
101
|
+
}
|
|
74
102
|
|
|
75
103
|
# Known hoster patterns for image detection
|
|
76
104
|
known_hosters = {
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
105
|
+
"rapidgator": ["rapidgator", "rg"],
|
|
106
|
+
"ddownload": ["ddownload", "ddl"],
|
|
107
|
+
"turbobit": ["turbobit"],
|
|
108
|
+
"1fichier": ["1fichier"],
|
|
81
109
|
}
|
|
82
110
|
|
|
83
111
|
# Skip if link text is a URL
|
|
84
|
-
if link_text and len(link_text) > 2 and not link_text.startswith(
|
|
85
|
-
cleaned = re.sub(r
|
|
112
|
+
if link_text and len(link_text) > 2 and not link_text.startswith("http"):
|
|
113
|
+
cleaned = re.sub(r"[^\w\s-]", "", link_text).strip().lower()
|
|
86
114
|
if cleaned and cleaned not in common_non_hosters:
|
|
87
|
-
main_part = cleaned.split()[0] if
|
|
115
|
+
main_part = cleaned.split()[0] if " " in cleaned else cleaned
|
|
88
116
|
if 2 < len(main_part) < 30:
|
|
89
117
|
return main_part
|
|
90
118
|
|
|
@@ -95,8 +123,8 @@ def extract_mirror_name_from_link(link_element):
|
|
|
95
123
|
text = sibling.strip()
|
|
96
124
|
if text:
|
|
97
125
|
# Remove common separators like @ : -
|
|
98
|
-
cleaned = re.sub(r
|
|
99
|
-
cleaned = re.sub(r
|
|
126
|
+
cleaned = re.sub(r"[@:\-–—\s]+$", "", text).strip().lower()
|
|
127
|
+
cleaned = re.sub(r"[^\w\s.-]", "", cleaned).strip()
|
|
100
128
|
if cleaned and len(cleaned) > 2 and cleaned not in common_non_hosters:
|
|
101
129
|
# Take the last word as mirror name (e.g., "Rapidgator" from "Rapidgator @")
|
|
102
130
|
parts = cleaned.split()
|
|
@@ -107,18 +135,20 @@ def extract_mirror_name_from_link(link_element):
|
|
|
107
135
|
continue
|
|
108
136
|
|
|
109
137
|
# Skip non-Tag elements
|
|
110
|
-
if not hasattr(sibling,
|
|
138
|
+
if not hasattr(sibling, "name") or sibling.name is None:
|
|
111
139
|
continue
|
|
112
140
|
|
|
113
141
|
# Skip spoiler elements entirely
|
|
114
|
-
classes = sibling.get(
|
|
115
|
-
if classes and any(
|
|
142
|
+
classes = sibling.get("class", [])
|
|
143
|
+
if classes and any("spoiler" in str(c).lower() for c in classes):
|
|
116
144
|
continue
|
|
117
145
|
|
|
118
146
|
# Check for images with hoster names in src/alt/data-url
|
|
119
|
-
img = sibling.find(
|
|
147
|
+
img = sibling.find("img") if sibling.name != "img" else sibling
|
|
120
148
|
if img:
|
|
121
|
-
img_identifiers = (
|
|
149
|
+
img_identifiers = (
|
|
150
|
+
img.get("src", "") + img.get("alt", "") + img.get("data-url", "")
|
|
151
|
+
).lower()
|
|
122
152
|
for hoster, patterns in known_hosters.items():
|
|
123
153
|
if any(pattern in img_identifiers for pattern in patterns):
|
|
124
154
|
return hoster
|
|
@@ -127,10 +157,14 @@ def extract_mirror_name_from_link(link_element):
|
|
|
127
157
|
# Skip if text is too long - likely NFO content or other non-mirror text
|
|
128
158
|
if len(sibling_text) > 30:
|
|
129
159
|
continue
|
|
130
|
-
if
|
|
131
|
-
|
|
160
|
+
if (
|
|
161
|
+
sibling_text
|
|
162
|
+
and len(sibling_text) > 2
|
|
163
|
+
and sibling_text not in common_non_hosters
|
|
164
|
+
):
|
|
165
|
+
cleaned = re.sub(r"[^\w\s-]", "", sibling_text).strip()
|
|
132
166
|
if cleaned and 2 < len(cleaned) < 30:
|
|
133
|
-
return cleaned.split()[0] if
|
|
167
|
+
return cleaned.split()[0] if " " in cleaned else cleaned
|
|
134
168
|
|
|
135
169
|
return None
|
|
136
170
|
|
|
@@ -144,31 +178,31 @@ def extract_status_url_from_html(link_element, crypter_type):
|
|
|
144
178
|
return None
|
|
145
179
|
|
|
146
180
|
# Look for status image in the link itself
|
|
147
|
-
img = link_element.find(
|
|
181
|
+
img = link_element.find("img")
|
|
148
182
|
if img:
|
|
149
|
-
for attr in [
|
|
150
|
-
url = img.get(attr,
|
|
151
|
-
if
|
|
183
|
+
for attr in ["src", "data-url"]:
|
|
184
|
+
url = img.get(attr, "")
|
|
185
|
+
if "filecrypt.cc/Stat/" in url:
|
|
152
186
|
return url
|
|
153
187
|
|
|
154
188
|
# Look in siblings
|
|
155
189
|
for sibling in link_element.next_siblings:
|
|
156
|
-
if not hasattr(sibling,
|
|
190
|
+
if not hasattr(sibling, "name") or sibling.name is None:
|
|
157
191
|
continue
|
|
158
|
-
if sibling.name ==
|
|
159
|
-
for attr in [
|
|
160
|
-
url = sibling.get(attr,
|
|
161
|
-
if
|
|
192
|
+
if sibling.name == "img":
|
|
193
|
+
for attr in ["src", "data-url"]:
|
|
194
|
+
url = sibling.get(attr, "")
|
|
195
|
+
if "filecrypt.cc/Stat/" in url:
|
|
162
196
|
return url
|
|
163
197
|
# Check nested images
|
|
164
|
-
nested_img = sibling.find(
|
|
198
|
+
nested_img = sibling.find("img") if hasattr(sibling, "find") else None
|
|
165
199
|
if nested_img:
|
|
166
|
-
for attr in [
|
|
167
|
-
url = nested_img.get(attr,
|
|
168
|
-
if
|
|
200
|
+
for attr in ["src", "data-url"]:
|
|
201
|
+
url = nested_img.get(attr, "")
|
|
202
|
+
if "filecrypt.cc/Stat/" in url:
|
|
169
203
|
return url
|
|
170
204
|
# Stop at next link
|
|
171
|
-
if sibling.name ==
|
|
205
|
+
if sibling.name == "a":
|
|
172
206
|
break
|
|
173
207
|
|
|
174
208
|
return None
|
|
@@ -183,11 +217,11 @@ def build_filecrypt_status_map(soup):
|
|
|
183
217
|
status_map = {}
|
|
184
218
|
|
|
185
219
|
# Find all FileCrypt status images in the post
|
|
186
|
-
for img in soup.find_all(
|
|
220
|
+
for img in soup.find_all("img"):
|
|
187
221
|
status_url = None
|
|
188
|
-
for attr in [
|
|
189
|
-
url = img.get(attr,
|
|
190
|
-
if
|
|
222
|
+
for attr in ["src", "data-url"]:
|
|
223
|
+
url = img.get(attr, "")
|
|
224
|
+
if "filecrypt.cc/Stat/" in url:
|
|
191
225
|
status_url = url
|
|
192
226
|
break
|
|
193
227
|
|
|
@@ -203,7 +237,7 @@ def build_filecrypt_status_map(soup):
|
|
|
203
237
|
# Get all previous text content before this image
|
|
204
238
|
prev_text = ""
|
|
205
239
|
for prev in parent.previous_siblings:
|
|
206
|
-
if hasattr(prev,
|
|
240
|
+
if hasattr(prev, "get_text"):
|
|
207
241
|
prev_text = prev.get_text(strip=True)
|
|
208
242
|
elif isinstance(prev, NavigableString):
|
|
209
243
|
prev_text = prev.strip()
|
|
@@ -215,7 +249,7 @@ def build_filecrypt_status_map(soup):
|
|
|
215
249
|
if isinstance(prev, NavigableString) and prev.strip():
|
|
216
250
|
prev_text = prev.strip()
|
|
217
251
|
break
|
|
218
|
-
elif hasattr(prev,
|
|
252
|
+
elif hasattr(prev, "get_text"):
|
|
219
253
|
text = prev.get_text(strip=True)
|
|
220
254
|
if text:
|
|
221
255
|
prev_text = text
|
|
@@ -223,7 +257,7 @@ def build_filecrypt_status_map(soup):
|
|
|
223
257
|
|
|
224
258
|
if prev_text:
|
|
225
259
|
# Clean up the text to get mirror name
|
|
226
|
-
cleaned = re.sub(r
|
|
260
|
+
cleaned = re.sub(r"[^\w\s.-]", "", prev_text).strip().lower()
|
|
227
261
|
# Take last word/phrase as it's likely the mirror name
|
|
228
262
|
parts = cleaned.split()
|
|
229
263
|
if parts:
|
|
@@ -242,24 +276,24 @@ def extract_links_and_password_from_post(post_content, host):
|
|
|
242
276
|
Returns links with status URLs for online checking.
|
|
243
277
|
"""
|
|
244
278
|
links = [] # [href, identifier, status_url]
|
|
245
|
-
soup = BeautifulSoup(post_content,
|
|
279
|
+
soup = BeautifulSoup(post_content, "html.parser")
|
|
246
280
|
|
|
247
281
|
# Build status map for FileCrypt links (handles separated status images)
|
|
248
282
|
filecrypt_status_map = build_filecrypt_status_map(soup)
|
|
249
283
|
|
|
250
|
-
for link in soup.find_all(
|
|
251
|
-
href = link.get(
|
|
284
|
+
for link in soup.find_all("a", href=True):
|
|
285
|
+
href = link.get("href")
|
|
252
286
|
|
|
253
|
-
if href.startswith(
|
|
287
|
+
if href.startswith("/") or host in href:
|
|
254
288
|
continue
|
|
255
289
|
|
|
256
|
-
if re.search(r
|
|
290
|
+
if re.search(r"filecrypt\.", href, re.IGNORECASE):
|
|
257
291
|
crypter_type = "filecrypt"
|
|
258
|
-
elif re.search(r
|
|
292
|
+
elif re.search(r"hide\.", href, re.IGNORECASE):
|
|
259
293
|
crypter_type = "hide"
|
|
260
|
-
elif re.search(r
|
|
294
|
+
elif re.search(r"keeplinks\.", href, re.IGNORECASE):
|
|
261
295
|
crypter_type = "keeplinks"
|
|
262
|
-
elif re.search(r
|
|
296
|
+
elif re.search(r"tolink\.", href, re.IGNORECASE):
|
|
263
297
|
crypter_type = "tolink"
|
|
264
298
|
else:
|
|
265
299
|
debug(f"Unsupported link crypter/hoster found: {href}")
|
|
@@ -276,7 +310,10 @@ def extract_links_and_password_from_post(post_content, host):
|
|
|
276
310
|
mirror_normalized = normalize_mirror_name(mirror_name)
|
|
277
311
|
for map_key, map_url in filecrypt_status_map.items():
|
|
278
312
|
map_key_normalized = normalize_mirror_name(map_key)
|
|
279
|
-
if
|
|
313
|
+
if (
|
|
314
|
+
mirror_normalized in map_key_normalized
|
|
315
|
+
or map_key_normalized in mirror_normalized
|
|
316
|
+
):
|
|
280
317
|
status_url = map_url
|
|
281
318
|
break
|
|
282
319
|
|
|
@@ -288,7 +325,9 @@ def extract_links_and_password_from_post(post_content, host):
|
|
|
288
325
|
links.append([href, identifier, status_url])
|
|
289
326
|
status_info = f"status: {status_url}" if status_url else "no status URL"
|
|
290
327
|
if mirror_name:
|
|
291
|
-
debug(
|
|
328
|
+
debug(
|
|
329
|
+
f"Found {crypter_type} link for mirror: {mirror_name} ({status_info})"
|
|
330
|
+
)
|
|
292
331
|
else:
|
|
293
332
|
debug(f"Found {crypter_type} link ({status_info})")
|
|
294
333
|
|
|
@@ -318,16 +357,18 @@ def get_dl_download_links(shared_state, url, mirror, title, password):
|
|
|
318
357
|
return {"links": [], "password": ""}
|
|
319
358
|
|
|
320
359
|
try:
|
|
321
|
-
response = fetch_via_requests_session(
|
|
360
|
+
response = fetch_via_requests_session(
|
|
361
|
+
shared_state, method="GET", target_url=url, timeout=30
|
|
362
|
+
)
|
|
322
363
|
|
|
323
364
|
if response.status_code != 200:
|
|
324
365
|
info(f"Failed to load thread page: {url} (Status: {response.status_code})")
|
|
325
366
|
return {"links": [], "password": ""}
|
|
326
367
|
|
|
327
|
-
soup = BeautifulSoup(response.text,
|
|
368
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
|
328
369
|
|
|
329
370
|
# Get all posts in thread
|
|
330
|
-
posts = soup.select(
|
|
371
|
+
posts = soup.select("article.message--post")
|
|
331
372
|
if not posts:
|
|
332
373
|
info(f"Could not find any posts in thread: {url}")
|
|
333
374
|
return {"links": [], "password": ""}
|
|
@@ -338,11 +379,13 @@ def get_dl_download_links(shared_state, url, mirror, title, password):
|
|
|
338
379
|
|
|
339
380
|
# Iterate through posts to find one with verified online links
|
|
340
381
|
for post_index, post in enumerate(posts):
|
|
341
|
-
post_content = post.select_one(
|
|
382
|
+
post_content = post.select_one("div.bbWrapper")
|
|
342
383
|
if not post_content:
|
|
343
384
|
continue
|
|
344
385
|
|
|
345
|
-
links_with_status, extracted_password =
|
|
386
|
+
links_with_status, extracted_password = (
|
|
387
|
+
extract_links_and_password_from_post(str(post_content), host)
|
|
388
|
+
)
|
|
346
389
|
|
|
347
390
|
if not links_with_status:
|
|
348
391
|
continue
|
|
@@ -355,22 +398,32 @@ def get_dl_download_links(shared_state, url, mirror, title, password):
|
|
|
355
398
|
if fallback_links is None:
|
|
356
399
|
fallback_links = [[link[0], link[1]] for link in links_with_status]
|
|
357
400
|
fallback_password = extracted_password
|
|
358
|
-
debug(
|
|
401
|
+
debug(
|
|
402
|
+
f"Post #{post_index + 1} has links but no status URLs, saving as fallback..."
|
|
403
|
+
)
|
|
359
404
|
continue
|
|
360
405
|
|
|
361
406
|
# Check which links are online
|
|
362
407
|
online_links = check_links_online_status(links_with_status, shared_state)
|
|
363
408
|
|
|
364
409
|
if online_links:
|
|
365
|
-
post_info =
|
|
366
|
-
|
|
410
|
+
post_info = (
|
|
411
|
+
"first post" if post_index == 0 else f"post #{post_index + 1}"
|
|
412
|
+
)
|
|
413
|
+
debug(
|
|
414
|
+
f"Found {len(online_links)} verified online link(s) in {post_info} for: {title}"
|
|
415
|
+
)
|
|
367
416
|
return {"links": online_links, "password": extracted_password}
|
|
368
417
|
else:
|
|
369
|
-
debug(
|
|
418
|
+
debug(
|
|
419
|
+
f"All links in post #{post_index + 1} are offline, checking next post..."
|
|
420
|
+
)
|
|
370
421
|
|
|
371
422
|
# No verified online links found - return fallback if available
|
|
372
423
|
if fallback_links:
|
|
373
|
-
debug(
|
|
424
|
+
debug(
|
|
425
|
+
f"No verified online links found, returning unverified fallback links for: {title}"
|
|
426
|
+
)
|
|
374
427
|
return {"links": fallback_links, "password": fallback_password}
|
|
375
428
|
|
|
376
429
|
info(f"No online download links found in any post: {url}")
|
|
@@ -378,6 +431,8 @@ def get_dl_download_links(shared_state, url, mirror, title, password):
|
|
|
378
431
|
|
|
379
432
|
except Exception as e:
|
|
380
433
|
info(f"Error extracting download links from {url}: {e}")
|
|
381
|
-
mark_hostname_issue(
|
|
434
|
+
mark_hostname_issue(
|
|
435
|
+
hostname, "download", str(e) if "e" in dir() else "Download error"
|
|
436
|
+
)
|
|
382
437
|
invalidate_session(shared_state)
|
|
383
438
|
return {"links": [], "password": ""}
|
quasarr/downloads/sources/dt.py
CHANGED
|
@@ -8,7 +8,7 @@ from urllib.parse import urlparse
|
|
|
8
8
|
import requests
|
|
9
9
|
from bs4 import BeautifulSoup
|
|
10
10
|
|
|
11
|
-
from quasarr.providers.hostname_issues import
|
|
11
|
+
from quasarr.providers.hostname_issues import clear_hostname_issue, mark_hostname_issue
|
|
12
12
|
from quasarr.providers.log import info
|
|
13
13
|
|
|
14
14
|
hostname = "dt"
|
|
@@ -18,9 +18,9 @@ def derive_mirror_from_url(url):
|
|
|
18
18
|
"""Extract hoster name from URL hostname."""
|
|
19
19
|
try:
|
|
20
20
|
mirror_hostname = urlparse(url).netloc.lower()
|
|
21
|
-
if mirror_hostname.startswith(
|
|
21
|
+
if mirror_hostname.startswith("www."):
|
|
22
22
|
mirror_hostname = mirror_hostname[4:]
|
|
23
|
-
parts = mirror_hostname.split(
|
|
23
|
+
parts = mirror_hostname.split(".")
|
|
24
24
|
if len(parts) >= 2:
|
|
25
25
|
return parts[-2]
|
|
26
26
|
return mirror_hostname
|
|
@@ -58,7 +58,9 @@ def get_dt_download_links(shared_state, url, mirror, title, password):
|
|
|
58
58
|
anchors = body.find_all("a", href=True)
|
|
59
59
|
|
|
60
60
|
except Exception as e:
|
|
61
|
-
info(
|
|
61
|
+
info(
|
|
62
|
+
f"DT site has been updated. Grabbing download links for {title} not possible! ({e})"
|
|
63
|
+
)
|
|
62
64
|
mark_hostname_issue(hostname, "download", str(e))
|
|
63
65
|
return None
|
|
64
66
|
|
|
@@ -87,7 +89,11 @@ def get_dt_download_links(shared_state, url, mirror, title, password):
|
|
|
87
89
|
if u not in seen:
|
|
88
90
|
seen.add(u)
|
|
89
91
|
low = u.lower()
|
|
90
|
-
if
|
|
92
|
+
if (
|
|
93
|
+
low.startswith(("http://", "https://"))
|
|
94
|
+
and "imdb.com" not in low
|
|
95
|
+
and "?ref=" not in low
|
|
96
|
+
):
|
|
91
97
|
if not mirror or mirror in u:
|
|
92
98
|
mirror_name = derive_mirror_from_url(u)
|
|
93
99
|
filtered.append([u, mirror_name])
|