quasarr 1.20.8__py3-none-any.whl → 1.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quasarr/__init__.py +7 -0
- quasarr/api/arr/__init__.py +4 -1
- quasarr/downloads/__init__.py +93 -27
- quasarr/downloads/sources/dl.py +196 -0
- quasarr/downloads/sources/wx.py +127 -0
- quasarr/providers/html_images.py +2 -0
- quasarr/providers/myjd_api.py +35 -4
- quasarr/providers/sessions/dl.py +175 -0
- quasarr/providers/shared_state.py +21 -5
- quasarr/providers/version.py +1 -1
- quasarr/search/__init__.py +9 -0
- quasarr/search/sources/dl.py +316 -0
- quasarr/search/sources/wx.py +342 -0
- quasarr/storage/config.py +7 -1
- quasarr/storage/setup.py +10 -2
- {quasarr-1.20.8.dist-info → quasarr-1.21.0.dist-info}/METADATA +3 -1
- {quasarr-1.20.8.dist-info → quasarr-1.21.0.dist-info}/RECORD +21 -16
- {quasarr-1.20.8.dist-info → quasarr-1.21.0.dist-info}/WHEEL +0 -0
- {quasarr-1.20.8.dist-info → quasarr-1.21.0.dist-info}/entry_points.txt +0 -0
- {quasarr-1.20.8.dist-info → quasarr-1.21.0.dist-info}/licenses/LICENSE +0 -0
- {quasarr-1.20.8.dist-info → quasarr-1.21.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Quasarr
|
|
3
|
+
# Project by https://github.com/rix1337
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import pickle
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
|
|
11
|
+
from quasarr.providers.log import info, debug
|
|
12
|
+
|
|
13
|
+
hostname = "dl"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_and_persist_session(shared_state):
|
|
17
|
+
"""
|
|
18
|
+
Create and persist a session using user and password.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
shared_state: Shared state object
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
requests.Session or None
|
|
25
|
+
"""
|
|
26
|
+
cfg = shared_state.values["config"]("Hostnames")
|
|
27
|
+
host = cfg.get(hostname)
|
|
28
|
+
credentials_cfg = shared_state.values["config"](hostname.upper())
|
|
29
|
+
|
|
30
|
+
user = credentials_cfg.get("user")
|
|
31
|
+
password = credentials_cfg.get("password")
|
|
32
|
+
|
|
33
|
+
if not user or not password:
|
|
34
|
+
info(f'Missing credentials for: "{hostname}" - user and password are required')
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
sess = requests.Session()
|
|
38
|
+
|
|
39
|
+
# Set user agent
|
|
40
|
+
ua = shared_state.values["user_agent"]
|
|
41
|
+
sess.headers.update({'User-Agent': ua})
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
# Step 1: Get login page to retrieve CSRF token
|
|
45
|
+
login_page_url = f'https://www.{host}/login/'
|
|
46
|
+
login_page = sess.get(login_page_url, timeout=30)
|
|
47
|
+
|
|
48
|
+
if login_page.status_code != 200:
|
|
49
|
+
info(f'Failed to load login page for: "{hostname}" - Status {login_page.status_code}')
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
# Extract CSRF token from login form
|
|
53
|
+
soup = BeautifulSoup(login_page.text, 'html.parser')
|
|
54
|
+
csrf_input = soup.find('input', {'name': '_xfToken'})
|
|
55
|
+
|
|
56
|
+
if not csrf_input or not csrf_input.get('value'):
|
|
57
|
+
info(f'Could not find CSRF token on login page for: "{hostname}"')
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
csrf_token = csrf_input['value']
|
|
61
|
+
|
|
62
|
+
# Step 2: Submit login form
|
|
63
|
+
login_data = {
|
|
64
|
+
'login': user,
|
|
65
|
+
'password': password,
|
|
66
|
+
'_xfToken': csrf_token,
|
|
67
|
+
'remember': '1',
|
|
68
|
+
'_xfRedirect': f'https://www.{host}/'
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
login_url = f'https://www.{host}/login/login'
|
|
72
|
+
login_response = sess.post(login_url, data=login_data, timeout=30)
|
|
73
|
+
|
|
74
|
+
# Step 3: Verify login success
|
|
75
|
+
# Check if we're logged in by accessing the main page
|
|
76
|
+
verify_response = sess.get(f'https://www.{host}/', timeout=30)
|
|
77
|
+
|
|
78
|
+
if 'data-logged-in="true"' not in verify_response.text:
|
|
79
|
+
info(f'Login verification failed for: "{hostname}" - invalid credentials or login failed')
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
info(f'Session successfully created for: "{hostname}" using user/password')
|
|
83
|
+
except Exception as e:
|
|
84
|
+
info(f'Failed to create session for: "{hostname}" - {e}')
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
# Persist session to database
|
|
88
|
+
blob = pickle.dumps(sess)
|
|
89
|
+
token = base64.b64encode(blob).decode("utf-8")
|
|
90
|
+
shared_state.values["database"]("sessions").update_store(hostname, token)
|
|
91
|
+
|
|
92
|
+
return sess
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def retrieve_and_validate_session(shared_state):
|
|
96
|
+
"""
|
|
97
|
+
Retrieve session from database or create a new one.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
shared_state: Shared state object
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
requests.Session or None
|
|
104
|
+
"""
|
|
105
|
+
db = shared_state.values["database"]("sessions")
|
|
106
|
+
token = db.retrieve(hostname)
|
|
107
|
+
if not token:
|
|
108
|
+
return create_and_persist_session(shared_state)
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
blob = base64.b64decode(token.encode("utf-8"))
|
|
112
|
+
sess = pickle.loads(blob)
|
|
113
|
+
if not isinstance(sess, requests.Session):
|
|
114
|
+
raise ValueError("Not a Session")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
debug(f"{hostname}: session load failed: {e}")
|
|
117
|
+
return create_and_persist_session(shared_state)
|
|
118
|
+
|
|
119
|
+
return sess
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def invalidate_session(shared_state):
|
|
123
|
+
"""
|
|
124
|
+
Invalidate the current session.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
shared_state: Shared state object
|
|
128
|
+
"""
|
|
129
|
+
db = shared_state.values["database"]("sessions")
|
|
130
|
+
db.delete(hostname)
|
|
131
|
+
debug(f'Session for "{hostname}" marked as invalid!')
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _persist_session_to_db(shared_state, sess):
|
|
135
|
+
"""
|
|
136
|
+
Serialize & store the given requests.Session into the database under `hostname`.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
shared_state: Shared state object
|
|
140
|
+
sess: requests.Session to persist
|
|
141
|
+
"""
|
|
142
|
+
blob = pickle.dumps(sess)
|
|
143
|
+
token = base64.b64encode(blob).decode("utf-8")
|
|
144
|
+
shared_state.values["database"]("sessions").update_store(hostname, token)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def fetch_via_requests_session(shared_state, method: str, target_url: str, post_data: dict = None, get_params: dict = None, timeout: int = 30):
|
|
148
|
+
"""
|
|
149
|
+
Execute request using the session.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
shared_state: Shared state object
|
|
153
|
+
method: "GET" or "POST"
|
|
154
|
+
target_url: URL to fetch
|
|
155
|
+
post_data: POST data (for POST requests)
|
|
156
|
+
get_params: URL parameters (for GET requests)
|
|
157
|
+
timeout: Request timeout in seconds
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Response object
|
|
161
|
+
"""
|
|
162
|
+
sess = retrieve_and_validate_session(shared_state)
|
|
163
|
+
if not sess:
|
|
164
|
+
raise Exception(f"Could not retrieve valid session for {hostname}")
|
|
165
|
+
|
|
166
|
+
# Execute request
|
|
167
|
+
if method.upper() == "GET":
|
|
168
|
+
resp = sess.get(target_url, params=get_params, timeout=timeout)
|
|
169
|
+
else: # POST
|
|
170
|
+
resp = sess.post(target_url, data=post_data, timeout=timeout)
|
|
171
|
+
|
|
172
|
+
# Re-persist cookies, since the site might have modified them during the request
|
|
173
|
+
_persist_session_to_db(shared_state, sess)
|
|
174
|
+
|
|
175
|
+
return resp
|
|
@@ -188,6 +188,7 @@ def connect_device():
|
|
|
188
188
|
|
|
189
189
|
def get_device():
|
|
190
190
|
attempts = 0
|
|
191
|
+
last_backoff_change = 0 # Track when we last changed backoff strategy
|
|
191
192
|
|
|
192
193
|
while True:
|
|
193
194
|
try:
|
|
@@ -199,14 +200,30 @@ def get_device():
|
|
|
199
200
|
|
|
200
201
|
update("device", False)
|
|
201
202
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
203
|
+
# Determine sleep time based on failure count
|
|
204
|
+
if attempts <= 10:
|
|
205
|
+
# First 10 failures: 3 seconds
|
|
206
|
+
sleep_time = 3
|
|
207
|
+
if attempts == 10:
|
|
208
|
+
info(f"WARNING: {attempts} consecutive JDownloader connection errors. Switching to 1-minute intervals.")
|
|
209
|
+
elif attempts <= 15:
|
|
210
|
+
# Next 5 failures (11-15): 1 minute
|
|
211
|
+
sleep_time = 60
|
|
212
|
+
if attempts % 10 == 0:
|
|
213
|
+
info(f"WARNING: {attempts} consecutive JDownloader connection errors. Please check your credentials!")
|
|
214
|
+
if attempts == 15:
|
|
215
|
+
info(f"WARNING: Still failing after {attempts} attempts. Switching to 5-minute intervals.")
|
|
216
|
+
else:
|
|
217
|
+
# After 15 failures: 5 minutes
|
|
218
|
+
sleep_time = 300
|
|
219
|
+
if attempts % 10 == 0:
|
|
220
|
+
info(f"WARNING: {attempts} consecutive JDownloader connection errors. Please check your credentials!")
|
|
206
221
|
|
|
207
222
|
if connect_device():
|
|
208
223
|
break
|
|
209
224
|
|
|
225
|
+
time.sleep(sleep_time)
|
|
226
|
+
|
|
210
227
|
return values["device"]
|
|
211
228
|
|
|
212
229
|
|
|
@@ -608,7 +625,6 @@ def is_valid_release(title: str,
|
|
|
608
625
|
debug(f"Skipping {title!r} as it doesn't match sanitized search string: {search_string!r}")
|
|
609
626
|
return False
|
|
610
627
|
|
|
611
|
-
|
|
612
628
|
# if it's a movie search, don't allow any TV show titles (check for NO season or episode tags in the title)
|
|
613
629
|
if is_movie_search:
|
|
614
630
|
if not MOVIE_REGEX.match(title):
|
quasarr/providers/version.py
CHANGED
quasarr/search/__init__.py
CHANGED
|
@@ -10,6 +10,7 @@ from quasarr.search.sources.al import al_feed, al_search
|
|
|
10
10
|
from quasarr.search.sources.by import by_feed, by_search
|
|
11
11
|
from quasarr.search.sources.dd import dd_search, dd_feed
|
|
12
12
|
from quasarr.search.sources.dj import dj_search, dj_feed
|
|
13
|
+
from quasarr.search.sources.dl import dl_search, dl_feed
|
|
13
14
|
from quasarr.search.sources.dt import dt_feed, dt_search
|
|
14
15
|
from quasarr.search.sources.dw import dw_feed, dw_search
|
|
15
16
|
from quasarr.search.sources.fx import fx_feed, fx_search
|
|
@@ -21,6 +22,7 @@ from quasarr.search.sources.sf import sf_feed, sf_search
|
|
|
21
22
|
from quasarr.search.sources.sj import sj_search, sj_feed
|
|
22
23
|
from quasarr.search.sources.sl import sl_feed, sl_search
|
|
23
24
|
from quasarr.search.sources.wd import wd_feed, wd_search
|
|
25
|
+
from quasarr.search.sources.wx import wx_feed, wx_search
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
def get_search_results(shared_state, request_from, imdb_id="", search_phrase="", mirror=None, season="", episode=""):
|
|
@@ -34,6 +36,7 @@ def get_search_results(shared_state, request_from, imdb_id="", search_phrase="",
|
|
|
34
36
|
al = shared_state.values["config"]("Hostnames").get("al")
|
|
35
37
|
by = shared_state.values["config"]("Hostnames").get("by")
|
|
36
38
|
dd = shared_state.values["config"]("Hostnames").get("dd")
|
|
39
|
+
dl = shared_state.values["config"]("Hostnames").get("dl")
|
|
37
40
|
dt = shared_state.values["config"]("Hostnames").get("dt")
|
|
38
41
|
dj = shared_state.values["config"]("Hostnames").get("dj")
|
|
39
42
|
dw = shared_state.values["config"]("Hostnames").get("dw")
|
|
@@ -46,6 +49,7 @@ def get_search_results(shared_state, request_from, imdb_id="", search_phrase="",
|
|
|
46
49
|
sj = shared_state.values["config"]("Hostnames").get("sj")
|
|
47
50
|
sl = shared_state.values["config"]("Hostnames").get("sl")
|
|
48
51
|
wd = shared_state.values["config"]("Hostnames").get("wd")
|
|
52
|
+
wx = shared_state.values["config"]("Hostnames").get("wx")
|
|
49
53
|
|
|
50
54
|
start_time = time.time()
|
|
51
55
|
|
|
@@ -56,6 +60,7 @@ def get_search_results(shared_state, request_from, imdb_id="", search_phrase="",
|
|
|
56
60
|
(al, al_search),
|
|
57
61
|
(by, by_search),
|
|
58
62
|
(dd, dd_search),
|
|
63
|
+
(dl, dl_search),
|
|
59
64
|
(dt, dt_search),
|
|
60
65
|
(dj, dj_search),
|
|
61
66
|
(dw, dw_search),
|
|
@@ -68,11 +73,13 @@ def get_search_results(shared_state, request_from, imdb_id="", search_phrase="",
|
|
|
68
73
|
(sj, sj_search),
|
|
69
74
|
(sl, sl_search),
|
|
70
75
|
(wd, wd_search),
|
|
76
|
+
(wx, wx_search),
|
|
71
77
|
]
|
|
72
78
|
|
|
73
79
|
# LazyLibrarian uses search_phrase for searches
|
|
74
80
|
phrase_map = [
|
|
75
81
|
(by, by_search),
|
|
82
|
+
(dl, dl_search),
|
|
76
83
|
(dt, dt_search),
|
|
77
84
|
(nx, nx_search),
|
|
78
85
|
(sl, sl_search),
|
|
@@ -85,6 +92,7 @@ def get_search_results(shared_state, request_from, imdb_id="", search_phrase="",
|
|
|
85
92
|
(by, by_feed),
|
|
86
93
|
(dd, dd_feed),
|
|
87
94
|
(dj, dj_feed),
|
|
95
|
+
(dl, dl_feed),
|
|
88
96
|
(dt, dt_feed),
|
|
89
97
|
(dw, dw_feed),
|
|
90
98
|
(fx, fx_feed),
|
|
@@ -96,6 +104,7 @@ def get_search_results(shared_state, request_from, imdb_id="", search_phrase="",
|
|
|
96
104
|
(sj, sj_feed),
|
|
97
105
|
(sl, sl_feed),
|
|
98
106
|
(wd, wd_feed),
|
|
107
|
+
(wx, wx_feed),
|
|
99
108
|
]
|
|
100
109
|
|
|
101
110
|
if imdb_id: # only Radarr/Sonarr are using imdb_id
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Quasarr
|
|
3
|
+
# Project by https://github.com/rix1337
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import time
|
|
7
|
+
import warnings
|
|
8
|
+
from base64 import urlsafe_b64encode
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from html import unescape
|
|
12
|
+
|
|
13
|
+
from bs4 import BeautifulSoup
|
|
14
|
+
from bs4 import XMLParsedAsHTMLWarning
|
|
15
|
+
|
|
16
|
+
from quasarr.providers.imdb_metadata import get_localized_title
|
|
17
|
+
from quasarr.providers.log import info, debug
|
|
18
|
+
from quasarr.providers.sessions.dl import retrieve_and_validate_session, invalidate_session, fetch_via_requests_session
|
|
19
|
+
|
|
20
|
+
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning) # we dont want to use lxml
|
|
21
|
+
|
|
22
|
+
hostname = "dl"
|
|
23
|
+
supported_mirrors = []
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def normalize_title_for_sonarr(title):
|
|
27
|
+
"""
|
|
28
|
+
Normalize title for Sonarr by replacing spaces with dots.
|
|
29
|
+
"""
|
|
30
|
+
title = title.replace(' ', '.')
|
|
31
|
+
title = re.sub(r'\s*-\s*', '-', title)
|
|
32
|
+
title = re.sub(r'\.\-\.', '-', title)
|
|
33
|
+
title = re.sub(r'\.{2,}', '.', title)
|
|
34
|
+
title = title.strip('.')
|
|
35
|
+
return title
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def dl_feed(shared_state, start_time, request_from, mirror=None):
|
|
39
|
+
"""
|
|
40
|
+
Parse the RSS feed and return releases.
|
|
41
|
+
"""
|
|
42
|
+
releases = []
|
|
43
|
+
host = shared_state.values["config"]("Hostnames").get(hostname)
|
|
44
|
+
|
|
45
|
+
if not host:
|
|
46
|
+
debug(f"{hostname}: hostname not configured")
|
|
47
|
+
return releases
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
sess = retrieve_and_validate_session(shared_state)
|
|
51
|
+
if not sess:
|
|
52
|
+
info(f"Could not retrieve valid session for {host}")
|
|
53
|
+
return releases
|
|
54
|
+
|
|
55
|
+
# Instead we should parse the HTML for the correct *arr client
|
|
56
|
+
rss_url = f'https://www.{host}/forums/-/index.rss'
|
|
57
|
+
response = sess.get(rss_url, timeout=30)
|
|
58
|
+
|
|
59
|
+
if response.status_code != 200:
|
|
60
|
+
info(f"{hostname}: RSS feed returned status {response.status_code}")
|
|
61
|
+
return releases
|
|
62
|
+
|
|
63
|
+
soup = BeautifulSoup(response.content, 'html.parser')
|
|
64
|
+
items = soup.find_all('item')
|
|
65
|
+
|
|
66
|
+
if not items:
|
|
67
|
+
debug(f"{hostname}: No entries found in RSS feed")
|
|
68
|
+
return releases
|
|
69
|
+
|
|
70
|
+
for item in items:
|
|
71
|
+
try:
|
|
72
|
+
title_tag = item.find('title')
|
|
73
|
+
if not title_tag:
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
title = title_tag.get_text(strip=True)
|
|
77
|
+
if not title:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
title = unescape(title)
|
|
81
|
+
title = title.replace(']]>', '').replace('<![CDATA[', '')
|
|
82
|
+
title = normalize_title_for_sonarr(title)
|
|
83
|
+
|
|
84
|
+
item_text = item.get_text()
|
|
85
|
+
thread_url = None
|
|
86
|
+
match = re.search(r'https://[^\s]+/threads/[^\s]+', item_text)
|
|
87
|
+
if match:
|
|
88
|
+
thread_url = match.group(0)
|
|
89
|
+
if not thread_url:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
pub_date = item.find('pubdate')
|
|
93
|
+
if pub_date:
|
|
94
|
+
date_str = pub_date.get_text(strip=True)
|
|
95
|
+
else:
|
|
96
|
+
# Fallback: use current time if no pubDate found
|
|
97
|
+
date_str = datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000")
|
|
98
|
+
|
|
99
|
+
mb = 0
|
|
100
|
+
imdb_id = None
|
|
101
|
+
password = ""
|
|
102
|
+
|
|
103
|
+
payload = urlsafe_b64encode(
|
|
104
|
+
f"{title}|{thread_url}|{mirror}|{mb}|{password}|{imdb_id or ''}".encode("utf-8")
|
|
105
|
+
).decode("utf-8")
|
|
106
|
+
link = f"{shared_state.values['internal_address']}/download/?payload={payload}"
|
|
107
|
+
|
|
108
|
+
releases.append({
|
|
109
|
+
"details": {
|
|
110
|
+
"title": title,
|
|
111
|
+
"hostname": hostname,
|
|
112
|
+
"imdb_id": imdb_id,
|
|
113
|
+
"link": link,
|
|
114
|
+
"mirror": mirror,
|
|
115
|
+
"size": mb * 1024 * 1024,
|
|
116
|
+
"date": date_str,
|
|
117
|
+
"source": thread_url
|
|
118
|
+
},
|
|
119
|
+
"type": "protected"
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
except Exception as e:
|
|
123
|
+
debug(f"{hostname}: error parsing RSS entry: {e}")
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
except Exception as e:
|
|
127
|
+
info(f"{hostname}: RSS feed error: {e}")
|
|
128
|
+
invalidate_session(shared_state)
|
|
129
|
+
|
|
130
|
+
elapsed = time.time() - start_time
|
|
131
|
+
debug(f"Time taken: {elapsed:.2f}s ({hostname})")
|
|
132
|
+
return releases
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _search_single_page(shared_state, host, search_string, search_id, page_num, imdb_id, mirror, request_from, season,
|
|
136
|
+
episode):
|
|
137
|
+
"""
|
|
138
|
+
Search a single page. This function is called in parallel for each page.
|
|
139
|
+
"""
|
|
140
|
+
page_releases = []
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
if page_num == 1:
|
|
144
|
+
search_params = {
|
|
145
|
+
'keywords': search_string,
|
|
146
|
+
'c[title_only]': 1
|
|
147
|
+
}
|
|
148
|
+
search_url = f'https://www.{host}/search/search'
|
|
149
|
+
else:
|
|
150
|
+
if not search_id:
|
|
151
|
+
return page_releases, None
|
|
152
|
+
|
|
153
|
+
search_params = {
|
|
154
|
+
'page': page_num,
|
|
155
|
+
'q': search_string,
|
|
156
|
+
'o': 'relevance'
|
|
157
|
+
}
|
|
158
|
+
search_url = f'https://www.{host}/search/{search_id}/'
|
|
159
|
+
|
|
160
|
+
search_response = fetch_via_requests_session(shared_state, method="GET",
|
|
161
|
+
target_url=search_url,
|
|
162
|
+
get_params=search_params,
|
|
163
|
+
timeout=10)
|
|
164
|
+
|
|
165
|
+
if search_response.status_code != 200:
|
|
166
|
+
debug(f"{hostname}: [Page {page_num}] returned status {search_response.status_code}")
|
|
167
|
+
return page_releases, None
|
|
168
|
+
|
|
169
|
+
# Extract search ID from first page
|
|
170
|
+
extracted_search_id = None
|
|
171
|
+
if page_num == 1:
|
|
172
|
+
match = re.search(r'/search/(\d+)/', search_response.url)
|
|
173
|
+
if match:
|
|
174
|
+
extracted_search_id = match.group(1)
|
|
175
|
+
debug(f"{hostname}: [Page 1] Extracted search ID: {extracted_search_id}")
|
|
176
|
+
|
|
177
|
+
soup = BeautifulSoup(search_response.text, 'html.parser')
|
|
178
|
+
result_items = soup.select('li.block-row')
|
|
179
|
+
|
|
180
|
+
if not result_items:
|
|
181
|
+
debug(f"{hostname}: [Page {page_num}] found 0 results")
|
|
182
|
+
return page_releases, extracted_search_id
|
|
183
|
+
|
|
184
|
+
debug(f"{hostname}: [Page {page_num}] found {len(result_items)} results")
|
|
185
|
+
|
|
186
|
+
for item in result_items:
|
|
187
|
+
try:
|
|
188
|
+
title_elem = item.select_one('h3.contentRow-title a')
|
|
189
|
+
if not title_elem:
|
|
190
|
+
continue
|
|
191
|
+
|
|
192
|
+
title = title_elem.get_text(separator=' ', strip=True)
|
|
193
|
+
title = re.sub(r'\s+', ' ', title)
|
|
194
|
+
title = unescape(title)
|
|
195
|
+
title_normalized = normalize_title_for_sonarr(title)
|
|
196
|
+
|
|
197
|
+
thread_url = title_elem.get('href')
|
|
198
|
+
if thread_url.startswith('/'):
|
|
199
|
+
thread_url = f"https://www.{host}{thread_url}"
|
|
200
|
+
|
|
201
|
+
if not shared_state.is_valid_release(title_normalized, request_from, search_string, season, episode):
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
minor_info = item.select_one('div.contentRow-minor')
|
|
205
|
+
date_str = ""
|
|
206
|
+
if minor_info:
|
|
207
|
+
date_elem = minor_info.select_one('time.u-dt')
|
|
208
|
+
if date_elem:
|
|
209
|
+
date_str = date_elem.get('datetime', '')
|
|
210
|
+
|
|
211
|
+
# Fallback: use current time if no date found
|
|
212
|
+
if not date_str:
|
|
213
|
+
date_str = datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000")
|
|
214
|
+
|
|
215
|
+
mb = 0
|
|
216
|
+
password = ""
|
|
217
|
+
|
|
218
|
+
payload = urlsafe_b64encode(
|
|
219
|
+
f"{title_normalized}|{thread_url}|{mirror}|{mb}|{password}|{imdb_id or ''}".encode("utf-8")
|
|
220
|
+
).decode("utf-8")
|
|
221
|
+
link = f"{shared_state.values['internal_address']}/download/?payload={payload}"
|
|
222
|
+
|
|
223
|
+
page_releases.append({
|
|
224
|
+
"details": {
|
|
225
|
+
"title": title_normalized,
|
|
226
|
+
"hostname": hostname,
|
|
227
|
+
"imdb_id": imdb_id,
|
|
228
|
+
"link": link,
|
|
229
|
+
"mirror": mirror,
|
|
230
|
+
"size": mb * 1024 * 1024,
|
|
231
|
+
"date": date_str,
|
|
232
|
+
"source": thread_url
|
|
233
|
+
},
|
|
234
|
+
"type": "protected"
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
except Exception as e:
|
|
238
|
+
debug(f"{hostname}: [Page {page_num}] error parsing item: {e}")
|
|
239
|
+
|
|
240
|
+
return page_releases, extracted_search_id
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
info(f"{hostname}: [Page {page_num}] error: {e}")
|
|
244
|
+
return page_releases, None
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def dl_search(shared_state, start_time, request_from, search_string,
|
|
248
|
+
mirror=None, season=None, episode=None):
|
|
249
|
+
"""
|
|
250
|
+
Search with parallel pagination (max 5 pages) to find best quality releases.
|
|
251
|
+
Requests are fired in parallel to minimize search time.
|
|
252
|
+
"""
|
|
253
|
+
releases = []
|
|
254
|
+
host = shared_state.values["config"]("Hostnames").get(hostname)
|
|
255
|
+
|
|
256
|
+
imdb_id = shared_state.is_imdb_id(search_string)
|
|
257
|
+
if imdb_id:
|
|
258
|
+
title = get_localized_title(shared_state, imdb_id, 'de')
|
|
259
|
+
if not title:
|
|
260
|
+
info(f"{hostname}: no title for IMDb {imdb_id}")
|
|
261
|
+
return releases
|
|
262
|
+
search_string = title
|
|
263
|
+
|
|
264
|
+
search_string = unescape(search_string)
|
|
265
|
+
max_pages = 5
|
|
266
|
+
|
|
267
|
+
info(
|
|
268
|
+
f"{hostname}: Starting parallel paginated search for '{search_string}' (Season: {season}, Episode: {episode}) - up to {max_pages} pages")
|
|
269
|
+
|
|
270
|
+
try:
|
|
271
|
+
sess = retrieve_and_validate_session(shared_state)
|
|
272
|
+
if not sess:
|
|
273
|
+
info(f"Could not retrieve valid session for {host}")
|
|
274
|
+
return releases
|
|
275
|
+
|
|
276
|
+
# First, do page 1 to get the search ID
|
|
277
|
+
page_1_releases, search_id = _search_single_page(
|
|
278
|
+
shared_state, host, search_string, None, 1,
|
|
279
|
+
imdb_id, mirror, request_from, season, episode
|
|
280
|
+
)
|
|
281
|
+
releases.extend(page_1_releases)
|
|
282
|
+
|
|
283
|
+
if not search_id:
|
|
284
|
+
info(f"{hostname}: Could not extract search ID, stopping pagination")
|
|
285
|
+
return releases
|
|
286
|
+
|
|
287
|
+
# Now fire remaining pages in parallel
|
|
288
|
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
|
289
|
+
futures = {}
|
|
290
|
+
for page_num in range(2, max_pages + 1):
|
|
291
|
+
future = executor.submit(
|
|
292
|
+
_search_single_page,
|
|
293
|
+
shared_state, host, search_string, search_id, page_num,
|
|
294
|
+
imdb_id, mirror, request_from, season, episode
|
|
295
|
+
)
|
|
296
|
+
futures[future] = page_num
|
|
297
|
+
|
|
298
|
+
for future in as_completed(futures):
|
|
299
|
+
page_num = futures[future]
|
|
300
|
+
try:
|
|
301
|
+
page_releases, _ = future.result()
|
|
302
|
+
releases.extend(page_releases)
|
|
303
|
+
debug(f"{hostname}: [Page {page_num}] completed with {len(page_releases)} valid releases")
|
|
304
|
+
except Exception as e:
|
|
305
|
+
info(f"{hostname}: [Page {page_num}] failed: {e}")
|
|
306
|
+
|
|
307
|
+
except Exception as e:
|
|
308
|
+
info(f"{hostname}: search error: {e}")
|
|
309
|
+
invalidate_session(shared_state)
|
|
310
|
+
|
|
311
|
+
info(f"{hostname}: FINAL - Found {len(releases)} valid releases - providing to {request_from}")
|
|
312
|
+
|
|
313
|
+
elapsed = time.time() - start_time
|
|
314
|
+
debug(f"Time taken: {elapsed:.2f}s ({hostname})")
|
|
315
|
+
|
|
316
|
+
return releases
|