quasarr 2.1.5__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of quasarr might be problematic. Click here for more details.

Files changed (60) hide show
  1. quasarr/__init__.py +38 -29
  2. quasarr/api/__init__.py +94 -23
  3. quasarr/api/captcha/__init__.py +0 -12
  4. quasarr/api/config/__init__.py +22 -11
  5. quasarr/api/packages/__init__.py +26 -34
  6. quasarr/api/statistics/__init__.py +15 -15
  7. quasarr/downloads/__init__.py +9 -1
  8. quasarr/downloads/packages/__init__.py +2 -2
  9. quasarr/downloads/sources/al.py +6 -0
  10. quasarr/downloads/sources/by.py +29 -20
  11. quasarr/downloads/sources/dd.py +9 -1
  12. quasarr/downloads/sources/dl.py +3 -0
  13. quasarr/downloads/sources/dt.py +16 -7
  14. quasarr/downloads/sources/dw.py +22 -17
  15. quasarr/downloads/sources/he.py +11 -6
  16. quasarr/downloads/sources/mb.py +9 -3
  17. quasarr/downloads/sources/nk.py +9 -3
  18. quasarr/downloads/sources/nx.py +21 -17
  19. quasarr/downloads/sources/sf.py +21 -13
  20. quasarr/downloads/sources/sl.py +10 -2
  21. quasarr/downloads/sources/wd.py +18 -9
  22. quasarr/downloads/sources/wx.py +7 -11
  23. quasarr/providers/auth.py +1 -1
  24. quasarr/providers/cloudflare.py +1 -1
  25. quasarr/providers/hostname_issues.py +63 -0
  26. quasarr/providers/html_images.py +1 -18
  27. quasarr/providers/html_templates.py +104 -12
  28. quasarr/providers/imdb_metadata.py +288 -75
  29. quasarr/providers/obfuscated.py +11 -11
  30. quasarr/providers/sessions/al.py +27 -11
  31. quasarr/providers/sessions/dd.py +12 -4
  32. quasarr/providers/sessions/dl.py +19 -11
  33. quasarr/providers/sessions/nx.py +12 -4
  34. quasarr/providers/version.py +1 -1
  35. quasarr/search/__init__.py +5 -0
  36. quasarr/search/sources/al.py +12 -1
  37. quasarr/search/sources/by.py +15 -4
  38. quasarr/search/sources/dd.py +22 -3
  39. quasarr/search/sources/dj.py +12 -1
  40. quasarr/search/sources/dl.py +12 -6
  41. quasarr/search/sources/dt.py +17 -4
  42. quasarr/search/sources/dw.py +15 -4
  43. quasarr/search/sources/fx.py +19 -6
  44. quasarr/search/sources/he.py +22 -3
  45. quasarr/search/sources/mb.py +15 -4
  46. quasarr/search/sources/nk.py +19 -3
  47. quasarr/search/sources/nx.py +15 -4
  48. quasarr/search/sources/sf.py +25 -8
  49. quasarr/search/sources/sj.py +14 -1
  50. quasarr/search/sources/sl.py +17 -2
  51. quasarr/search/sources/wd.py +15 -4
  52. quasarr/search/sources/wx.py +16 -18
  53. quasarr/storage/setup.py +150 -35
  54. {quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/METADATA +6 -3
  55. quasarr-2.3.0.dist-info/RECORD +82 -0
  56. {quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/WHEEL +1 -1
  57. quasarr-2.1.5.dist-info/RECORD +0 -81
  58. {quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/entry_points.txt +0 -0
  59. {quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/licenses/LICENSE +0 -0
  60. {quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@
5
5
  import html
6
6
  import re
7
7
  from datetime import datetime, timedelta
8
- from json import loads
8
+ from json import loads, dumps
9
9
  from urllib.parse import quote
10
10
 
11
11
  import requests
@@ -14,20 +14,168 @@ from bs4 import BeautifulSoup
14
14
  from quasarr.providers.log import info, debug
15
15
 
16
16
 
17
+ def _get_db(table_name):
18
+ """Lazy import to avoid circular dependency."""
19
+ from quasarr.storage.sqlite_database import DataBase
20
+ return DataBase(table_name)
21
+
22
+
23
+ class IMDbAPI:
24
+ """Handles interactions with api.imdbapi.dev"""
25
+ BASE_URL = "https://api.imdbapi.dev"
26
+
27
+ @staticmethod
28
+ def get_title(imdb_id):
29
+ try:
30
+ response = requests.get(f"{IMDbAPI.BASE_URL}/titles/{imdb_id}", timeout=30)
31
+ response.raise_for_status()
32
+ return response.json()
33
+ except Exception as e:
34
+ info(f"Error loading imdbapi.dev for {imdb_id}: {e}")
35
+ return None
36
+
37
+ @staticmethod
38
+ def get_akas(imdb_id):
39
+ try:
40
+ response = requests.get(f"{IMDbAPI.BASE_URL}/titles/{imdb_id}/akas", timeout=30)
41
+ response.raise_for_status()
42
+ return response.json().get("akas", [])
43
+ except Exception as e:
44
+ info(f"Error loading localized titles from IMDbAPI.dev for {imdb_id}: {e}")
45
+ return []
46
+
47
+ @staticmethod
48
+ def search_titles(query):
49
+ try:
50
+ response = requests.get(f"{IMDbAPI.BASE_URL}/search/titles?query={quote(query)}&limit=5", timeout=30)
51
+ response.raise_for_status()
52
+ return response.json().get("titles", [])
53
+ except Exception as e:
54
+ debug(f"Request on IMDbAPI failed: {e}")
55
+ return []
56
+
57
+
58
+ class IMDbWeb:
59
+ """Handles fallback interactions by scraping imdb.com"""
60
+ BASE_URL = "https://www.imdb.com"
61
+
62
+ @staticmethod
63
+ def get_poster(imdb_id, user_agent):
64
+ headers = {'User-Agent': user_agent}
65
+ try:
66
+ request = requests.get(f"{IMDbWeb.BASE_URL}/title/{imdb_id}/", headers=headers, timeout=10).text
67
+ soup = BeautifulSoup(request, "html.parser")
68
+ poster_set = soup.find('div', class_='ipc-poster').div.img["srcset"]
69
+ poster_links = [x for x in poster_set.split(" ") if len(x) > 10]
70
+ return poster_links[-1]
71
+ except Exception as e:
72
+ debug(f"Could not get poster title for {imdb_id} from IMDb: {e}")
73
+ return None
74
+
75
+ @staticmethod
76
+ def get_localized_title(imdb_id, language, user_agent):
77
+ headers = {
78
+ 'Accept-Language': language,
79
+ 'User-Agent': user_agent
80
+ }
81
+ try:
82
+ response = requests.get(f"{IMDbWeb.BASE_URL}/title/{imdb_id}/", headers=headers, timeout=10)
83
+ response.raise_for_status()
84
+
85
+ match = re.search(r'<title>(.*?) \(.*?</title>', response.text)
86
+ if not match:
87
+ match = re.search(r'<title>(.*?) - IMDb</title>', response.text)
88
+
89
+ if match:
90
+ return match.group(1)
91
+ except Exception as e:
92
+ info(f"Error loading IMDb metadata for {imdb_id}: {e}")
93
+
94
+ return None
95
+
96
+ @staticmethod
97
+ def search_titles(query, ttype, language, user_agent):
98
+ headers = {
99
+ 'Accept-Language': language,
100
+ 'User-Agent': user_agent
101
+ }
102
+ try:
103
+ results = requests.get(f"{IMDbWeb.BASE_URL}/find/?q={quote(query)}&s=tt&ttype={ttype}&ref_=fn_{ttype}",
104
+ headers=headers, timeout=10)
105
+
106
+ if results.status_code == 200:
107
+ soup = BeautifulSoup(results.text, "html.parser")
108
+ props = soup.find("script", text=re.compile("props"))
109
+ if props:
110
+ details = loads(props.string)
111
+ return details['props']['pageProps']['titleResults']['results']
112
+ else:
113
+ debug(f"Request on IMDb failed: {results.status_code}")
114
+ except Exception as e:
115
+ debug(f"IMDb scraping fallback failed: {e}")
116
+
117
+ return []
118
+
119
+
120
+ class TitleCleaner:
121
+ @staticmethod
122
+ def sanitize(title):
123
+ if not title:
124
+ return ""
125
+ sanitized_title = html.unescape(title)
126
+ sanitized_title = re.sub(r"[^a-zA-Z0-9äöüÄÖÜß&-']", ' ', sanitized_title).strip()
127
+ sanitized_title = sanitized_title.replace(" - ", "-")
128
+ sanitized_title = re.sub(r'\s{2,}', ' ', sanitized_title)
129
+ return sanitized_title
130
+
131
+ @staticmethod
132
+ def clean(title):
133
+ try:
134
+ # Regex to find the title part before common release tags
135
+ # Stops at:
136
+ # - Year (19xx or 20xx) preceded by a separator
137
+ # - Language tags (.German, .GERMAN)
138
+ # - Resolution (.1080p, .720p, etc.)
139
+ # - Season info (.S01)
140
+ pattern = r"(.*?)(?:[\.\s](?!19|20)\d{2}|[\.\s]German|[\.\s]GERMAN|[\.\s]\d{3,4}p|[\.\s]S(?:\d{1,3}))"
141
+ match = re.search(pattern, title)
142
+ if match:
143
+ extracted_title = match.group(1)
144
+ else:
145
+ extracted_title = title
146
+
147
+ # Remove specific tags that might appear in the title part
148
+ tags_to_remove = [
149
+ r'[\.\s]UNRATED.*', r'[\.\s]Unrated.*', r'[\.\s]Uncut.*', r'[\.\s]UNCUT.*',
150
+ r'[\.\s]Directors[\.\s]Cut.*', r'[\.\s]Final[\.\s]Cut.*', r'[\.\s]DC.*',
151
+ r'[\.\s]REMASTERED.*', r'[\.\s]EXTENDED.*', r'[\.\s]Extended.*',
152
+ r'[\.\s]Theatrical.*', r'[\.\s]THEATRICAL.*'
153
+ ]
154
+
155
+ clean_title = extracted_title
156
+ for tag in tags_to_remove:
157
+ clean_title = re.sub(tag, "", clean_title, flags=re.IGNORECASE)
158
+
159
+ clean_title = clean_title.replace(".", " ").strip()
160
+ clean_title = re.sub(r'\s+', ' ', clean_title) # Remove multiple spaces
161
+ clean_title = clean_title.replace(" ", "+")
162
+
163
+ return clean_title
164
+ except Exception as e:
165
+ debug(f"Error cleaning title '{title}': {e}")
166
+ return title
167
+
168
+
17
169
  def get_poster_link(shared_state, imdb_id):
170
+ imdb_metadata = get_imdb_metadata(imdb_id)
171
+ if imdb_metadata:
172
+ poster_link = imdb_metadata.get("poster_link")
173
+ if poster_link:
174
+ return poster_link
175
+
18
176
  poster_link = None
19
177
  if imdb_id:
20
- headers = {'User-Agent': shared_state.values["user_agent"]}
21
- request = requests.get(f"https://www.imdb.com/title/{imdb_id}/", headers=headers, timeout=10).text
22
- soup = BeautifulSoup(request, "html.parser")
23
- try:
24
- poster_set = soup.find('div', class_='ipc-poster').div.img[
25
- "srcset"] # contains links to posters in ascending resolution
26
- poster_links = [x for x in poster_set.split(" ") if
27
- len(x) > 10] # extract all poster links ignoring resolution info
28
- poster_link = poster_links[-1] # get the highest resolution poster
29
- except:
30
- pass
178
+ poster_link = IMDbWeb.get_poster(imdb_id, shared_state.values["user_agent"])
31
179
 
32
180
  if not poster_link:
33
181
  debug(f"Could not get poster title for {imdb_id} from IMDb")
@@ -35,87 +183,151 @@ def get_poster_link(shared_state, imdb_id):
35
183
  return poster_link
36
184
 
37
185
 
38
- def get_localized_title(shared_state, imdb_id, language='de'):
39
- localized_title = None
186
+ def get_imdb_metadata(imdb_id):
187
+ db = _get_db("imdb_metadata")
188
+ now = datetime.now().timestamp()
40
189
 
41
- headers = {
42
- 'Accept-Language': language,
43
- 'User-Agent': shared_state.values["user_agent"]
190
+ # Try to load from DB
191
+ cached_metadata = None
192
+ try:
193
+ cached_data = db.retrieve(imdb_id)
194
+ if cached_data:
195
+ cached_metadata = loads(cached_data)
196
+ # If valid, update TTL and return
197
+ if cached_metadata.get("ttl") and cached_metadata["ttl"] > now:
198
+ cached_metadata["ttl"] = now + timedelta(days=30).total_seconds()
199
+ db.update_store(imdb_id, dumps(cached_metadata))
200
+ return cached_metadata
201
+ except Exception as e:
202
+ debug(f"Error retrieving IMDb metadata from DB for {imdb_id}: {e}")
203
+
204
+ # Initialize new metadata structure
205
+ imdb_metadata = {
206
+ "title": None,
207
+ "year": None,
208
+ "poster_link": None,
209
+ "localized": {},
210
+ "ttl": 0
44
211
  }
45
212
 
213
+ # Fetch from API
214
+ response_json = IMDbAPI.get_title(imdb_id)
215
+
216
+ if not response_json:
217
+ # API failed. If we have stale cached data, return it as fallback
218
+ if cached_metadata:
219
+ debug(f"IMDb API failed for {imdb_id}, returning stale cached data.")
220
+ return cached_metadata
221
+ return imdb_metadata
222
+
223
+ # Process API response
224
+ imdb_metadata["title"] = TitleCleaner.sanitize(response_json.get("primaryTitle", ""))
225
+ imdb_metadata["year"] = response_json.get("startYear")
226
+ imdb_metadata["ttl"] = now + timedelta(days=30).total_seconds()
227
+
46
228
  try:
47
- response = requests.get(f"https://www.imdb.com/title/{imdb_id}/", headers=headers, timeout=10)
229
+ imdb_metadata["poster_link"] = response_json.get("primaryImage").get("url")
48
230
  except Exception as e:
49
- info(f"Error loading IMDb metadata for {imdb_id}: {e}")
50
- return localized_title
231
+ debug(f"Could not find poster link for {imdb_id} from imdbapi.dev: {e}")
232
+ # Shorten TTL if data is incomplete
233
+ imdb_metadata["ttl"] = now + timedelta(days=1).total_seconds()
51
234
 
52
- try:
53
- match = re.findall(r'<title>(.*?) \(.*?</title>', response.text)
54
- localized_title = match[0]
55
- except:
56
- try:
57
- match = re.findall(r'<title>(.*?) - IMDb</title>', response.text)
58
- localized_title = match[0]
59
- except:
60
- pass
235
+ akas = IMDbAPI.get_akas(imdb_id)
236
+ if akas:
237
+ for aka in akas:
238
+ if aka.get("language"):
239
+ continue # skip entries with specific language tags
240
+ if aka.get("country", {}).get("code", "").lower() == "de":
241
+ imdb_metadata["localized"]["de"] = TitleCleaner.sanitize(aka.get("text"))
242
+ break
243
+ else:
244
+ # Shorten TTL if AKAs failed
245
+ imdb_metadata["ttl"] = now + timedelta(days=1).total_seconds()
61
246
 
62
- if not localized_title:
63
- debug(f"Could not get localized title for {imdb_id} in {language} from IMDb")
247
+ db.update_store(imdb_id, dumps(imdb_metadata))
248
+ return imdb_metadata
64
249
 
65
- localized_title = html.unescape(localized_title)
66
- localized_title = re.sub(r"[^a-zA-Z0-9äöüÄÖÜß&-']", ' ', localized_title).strip()
67
- localized_title = localized_title.replace(" - ", "-")
68
- localized_title = re.sub(r'\s{2,}', ' ', localized_title)
69
250
 
70
- return localized_title
251
+ def get_year(imdb_id):
252
+ imdb_metadata = get_imdb_metadata(imdb_id)
253
+ if imdb_metadata:
254
+ return imdb_metadata.get("year")
255
+ return None
71
256
 
72
257
 
73
- def get_clean_title(title):
74
- try:
75
- extracted_title = re.findall(r"(.*?)(?:.(?!19|20)\d{2}|\.German|.GERMAN|\.\d{3,4}p|\.S(?:\d{1,3}))", title)[0]
76
- leftover_tags_removed = re.sub(
77
- r'(|.UNRATED.*|.Unrated.*|.Uncut.*|.UNCUT.*)(|.Directors.Cut.*|.Final.Cut.*|.DC.*|.REMASTERED.*|.EXTENDED.*|.Extended.*|.Theatrical.*|.THEATRICAL.*)',
78
- "", extracted_title)
79
- clean_title = leftover_tags_removed.replace(".", " ").strip().replace(" ", "+")
258
+ def get_localized_title(shared_state, imdb_id, language='de'):
259
+ imdb_metadata = get_imdb_metadata(imdb_id)
260
+ if imdb_metadata:
261
+ localized_title = imdb_metadata.get("localized").get(language)
262
+ if localized_title:
263
+ return localized_title
264
+ return imdb_metadata.get("title")
80
265
 
81
- except:
82
- clean_title = title
83
- return clean_title
266
+ localized_title = IMDbWeb.get_localized_title(imdb_id, language, shared_state.values["user_agent"])
267
+
268
+ if not localized_title:
269
+ debug(f"Could not get localized title for {imdb_id} in {language} from IMDb")
270
+ else:
271
+ localized_title = TitleCleaner.sanitize(localized_title)
272
+ return localized_title
84
273
 
85
274
 
86
275
  def get_imdb_id_from_title(shared_state, title, language="de"):
87
276
  imdb_id = None
88
277
 
89
278
  if re.search(r"S\d{1,3}(E\d{1,3})?", title, re.IGNORECASE):
90
- ttype = "tv"
279
+ ttype_api = "TV_SERIES"
280
+ ttype_web = "tv"
91
281
  else:
92
- ttype = "ft"
282
+ ttype_api = "MOVIE"
283
+ ttype_web = "ft"
284
+
285
+ title = TitleCleaner.clean(title)
93
286
 
94
- title = get_clean_title(title)
287
+ # Check Search Cache (DB)
288
+ db = _get_db("imdb_searches")
289
+ try:
290
+ cached_data = db.retrieve(title)
291
+ if cached_data:
292
+ data = loads(cached_data)
293
+ # Check TTL (48 hours)
294
+ if data.get("timestamp") and datetime.fromtimestamp(data["timestamp"]) > datetime.now() - timedelta(
295
+ hours=48):
296
+ return data.get("imdb_id")
297
+ except Exception as e:
298
+ debug(f"Error retrieving search cache for {title}: {e}")
95
299
 
96
- threshold = 60 * 60 * 48 # 48 hours
97
- context = "recents_imdb"
98
- recently_searched = shared_state.get_recently_searched(shared_state, context, threshold)
99
- if title in recently_searched:
100
- title_item = recently_searched[title]
101
- if title_item["timestamp"] > datetime.now() - timedelta(seconds=threshold):
102
- return title_item["imdb_id"]
300
+ # Try IMDbAPI.dev first
301
+ search_results = IMDbAPI.search_titles(title)
302
+ if search_results:
303
+ for result in search_results:
304
+ found_title = result.get("primaryTitle")
305
+ found_id = result.get("id")
306
+ found_type = result.get("type")
103
307
 
104
- headers = {
105
- 'Accept-Language': language,
106
- 'User-Agent': shared_state.values["user_agent"]
107
- }
308
+ # Basic type filtering if possible from result data
309
+ if ttype_api == "TV_SERIES" and found_type not in ["tvSeries", "tvMiniSeries"]:
310
+ continue
311
+ if ttype_api == "MOVIE" and found_type not in ["movie", "tvMovie"]:
312
+ continue
108
313
 
109
- results = requests.get(f"https://www.imdb.com/find/?q={quote(title)}&s=tt&ttype={ttype}&ref_=fn_{ttype}",
110
- headers=headers, timeout=10)
314
+ if shared_state.search_string_in_sanitized_title(title, found_title):
315
+ imdb_id = found_id
316
+ break
111
317
 
112
- if results.status_code == 200:
113
- soup = BeautifulSoup(results.text, "html.parser")
114
- props = soup.find("script", text=re.compile("props"))
115
- details = loads(props.string)
116
- search_results = details['props']['pageProps']['titleResults']['results']
318
+ # If no exact match found with type filtering, try relaxed matching
319
+ if not imdb_id:
320
+ for result in search_results:
321
+ found_title = result.get("primaryTitle")
322
+ found_id = result.get("id")
323
+ if shared_state.search_string_in_sanitized_title(title, found_title):
324
+ imdb_id = found_id
325
+ break
117
326
 
118
- if len(search_results) > 0:
327
+ # Fallback to IMDb scraping if API failed or returned no results
328
+ if not imdb_id:
329
+ search_results = IMDbWeb.search_titles(title, ttype_web, language, shared_state.values["user_agent"])
330
+ if search_results:
119
331
  for result in search_results:
120
332
  try:
121
333
  found_title = result["listItem"]["titleText"]
@@ -127,14 +339,15 @@ def get_imdb_id_from_title(shared_state, title, language="de"):
127
339
  if shared_state.search_string_in_sanitized_title(title, found_title):
128
340
  imdb_id = found_id
129
341
  break
130
- else:
131
- debug(f"Request on IMDb failed: {results.status_code}")
132
342
 
133
- recently_searched[title] = {
134
- "imdb_id": imdb_id,
135
- "timestamp": datetime.now()
136
- }
137
- shared_state.update(context, recently_searched)
343
+ # Update Search Cache
344
+ try:
345
+ db.update_store(title, dumps({
346
+ "imdb_id": imdb_id,
347
+ "timestamp": datetime.now().timestamp()
348
+ }))
349
+ except Exception as e:
350
+ debug(f"Error updating search cache for {title}: {e}")
138
351
 
139
352
  if not imdb_id:
140
353
  debug(f"No IMDb-ID found for {title}")