quasarr 2.3.1__tar.gz → 2.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quasarr might be problematic. Click here for more details.
- {quasarr-2.3.1 → quasarr-2.3.2}/PKG-INFO +1 -1
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/__init__.py +24 -6
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/api/config/__init__.py +2 -2
- quasarr-2.3.2/quasarr/providers/imdb_metadata.py +577 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/version.py +1 -1
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr.egg-info/PKG-INFO +1 -1
- quasarr-2.3.1/quasarr/providers/imdb_metadata.py +0 -353
- {quasarr-2.3.1 → quasarr-2.3.2}/LICENSE +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/README.md +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/api/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/api/arr/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/api/captcha/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/api/packages/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/api/sponsors_helper/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/api/statistics/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/linkcrypters/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/linkcrypters/al.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/linkcrypters/filecrypt.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/linkcrypters/hide.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/packages/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/al.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/by.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/dd.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/dj.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/dl.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/dt.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/dw.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/he.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/mb.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/nk.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/nx.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/sf.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/sj.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/sl.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/wd.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/downloads/sources/wx.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/auth.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/cloudflare.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/hostname_issues.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/html_images.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/html_templates.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/jd_cache.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/log.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/myjd_api.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/notifications.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/obfuscated.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/sessions/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/sessions/al.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/sessions/dd.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/sessions/dl.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/sessions/nx.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/shared_state.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/statistics.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/utils.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/providers/web_server.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/al.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/by.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/dd.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/dj.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/dl.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/dt.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/dw.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/fx.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/he.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/mb.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/nk.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/nx.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/sf.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/sj.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/sl.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/wd.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/search/sources/wx.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/storage/__init__.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/storage/config.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/storage/setup.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr/storage/sqlite_database.py +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr.egg-info/SOURCES.txt +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr.egg-info/dependency_links.txt +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr.egg-info/entry_points.txt +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr.egg-info/not-zip-safe +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr.egg-info/requires.txt +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/quasarr.egg-info/top_level.txt +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/setup.cfg +0 -0
- {quasarr-2.3.1 → quasarr-2.3.2}/setup.py +0 -0
|
@@ -169,6 +169,14 @@ def run():
|
|
|
169
169
|
else:
|
|
170
170
|
hostname_credentials_config(shared_state, site.upper(), hostname)
|
|
171
171
|
|
|
172
|
+
# Check FlareSolverr configuration
|
|
173
|
+
skip_flaresolverr_db = DataBase("skip_flaresolverr")
|
|
174
|
+
flaresolverr_skipped = skip_flaresolverr_db.retrieve("skipped")
|
|
175
|
+
flaresolverr_url = Config('FlareSolverr').get('url')
|
|
176
|
+
|
|
177
|
+
if not flaresolverr_url and not flaresolverr_skipped:
|
|
178
|
+
flaresolverr_config(shared_state)
|
|
179
|
+
|
|
172
180
|
config = Config('JDownloader')
|
|
173
181
|
user = config.get('user')
|
|
174
182
|
password = config.get('password')
|
|
@@ -249,23 +257,33 @@ def flaresolverr_checker(shared_state_dict, shared_state_lock):
|
|
|
249
257
|
flaresolverr_skipped = skip_flaresolverr_db.retrieve("skipped")
|
|
250
258
|
|
|
251
259
|
flaresolverr_url = Config('FlareSolverr').get('url')
|
|
260
|
+
|
|
261
|
+
# If FlareSolverr is not configured and not skipped, it means it's the first run
|
|
262
|
+
# and the user needs to be prompted via the WebUI.
|
|
263
|
+
# This background process should NOT block or prompt the user.
|
|
264
|
+
# It should only check and log the status.
|
|
252
265
|
if not flaresolverr_url and not flaresolverr_skipped:
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
flaresolverr_url = Config('FlareSolverr').get('url')
|
|
266
|
+
info('FlareSolverr URL not configured. Please configure it via the WebUI.')
|
|
267
|
+
info('Some sites (AL) will not work without FlareSolverr.')
|
|
268
|
+
return # Exit the checker, it will be re-checked if user configures it later
|
|
257
269
|
|
|
258
270
|
if flaresolverr_skipped:
|
|
259
271
|
info('FlareSolverr setup skipped by user preference')
|
|
260
272
|
info('Some sites (AL) will not work without FlareSolverr. Configure it later in the web UI.')
|
|
261
273
|
elif flaresolverr_url:
|
|
262
|
-
|
|
274
|
+
info(f'Checking FlareSolverr at URL: "{flaresolverr_url}"')
|
|
263
275
|
flaresolverr_check = check_flaresolverr(shared_state, flaresolverr_url)
|
|
264
276
|
if flaresolverr_check:
|
|
265
|
-
|
|
277
|
+
info(f'FlareSolverr connection successful. Using User-Agent: "{shared_state.values["user_agent"]}"')
|
|
278
|
+
else:
|
|
279
|
+
info('FlareSolverr check failed - using fallback user agent')
|
|
280
|
+
# Fallback user agent is already set in main process, but we log it
|
|
281
|
+
info(f'User Agent (fallback): "{FALLBACK_USER_AGENT}"')
|
|
266
282
|
|
|
267
283
|
except KeyboardInterrupt:
|
|
268
284
|
pass
|
|
285
|
+
except Exception as e:
|
|
286
|
+
info(f"An unexpected error occurred in FlareSolverr checker: {e}")
|
|
269
287
|
|
|
270
288
|
|
|
271
289
|
def update_checker(shared_state_dict, shared_state_lock):
|
|
@@ -288,8 +288,8 @@ def setup_config(app, shared_state):
|
|
|
288
288
|
"FlareSolverr URL saved successfully! A restart is recommended.")
|
|
289
289
|
else:
|
|
290
290
|
return render_fail(f"FlareSolverr returned unexpected status: {json_data.get('status')}")
|
|
291
|
-
except requests.RequestException
|
|
292
|
-
return render_fail(f"Could not reach FlareSolverr
|
|
291
|
+
except requests.RequestException:
|
|
292
|
+
return render_fail(f"Could not reach FlareSolverr!")
|
|
293
293
|
|
|
294
294
|
return render_fail("Could not reach FlareSolverr at that URL (expected HTTP 200).")
|
|
295
295
|
|
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Quasarr
|
|
3
|
+
# Project by https://github.com/rix1337
|
|
4
|
+
|
|
5
|
+
import html
|
|
6
|
+
import re
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
|
+
from json import loads, dumps
|
|
9
|
+
from urllib.parse import quote
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
from bs4 import BeautifulSoup
|
|
13
|
+
|
|
14
|
+
from quasarr.providers.log import info, debug
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _get_db(table_name):
|
|
18
|
+
"""Lazy import to avoid circular dependency."""
|
|
19
|
+
from quasarr.storage.sqlite_database import DataBase
|
|
20
|
+
return DataBase(table_name)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_config(section):
|
|
24
|
+
"""Lazy import to avoid circular dependency."""
|
|
25
|
+
from quasarr.storage.config import Config
|
|
26
|
+
return Config(section)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TitleCleaner:
|
|
30
|
+
@staticmethod
|
|
31
|
+
def sanitize(title):
|
|
32
|
+
if not title:
|
|
33
|
+
return ""
|
|
34
|
+
sanitized_title = html.unescape(title)
|
|
35
|
+
sanitized_title = re.sub(r"[^a-zA-Z0-9äöüÄÖÜß&-']", ' ', sanitized_title).strip()
|
|
36
|
+
sanitized_title = sanitized_title.replace(" - ", "-")
|
|
37
|
+
sanitized_title = re.sub(r'\s{2,}', ' ', sanitized_title)
|
|
38
|
+
return sanitized_title
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def clean(title):
|
|
42
|
+
try:
|
|
43
|
+
# Regex to find the title part before common release tags
|
|
44
|
+
pattern = r"(.*?)(?:[\.\s](?!19|20)\d{2}|[\.\s]German|[\.\s]GERMAN|[\.\s]\d{3,4}p|[\.\s]S(?:\d{1,3}))"
|
|
45
|
+
match = re.search(pattern, title)
|
|
46
|
+
if match:
|
|
47
|
+
extracted_title = match.group(1)
|
|
48
|
+
else:
|
|
49
|
+
extracted_title = title
|
|
50
|
+
|
|
51
|
+
tags_to_remove = [
|
|
52
|
+
r'[\.\s]UNRATED.*', r'[\.\s]Unrated.*', r'[\.\s]Uncut.*', r'[\.\s]UNCUT.*',
|
|
53
|
+
r'[\.\s]Directors[\.\s]Cut.*', r'[\.\s]Final[\.\s]Cut.*', r'[\.\s]DC.*',
|
|
54
|
+
r'[\.\s]REMASTERED.*', r'[\.\s]EXTENDED.*', r'[\.\s]Extended.*',
|
|
55
|
+
r'[\.\s]Theatrical.*', r'[\.\s]THEATRICAL.*'
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
clean_title = extracted_title
|
|
59
|
+
for tag in tags_to_remove:
|
|
60
|
+
clean_title = re.sub(tag, "", clean_title, flags=re.IGNORECASE)
|
|
61
|
+
|
|
62
|
+
clean_title = clean_title.replace(".", " ").strip()
|
|
63
|
+
clean_title = re.sub(r'\s+', ' ', clean_title)
|
|
64
|
+
clean_title = clean_title.replace(" ", "+")
|
|
65
|
+
|
|
66
|
+
return clean_title
|
|
67
|
+
except Exception as e:
|
|
68
|
+
debug(f"Error cleaning title '{title}': {e}")
|
|
69
|
+
return title
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class IMDbAPI:
|
|
73
|
+
"""Tier 1: api.imdbapi.dev - Primary, fast, comprehensive."""
|
|
74
|
+
BASE_URL = "https://api.imdbapi.dev"
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def get_title(imdb_id):
|
|
78
|
+
try:
|
|
79
|
+
response = requests.get(f"{IMDbAPI.BASE_URL}/titles/{imdb_id}", timeout=30)
|
|
80
|
+
response.raise_for_status()
|
|
81
|
+
return response.json()
|
|
82
|
+
except Exception as e:
|
|
83
|
+
info(f"IMDbAPI get_title failed for {imdb_id}: {e}")
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def get_akas(imdb_id):
|
|
88
|
+
try:
|
|
89
|
+
response = requests.get(f"{IMDbAPI.BASE_URL}/titles/{imdb_id}/akas", timeout=30)
|
|
90
|
+
response.raise_for_status()
|
|
91
|
+
return response.json().get("akas", [])
|
|
92
|
+
except Exception as e:
|
|
93
|
+
info(f"IMDbAPI get_akas failed for {imdb_id}: {e}")
|
|
94
|
+
return []
|
|
95
|
+
|
|
96
|
+
@staticmethod
|
|
97
|
+
def search_titles(query):
|
|
98
|
+
try:
|
|
99
|
+
response = requests.get(f"{IMDbAPI.BASE_URL}/search/titles?query={quote(query)}&limit=5", timeout=30)
|
|
100
|
+
response.raise_for_status()
|
|
101
|
+
return response.json().get("titles", [])
|
|
102
|
+
except Exception as e:
|
|
103
|
+
debug(f"IMDbAPI search_titles failed: {e}")
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class IMDbCDN:
|
|
108
|
+
"""Tier 2: v2.sg.media-imdb.com - Fast fallback for English data."""
|
|
109
|
+
CDN_URL = "https://v2.sg.media-imdb.com/suggestion"
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def _get_cdn_data(imdb_id, language, user_agent):
|
|
113
|
+
try:
|
|
114
|
+
if not imdb_id or len(imdb_id) < 2:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
headers = {
|
|
118
|
+
'Accept-Language': f'{language},en;q=0.9',
|
|
119
|
+
'User-Agent': user_agent,
|
|
120
|
+
'Accept': 'application/json'
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
first_char = imdb_id[0].lower()
|
|
124
|
+
url = f"{IMDbCDN.CDN_URL}/{first_char}/{imdb_id}.json"
|
|
125
|
+
|
|
126
|
+
response = requests.get(url, headers=headers, timeout=5)
|
|
127
|
+
response.raise_for_status()
|
|
128
|
+
|
|
129
|
+
data = response.json()
|
|
130
|
+
|
|
131
|
+
if "d" in data and len(data["d"]) > 0:
|
|
132
|
+
for entry in data["d"]:
|
|
133
|
+
if entry.get("id") == imdb_id:
|
|
134
|
+
return entry
|
|
135
|
+
return data["d"][0]
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
debug(f"IMDbCDN request failed for {imdb_id}: {e}")
|
|
139
|
+
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def get_poster(imdb_id, user_agent):
|
|
144
|
+
data = IMDbCDN._get_cdn_data(imdb_id, 'en', user_agent)
|
|
145
|
+
if data:
|
|
146
|
+
image_node = data.get("i")
|
|
147
|
+
if image_node and "imageUrl" in image_node:
|
|
148
|
+
return image_node["imageUrl"]
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
@staticmethod
|
|
152
|
+
def get_title(imdb_id, user_agent):
|
|
153
|
+
"""Returns the English title from CDN."""
|
|
154
|
+
data = IMDbCDN._get_cdn_data(imdb_id, 'en', user_agent)
|
|
155
|
+
if data and "l" in data:
|
|
156
|
+
return data["l"]
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def search_titles(query, ttype, language, user_agent):
|
|
161
|
+
try:
|
|
162
|
+
clean_query = quote(query.lower().replace(" ", "_"))
|
|
163
|
+
if not clean_query: return []
|
|
164
|
+
|
|
165
|
+
headers = {
|
|
166
|
+
'Accept-Language': f'{language},en;q=0.9',
|
|
167
|
+
'User-Agent': user_agent
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
first_char = clean_query[0]
|
|
171
|
+
url = f"{IMDbCDN.CDN_URL}/{first_char}/{clean_query}.json"
|
|
172
|
+
|
|
173
|
+
response = requests.get(url, headers=headers, timeout=5)
|
|
174
|
+
|
|
175
|
+
if response.status_code == 200:
|
|
176
|
+
data = response.json()
|
|
177
|
+
results = []
|
|
178
|
+
if "d" in data:
|
|
179
|
+
for item in data["d"]:
|
|
180
|
+
results.append({
|
|
181
|
+
'id': item.get('id'),
|
|
182
|
+
'titleNameText': item.get('l'),
|
|
183
|
+
'titleReleaseText': item.get('y')
|
|
184
|
+
})
|
|
185
|
+
return results
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
from quasarr.providers.log import debug
|
|
189
|
+
debug(f"IMDb CDN search failed: {e}")
|
|
190
|
+
|
|
191
|
+
return []
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class IMDbFlareSolverr:
|
|
195
|
+
"""Tier 3: FlareSolverr - Robust fallback using browser automation."""
|
|
196
|
+
WEB_URL = "https://www.imdb.com"
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _request(url):
|
|
200
|
+
flaresolverr_url = _get_config('FlareSolverr').get('url')
|
|
201
|
+
flaresolverr_skipped = _get_db("skip_flaresolverr").retrieve("skipped")
|
|
202
|
+
|
|
203
|
+
if not flaresolverr_url or flaresolverr_skipped:
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
post_data = {
|
|
208
|
+
"cmd": "request.get",
|
|
209
|
+
"url": url,
|
|
210
|
+
"maxTimeout": 60000,
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
response = requests.post(flaresolverr_url, json=post_data, headers={"Content-Type": "application/json"},
|
|
214
|
+
timeout=60)
|
|
215
|
+
if response.status_code == 200:
|
|
216
|
+
json_response = response.json()
|
|
217
|
+
if json_response.get("status") == "ok":
|
|
218
|
+
return json_response.get("solution", {}).get("response", "")
|
|
219
|
+
except Exception as e:
|
|
220
|
+
debug(f"FlareSolverr request failed for {url}: {e}")
|
|
221
|
+
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
@staticmethod
|
|
225
|
+
def get_poster(imdb_id):
|
|
226
|
+
html_content = IMDbFlareSolverr._request(f"{IMDbFlareSolverr.WEB_URL}/title/{imdb_id}/")
|
|
227
|
+
if html_content:
|
|
228
|
+
try:
|
|
229
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
230
|
+
poster_div = soup.find('div', class_='ipc-poster')
|
|
231
|
+
if poster_div and poster_div.div and poster_div.div.img:
|
|
232
|
+
poster_set = poster_div.div.img.get("srcset")
|
|
233
|
+
if poster_set:
|
|
234
|
+
poster_links = [x for x in poster_set.split(" ") if len(x) > 10]
|
|
235
|
+
return poster_links[-1]
|
|
236
|
+
except Exception as e:
|
|
237
|
+
debug(f"FlareSolverr poster parsing failed: {e}")
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
@staticmethod
|
|
241
|
+
def get_localized_title(imdb_id, language):
|
|
242
|
+
# FlareSolverr doesn't reliably support headers for localization.
|
|
243
|
+
# Instead, we scrape the release info page which lists AKAs.
|
|
244
|
+
url = f"{IMDbFlareSolverr.WEB_URL}/title/{imdb_id}/releaseinfo"
|
|
245
|
+
html_content = IMDbFlareSolverr._request(url)
|
|
246
|
+
|
|
247
|
+
if html_content:
|
|
248
|
+
try:
|
|
249
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
250
|
+
|
|
251
|
+
# Map language codes to country names commonly used in IMDb AKAs
|
|
252
|
+
country_map = {
|
|
253
|
+
'de': ['Germany', 'Austria', 'Switzerland', 'West Germany'],
|
|
254
|
+
'fr': ['France', 'Canada', 'Belgium'],
|
|
255
|
+
'es': ['Spain', 'Mexico', 'Argentina'],
|
|
256
|
+
'it': ['Italy'],
|
|
257
|
+
'pt': ['Portugal', 'Brazil'],
|
|
258
|
+
'ru': ['Russia', 'Soviet Union'],
|
|
259
|
+
'ja': ['Japan'],
|
|
260
|
+
'hi': ['India']
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
target_countries = country_map.get(language, [])
|
|
264
|
+
|
|
265
|
+
# Find the AKAs list
|
|
266
|
+
# The structure is a list of items with country names and titles
|
|
267
|
+
items = soup.find_all("li", class_="ipc-metadata-list__item")
|
|
268
|
+
|
|
269
|
+
for item in items:
|
|
270
|
+
label_span = item.find("span", class_="ipc-metadata-list-item__label")
|
|
271
|
+
if not label_span:
|
|
272
|
+
# Sometimes it's an anchor if it's a link
|
|
273
|
+
label_span = item.find("a", class_="ipc-metadata-list-item__label")
|
|
274
|
+
|
|
275
|
+
if label_span:
|
|
276
|
+
country = label_span.get_text(strip=True)
|
|
277
|
+
# Check if this country matches our target language
|
|
278
|
+
if any(c in country for c in target_countries):
|
|
279
|
+
# Found a matching country, get the title
|
|
280
|
+
title_span = item.find("span", class_="ipc-metadata-list-item__list-content-item")
|
|
281
|
+
if title_span:
|
|
282
|
+
return title_span.get_text(strip=True)
|
|
283
|
+
|
|
284
|
+
except Exception as e:
|
|
285
|
+
debug(f"FlareSolverr localized title parsing failed: {e}")
|
|
286
|
+
|
|
287
|
+
return None
|
|
288
|
+
|
|
289
|
+
@staticmethod
|
|
290
|
+
def search_titles(query, ttype):
|
|
291
|
+
url = f"{IMDbFlareSolverr.WEB_URL}/find/?q={quote(query)}&s=tt&ttype={ttype}&ref_=fn_{ttype}"
|
|
292
|
+
html_content = IMDbFlareSolverr._request(url)
|
|
293
|
+
|
|
294
|
+
if html_content:
|
|
295
|
+
try:
|
|
296
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
297
|
+
props = soup.find("script", text=re.compile("props"))
|
|
298
|
+
if props:
|
|
299
|
+
details = loads(props.string)
|
|
300
|
+
results = details['props']['pageProps']['titleResults']['results']
|
|
301
|
+
mapped_results = []
|
|
302
|
+
for result in results:
|
|
303
|
+
try:
|
|
304
|
+
mapped_results.append({
|
|
305
|
+
'id': result["listItem"]["titleId"],
|
|
306
|
+
'titleNameText': result["listItem"]["titleText"],
|
|
307
|
+
'titleReleaseText': result["listItem"].get("releaseYear")
|
|
308
|
+
})
|
|
309
|
+
except KeyError:
|
|
310
|
+
mapped_results.append({
|
|
311
|
+
'id': result.get('id'),
|
|
312
|
+
'titleNameText': result.get("titleNameText"),
|
|
313
|
+
'titleReleaseText': result.get("titleReleaseText")
|
|
314
|
+
})
|
|
315
|
+
return mapped_results
|
|
316
|
+
|
|
317
|
+
results = []
|
|
318
|
+
items = soup.find_all("li", class_="ipc-metadata-list-summary-item")
|
|
319
|
+
for item in items:
|
|
320
|
+
a_tag = item.find("a", class_="ipc-metadata-list-summary-item__t")
|
|
321
|
+
if a_tag:
|
|
322
|
+
href = a_tag.get("href", "")
|
|
323
|
+
id_match = re.search(r"(tt\d+)", href)
|
|
324
|
+
if id_match:
|
|
325
|
+
results.append({
|
|
326
|
+
'id': id_match.group(1),
|
|
327
|
+
'titleNameText': a_tag.get_text(strip=True),
|
|
328
|
+
'titleReleaseText': ""
|
|
329
|
+
})
|
|
330
|
+
return results
|
|
331
|
+
|
|
332
|
+
except Exception as e:
|
|
333
|
+
debug(f"FlareSolverr search parsing failed: {e}")
|
|
334
|
+
return []
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
# =============================================================================
|
|
338
|
+
# Main Functions (Chain of Responsibility)
|
|
339
|
+
# =============================================================================
|
|
340
|
+
|
|
341
|
+
def _update_cache(imdb_id, key, value, language=None):
|
|
342
|
+
db = _get_db("imdb_metadata")
|
|
343
|
+
try:
|
|
344
|
+
cached_data = db.retrieve(imdb_id)
|
|
345
|
+
if cached_data:
|
|
346
|
+
metadata = loads(cached_data)
|
|
347
|
+
else:
|
|
348
|
+
metadata = {
|
|
349
|
+
"title": None,
|
|
350
|
+
"year": None,
|
|
351
|
+
"poster_link": None,
|
|
352
|
+
"localized": {},
|
|
353
|
+
"ttl": 0
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
if key == "localized" and language:
|
|
357
|
+
if "localized" not in metadata or not isinstance(metadata["localized"], dict):
|
|
358
|
+
metadata["localized"] = {}
|
|
359
|
+
metadata["localized"][language] = value
|
|
360
|
+
else:
|
|
361
|
+
metadata[key] = value
|
|
362
|
+
|
|
363
|
+
now = datetime.now().timestamp()
|
|
364
|
+
days = 7 if metadata.get("title") and metadata.get("year") else 1
|
|
365
|
+
metadata["ttl"] = now + timedelta(days=days).total_seconds()
|
|
366
|
+
|
|
367
|
+
db.update_store(imdb_id, dumps(metadata))
|
|
368
|
+
except Exception as e:
|
|
369
|
+
debug(f"Error updating IMDb metadata cache for {imdb_id}: {e}")
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def get_poster_link(shared_state, imdb_id):
|
|
373
|
+
# 0. Check Cache (via get_imdb_metadata)
|
|
374
|
+
imdb_metadata = get_imdb_metadata(imdb_id)
|
|
375
|
+
if imdb_metadata and imdb_metadata.get("poster_link"):
|
|
376
|
+
return imdb_metadata.get("poster_link")
|
|
377
|
+
|
|
378
|
+
user_agent = shared_state.values["user_agent"]
|
|
379
|
+
|
|
380
|
+
poster = IMDbCDN.get_poster(imdb_id, user_agent)
|
|
381
|
+
if poster:
|
|
382
|
+
_update_cache(imdb_id, "poster_link", poster)
|
|
383
|
+
return poster
|
|
384
|
+
|
|
385
|
+
poster = IMDbFlareSolverr.get_poster(imdb_id)
|
|
386
|
+
if poster:
|
|
387
|
+
_update_cache(imdb_id, "poster_link", poster)
|
|
388
|
+
return poster
|
|
389
|
+
|
|
390
|
+
debug(f"Could not get poster title for {imdb_id}")
|
|
391
|
+
return None
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def get_localized_title(shared_state, imdb_id, language='de'):
|
|
395
|
+
# 0. Check Cache (via get_imdb_metadata)
|
|
396
|
+
imdb_metadata = get_imdb_metadata(imdb_id)
|
|
397
|
+
if imdb_metadata:
|
|
398
|
+
localized = imdb_metadata.get("localized", {}).get(language)
|
|
399
|
+
if localized: return localized
|
|
400
|
+
if language == 'en' and imdb_metadata.get("title"):
|
|
401
|
+
return imdb_metadata.get("title")
|
|
402
|
+
|
|
403
|
+
user_agent = shared_state.values["user_agent"]
|
|
404
|
+
|
|
405
|
+
if language == 'en':
|
|
406
|
+
title = IMDbCDN.get_title(imdb_id, user_agent)
|
|
407
|
+
if title:
|
|
408
|
+
sanitized_title = TitleCleaner.sanitize(title)
|
|
409
|
+
_update_cache(imdb_id, "title", sanitized_title)
|
|
410
|
+
return sanitized_title
|
|
411
|
+
|
|
412
|
+
title = IMDbFlareSolverr.get_localized_title(imdb_id, language)
|
|
413
|
+
if title:
|
|
414
|
+
sanitized_title = TitleCleaner.sanitize(title)
|
|
415
|
+
_update_cache(imdb_id, "localized", sanitized_title, language)
|
|
416
|
+
return sanitized_title
|
|
417
|
+
|
|
418
|
+
# Final fallback: Try CDN for English title if localization failed
|
|
419
|
+
title = IMDbCDN.get_title(imdb_id, user_agent)
|
|
420
|
+
if title:
|
|
421
|
+
sanitized_title = TitleCleaner.sanitize(title)
|
|
422
|
+
_update_cache(imdb_id, "title", sanitized_title)
|
|
423
|
+
return sanitized_title
|
|
424
|
+
|
|
425
|
+
debug(f"Could not get localized title for {imdb_id} in {language}")
|
|
426
|
+
return None
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def get_imdb_metadata(imdb_id):
|
|
430
|
+
db = _get_db("imdb_metadata")
|
|
431
|
+
now = datetime.now().timestamp()
|
|
432
|
+
cached_metadata = None
|
|
433
|
+
|
|
434
|
+
# 0. Check Cache
|
|
435
|
+
try:
|
|
436
|
+
cached_data = db.retrieve(imdb_id)
|
|
437
|
+
if cached_data:
|
|
438
|
+
cached_metadata = loads(cached_data)
|
|
439
|
+
if cached_metadata.get("ttl") and cached_metadata["ttl"] > now:
|
|
440
|
+
return cached_metadata
|
|
441
|
+
except Exception as e:
|
|
442
|
+
debug(f"Error retrieving IMDb metadata from DB for {imdb_id}: {e}")
|
|
443
|
+
cached_metadata = None
|
|
444
|
+
|
|
445
|
+
imdb_metadata = {
|
|
446
|
+
"title": None,
|
|
447
|
+
"year": None,
|
|
448
|
+
"poster_link": None,
|
|
449
|
+
"localized": {},
|
|
450
|
+
"ttl": 0
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
# 1. Try API
|
|
454
|
+
response_json = IMDbAPI.get_title(imdb_id)
|
|
455
|
+
|
|
456
|
+
if response_json:
|
|
457
|
+
imdb_metadata["title"] = TitleCleaner.sanitize(response_json.get("primaryTitle", ""))
|
|
458
|
+
imdb_metadata["year"] = response_json.get("startYear")
|
|
459
|
+
|
|
460
|
+
days = 7 if imdb_metadata.get("title") and imdb_metadata.get("year") else 1
|
|
461
|
+
imdb_metadata["ttl"] = now + timedelta(days=days).total_seconds()
|
|
462
|
+
|
|
463
|
+
try:
|
|
464
|
+
imdb_metadata["poster_link"] = response_json.get("primaryImage").get("url")
|
|
465
|
+
except:
|
|
466
|
+
pass
|
|
467
|
+
|
|
468
|
+
akas = IMDbAPI.get_akas(imdb_id)
|
|
469
|
+
if akas:
|
|
470
|
+
for aka in akas:
|
|
471
|
+
if aka.get("language"): continue
|
|
472
|
+
if aka.get("country", {}).get("code", "").lower() == "de":
|
|
473
|
+
imdb_metadata["localized"]["de"] = TitleCleaner.sanitize(aka.get("text"))
|
|
474
|
+
break
|
|
475
|
+
|
|
476
|
+
db.update_store(imdb_id, dumps(imdb_metadata))
|
|
477
|
+
return imdb_metadata
|
|
478
|
+
|
|
479
|
+
# API Failed. If we have stale cache, return it.
|
|
480
|
+
if cached_metadata:
|
|
481
|
+
return cached_metadata
|
|
482
|
+
|
|
483
|
+
# 2. Fallback: Try CDN for basic info (English title, Year, Poster)
|
|
484
|
+
# We can't get localized titles from CDN, but we can get the rest.
|
|
485
|
+
# We need a user agent, but this function doesn't receive shared_state.
|
|
486
|
+
# We'll skip CDN fallback here to avoid circular deps or complexity,
|
|
487
|
+
# as get_poster_link and get_localized_title handle their own fallbacks.
|
|
488
|
+
# But to populate the DB, we could try. For now, return empty/partial if API fails.
|
|
489
|
+
|
|
490
|
+
return imdb_metadata
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def get_imdb_id_from_title(shared_state, title, language="de"):
|
|
494
|
+
imdb_id = None
|
|
495
|
+
|
|
496
|
+
if re.search(r"S\d{1,3}(E\d{1,3})?", title, re.IGNORECASE):
|
|
497
|
+
ttype_api = "TV_SERIES"
|
|
498
|
+
ttype_web = "tv"
|
|
499
|
+
else:
|
|
500
|
+
ttype_api = "MOVIE"
|
|
501
|
+
ttype_web = "ft"
|
|
502
|
+
|
|
503
|
+
title = TitleCleaner.clean(title)
|
|
504
|
+
|
|
505
|
+
# 0. Check Search Cache
|
|
506
|
+
db = _get_db("imdb_searches")
|
|
507
|
+
try:
|
|
508
|
+
cached_data = db.retrieve(title)
|
|
509
|
+
if cached_data:
|
|
510
|
+
data = loads(cached_data)
|
|
511
|
+
if data.get("timestamp") and datetime.fromtimestamp(data["timestamp"]) > datetime.now() - timedelta(
|
|
512
|
+
hours=48):
|
|
513
|
+
return data.get("imdb_id")
|
|
514
|
+
except Exception:
|
|
515
|
+
pass
|
|
516
|
+
|
|
517
|
+
user_agent = shared_state.values["user_agent"]
|
|
518
|
+
|
|
519
|
+
# 1. Try API
|
|
520
|
+
search_results = IMDbAPI.search_titles(title)
|
|
521
|
+
if search_results:
|
|
522
|
+
imdb_id = _match_result(shared_state, title, search_results, ttype_api, is_api=True)
|
|
523
|
+
|
|
524
|
+
# 2. Try CDN (Fallback)
|
|
525
|
+
if not imdb_id:
|
|
526
|
+
search_results = IMDbCDN.search_titles(title, ttype_web, language, user_agent)
|
|
527
|
+
if search_results:
|
|
528
|
+
imdb_id = _match_result(shared_state, title, search_results, ttype_api, is_api=False)
|
|
529
|
+
|
|
530
|
+
# 3. Try FlareSolverr (Last Resort)
|
|
531
|
+
if not imdb_id:
|
|
532
|
+
search_results = IMDbFlareSolverr.search_titles(title, ttype_web)
|
|
533
|
+
if search_results:
|
|
534
|
+
imdb_id = _match_result(shared_state, title, search_results, ttype_api, is_api=False)
|
|
535
|
+
|
|
536
|
+
# Update Cache
|
|
537
|
+
try:
|
|
538
|
+
db.update_store(title, dumps({
|
|
539
|
+
"imdb_id": imdb_id,
|
|
540
|
+
"timestamp": datetime.now().timestamp()
|
|
541
|
+
}))
|
|
542
|
+
except Exception:
|
|
543
|
+
pass
|
|
544
|
+
|
|
545
|
+
if not imdb_id:
|
|
546
|
+
debug(f"No IMDb-ID found for {title}")
|
|
547
|
+
|
|
548
|
+
return imdb_id
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _match_result(shared_state, title, results, ttype_api, is_api=False):
|
|
552
|
+
for result in results:
|
|
553
|
+
found_title = result.get("primaryTitle") if is_api else result.get("titleNameText")
|
|
554
|
+
found_id = result.get("id")
|
|
555
|
+
|
|
556
|
+
if is_api:
|
|
557
|
+
found_type = result.get("type")
|
|
558
|
+
if ttype_api == "TV_SERIES" and found_type not in ["tvSeries", "tvMiniSeries"]: continue
|
|
559
|
+
if ttype_api == "MOVIE" and found_type not in ["movie", "tvMovie"]: continue
|
|
560
|
+
|
|
561
|
+
if shared_state.search_string_in_sanitized_title(title, found_title):
|
|
562
|
+
return found_id
|
|
563
|
+
|
|
564
|
+
for result in results:
|
|
565
|
+
found_title = result.get("primaryTitle") if is_api else result.get("titleNameText")
|
|
566
|
+
found_id = result.get("id")
|
|
567
|
+
if shared_state.search_string_in_sanitized_title(title, found_title):
|
|
568
|
+
return found_id
|
|
569
|
+
|
|
570
|
+
return None
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def get_year(imdb_id):
|
|
574
|
+
imdb_metadata = get_imdb_metadata(imdb_id)
|
|
575
|
+
if imdb_metadata:
|
|
576
|
+
return imdb_metadata.get("year")
|
|
577
|
+
return None
|