PyPI - quasarr - Versions diffs - 2.1.5__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

quasarr 2.1.5py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of quasarr might be problematic. Click here for more details.

Files changed (60) hide show

quasarr/__init__.py +38 -29
quasarr/api/__init__.py +94 -23
quasarr/api/captcha/__init__.py +0 -12
quasarr/api/config/__init__.py +22 -11
quasarr/api/packages/__init__.py +26 -34
quasarr/api/statistics/__init__.py +15 -15
quasarr/downloads/__init__.py +9 -1
quasarr/downloads/packages/__init__.py +2 -2
quasarr/downloads/sources/al.py +6 -0
quasarr/downloads/sources/by.py +29 -20
quasarr/downloads/sources/dd.py +9 -1
quasarr/downloads/sources/dl.py +3 -0
quasarr/downloads/sources/dt.py +16 -7
quasarr/downloads/sources/dw.py +22 -17
quasarr/downloads/sources/he.py +11 -6
quasarr/downloads/sources/mb.py +9 -3
quasarr/downloads/sources/nk.py +9 -3
quasarr/downloads/sources/nx.py +21 -17
quasarr/downloads/sources/sf.py +21 -13
quasarr/downloads/sources/sl.py +10 -2
quasarr/downloads/sources/wd.py +18 -9
quasarr/downloads/sources/wx.py +7 -11
quasarr/providers/auth.py +1 -1
quasarr/providers/cloudflare.py +1 -1
quasarr/providers/hostname_issues.py +63 -0
quasarr/providers/html_images.py +1 -18
quasarr/providers/html_templates.py +104 -12
quasarr/providers/imdb_metadata.py +288 -75
quasarr/providers/obfuscated.py +11 -11
quasarr/providers/sessions/al.py +27 -11
quasarr/providers/sessions/dd.py +12 -4
quasarr/providers/sessions/dl.py +19 -11
quasarr/providers/sessions/nx.py +12 -4
quasarr/providers/version.py +1 -1
quasarr/search/__init__.py +5 -0
quasarr/search/sources/al.py +12 -1
quasarr/search/sources/by.py +15 -4
quasarr/search/sources/dd.py +22 -3
quasarr/search/sources/dj.py +12 -1
quasarr/search/sources/dl.py +12 -6
quasarr/search/sources/dt.py +17 -4
quasarr/search/sources/dw.py +15 -4
quasarr/search/sources/fx.py +19 -6
quasarr/search/sources/he.py +22 -3
quasarr/search/sources/mb.py +15 -4
quasarr/search/sources/nk.py +19 -3
quasarr/search/sources/nx.py +15 -4
quasarr/search/sources/sf.py +25 -8
quasarr/search/sources/sj.py +14 -1
quasarr/search/sources/sl.py +17 -2
quasarr/search/sources/wd.py +15 -4
quasarr/search/sources/wx.py +16 -18
quasarr/storage/setup.py +150 -35
{quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/METADATA +6 -3
quasarr-2.3.0.dist-info/RECORD +82 -0
{quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/WHEEL +1 -1
quasarr-2.1.5.dist-info/RECORD +0 -81
{quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/entry_points.txt +0 -0
{quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/licenses/LICENSE +0 -0
{quasarr-2.1.5.dist-info → quasarr-2.3.0.dist-info}/top_level.txt +0 -0

quasarr/providers/imdb_metadata.py CHANGED Viewed

@@ -5,7 +5,7 @@
 import html
 import re
 from datetime import datetime, timedelta
-from json import loads
+from json import loads, dumps
 from urllib.parse import quote
 import requests
@@ -14,20 +14,168 @@ from bs4 import BeautifulSoup
 from quasarr.providers.log import info, debug
+def _get_db(table_name):
+    """Lazy import to avoid circular dependency."""
+    from quasarr.storage.sqlite_database import DataBase
+    return DataBase(table_name)
+class IMDbAPI:
+    """Handles interactions with api.imdbapi.dev"""
+    BASE_URL = "https://api.imdbapi.dev"
+    @staticmethod
+    def get_title(imdb_id):
+        try:
+            response = requests.get(f"{IMDbAPI.BASE_URL}/titles/{imdb_id}", timeout=30)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            info(f"Error loading imdbapi.dev for {imdb_id}: {e}")
+            return None
+    @staticmethod
+    def get_akas(imdb_id):
+        try:
+            response = requests.get(f"{IMDbAPI.BASE_URL}/titles/{imdb_id}/akas", timeout=30)
+            response.raise_for_status()
+            return response.json().get("akas", [])
+        except Exception as e:
+            info(f"Error loading localized titles from IMDbAPI.dev for {imdb_id}: {e}")
+            return []
+    @staticmethod
+    def search_titles(query):
+        try:
+            response = requests.get(f"{IMDbAPI.BASE_URL}/search/titles?query={quote(query)}&limit=5", timeout=30)
+            response.raise_for_status()
+            return response.json().get("titles", [])
+        except Exception as e:
+            debug(f"Request on IMDbAPI failed: {e}")
+            return []
+class IMDbWeb:
+    """Handles fallback interactions by scraping imdb.com"""
+    BASE_URL = "https://www.imdb.com"
+    @staticmethod
+    def get_poster(imdb_id, user_agent):
+        headers = {'User-Agent': user_agent}
+        try:
+            request = requests.get(f"{IMDbWeb.BASE_URL}/title/{imdb_id}/", headers=headers, timeout=10).text
+            soup = BeautifulSoup(request, "html.parser")
+            poster_set = soup.find('div', class_='ipc-poster').div.img["srcset"]
+            poster_links = [x for x in poster_set.split(" ") if len(x) > 10]
+            return poster_links[-1]
+        except Exception as e:
+            debug(f"Could not get poster title for {imdb_id} from IMDb: {e}")
+            return None
+    @staticmethod
+    def get_localized_title(imdb_id, language, user_agent):
+        headers = {
+            'Accept-Language': language,
+            'User-Agent': user_agent
+        }
+        try:
+            response = requests.get(f"{IMDbWeb.BASE_URL}/title/{imdb_id}/", headers=headers, timeout=10)
+            response.raise_for_status()
+            match = re.search(r'<title>(.*?) \(.*?</title>', response.text)
+            if not match:
+                match = re.search(r'<title>(.*?) - IMDb</title>', response.text)
+            if match:
+                return match.group(1)
+        except Exception as e:
+            info(f"Error loading IMDb metadata for {imdb_id}: {e}")
+        return None
+    @staticmethod
+    def search_titles(query, ttype, language, user_agent):
+        headers = {
+            'Accept-Language': language,
+            'User-Agent': user_agent
+        }
+        try:
+            results = requests.get(f"{IMDbWeb.BASE_URL}/find/?q={quote(query)}&s=tt&ttype={ttype}&ref_=fn_{ttype}",
+                                   headers=headers, timeout=10)
+            if results.status_code == 200:
+                soup = BeautifulSoup(results.text, "html.parser")
+                props = soup.find("script", text=re.compile("props"))
+                if props:
+                    details = loads(props.string)
+                    return details['props']['pageProps']['titleResults']['results']
+            else:
+                debug(f"Request on IMDb failed: {results.status_code}")
+        except Exception as e:
+            debug(f"IMDb scraping fallback failed: {e}")
+        return []
+class TitleCleaner:
+    @staticmethod
+    def sanitize(title):
+        if not title:
+            return ""
+        sanitized_title = html.unescape(title)
+        sanitized_title = re.sub(r"[^a-zA-Z0-9äöüÄÖÜß&-']", ' ', sanitized_title).strip()
+        sanitized_title = sanitized_title.replace(" - ", "-")
+        sanitized_title = re.sub(r'\s{2,}', ' ', sanitized_title)
+        return sanitized_title
+    @staticmethod
+    def clean(title):
+        try:
+            # Regex to find the title part before common release tags
+            # Stops at:
+            # - Year (19xx or 20xx) preceded by a separator
+            # - Language tags (.German, .GERMAN)
+            # - Resolution (.1080p, .720p, etc.)
+            # - Season info (.S01)
+            pattern = r"(.*?)(?:[\.\s](?!19|20)\d{2}|[\.\s]German|[\.\s]GERMAN|[\.\s]\d{3,4}p|[\.\s]S(?:\d{1,3}))"
+            match = re.search(pattern, title)
+            if match:
+                extracted_title = match.group(1)
+            else:
+                extracted_title = title
+            # Remove specific tags that might appear in the title part
+            tags_to_remove = [
+                r'[\.\s]UNRATED.*', r'[\.\s]Unrated.*', r'[\.\s]Uncut.*', r'[\.\s]UNCUT.*',
+                r'[\.\s]Directors[\.\s]Cut.*', r'[\.\s]Final[\.\s]Cut.*', r'[\.\s]DC.*',
+                r'[\.\s]REMASTERED.*', r'[\.\s]EXTENDED.*', r'[\.\s]Extended.*',
+                r'[\.\s]Theatrical.*', r'[\.\s]THEATRICAL.*'
+            ]
+            clean_title = extracted_title
+            for tag in tags_to_remove:
+                clean_title = re.sub(tag, "", clean_title, flags=re.IGNORECASE)
+            clean_title = clean_title.replace(".", " ").strip()
+            clean_title = re.sub(r'\s+', ' ', clean_title)  # Remove multiple spaces
+            clean_title = clean_title.replace(" ", "+")
+            return clean_title
+        except Exception as e:
+            debug(f"Error cleaning title '{title}': {e}")
+            return title
 def get_poster_link(shared_state, imdb_id):
+    imdb_metadata = get_imdb_metadata(imdb_id)
+    if imdb_metadata:
+        poster_link = imdb_metadata.get("poster_link")
+        if poster_link:
+            return poster_link
     poster_link = None
     if imdb_id:
-        headers = {'User-Agent': shared_state.values["user_agent"]}
-        request = requests.get(f"https://www.imdb.com/title/{imdb_id}/", headers=headers, timeout=10).text
-        soup = BeautifulSoup(request, "html.parser")
-        try:
-            poster_set = soup.find('div', class_='ipc-poster').div.img[
-                "srcset"]  # contains links to posters in ascending resolution
-            poster_links = [x for x in poster_set.split(" ") if
-                            len(x) > 10]  # extract all poster links ignoring resolution info
-            poster_link = poster_links[-1]  # get the highest resolution poster
-        except:
-            pass
+        poster_link = IMDbWeb.get_poster(imdb_id, shared_state.values["user_agent"])
     if not poster_link:
         debug(f"Could not get poster title for {imdb_id} from IMDb")
@@ -35,87 +183,151 @@ def get_poster_link(shared_state, imdb_id):
     return poster_link
-def get_localized_title(shared_state, imdb_id, language='de'):
-    localized_title = None
+def get_imdb_metadata(imdb_id):
+    db = _get_db("imdb_metadata")
+    now = datetime.now().timestamp()
-    headers = {
-        'Accept-Language': language,
-        'User-Agent': shared_state.values["user_agent"]
+    # Try to load from DB
+    cached_metadata = None
+    try:
+        cached_data = db.retrieve(imdb_id)
+        if cached_data:
+            cached_metadata = loads(cached_data)
+            # If valid, update TTL and return
+            if cached_metadata.get("ttl") and cached_metadata["ttl"] > now:
+                cached_metadata["ttl"] = now + timedelta(days=30).total_seconds()
+                db.update_store(imdb_id, dumps(cached_metadata))
+                return cached_metadata
+    except Exception as e:
+        debug(f"Error retrieving IMDb metadata from DB for {imdb_id}: {e}")
+    # Initialize new metadata structure
+    imdb_metadata = {
+        "title": None,
+        "year": None,
+        "poster_link": None,
+        "localized": {},
+        "ttl": 0
     }
+    # Fetch from API
+    response_json = IMDbAPI.get_title(imdb_id)
+    if not response_json:
+        # API failed. If we have stale cached data, return it as fallback
+        if cached_metadata:
+            debug(f"IMDb API failed for {imdb_id}, returning stale cached data.")
+            return cached_metadata
+        return imdb_metadata
+    # Process API response
+    imdb_metadata["title"] = TitleCleaner.sanitize(response_json.get("primaryTitle", ""))
+    imdb_metadata["year"] = response_json.get("startYear")
+    imdb_metadata["ttl"] = now + timedelta(days=30).total_seconds()
     try:
-        response = requests.get(f"https://www.imdb.com/title/{imdb_id}/", headers=headers, timeout=10)
+        imdb_metadata["poster_link"] = response_json.get("primaryImage").get("url")
     except Exception as e:
-        info(f"Error loading IMDb metadata for {imdb_id}: {e}")
-        return localized_title
+        debug(f"Could not find poster link for {imdb_id} from imdbapi.dev: {e}")
+        # Shorten TTL if data is incomplete
+        imdb_metadata["ttl"] = now + timedelta(days=1).total_seconds()
-    try:
-        match = re.findall(r'<title>(.*?) \(.*?</title>', response.text)
-        localized_title = match[0]
-    except:
-        try:
-            match = re.findall(r'<title>(.*?) - IMDb</title>', response.text)
-            localized_title = match[0]
-        except:
-            pass
+    akas = IMDbAPI.get_akas(imdb_id)
+    if akas:
+        for aka in akas:
+            if aka.get("language"):
+                continue  # skip entries with specific language tags
+            if aka.get("country", {}).get("code", "").lower() == "de":
+                imdb_metadata["localized"]["de"] = TitleCleaner.sanitize(aka.get("text"))
+                break
+    else:
+        # Shorten TTL if AKAs failed
+        imdb_metadata["ttl"] = now + timedelta(days=1).total_seconds()
-    if not localized_title:
-        debug(f"Could not get localized title for {imdb_id} in {language} from IMDb")
+    db.update_store(imdb_id, dumps(imdb_metadata))
+    return imdb_metadata
-    localized_title = html.unescape(localized_title)
-    localized_title = re.sub(r"[^a-zA-Z0-9äöüÄÖÜß&-']", ' ', localized_title).strip()
-    localized_title = localized_title.replace(" - ", "-")
-    localized_title = re.sub(r'\s{2,}', ' ', localized_title)
-    return localized_title
+def get_year(imdb_id):
+    imdb_metadata = get_imdb_metadata(imdb_id)
+    if imdb_metadata:
+        return imdb_metadata.get("year")
+    return None
-def get_clean_title(title):
-    try:
-        extracted_title = re.findall(r"(.*?)(?:.(?!19|20)\d{2}|\.German|.GERMAN|\.\d{3,4}p|\.S(?:\d{1,3}))", title)[0]
-        leftover_tags_removed = re.sub(
-            r'(|.UNRATED.*|.Unrated.*|.Uncut.*|.UNCUT.*)(|.Directors.Cut.*|.Final.Cut.*|.DC.*|.REMASTERED.*|.EXTENDED.*|.Extended.*|.Theatrical.*|.THEATRICAL.*)',
-            "", extracted_title)
-        clean_title = leftover_tags_removed.replace(".", " ").strip().replace(" ", "+")
+def get_localized_title(shared_state, imdb_id, language='de'):
+    imdb_metadata = get_imdb_metadata(imdb_id)
+    if imdb_metadata:
+        localized_title = imdb_metadata.get("localized").get(language)
+        if localized_title:
+            return localized_title
+        return imdb_metadata.get("title")
-    except:
-        clean_title = title
-    return clean_title
+    localized_title = IMDbWeb.get_localized_title(imdb_id, language, shared_state.values["user_agent"])
+    if not localized_title:
+        debug(f"Could not get localized title for {imdb_id} in {language} from IMDb")
+    else:
+        localized_title = TitleCleaner.sanitize(localized_title)
+    return localized_title
 def get_imdb_id_from_title(shared_state, title, language="de"):
     imdb_id = None
     if re.search(r"S\d{1,3}(E\d{1,3})?", title, re.IGNORECASE):
-        ttype = "tv"
+        ttype_api = "TV_SERIES"
+        ttype_web = "tv"
     else:
-        ttype = "ft"
+        ttype_api = "MOVIE"
+        ttype_web = "ft"
+    title = TitleCleaner.clean(title)
-    title = get_clean_title(title)
+    # Check Search Cache (DB)
+    db = _get_db("imdb_searches")
+    try:
+        cached_data = db.retrieve(title)
+        if cached_data:
+            data = loads(cached_data)
+            # Check TTL (48 hours)
+            if data.get("timestamp") and datetime.fromtimestamp(data["timestamp"]) > datetime.now() - timedelta(
+                    hours=48):
+                return data.get("imdb_id")
+    except Exception as e:
+        debug(f"Error retrieving search cache for {title}: {e}")
-    threshold = 60 * 60 * 48  # 48 hours
-    context = "recents_imdb"
-    recently_searched = shared_state.get_recently_searched(shared_state, context, threshold)
-    if title in recently_searched:
-        title_item = recently_searched[title]
-        if title_item["timestamp"] > datetime.now() - timedelta(seconds=threshold):
-            return title_item["imdb_id"]
+    # Try IMDbAPI.dev first
+    search_results = IMDbAPI.search_titles(title)
+    if search_results:
+        for result in search_results:
+            found_title = result.get("primaryTitle")
+            found_id = result.get("id")
+            found_type = result.get("type")
-    headers = {
-        'Accept-Language': language,
-        'User-Agent': shared_state.values["user_agent"]
-    }
+            # Basic type filtering if possible from result data
+            if ttype_api == "TV_SERIES" and found_type not in ["tvSeries", "tvMiniSeries"]:
+                continue
+            if ttype_api == "MOVIE" and found_type not in ["movie", "tvMovie"]:
+                continue
-    results = requests.get(f"https://www.imdb.com/find/?q={quote(title)}&s=tt&ttype={ttype}&ref_=fn_{ttype}",
-                           headers=headers, timeout=10)
+            if shared_state.search_string_in_sanitized_title(title, found_title):
+                imdb_id = found_id
+                break
-    if results.status_code == 200:
-        soup = BeautifulSoup(results.text, "html.parser")
-        props = soup.find("script", text=re.compile("props"))
-        details = loads(props.string)
-        search_results = details['props']['pageProps']['titleResults']['results']
+        # If no exact match found with type filtering, try relaxed matching
+        if not imdb_id:
+            for result in search_results:
+                found_title = result.get("primaryTitle")
+                found_id = result.get("id")
+                if shared_state.search_string_in_sanitized_title(title, found_title):
+                    imdb_id = found_id
+                    break
-        if len(search_results) > 0:
+    # Fallback to IMDb scraping if API failed or returned no results
+    if not imdb_id:
+        search_results = IMDbWeb.search_titles(title, ttype_web, language, shared_state.values["user_agent"])
+        if search_results:
             for result in search_results:
                 try:
                     found_title = result["listItem"]["titleText"]
@@ -127,14 +339,15 @@ def get_imdb_id_from_title(shared_state, title, language="de"):
                 if shared_state.search_string_in_sanitized_title(title, found_title):
                     imdb_id = found_id
                     break
-    else:
-        debug(f"Request on IMDb failed: {results.status_code}")
-    recently_searched[title] = {
-        "imdb_id": imdb_id,
-        "timestamp": datetime.now()
-    }
-    shared_state.update(context, recently_searched)
+    # Update Search Cache
+    try:
+        db.update_store(title, dumps({
+            "imdb_id": imdb_id,
+            "timestamp": datetime.now().timestamp()
+        }))
+    except Exception as e:
+        debug(f"Error updating search cache for {title}: {e}")
     if not imdb_id:
         debug(f"No IMDb-ID found for {title}")

quasarr 2.1.5__py3-none-any.whl → 2.3.0__py3-none-any.whl

Potentially problematic release.

quasarr 2.1.5py3-none-any.whl → 2.3.0py3-none-any.whl