PyPI - mkv-episode-matcher - Versions diffs - 0.1.0__py2.py3-none-any.whl - Mend

mkv-episode-matcher 0.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mkv-episode-matcher might be problematic. Click here for more details.

Files changed (12) hide show

mkv_episode_matcher/.gitattributes +2 -0
mkv_episode_matcher/__init__.py +0 -0
mkv_episode_matcher/__main__.py +177 -0
mkv_episode_matcher/config.py +79 -0
mkv_episode_matcher/episode_matcher.py +235 -0
mkv_episode_matcher/mkv_to_srt.py +178 -0
mkv_episode_matcher/requirements.txt +8 -0
mkv_episode_matcher/tmdb_client.py +132 -0
mkv_episode_matcher/utils.py +226 -0
mkv_episode_matcher-0.1.0.dist-info/METADATA +90 -0
mkv_episode_matcher-0.1.0.dist-info/RECORD +12 -0
mkv_episode_matcher-0.1.0.dist-info/WHEEL +5 -0

mkv_episode_matcher/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # Auto detect text files and perform LF normalization
2	+ * text=auto

mkv_episode_matcher/__init__.py ADDED Viewed

File without changes

mkv_episode_matcher/__main__.py ADDED Viewed

@@ -0,0 +1,177 @@
+# __main__.py
+import argparse
+import os
+from loguru import logger
+from .config import set_config, get_config
+# Check if logs directory exists, if not create it
+if not os.path.exists('./logs'):
+    os.mkdir('./logs')
+# Add a new handler for stdout logs
+logger.add("./logs/file_stdout.log", format="{time} {level} {message}", level="DEBUG", rotation="10 MB")
+# Add a new handler for error logs
+logger.add("./logs/file_errors.log", level="ERROR", rotation="10 MB")
+# Check if the configuration directory exists, if not create it
+if not os.path.exists(os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher")):
+    os.makedirs(os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher"))
+# Define the paths for the configuration file and cache directory
+CONFIG_FILE = os.path.join(
+    os.path.expanduser("~"), ".mkv-episode-matcher", "config.ini"
+)
+CACHE_DIR = os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher", "cache")
+# Check if the cache directory exists, if not create it
+if not os.path.exists(CACHE_DIR):
+    os.makedirs(CACHE_DIR)
+@logger.catch
+def main():
+    """
+    Entry point of the application.
+    This function is responsible for starting the application, parsing command-line arguments,
+    setting the configuration, and processing the show.
+    Command-line arguments:
+    --tmdb-api-key: The API key for the TMDb API. If not provided, the function will try to get it from the cache or prompt the user to input it.
+    --show-dir: The main directory of the show. If not provided, the function will prompt the user to input it.
+    --season: The season number to be processed. If not provided, all seasons will be processed.
+    --dry-run: A boolean flag indicating whether to perform a dry run (i.e., not rename any files). If not provided, the function will rename files.
+    --get-subs: A boolean flag indicating whether to download subtitles for the show. If not provided, the function will not download subtitles.
+    --tesseract-path: The path to the tesseract executable. If not provided, the function will try to get it from the cache or prompt the user to input it.
+    The function logs its progress to two separate log files: one for standard output and one for errors.
+    """
+    # Log the start of the application
+    logger.info("Starting the application")
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(description="Process shows with TMDb API")
+    parser.add_argument("--tmdb-api-key", help="TMDb API key")
+    parser.add_argument("--show-dir", help="Main directory of the show")
+    parser.add_argument(
+        "--season",
+        type=int,
+        default=None,
+        nargs="?",
+        help="Specify the season number to be processed (default: None)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        type=bool,
+        default=None,
+        nargs="?",
+        help="Don't rename any files (default: None)",
+    )
+    parser.add_argument(
+        "--get-subs",
+        type=bool,
+        default=None,
+        nargs="?",
+        help="Download subtitles for the show (default: None)",
+    )
+    parser.add_argument(
+        "--tesseract-path",
+        type=str,
+        default=None,
+        nargs="?",
+        help="Path to the tesseract executable (default: None)",
+    )
+    args = parser.parse_args()
+    logger.debug(f"Command-line arguments: {args}")
+    open_subtitles_api_key = ""
+    open_subtitles_user_agent = ""
+    open_subtitles_username = ""
+    open_subtitles_password = ""
+    # Check if API key is provided via command-line argument
+    tmdb_api_key = args.tmdb_api_key
+    # If API key is not provided, try to get it from the cache
+    if not tmdb_api_key:
+        cached_config = get_config(CONFIG_FILE)
+        if cached_config:
+            tmdb_api_key = cached_config.get("tmdb_api_key")
+    # If API key is still not available, prompt the user to input it
+    if not tmdb_api_key:
+        tmdb_api_key = input("Enter your TMDb API key: ")
+        # Cache the API key
+    logger.debug(f"TMDb API Key: {tmdb_api_key}")
+    if args.get_subs:
+        logger.debug("Getting OpenSubtitles API key")
+        cached_config = get_config(CONFIG_FILE)
+        try:
+            open_subtitles_api_key = cached_config.get("open_subtitles_api_key")
+            open_subtitles_user_agent = cached_config.get("open_subtitles_user_agent")
+            open_subtitles_username = cached_config.get("open_subtitles_username")
+            open_subtitles_password = cached_config.get("open_subtitles_password")
+        except:
+            pass
+        if not open_subtitles_api_key:
+            open_subtitles_api_key = input("Enter your OpenSubtitles API key: ")
+        if not open_subtitles_user_agent:
+            open_subtitles_user_agent = input("Enter your OpenSubtitles User Agent: ")
+        if not open_subtitles_username:
+            open_subtitles_username = input("Enter your OpenSubtitles Username: ")
+        if not open_subtitles_password:
+            open_subtitles_password = input("Enter your OpenSubtitles Password: ")
+    # If show directory is provided via command-line argument, use it
+    show_dir = args.show_dir
+    if not show_dir:
+        show_dir = cached_config.get("show_dir")
+        if not show_dir:
+            # If show directory is not provided, prompt the user to input it
+            show_dir = input("Enter the main directory of the show:")
+        logger.info(f"Show Directory: {show_dir}")
+        # if the user does not provide a show directory, make the default show directory the current working directory
+        if not show_dir:
+            show_dir = os.getcwd()
+    if not args.tesseract_path:
+        tesseract_path = cached_config.get("tesseract_path")
+        if not tesseract_path:
+            tesseract_path = input(
+                r"Enter the path to the tesseract executable: ['C:\Program Files\Tesseract-OCR\tesseract.exe']"
+            )
+    else:
+        tesseract_path = args.tesseract_path
+    logger.debug(f"Teesseract Path: {tesseract_path}")
+    logger.debug(f"Show Directory: {show_dir}")
+    # Set the configuration
+    set_config(
+        tmdb_api_key,
+        open_subtitles_api_key,
+        open_subtitles_user_agent,
+        open_subtitles_username,
+        open_subtitles_password,
+        show_dir,
+        CONFIG_FILE,
+        tesseract_path=tesseract_path,
+    )
+    logger.info("Configuration set")
+    # Process the show
+    from .episode_matcher import process_show
+    process_show(args.season, dry_run=args.dry_run, get_subs=args.get_subs)
+    logger.info("Show processing completed")
+# Run the main function if the script is run directly
+if __name__ == "__main__":
+    main()

mkv_episode_matcher/config.py ADDED Viewed

@@ -0,0 +1,79 @@
+# config.py
+import os
+import configparser
+import multiprocessing
+from loguru import logger
+MAX_THREADS = 4
+def get_total_threads():
+    return multiprocessing.cpu_count()
+total_threads = get_total_threads()
+logger.info(f"Total available threads: {total_threads} -> Setting max to {MAX_THREADS}")
+def set_config(
+    tmdb_api_key,
+    open_subtitles_api_key,
+    open_subtitles_user_agent,
+    open_subtitles_username,
+    open_subtitles_password,
+    show_dir,
+    file,
+    tesseract_path=None,
+):
+    """
+    Sets the configuration values and writes them to a file.
+    Args:
+        tmdb_api_key (str): The API key for TMDB (The Movie Database).
+        open_subtitles_api_key (str): The API key for OpenSubtitles.
+        open_subtitles_user_agent (str): The user agent for OpenSubtitles.
+        open_subtitles_username (str): The username for OpenSubtitles.
+        open_subtitles_password (str): The password for OpenSubtitles.
+        show_dir (str): The directory where the TV show episodes are located.
+        file (str): The path to the configuration file.
+        tesseract_path (str, optional): The path to the Tesseract OCR executable.
+    Returns:
+        None
+    """
+    config = configparser.ConfigParser()
+    config["Config"] = {
+        "tmdb_api_key": str(tmdb_api_key),
+        "show_dir": show_dir,
+        "max_threads": int(MAX_THREADS),
+        "open_subtitles_api_key": str(open_subtitles_api_key),
+        "open_subtitles_user_agent": str(open_subtitles_user_agent),
+        "open_subtitles_username": str(open_subtitles_username),
+        "open_subtitles_password": str(open_subtitles_password),
+        "tesseract_path": str(tesseract_path),
+    }
+    logger.info(
+        f"Setting config with API:{tmdb_api_key}, show_dir: {show_dir}, and max_threads: {MAX_THREADS}"
+    )
+    with open(file, "w") as configfile:
+        config.write(configfile)
+def get_config(file):
+    """
+    Read and return the configuration from the specified file.
+    Args:
+        file (str): The path to the configuration file.
+    Returns:
+        dict: The configuration settings as a dictionary.
+    """
+    logger.info(f"Loading config from {file}")
+    config = configparser.ConfigParser()
+    if os.path.exists(file):
+        config.read(file)
+        return config["Config"] if "Config" in config else None
+    return {}

mkv_episode_matcher/episode_matcher.py ADDED Viewed

@@ -0,0 +1,235 @@
+# episode_matcher.py
+import os
+from mkv_episode_matcher.config import get_config
+from mkv_episode_matcher.tmdb_client import fetch_show_id
+from mkv_episode_matcher.utils import get_subtitles, cleanup_ocr_files,check_filename
+from loguru import logger
+from mkv_episode_matcher.__main__ import CONFIG_FILE, CACHE_DIR
+from mkv_episode_matcher.mkv_to_srt import convert_mkv_to_srt
+import re
+# hash_data = {}
+@logger.catch
+def process_show(season=None, dry_run=False, get_subs=False):
+    """
+    Process the show by downloading episode images and finding matching episodes.
+    Args:
+        season (int, optional): The season number to process. If provided, only that season will be processed. Defaults to None.
+        force (bool, optional): Whether to force re-processing of episodes even if they already exist. Defaults to False.
+        dry_run (bool, optional): Whether to perform a dry run without actually processing the episodes. Defaults to False.
+        threshold (float, optional): The threshold value for matching episodes. Defaults to None.
+    """
+    config = get_config(CONFIG_FILE)
+    show_dir = config.get("show_dir")
+    show_name = os.path.basename(show_dir)
+    logger.info(f"Processing show '{show_name}'...")
+    show_id = fetch_show_id(show_name)
+    if show_id is None:
+        logger.error(f"Could not find show '{os.path.basename(show_dir)}' on TMDb.")
+        return
+    season_paths = [
+        os.path.join(show_dir, d)
+        for d in os.listdir(show_dir)
+        if os.path.isdir(os.path.join(show_dir, d))
+    ]
+    logger.info(
+        f"Found {len(season_paths)} seasons for show '{os.path.basename(show_dir)}'"
+    )
+    seasons_to_process = [
+        int(os.path.basename(season_path).split()[-1]) for season_path in season_paths
+    ]
+    if get_subs:
+        get_subtitles(show_id, seasons=set(seasons_to_process))
+    if season is not None:
+        mkv_files = [
+            os.path.join(show_dir, season)
+            for f in os.listdir(show_dir)
+            if f.endswith(".mkv")
+        ]
+        season_path = os.path.join(show_dir, f"Season {season}")
+    else:
+        for season_path in os.listdir(show_dir):
+            season_path = os.path.join(show_dir, season_path)
+            mkv_files = [
+                os.path.join(season_path, f)
+                for f in os.listdir(season_path)
+                if f.endswith(".mkv")
+            ]
+    # Filter out files that have already been processed
+    for f in mkv_files:
+        if check_filename(f):
+            logger.info(f"Skipping {f}, already processed")
+            mkv_files.remove(f)
+    if len(mkv_files) == 0:
+        logger.info("No new files to process")
+        return
+    convert_mkv_to_srt(season_path, mkv_files)
+    reference_text_dict = process_reference_srt_files(show_name)
+    srt_text_dict = process_srt_files(show_dir)
+    compare_and_rename_files(srt_text_dict, reference_text_dict, dry_run=dry_run)
+    cleanup_ocr_files(show_dir)
+def check_filename(filename):
+    """
+    Check if the filename is in the correct format.
+    Args:
+        filename (str): The filename to check.
+    Returns:
+        bool: True if the filename is in the correct format, False otherwise.
+    """
+    # Check if the filename matches the expected format
+    match = re.match(r".*S\d+E\d+", filename)
+    return bool(match)
+def extract_srt_text(filepath):
+    """
+    Extracts the text from an SRT file.
+    Args:
+        filepath (str): The path to the SRT file.
+    Returns:
+        list: A list of lists, where each inner list represents a block of text from the SRT file.
+              Each inner list contains the lines of text for that block.
+    """
+    # extract the text from the file
+    with open(filepath, "r") as f:
+        filepath = f.read()
+    text_lines = [
+        filepath.split("\n\n")[i].split("\n")[2:]
+        for i in range(len(filepath.split("\n\n")))
+    ]
+    # remove empty lines
+    text_lines = [[line for line in lines if line] for lines in text_lines]
+    # remove <i> or </i> tags
+    text_lines = [
+        [re.sub(r"<i>|</i>|", "", line) for line in lines] for lines in text_lines
+    ]
+    # remove empty lists
+    text_lines = [lines for lines in text_lines if lines]
+    return text_lines
+def compare_text(text1, text2):
+    """
+    Compare two lists of text lines and return the number of matching lines.
+    Args:
+        text1 (list): List of text lines from the first source.
+        text2 (list): List of text lines from the second source.
+    Returns:
+        int: Number of matching lines between the two sources.
+    """
+    # Flatten the list of text lines
+    flat_text1 = [line for lines in text1 for line in lines]
+    flat_text2 = [line for lines in text2 for line in lines]
+    # Compare the two lists of text lines
+    matching_lines = set(flat_text1).intersection(flat_text2)
+    return len(matching_lines)
+def extract_season_episode(filename):
+    """
+    Extract the season and episode number from the filename.
+    Args:
+        filename (str): The filename to extract the season and episode from.
+    Returns:
+        tuple: A tuple containing the season and episode number.
+    """
+    # Extract the season and episode number from the filename
+    match = re.search(r"S(\d+)E(\d+)", filename)
+    if match:
+        season = int(match.group(1))
+        episode = int(match.group(2))
+        return season, episode
+    else:
+        return None, None
+def process_reference_srt_files(series_name):
+    """
+    Process reference SRT files for a given series.
+    Args:
+        series_name (str): The name of the series.
+    Returns:
+        dict: A dictionary containing the reference files where the keys are the MKV filenames
+              and the values are the corresponding SRT texts.
+    """
+    reference_files = {}
+    reference_dir = os.path.join(CACHE_DIR, "data", series_name)
+    for dirpath, _, filenames in os.walk(reference_dir):
+        for filename in filenames:
+            if filename.lower().endswith(".srt"):
+                srt_file = os.path.join(dirpath, filename)
+                print(f"Processing {srt_file}")
+                srt_text = extract_srt_text(srt_file)
+                season, episode = extract_season_episode(filename)
+                mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
+                reference_files[mkv_filename] = srt_text
+    return reference_files
+def process_srt_files(show_dir):
+    """
+    Process all SRT files in the given directory and its subdirectories.
+    Args:
+        show_dir (str): The directory path where the SRT files are located.
+    Returns:
+        dict: A dictionary containing the SRT file paths as keys and their corresponding text content as values.
+    """
+    srt_files = {}
+    for dirpath, _, filenames in os.walk(show_dir):
+        for filename in filenames:
+            if filename.lower().endswith(".srt"):
+                srt_file = os.path.join(dirpath, filename)
+                print(f"Processing {srt_file}")
+                srt_text = extract_srt_text(srt_file)
+                srt_files[srt_file] = srt_text
+    return srt_files
+def compare_and_rename_files(srt_files, reference_files, dry_run=False):
+    """
+    Compare the srt files with the reference files and rename the matching mkv files.
+    Args:
+        srt_files (dict): A dictionary containing the srt files as keys and their contents as values.
+        reference_files (dict): A dictionary containing the reference files as keys and their contents as values.
+        dry_run (bool, optional): If True, the function will only log the renaming actions without actually renaming the files. Defaults to False.
+    """
+    logger.info(
+        f"Comparing {len(srt_files)} srt files with {len(reference_files)} reference files"
+    )
+    for srt_text in srt_files.keys():
+        parent_dir = os.path.dirname(os.path.dirname(srt_text))
+        for reference in reference_files.keys():
+            season, episode = extract_season_episode(reference)
+            mkv_file = os.path.join(
+                parent_dir, os.path.basename(srt_text).replace(".srt", ".mkv")
+            )
+            matching_lines = compare_text(
+                reference_files[reference], srt_files[srt_text]
+            )
+            if matching_lines >= int(len(reference_files[reference]) * 0.1):
+                logger.info(f"Matching lines: {matching_lines}")
+                logger.info(f"Found matching file: {mkv_file} ->{reference}")
+                new_filename = os.path.join(parent_dir, reference)
+                if not os.path.exists(new_filename):
+                    if os.path.exists(mkv_file) and not dry_run:
+                        logger.info(f"Renaming {mkv_file} to {new_filename}")
+                        os.rename(mkv_file, new_filename)
+                else:
+                    logger.info(f"File {new_filename} already exists, skipping")

mkv_episode_matcher/mkv_to_srt.py ADDED Viewed

@@ -0,0 +1,178 @@
+import os
+import subprocess
+import sys
+# Get the absolute path of the parent directory of the current script.
+parent_dir = os.path.dirname(os.path.abspath(__file__))
+# Add the parent directory to the Python path.
+sys.path.append(parent_dir)
+# Add the 'libraries' directory to the Python path.
+sys.path.append(os.path.join(parent_dir, "libraries"))
+# Add the 'libraries' directory to the Python path.
+sys.path.append(os.path.join(parent_dir, "..", "libraries", "pgs2srt"))
+import pytesseract
+import re
+from PIL import Image, ImageOps
+from mkv_episode_matcher.__main__ import CONFIG_FILE
+from mkv_episode_matcher.config import get_config
+from datetime import datetime, timedelta
+from concurrent.futures import ThreadPoolExecutor
+from pgsreader import PGSReader
+from imagemaker import make_image
+from loguru import logger
+def convert_mkv_to_sup(mkv_file, output_dir):
+    """
+    Convert an .mkv file to a .sup file using FFmpeg and pgs2srt.
+    Args:
+        mkv_file (str): Path to the .mkv file.
+        output_dir (str): Path to the directory where the .sup file will be saved.
+    Returns:
+        str: Path to the converted .sup file.
+    """
+    # Get the base name of the .mkv file without the extension
+    base_name = os.path.splitext(os.path.basename(mkv_file))[0]
+    # Construct the output .sup file path
+    sup_file = os.path.join(output_dir, f"{base_name}.sup")
+    if not os.path.exists(sup_file):
+        logger.info(f"Processing {mkv_file} to {sup_file}")
+        # FFmpeg command to convert .mkv to .sup
+        ffmpeg_cmd = ["ffmpeg", "-i", mkv_file, "-map", "0:s:0", "-c", "copy", sup_file]
+        try:
+            subprocess.run(ffmpeg_cmd, check=True)
+            logger.info(f"Converted {mkv_file} to {sup_file}")
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Error converting {mkv_file}: {e}")
+    else:
+        logger.info(f"File {sup_file} already exists, skipping")
+    return sup_file
+@logger.catch
+def perform_ocr(sup_file_path):
+    """
+    Perform OCR on a .sup file and save the extracted text to a .srt file.
+    Args:
+        sup_file_path (str): Path to the .sup file.
+    """
+    # Get the base name of the .sup file without the extension
+    base_name = os.path.splitext(os.path.basename(sup_file_path))[0]
+    output_dir = os.path.dirname(sup_file_path)
+    logger.info(f"Performing OCR on {sup_file_path}")
+    # Construct the output .srt file path
+    srt_file = os.path.join(output_dir, f"{base_name}.srt")
+    # Load a PGS/SUP file.
+    pgs = PGSReader(sup_file_path)
+    # Set index
+    i = 0
+    # Complete subtitle track index
+    si = 0
+    tesseract_lang = "eng"
+    tesseract_config = "-c tessedit_char_blacklist=[] --psm 6 --oem {}".format(1)
+    config = get_config(CONFIG_FILE)
+    tesseract_path = config.get("tesseract_path")
+    logger.debug(f"Setting Teesseract Path to {tesseract_path}")
+    pytesseract.pytesseract.tesseract_cmd = str(tesseract_path)
+    # SubRip output
+    output = ""
+    if not os.path.exists(srt_file):
+        # Iterate the pgs generator
+        for ds in pgs.iter_displaysets():
+            # If set has image, parse the image
+            if ds.has_image:
+                # Get Palette Display Segment
+                pds = ds.pds[0]
+                # Get Object Display Segment
+                ods = ds.ods[0]
+                if pds and ods:
+                    # Create and show the bitmap image and convert it to RGBA
+                    src = make_image(ods, pds).convert("RGBA")
+                    # Create grayscale image with black background
+                    img = Image.new("L", src.size, "BLACK")
+                    # Paste the subtitle bitmap
+                    img.paste(src, (0, 0), src)
+                    # Invert images so the text is readable by Tesseract
+                    img = ImageOps.invert(img)
+                    # Parse the image with tesesract
+                    text = pytesseract.image_to_string(
+                        img, lang=tesseract_lang, config=tesseract_config
+                    ).strip()
+                    # Replace "|" with "I"
+                    # Works better than blacklisting "|" in Tesseract,
+                    # which results in I becoming "!" "i" and "1"
+                    text = re.sub(r"[|/\\]", "I", text)
+                    text = re.sub(r"[_]", "L", text)
+                    start = datetime.fromtimestamp(ods.presentation_timestamp / 1000)
+                    start = start + timedelta(hours=-1)
+            else:
+                # Get Presentation Composition Segment
+                pcs = ds.pcs[0]
+                if pcs:
+                    end = datetime.fromtimestamp(pcs.presentation_timestamp / 1000)
+                    end = end + timedelta(hours=-1)
+                    if (
+                        isinstance(start, datetime)
+                        and isinstance(end, datetime)
+                        and len(text)
+                    ):
+                        si = si + 1
+                        sub_output = str(si) + "\n"
+                        sub_output += (
+                            start.strftime("%H:%M:%S,%f")[0:12]
+                            + " --> "
+                            + end.strftime("%H:%M:%S,%f")[0:12]
+                            + "\n"
+                        )
+                        sub_output += text + "\n\n"
+                        output += sub_output
+                        start = end = text = None
+            i = i + 1
+        with open(srt_file, "w") as f:
+            f.write(output)
+        logger.info(f"Saved to: {srt_file}")
+def convert_mkv_to_srt(season_path, mkv_files):
+    """
+    Converts MKV files to SRT format.
+    Args:
+        season_path (str): The path to the season directory.
+        mkv_files (list): List of MKV files to convert.
+    Returns:
+        None
+    """
+    logger.info(f"Converting {len(mkv_files)} files to SRT")
+    output_dir = os.path.join(season_path, "ocr")
+    os.makedirs(output_dir, exist_ok=True)
+    sup_files = []
+    for mkv_file in mkv_files:
+        sup_file = convert_mkv_to_sup(mkv_file, output_dir)
+        sup_files.append(sup_file)
+    with ThreadPoolExecutor() as executor:
+        for sup_file in sup_files:
+            executor.submit(perform_ocr, sup_file)

mkv_episode_matcher/requirements.txt ADDED Viewed

@@ -0,0 +1,8 @@
+requests
+loguru
+pillow
+imagehash
+configparser
+tmdb_client
+pytesseract
+opensubtitlescom

mkv_episode_matcher/tmdb_client.py ADDED Viewed

@@ -0,0 +1,132 @@
+# tmdb_client.py
+import requests
+from loguru import logger
+from mkv_episode_matcher.config import get_config
+from mkv_episode_matcher.__main__ import CONFIG_FILE
+from threading import Lock
+import time
+BASE_IMAGE_URL = "https://image.tmdb.org/t/p/original"
+class RateLimitedRequest:
+    """
+    A class that represents a rate-limited request object.
+    Attributes:
+        rate_limit (int): Maximum number of requests allowed per period.
+        period (int): Period in seconds.
+        requests_made (int): Counter for requests made.
+        start_time (float): Start time of the current period.
+        lock (Lock): Lock for synchronization.
+    """
+    def __init__(self, rate_limit=30, period=1):
+        self.rate_limit = rate_limit
+        self.period = period
+        self.requests_made = 0
+        self.start_time = time.time()
+        self.lock = Lock()
+    def get(self, url):
+        """
+        Sends a rate-limited GET request to the specified URL.
+        Args:
+            url (str): The URL to send the request to.
+        Returns:
+            Response: The response object returned by the request.
+        """
+        with self.lock:
+            if self.requests_made >= self.rate_limit:
+                sleep_time = self.period - (time.time() - self.start_time)
+                if sleep_time > 0:
+                    time.sleep(sleep_time)
+                self.requests_made = 0
+                self.start_time = time.time()
+            self.requests_made += 1
+        response = requests.get(url)
+        return response
+# Initialize rate-limited request
+rate_limited_request = RateLimitedRequest(rate_limit=30, period=1)
+def fetch_show_id(show_name):
+    """
+    Fetch the TMDb ID for a given show name.
+    Args:
+        show_name (str): The name of the show.
+    Returns:
+        str: The TMDb ID of the show, or None if not found.
+    """
+    config = get_config(CONFIG_FILE)
+    tmdb_api_key = config.get("tmdb_api_key")
+    url = f"https://api.themoviedb.org/3/search/tv?query={show_name}&api_key={tmdb_api_key}"
+    response = requests.get(url)
+    if response.status_code == 200:
+        results = response.json().get("results", [])
+        if results:
+            return str(results[0]["id"])
+    return None
+def fetch_season_details(show_id, season_number):
+    """
+    Fetch the total number of episodes for a given show and season from the TMDb API.
+    Args:
+        show_id (str): The ID of the show on TMDb.
+        season_number (int): The season number to fetch details for.
+    Returns:
+        int: The total number of episodes in the season, or 0 if the API request failed.
+    """
+    logger.info(f"Fetching season details for Season {season_number}...")
+    config = get_config(CONFIG_FILE)
+    tmdb_api_key = config.get("tmdb_api_key")
+    url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_number}?api_key={tmdb_api_key}"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        season_data = response.json()
+        total_episodes = len(season_data.get("episodes", []))
+        return total_episodes
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Failed to fetch season details for Season {season_number}: {e}")
+        return 0
+    except KeyError:
+        logger.error(
+            f"Missing 'episodes' key in response JSON data for Season {season_number}"
+        )
+        return 0
+def get_number_of_seasons(show_id):
+    """
+    Retrieves the number of seasons for a given TV show from the TMDB API.
+    Parameters:
+    - show_id (int): The ID of the TV show.
+    Returns:
+    - num_seasons (int): The number of seasons for the TV show.
+    Raises:
+    - requests.HTTPError: If there is an error while making the API request.
+    """
+    config = get_config(CONFIG_FILE)
+    tmdb_api_key = config.get("tmdb_api_key")
+    url = f"https://api.themoviedb.org/3/tv/{show_id}?api_key={tmdb_api_key}"
+    response = requests.get(url)
+    response.raise_for_status()
+    show_data = response.json()
+    num_seasons = show_data.get("number_of_seasons", 0)
+    logger.info(f"Found {num_seasons} seasons")
+    return num_seasons

mkv_episode_matcher/utils.py ADDED Viewed

@@ -0,0 +1,226 @@
+# utils.py
+import os
+from typing import Set
+from loguru import logger
+import re
+from mkv_episode_matcher.__main__ import CONFIG_FILE, CACHE_DIR
+from mkv_episode_matcher.config import get_config
+from mkv_episode_matcher.tmdb_client import fetch_season_details
+import requests
+from opensubtitlescom import OpenSubtitles
+import shutil
+def check_filename(filename, series_title, season_number, episode_number):
+    """
+    Check if a filename matches the expected naming convention for a series episode.
+    Args:
+        filename (str): The filename to be checked.
+        series_title (str): The title of the series.
+        season_number (int): The season number of the episode.
+        episode_number (int): The episode number of the episode.
+    Returns:
+        bool: True if the filename matches the expected naming convention, False otherwise.
+    This function checks if the given filename matches the expected naming convention for a series episode.
+    The expected naming convention is '{series_title} - S{season_number:02d}E{episode_number:02d}.mkv'.
+    If the filename matches the expected pattern, it returns True; otherwise, it returns False.
+    Example:
+        If filename = 'Example - S01E03.mkv', series_title = 'Example', season_number = 1, and episode_number = 3,
+        the function will return True because the filename matches the expected pattern.
+    """
+    pattern = re.compile(
+        f"{re.escape(series_title)} - S{season_number:02d}E{episode_number:02d}.mkv"
+    )
+    return bool(pattern.match(filename))
+def scramble_filename(original_file_path, file_number):
+    """
+    Scrambles the filename of the given file path by adding the series title and file number.
+    Args:
+        original_file_path (str): The original file path.
+        file_number (int): The file number to be added to the filename.
+    Returns:
+        None
+    """
+    logger.info(f"Scrambling {original_file_path}")
+    series_title = os.path.basename(
+        os.path.dirname(os.path.dirname(original_file_path))
+    )
+    original_file_name = os.path.basename(original_file_path)
+    extension = os.path.splitext(original_file_path)[-1]
+    new_file_name = f"{series_title} - {file_number:03d}{extension}"
+    new_file_path = os.path.join(os.path.dirname(original_file_path), new_file_name)
+    if not os.path.exists(new_file_path):
+        logger.info(f"Renaming {original_file_name} -> {new_file_name}")
+        os.rename(original_file_path, new_file_path)
+def rename_episode_file(original_file_path, season_number, episode_number):
+    """
+    Rename an episode file with a standardized naming convention.
+    Args:
+        original_file_path (str): The original file path of the episode.
+        season_number (int): The season number of the episode.
+        episode_number (int): The episode number of the episode.
+    Returns:
+        None
+    This function renames an episode file with a standardized naming convention based on the series title, season number,
+    and episode number. If a file with the intended new name already exists, it appends a numerical suffix to the filename
+    until it finds a unique name.
+    Example:
+        If original_file_path = '/path/to/episode.mkv', season_number = 1, and episode_number = 3, and the series title is 'Example',
+        the function will rename the file to 'Example - S01E03.mkv' if no file with that name already exists. If a file with that
+        name already exists, it will be renamed to 'Example - S01E03_2.mkv', and so on.
+    """
+    series_title = os.path.basename(
+        os.path.dirname(os.path.dirname(original_file_path))
+    )
+    original_file_name = os.path.basename(original_file_path)
+    extension = os.path.splitext(original_file_path)[-1]
+    new_file_name = (
+        f"{series_title} - S{season_number:02d}E{episode_number:02d}{extension}"
+    )
+    new_file_path = os.path.join(os.path.dirname(original_file_path), new_file_name)
+    # Check if the new file path already exists
+    if os.path.exists(new_file_path):
+        logger.warning(f"Filename already exists: {new_file_name}.")
+        # If the file already exists, find a unique name by appending a numerical suffix
+        suffix = 2
+        while True:
+            new_file_name = f"{series_title} - S{season_number:02d}E{episode_number:02d}_{suffix}{extension}"
+            new_file_path = os.path.join(
+                os.path.dirname(original_file_path), new_file_name
+            )
+            if not os.path.exists(new_file_path):
+                break
+            suffix += 1
+        logger.info(f"Renaming {original_file_name} -> {new_file_name}")
+        os.rename(original_file_path, new_file_path)
+    else:
+        logger.info(f"Renaming {original_file_name} -> {new_file_name}")
+        os.rename(original_file_path, new_file_path)
+def get_subtitles(show_id, seasons: Set[int]):
+    """
+    Retrieves and saves subtitles for a given TV show and seasons.
+    Args:
+        show_id (int): The ID of the TV show.
+        seasons (Set[int]): A set of season numbers for which subtitles should be retrieved.
+    Returns:
+        None
+    """
+    logger.info(f"Getting subtitles for show ID {show_id}")
+    config = get_config(CONFIG_FILE)
+    show_dir = config.get("show_dir")
+    series_name = os.path.basename(show_dir)
+    tmdb_api_key = config.get("tmdb_api_key")
+    open_subtitles_api_key = config.get("open_subtitles_api_key")
+    open_subtitles_user_agent = config.get("open_subtitles_user_agent")
+    open_subtitles_username = config.get("open_subtitles_username")
+    open_subtitles_password = config.get("open_subtitles_password")
+    if not all(
+        [
+            show_dir,
+            tmdb_api_key,
+            open_subtitles_api_key,
+            open_subtitles_user_agent,
+            open_subtitles_username,
+            open_subtitles_password,
+        ]
+    ):
+        logger.error("Missing configuration settings. Please run the setup script.")
+    try:
+        # Initialize the OpenSubtitles client
+        subtitles = OpenSubtitles(open_subtitles_user_agent, open_subtitles_api_key)
+        # Log in (retrieve auth token)
+        subtitles.login(open_subtitles_username, open_subtitles_password)
+    except Exception as e:
+        logger.error(f"Failed to log in to OpenSubtitles: {e}")
+        return
+    for season in seasons:
+        episodes = fetch_season_details(show_id, season)
+        logger.info(f"Found {episodes} episodes in Season {season}")
+        for episode in range(1, episodes + 1):
+            logger.info(f"Processing Season {season}, Episode {episode}...")
+            srt_filepath = os.path.join(
+                CACHE_DIR,
+                "data",
+                series_name,
+                f"{series_name} - S{season:02d}E{episode:02d}.srt",
+            )
+            if not os.path.exists(srt_filepath):
+                # get the episode info from TMDB
+                url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season}/episode/{episode}?api_key={tmdb_api_key}"
+                response = requests.get(url)
+                response.raise_for_status()
+                episode_data = response.json()
+                episode_name = episode_data["name"]
+                episode_id = episode_data["id"]
+                # search for the subtitle
+                response = subtitles.search(tmdb_id=episode_id, languages="en")
+                if len(response.data) == 0:
+                    logger.warning(
+                        f"No subtitles found for {series_name} - S{season:02d}E{episode:02d}"
+                    )
+                for subtitle in response.data:
+                    subtitle_dict = subtitle.to_dict()
+                    # Remove special characters and convert to uppercase
+                    filename_clean = re.sub(
+                        r"\W+", " ", subtitle_dict["file_name"]
+                    ).upper()
+                    if f"E{episode:02d}" in filename_clean:
+                        logger.info(f"Original filename: {subtitle_dict['file_name']}")
+                        srt_file = subtitles.download_and_save(subtitle)
+                        series_name = series_name.replace(":", " -")
+                        shutil.move(srt_file, srt_filepath)
+                        logger.info(f"Subtitle saved to {srt_filepath}")
+                        break
+                    else:
+                        continue
+            else:
+                print(
+                    f"Subtitle already exists for {series_name} - S{season:02d}E{episode:02d}"
+                )
+                continue
+def cleanup_ocr_files(show_dir):
+    """
+    Clean up OCR files generated during the episode matching process.
+    Args:
+        show_dir (str): The directory containing the show files.
+    Returns:
+        None
+    This function cleans up the OCR files generated during the episode matching process.
+    It deletes the 'ocr' directory and all its contents in each season directory of the show.
+    """
+    for season_dir in os.listdir(show_dir):
+        season_dir_path = os.path.join(show_dir, season_dir)
+        ocr_dir_path = os.path.join(season_dir_path, "ocr")
+        if os.path.exists(ocr_dir_path):
+            logger.info(f"Cleaning up OCR files in {ocr_dir_path}")
+            shutil.rmtree(ocr_dir_path)

mkv_episode_matcher-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,90 @@
+Metadata-Version: 2.3
+Name: mkv-episode-matcher
+Version: 0.1.0
+Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
+Author-email: Jonathan Sakkos <jonathansakkos@protonmail.com>
+Description-Content-Type: text/markdown
+# MKV Episode Matcher
+The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
+## Quick start
+To use the MKV Episode Matcher, follow these steps:
+1. Clone this repository `git clone https://github.com/Jsakkos/mkv-episode-matcher`
+1. Obtain an API key from TMDb (https://developers.themoviedb.org/authentication/getting-a-apikey).
+2. (Optional) - Obtain an API key from Opensubtitles.com by creating an API consumer (https://www.opensubtitles.com/en/consumers)
+3. Provide a filepath to your show directory. This is the main directory that contains all of the episodes for a specific show.
+The directory and subfolders must be arranged in the following structure:
+- Show name
+  - Season 1
+  - Season 2
+  - ...
+  - Season n
+2. Call `python __main__.py` with the TMDB_API_KEY and SHOW_DIR as arguments or in environment variables from your command line:
+```
+python __main__.py --api-key `your-api-key` --show-dir /path/to/show
+```
+## How it works
+MKV Episode Matcher compares reference images from TMDb with frames from the mkv content using image hashing.
+## Caveats (WIP)
+Currently, MKV Episode Matcher is slow (several minutes per episode), CPU intensive, and error-prone.
+# Known issues
+When reading BluRay files, the following warning pops up in the terminal:
+```
+Could not find codec parameters for stream 3 (Subtitle: hdmv_pgs_subtitle (pgssub)): unspecified size
+Consider increasing the value for the 'analyzeduration' (0) and 'probesize' (5000000) options
+```
+# Contributing
+Contributions are welcome! If you would like to contribute to the MKV Episode Matcher project, please follow these steps:
+1. Fork the repository.
+1. Clone the repository.
+2. Create a new branch for your contribution.
+3. Make your changes and commit them to your branch.
+4. Push your branch to your forked repository.
+5. Open a pull request to the main repository.
+Please ensure that your code follows the project's coding conventions and standards. Additionally, provide a clear and detailed description of your changes in the pull request.
+Thank you for your contribution!
+# License
+MIT License
+Copyright (c) 2024 Jonathan Sakkos
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+# Acknowledgments
+This product uses the TMDB API but is not endorsed or certified by TMDB.
+![The Movie DB Logo](https://www.themoviedb.org/assets/2/v4/logos/v2/blue_long_2-9665a76b1ae401a510ec1e0ca40ddcb3b0cfe45f1d51b77a308fea0845885648.svg)

mkv_episode_matcher-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
+mkv_episode_matcher/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mkv_episode_matcher/__main__.py,sha256=kCsrekSaNOkrfaY8Lm-VzVzALsxcmuaEWeeyCE5deEQ,6678
+mkv_episode_matcher/config.py,sha256=2Ui0f9LUc0r6pmdRUmcopdykotHoFxjqJavLKLOzy5w,2354
+mkv_episode_matcher/episode_matcher.py,sha256=IAUDOHyMzmoqBDilA5GIXiQdomfEWR7mn4xXU5XtvsM,8904
+mkv_episode_matcher/mkv_to_srt.py,sha256=BSDgNCgrkpr451X-P0A3-Q4bENfItv2A43yp5dtB430,6468
+mkv_episode_matcher/requirements.txt,sha256=0JLuUm69lLp8anUgtW48CuULZ_lSwd-1XL3eoShVWjI,93
+mkv_episode_matcher/tmdb_client.py,sha256=3sWC0tHvsW2XAYA4ndXh3PjUFCobQRpXzykNP-Z4rAA,4170
+mkv_episode_matcher/utils.py,sha256=ZkqGV3ZNPwpTvN1dHNZb-iLwJnk4ldk6w-Znh3TPH70,9297
+mkv_episode_matcher-0.1.0.dist-info/METADATA,sha256=Z6_kFPF6S49njL8CzfhrvxMuuXH9qNueD3FyZrTHx5c,3759
+mkv_episode_matcher-0.1.0.dist-info/WHEEL,sha256=cDcbFFSNXOE-241I5PFuLkIYfR_FM7WTlPEi33njInY,105
+mkv_episode_matcher-0.1.0.dist-info/RECORD,,

mkv_episode_matcher-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.24.2
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any