PyPI - mkv-episode-matcher - Versions diffs - 0.1.2__tar.gz → 0.1.3__tar.gz - Mend

mkv-episode-matcher 0.1.2tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mkv-episode-matcher might be problematic. Click here for more details.

Files changed (33) hide show

mkv_episode_matcher-0.1.3/.coverage.DESKTOP-NTJ52LL.19040.XkHNEbEx ADDED Viewed

Binary file

mkv_episode_matcher-0.1.3/.coverage.DESKTOP-NTJ52LL.24340.XjsBEKWx ADDED Viewed

Binary file

mkv_episode_matcher-0.1.3/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # Auto detect text files and perform LF normalization
2	+ * text=auto

mkv_episode_matcher-0.1.3/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,29 @@
+name: ci
+on:
+  push:
+    branches:
+      - master
+      - main
+permissions:
+  contents: write
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Configure Git Credentials
+        run: |
+          git config user.name github-actions[bot]
+          git config user.email 41898282+github-actions[bot]@users.noreply.github.com
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+      - uses: actions/cache@v4
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+      - run: pip install mkdocs-material
+      - run: mkdocs gh-deploy --force

mkv_episode_matcher-0.1.3/.gitmodules ADDED Viewed

@@ -0,0 +1,3 @@
+[submodule "mkv_episode_matcher/libraries/pgs2srt"]
+	path = mkv_episode_matcher/libraries/pgs2srt
+	url = https://github.com/Jsakkos/pgs2srt

mkv_episode_matcher-0.1.3/.vscode/settings.json ADDED Viewed

@@ -0,0 +1,11 @@
+{
+    "python.testing.unittestArgs": [
+        "-v",
+        "-s",
+        "./tests",
+        "-p",
+        "*test*.py"
+    ],
+    "python.testing.pytestEnabled": false,
+    "python.testing.unittestEnabled": true
+}

{mkv_episode_matcher-0.1.2 → mkv_episode_matcher-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: mkv-episode-matcher
-Version: 0.1.2
+Version: 0.1.3
 Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
 Project-URL: Documentation, https://github.com/Jsakkos/mkv-episode-matcher#readme
 Project-URL: Issues, https://github.com/Jsakkos/mkv-episode-matcher/issues

mkv_episode_matcher-0.1.3/docs/index.md ADDED Viewed

@@ -0,0 +1,17 @@
+# Welcome to MkDocs
+For full documentation visit [mkdocs.org](https://www.mkdocs.org).
+## Commands
+* `mkdocs new [dir-name]` - Create a new project.
+* `mkdocs serve` - Start the live-reloading docs server.
+* `mkdocs build` - Build the documentation site.
+* `mkdocs -h` - Print help message and exit.
+## Project layout
+    mkdocs.yml    # The configuration file.
+    docs/
+        index.md  # The documentation homepage.
+        ...       # Other markdown pages, images and other files.

mkv_episode_matcher-0.1.3/mkdocs.yml ADDED Viewed

@@ -0,0 +1,6 @@
+site_name: MKV Episode Matcher Docs
+site_url: https://jsakkos.github.io/mkv-episode-matcher
+theme:
+  name: material
+repo_url: https://github.com/jsakkos/mkv-episode-matcher
+repo_name: jsakkos/mkv-episode-matcher

mkv_episode_matcher-0.1.3/mkv_episode_matcher/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # Auto detect text files and perform LF normalization
2	+ * text=auto

mkv_episode_matcher-0.1.3/mkv_episode_matcher/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ version = "0.1.3"

mkv_episode_matcher-0.1.3/mkv_episode_matcher/__main__.py ADDED Viewed

@@ -0,0 +1,179 @@
+# __main__.py
+import argparse
+import os
+from loguru import logger
+from .config import get_config, set_config
+# Log the start of the application
+logger.info("Starting the application")
+# Check if logs directory exists, if not create it
+if not os.path.exists('./logs'):
+    os.mkdir('./logs')
+# Add a new handler for stdout logs
+logger.add("./logs/file_stdout.log", format="{time} {level} {message}", level="DEBUG", rotation="10 MB")
+# Add a new handler for error logs
+logger.add("./logs/file_errors.log", level="ERROR", rotation="10 MB")
+# Check if the configuration directory exists, if not create it
+if not os.path.exists(os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher")):
+    os.makedirs(os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher"))
+# Define the paths for the configuration file and cache directory
+CONFIG_FILE = os.path.join(
+    os.path.expanduser("~"), ".mkv-episode-matcher", "config.ini"
+)
+CACHE_DIR = os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher", "cache")
+# Check if the cache directory exists, if not create it
+if not os.path.exists(CACHE_DIR):
+    os.makedirs(CACHE_DIR)
+@logger.catch
+def main():
+    """
+    Entry point of the application.
+    This function is responsible for starting the application, parsing command-line arguments,
+    setting the configuration, and processing the show.
+    Command-line arguments:
+    --tmdb-api-key: The API key for the TMDb API. If not provided, the function will try to get it from the cache or prompt the user to input it.
+    --show-dir: The main directory of the show. If not provided, the function will prompt the user to input it.
+    --season: The season number to be processed. If not provided, all seasons will be processed.
+    --dry-run: A boolean flag indicating whether to perform a dry run (i.e., not rename any files). If not provided, the function will rename files.
+    --get-subs: A boolean flag indicating whether to download subtitles for the show. If not provided, the function will not download subtitles.
+    --tesseract-path: The path to the tesseract executable. If not provided, the function will try to get it from the cache or prompt the user to input it.
+    The function logs its progress to two separate log files: one for standard output and one for errors.
+    """
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(description="Process shows with TMDb API")
+    parser.add_argument("--tmdb-api-key", help="TMDb API key")
+    parser.add_argument("--show-dir", help="Main directory of the show")
+    parser.add_argument(
+        "--season",
+        type=int,
+        default=None,
+        nargs="?",
+        help="Specify the season number to be processed (default: None)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        type=bool,
+        default=None,
+        nargs="?",
+        help="Don't rename any files (default: None)",
+    )
+    parser.add_argument(
+        "--get-subs",
+        type=bool,
+        default=None,
+        nargs="?",
+        help="Download subtitles for the show (default: None)",
+    )
+    parser.add_argument(
+        "--tesseract-path",
+        type=str,
+        default=None,
+        nargs="?",
+        help="Path to the tesseract executable (default: None)",
+    )
+    args = parser.parse_args()
+    logger.debug(f"Command-line arguments: {args}")
+    open_subtitles_api_key = ""
+    open_subtitles_user_agent = ""
+    open_subtitles_username = ""
+    open_subtitles_password = ""
+    # Check if API key is provided via command-line argument
+    tmdb_api_key = args.tmdb_api_key
+    # If API key is not provided, try to get it from the cache
+    if not tmdb_api_key:
+        cached_config = get_config(CONFIG_FILE)
+        if cached_config:
+            tmdb_api_key = cached_config.get("tmdb_api_key")
+    # If API key is still not available, prompt the user to input it
+    if not tmdb_api_key:
+        tmdb_api_key = input("Enter your TMDb API key: ")
+        # Cache the API key
+    logger.debug(f"TMDb API Key: {tmdb_api_key}")
+    if args.get_subs:
+        logger.debug("Getting OpenSubtitles API key")
+        cached_config = get_config(CONFIG_FILE)
+        try:
+            open_subtitles_api_key = cached_config.get("open_subtitles_api_key")
+            open_subtitles_user_agent = cached_config.get("open_subtitles_user_agent")
+            open_subtitles_username = cached_config.get("open_subtitles_username")
+            open_subtitles_password = cached_config.get("open_subtitles_password")
+        except:
+            pass
+        if not open_subtitles_api_key:
+            open_subtitles_api_key = input("Enter your OpenSubtitles API key: ")
+        if not open_subtitles_user_agent:
+            open_subtitles_user_agent = input("Enter your OpenSubtitles User Agent: ")
+        if not open_subtitles_username:
+            open_subtitles_username = input("Enter your OpenSubtitles Username: ")
+        if not open_subtitles_password:
+            open_subtitles_password = input("Enter your OpenSubtitles Password: ")
+    # If show directory is provided via command-line argument, use it
+    show_dir = args.show_dir
+    if not show_dir:
+        show_dir = cached_config.get("show_dir")
+        if not show_dir:
+            # If show directory is not provided, prompt the user to input it
+            show_dir = input("Enter the main directory of the show:")
+        logger.info(f"Show Directory: {show_dir}")
+        # if the user does not provide a show directory, make the default show directory the current working directory
+        if not show_dir:
+            show_dir = os.getcwd()
+    if not args.tesseract_path:
+        tesseract_path = cached_config.get("tesseract_path")
+        if not tesseract_path:
+            tesseract_path = input(
+                r"Enter the path to the tesseract executable: ['C:\Program Files\Tesseract-OCR\tesseract.exe']"
+            )
+    else:
+        tesseract_path = args.tesseract_path
+    logger.debug(f"Teesseract Path: {tesseract_path}")
+    logger.debug(f"Show Directory: {show_dir}")
+    # Set the configuration
+    set_config(
+        tmdb_api_key,
+        open_subtitles_api_key,
+        open_subtitles_user_agent,
+        open_subtitles_username,
+        open_subtitles_password,
+        show_dir,
+        CONFIG_FILE,
+        tesseract_path=tesseract_path,
+    )
+    logger.info("Configuration set")
+    # Process the show
+    from .episode_matcher import process_show
+    process_show(args.season, dry_run=args.dry_run, get_subs=args.get_subs)
+    logger.info("Show processing completed")
+# Run the main function if the script is run directly
+if __name__ == "__main__":
+    main()

mkv_episode_matcher-0.1.3/mkv_episode_matcher/config.py ADDED Viewed

@@ -0,0 +1,82 @@
+# config.py
+import configparser
+import multiprocessing
+import os
+from loguru import logger
+MAX_THREADS = 4
+def get_total_threads():
+    return multiprocessing.cpu_count()
+total_threads = get_total_threads()
+if total_threads < MAX_THREADS:
+    MAX_THREADS = total_threads
+logger.info(f"Total available threads: {total_threads} -> Setting max to {MAX_THREADS}")
+def set_config(
+    tmdb_api_key,
+    open_subtitles_api_key,
+    open_subtitles_user_agent,
+    open_subtitles_username,
+    open_subtitles_password,
+    show_dir,
+    file,
+    tesseract_path=None,
+):
+    """
+    Sets the configuration values and writes them to a file.
+    Args:
+        tmdb_api_key (str): The API key for TMDB (The Movie Database).
+        open_subtitles_api_key (str): The API key for OpenSubtitles.
+        open_subtitles_user_agent (str): The user agent for OpenSubtitles.
+        open_subtitles_username (str): The username for OpenSubtitles.
+        open_subtitles_password (str): The password for OpenSubtitles.
+        show_dir (str): The directory where the TV show episodes are located.
+        file (str): The path to the configuration file.
+        tesseract_path (str, optional): The path to the Tesseract OCR executable.
+    Returns:
+        None
+    """
+    config = configparser.ConfigParser()
+    config["Config"] = {
+        "tmdb_api_key": str(tmdb_api_key),
+        "show_dir": show_dir,
+        "max_threads": int(MAX_THREADS),
+        "open_subtitles_api_key": str(open_subtitles_api_key),
+        "open_subtitles_user_agent": str(open_subtitles_user_agent),
+        "open_subtitles_username": str(open_subtitles_username),
+        "open_subtitles_password": str(open_subtitles_password),
+        "tesseract_path": str(tesseract_path),
+    }
+    logger.info(
+        f"Setting config with API:{tmdb_api_key}, show_dir: {show_dir}, and max_threads: {MAX_THREADS}"
+    )
+    with open(file, "w") as configfile:
+        config.write(configfile)
+def get_config(file):
+    """
+    Read and return the configuration from the specified file.
+    Args:
+        file (str): The path to the configuration file.
+    Returns:
+        dict: The configuration settings as a dictionary.
+    """
+    logger.info(f"Loading config from {file}")
+    config = configparser.ConfigParser()
+    if os.path.exists(file):
+        config.read(file)
+        return config["Config"] if "Config" in config else None
+    return {}

mkv_episode_matcher-0.1.3/mkv_episode_matcher/episode_matcher.py ADDED Viewed

@@ -0,0 +1,237 @@
+# episode_matcher.py
+import os
+import re
+from loguru import logger
+from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
+from mkv_episode_matcher.config import get_config
+from mkv_episode_matcher.mkv_to_srt import convert_mkv_to_srt
+from mkv_episode_matcher.tmdb_client import fetch_show_id
+from mkv_episode_matcher.utils import check_filename, cleanup_ocr_files, get_subtitles
+# hash_data = {}
+@logger.catch
+def process_show(season=None, dry_run=False, get_subs=False):
+    """
+    Process the show by downloading episode images and finding matching episodes.
+    Args:
+        season (int, optional): The season number to process. If provided, only that season will be processed. Defaults to None.
+        force (bool, optional): Whether to force re-processing of episodes even if they already exist. Defaults to False.
+        dry_run (bool, optional): Whether to perform a dry run without actually processing the episodes. Defaults to False.
+        threshold (float, optional): The threshold value for matching episodes. Defaults to None.
+    """
+    config = get_config(CONFIG_FILE)
+    show_dir = config.get("show_dir")
+    show_name = os.path.basename(show_dir)
+    logger.info(f"Processing show '{show_name}'...")
+    show_id = fetch_show_id(show_name)
+    if show_id is None:
+        logger.error(f"Could not find show '{os.path.basename(show_dir)}' on TMDb.")
+        return
+    season_paths = [
+        os.path.join(show_dir, d)
+        for d in os.listdir(show_dir)
+        if os.path.isdir(os.path.join(show_dir, d))
+    ]
+    logger.info(
+        f"Found {len(season_paths)} seasons for show '{os.path.basename(show_dir)}'"
+    )
+    seasons_to_process = [
+        int(os.path.basename(season_path).split()[-1]) for season_path in season_paths
+    ]
+    if get_subs:
+        get_subtitles(show_id, seasons=set(seasons_to_process))
+    if season is not None:
+        mkv_files = [
+            os.path.join(show_dir, season)
+            for f in os.listdir(show_dir)
+            if f.endswith(".mkv")
+        ]
+        season_path = os.path.join(show_dir, f"Season {season}")
+    else:
+        for season_path in os.listdir(show_dir):
+            season_path = os.path.join(show_dir, season_path)
+            mkv_files = [
+                os.path.join(season_path, f)
+                for f in os.listdir(season_path)
+                if f.endswith(".mkv")
+            ]
+    # Filter out files that have already been processed
+    for f in mkv_files:
+        if check_filename(f):
+            logger.info(f"Skipping {f}, already processed")
+            mkv_files.remove(f)
+    if len(mkv_files) == 0:
+        logger.info("No new files to process")
+        return
+    convert_mkv_to_srt(season_path, mkv_files)
+    reference_text_dict = process_reference_srt_files(show_name)
+    srt_text_dict = process_srt_files(show_dir)
+    compare_and_rename_files(srt_text_dict, reference_text_dict, dry_run=dry_run)
+    cleanup_ocr_files(show_dir)
+def check_filename(filename):
+    """
+    Check if the filename is in the correct format.
+    Args:
+        filename (str): The filename to check.
+    Returns:
+        bool: True if the filename is in the correct format, False otherwise.
+    """
+    # Check if the filename matches the expected format
+    match = re.match(r".*S\d+E\d+", filename)
+    return bool(match)
+def extract_srt_text(filepath):
+    """
+    Extracts the text from an SRT file.
+    Args:
+        filepath (str): The path to the SRT file.
+    Returns:
+        list: A list of lists, where each inner list represents a block of text from the SRT file.
+              Each inner list contains the lines of text for that block.
+    """
+    # extract the text from the file
+    with open(filepath) as f:
+        filepath = f.read()
+    text_lines = [
+        filepath.split("\n\n")[i].split("\n")[2:]
+        for i in range(len(filepath.split("\n\n")))
+    ]
+    # remove empty lines
+    text_lines = [[line for line in lines if line] for lines in text_lines]
+    # remove <i> or </i> tags
+    text_lines = [
+        [re.sub(r"<i>|</i>|", "", line) for line in lines] for lines in text_lines
+    ]
+    # remove empty lists
+    text_lines = [lines for lines in text_lines if lines]
+    return text_lines
+def compare_text(text1, text2):
+    """
+    Compare two lists of text lines and return the number of matching lines.
+    Args:
+        text1 (list): List of text lines from the first source.
+        text2 (list): List of text lines from the second source.
+    Returns:
+        int: Number of matching lines between the two sources.
+    """
+    # Flatten the list of text lines
+    flat_text1 = [line for lines in text1 for line in lines]
+    flat_text2 = [line for lines in text2 for line in lines]
+    # Compare the two lists of text lines
+    matching_lines = set(flat_text1).intersection(flat_text2)
+    return len(matching_lines)
+def extract_season_episode(filename):
+    """
+    Extract the season and episode number from the filename.
+    Args:
+        filename (str): The filename to extract the season and episode from.
+    Returns:
+        tuple: A tuple containing the season and episode number.
+    """
+    # Extract the season and episode number from the filename
+    match = re.search(r"S(\d+)E(\d+)", filename)
+    if match:
+        season = int(match.group(1))
+        episode = int(match.group(2))
+        return season, episode
+    else:
+        return None, None
+def process_reference_srt_files(series_name):
+    """
+    Process reference SRT files for a given series.
+    Args:
+        series_name (str): The name of the series.
+    Returns:
+        dict: A dictionary containing the reference files where the keys are the MKV filenames
+              and the values are the corresponding SRT texts.
+    """
+    reference_files = {}
+    reference_dir = os.path.join(CACHE_DIR, "data", series_name)
+    for dirpath, _, filenames in os.walk(reference_dir):
+        for filename in filenames:
+            if filename.lower().endswith(".srt"):
+                srt_file = os.path.join(dirpath, filename)
+                logger.info(f"Processing {srt_file}")
+                srt_text = extract_srt_text(srt_file)
+                season, episode = extract_season_episode(filename)
+                mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
+                reference_files[mkv_filename] = srt_text
+    return reference_files
+def process_srt_files(show_dir):
+    """
+    Process all SRT files in the given directory and its subdirectories.
+    Args:
+        show_dir (str): The directory path where the SRT files are located.
+    Returns:
+        dict: A dictionary containing the SRT file paths as keys and their corresponding text content as values.
+    """
+    srt_files = {}
+    for dirpath, _, filenames in os.walk(show_dir):
+        for filename in filenames:
+            if filename.lower().endswith(".srt"):
+                srt_file = os.path.join(dirpath, filename)
+                logger.info(f"Processing {srt_file}")
+                srt_text = extract_srt_text(srt_file)
+                srt_files[srt_file] = srt_text
+    return srt_files
+def compare_and_rename_files(srt_files, reference_files, dry_run=False):
+    """
+    Compare the srt files with the reference files and rename the matching mkv files.
+    Args:
+        srt_files (dict): A dictionary containing the srt files as keys and their contents as values.
+        reference_files (dict): A dictionary containing the reference files as keys and their contents as values.
+        dry_run (bool, optional): If True, the function will only log the renaming actions without actually renaming the files. Defaults to False.
+    """
+    logger.info(
+        f"Comparing {len(srt_files)} srt files with {len(reference_files)} reference files"
+    )
+    for srt_text in srt_files.keys():
+        parent_dir = os.path.dirname(os.path.dirname(srt_text))
+        for reference in reference_files.keys():
+            season, episode = extract_season_episode(reference)
+            mkv_file = os.path.join(
+                parent_dir, os.path.basename(srt_text).replace(".srt", ".mkv")
+            )
+            matching_lines = compare_text(
+                reference_files[reference], srt_files[srt_text]
+            )
+            if matching_lines >= int(len(reference_files[reference]) * 0.1):
+                logger.info(f"Matching lines: {matching_lines}")
+                logger.info(f"Found matching file: {mkv_file} ->{reference}")
+                new_filename = os.path.join(parent_dir, reference)
+                if not os.path.exists(new_filename):
+                    if os.path.exists(mkv_file) and not dry_run:
+                        logger.info(f"Renaming {mkv_file} to {new_filename}")
+                        os.rename(mkv_file, new_filename)
+                else:
+                    logger.info(f"File {new_filename} already exists, skipping")

mkv_episode_matcher-0.1.3/mkv_episode_matcher/mkv_to_srt.py ADDED Viewed

@@ -0,0 +1,179 @@
+import os
+import subprocess
+import sys
+# Get the absolute path of the parent directory of the current script.
+parent_dir = os.path.dirname(os.path.abspath(__file__))
+# Add the parent directory to the Python path.
+sys.path.append(parent_dir)
+# Add the 'libraries' directory to the Python path.
+sys.path.append(os.path.join(parent_dir, "libraries"))
+# Add the 'libraries' directory to the Python path.
+sys.path.append(os.path.join(parent_dir, "..", "libraries", "pgs2srt"))
+import re
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime, timedelta
+import pytesseract
+from imagemaker import make_image
+from loguru import logger
+from pgsreader import PGSReader
+from PIL import Image, ImageOps
+from mkv_episode_matcher.__main__ import CONFIG_FILE
+from mkv_episode_matcher.config import get_config
+def convert_mkv_to_sup(mkv_file, output_dir):
+    """
+    Convert an .mkv file to a .sup file using FFmpeg and pgs2srt.
+    Args:
+        mkv_file (str): Path to the .mkv file.
+        output_dir (str): Path to the directory where the .sup file will be saved.
+    Returns:
+        str: Path to the converted .sup file.
+    """
+    # Get the base name of the .mkv file without the extension
+    base_name = os.path.splitext(os.path.basename(mkv_file))[0]
+    # Construct the output .sup file path
+    sup_file = os.path.join(output_dir, f"{base_name}.sup")
+    if not os.path.exists(sup_file):
+        logger.info(f"Processing {mkv_file} to {sup_file}")
+        # FFmpeg command to convert .mkv to .sup
+        ffmpeg_cmd = ["ffmpeg", "-i", mkv_file, "-map", "0:s:0", "-c", "copy", sup_file]
+        try:
+            subprocess.run(ffmpeg_cmd, check=True)
+            logger.info(f"Converted {mkv_file} to {sup_file}")
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Error converting {mkv_file}: {e}")
+    else:
+        logger.info(f"File {sup_file} already exists, skipping")
+    return sup_file
+@logger.catch
+def perform_ocr(sup_file_path):
+    """
+    Perform OCR on a .sup file and save the extracted text to a .srt file.
+    Args:
+        sup_file_path (str): Path to the .sup file.
+    """
+    # Get the base name of the .sup file without the extension
+    base_name = os.path.splitext(os.path.basename(sup_file_path))[0]
+    output_dir = os.path.dirname(sup_file_path)
+    logger.info(f"Performing OCR on {sup_file_path}")
+    # Construct the output .srt file path
+    srt_file = os.path.join(output_dir, f"{base_name}.srt")
+    # Load a PGS/SUP file.
+    pgs = PGSReader(sup_file_path)
+    # Set index
+    i = 0
+    # Complete subtitle track index
+    si = 0
+    tesseract_lang = "eng"
+    tesseract_config = f"-c tessedit_char_blacklist=[] --psm 6 --oem {1}"
+    config = get_config(CONFIG_FILE)
+    tesseract_path = config.get("tesseract_path")
+    logger.debug(f"Setting Teesseract Path to {tesseract_path}")
+    pytesseract.pytesseract.tesseract_cmd = str(tesseract_path)
+    # SubRip output
+    output = ""
+    if not os.path.exists(srt_file):
+        # Iterate the pgs generator
+        for ds in pgs.iter_displaysets():
+            # If set has image, parse the image
+            if ds.has_image:
+                # Get Palette Display Segment
+                pds = ds.pds[0]
+                # Get Object Display Segment
+                ods = ds.ods[0]
+                if pds and ods:
+                    # Create and show the bitmap image and convert it to RGBA
+                    src = make_image(ods, pds).convert("RGBA")
+                    # Create grayscale image with black background
+                    img = Image.new("L", src.size, "BLACK")
+                    # Paste the subtitle bitmap
+                    img.paste(src, (0, 0), src)
+                    # Invert images so the text is readable by Tesseract
+                    img = ImageOps.invert(img)
+                    # Parse the image with tesesract
+                    text = pytesseract.image_to_string(
+                        img, lang=tesseract_lang, config=tesseract_config
+                    ).strip()
+                    # Replace "|" with "I"
+                    # Works better than blacklisting "|" in Tesseract,
+                    # which results in I becoming "!" "i" and "1"
+                    text = re.sub(r"[|/\\]", "I", text)
+                    text = re.sub(r"[_]", "L", text)
+                    start = datetime.fromtimestamp(ods.presentation_timestamp / 1000)
+                    start = start + timedelta(hours=-1)
+            else:
+                # Get Presentation Composition Segment
+                pcs = ds.pcs[0]
+                if pcs:
+                    end = datetime.fromtimestamp(pcs.presentation_timestamp / 1000)
+                    end = end + timedelta(hours=-1)
+                    if (
+                        isinstance(start, datetime)
+                        and isinstance(end, datetime)
+                        and len(text)
+                    ):
+                        si = si + 1
+                        sub_output = str(si) + "\n"
+                        sub_output += (
+                            start.strftime("%H:%M:%S,%f")[0:12]
+                            + " --> "
+                            + end.strftime("%H:%M:%S,%f")[0:12]
+                            + "\n"
+                        )
+                        sub_output += text + "\n\n"
+                        output += sub_output
+                        start = end = text = None
+            i = i + 1
+        with open(srt_file, "w") as f:
+            f.write(output)
+        logger.info(f"Saved to: {srt_file}")
+def convert_mkv_to_srt(season_path, mkv_files):
+    """
+    Converts MKV files to SRT format.
+    Args:
+        season_path (str): The path to the season directory.
+        mkv_files (list): List of MKV files to convert.
+    Returns:
+        None
+    """
+    logger.info(f"Converting {len(mkv_files)} files to SRT")
+    output_dir = os.path.join(season_path, "ocr")
+    os.makedirs(output_dir, exist_ok=True)
+    sup_files = []
+    for mkv_file in mkv_files:
+        sup_file = convert_mkv_to_sup(mkv_file, output_dir)
+        sup_files.append(sup_file)
+    with ThreadPoolExecutor() as executor:
+        for sup_file in sup_files:
+            executor.submit(perform_ocr, sup_file)

mkv_episode_matcher-0.1.3/mkv_episode_matcher/requirements.txt ADDED Viewed

@@ -0,0 +1,8 @@
+requests
+loguru
+pillow
+imagehash
+configparser
+tmdb_client
+pytesseract
+opensubtitlescom

mkv_episode_matcher-0.1.3/mkv_episode_matcher/tmdb_client.py ADDED Viewed

@@ -0,0 +1,134 @@
+# tmdb_client.py
+import time
+from threading import Lock
+import requests
+from loguru import logger
+from mkv_episode_matcher.__main__ import CONFIG_FILE
+from mkv_episode_matcher.config import get_config
+BASE_IMAGE_URL = "https://image.tmdb.org/t/p/original"
+class RateLimitedRequest:
+    """
+    A class that represents a rate-limited request object.
+    Attributes:
+        rate_limit (int): Maximum number of requests allowed per period.
+        period (int): Period in seconds.
+        requests_made (int): Counter for requests made.
+        start_time (float): Start time of the current period.
+        lock (Lock): Lock for synchronization.
+    """
+    def __init__(self, rate_limit=30, period=1):
+        self.rate_limit = rate_limit
+        self.period = period
+        self.requests_made = 0
+        self.start_time = time.time()
+        self.lock = Lock()
+    def get(self, url):
+        """
+        Sends a rate-limited GET request to the specified URL.
+        Args:
+            url (str): The URL to send the request to.
+        Returns:
+            Response: The response object returned by the request.
+        """
+        with self.lock:
+            if self.requests_made >= self.rate_limit:
+                sleep_time = self.period - (time.time() - self.start_time)
+                if sleep_time > 0:
+                    time.sleep(sleep_time)
+                self.requests_made = 0
+                self.start_time = time.time()
+            self.requests_made += 1
+        response = requests.get(url)
+        return response
+# Initialize rate-limited request
+rate_limited_request = RateLimitedRequest(rate_limit=30, period=1)
+def fetch_show_id(show_name):
+    """
+    Fetch the TMDb ID for a given show name.
+    Args:
+        show_name (str): The name of the show.
+    Returns:
+        str: The TMDb ID of the show, or None if not found.
+    """
+    config = get_config(CONFIG_FILE)
+    tmdb_api_key = config.get("tmdb_api_key")
+    url = f"https://api.themoviedb.org/3/search/tv?query={show_name}&api_key={tmdb_api_key}"
+    response = requests.get(url)
+    if response.status_code == 200:
+        results = response.json().get("results", [])
+        if results:
+            return str(results[0]["id"])
+    return None
+def fetch_season_details(show_id, season_number):
+    """
+    Fetch the total number of episodes for a given show and season from the TMDb API.
+    Args:
+        show_id (str): The ID of the show on TMDb.
+        season_number (int): The season number to fetch details for.
+    Returns:
+        int: The total number of episodes in the season, or 0 if the API request failed.
+    """
+    logger.info(f"Fetching season details for Season {season_number}...")
+    config = get_config(CONFIG_FILE)
+    tmdb_api_key = config.get("tmdb_api_key")
+    url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_number}?api_key={tmdb_api_key}"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        season_data = response.json()
+        total_episodes = len(season_data.get("episodes", []))
+        return total_episodes
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Failed to fetch season details for Season {season_number}: {e}")
+        return 0
+    except KeyError:
+        logger.error(
+            f"Missing 'episodes' key in response JSON data for Season {season_number}"
+        )
+        return 0
+def get_number_of_seasons(show_id):
+    """
+    Retrieves the number of seasons for a given TV show from the TMDB API.
+    Parameters:
+    - show_id (int): The ID of the TV show.
+    Returns:
+    - num_seasons (int): The number of seasons for the TV show.
+    Raises:
+    - requests.HTTPError: If there is an error while making the API request.
+    """
+    config = get_config(CONFIG_FILE)
+    tmdb_api_key = config.get("tmdb_api_key")
+    url = f"https://api.themoviedb.org/3/tv/{show_id}?api_key={tmdb_api_key}"
+    response = requests.get(url)
+    response.raise_for_status()
+    show_data = response.json()
+    num_seasons = show_data.get("number_of_seasons", 0)
+    logger.info(f"Found {num_seasons} seasons")
+    return num_seasons

mkv_episode_matcher-0.1.3/mkv_episode_matcher/utils.py ADDED Viewed

@@ -0,0 +1,228 @@
+# utils.py
+import os
+import re
+import shutil
+from typing import Set
+import requests
+from loguru import logger
+from opensubtitlescom import OpenSubtitles
+from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
+from mkv_episode_matcher.config import get_config
+from mkv_episode_matcher.tmdb_client import fetch_season_details
+def check_filename(filename, series_title, season_number, episode_number):
+    """
+    Check if a filename matches the expected naming convention for a series episode.
+    Args:
+        filename (str): The filename to be checked.
+        series_title (str): The title of the series.
+        season_number (int): The season number of the episode.
+        episode_number (int): The episode number of the episode.
+    Returns:
+        bool: True if the filename matches the expected naming convention, False otherwise.
+    This function checks if the given filename matches the expected naming convention for a series episode.
+    The expected naming convention is '{series_title} - S{season_number:02d}E{episode_number:02d}.mkv'.
+    If the filename matches the expected pattern, it returns True; otherwise, it returns False.
+    Example:
+        If filename = 'Example - S01E03.mkv', series_title = 'Example', season_number = 1, and episode_number = 3,
+        the function will return True because the filename matches the expected pattern.
+    """
+    pattern = re.compile(
+        f"{re.escape(series_title)} - S{season_number:02d}E{episode_number:02d}.mkv"
+    )
+    return bool(pattern.match(filename))
+def scramble_filename(original_file_path, file_number):
+    """
+    Scrambles the filename of the given file path by adding the series title and file number.
+    Args:
+        original_file_path (str): The original file path.
+        file_number (int): The file number to be added to the filename.
+    Returns:
+        None
+    """
+    logger.info(f"Scrambling {original_file_path}")
+    series_title = os.path.basename(
+        os.path.dirname(os.path.dirname(original_file_path))
+    )
+    original_file_name = os.path.basename(original_file_path)
+    extension = os.path.splitext(original_file_path)[-1]
+    new_file_name = f"{series_title} - {file_number:03d}{extension}"
+    new_file_path = os.path.join(os.path.dirname(original_file_path), new_file_name)
+    if not os.path.exists(new_file_path):
+        logger.info(f"Renaming {original_file_name} -> {new_file_name}")
+        os.rename(original_file_path, new_file_path)
+def rename_episode_file(original_file_path, season_number, episode_number):
+    """
+    Rename an episode file with a standardized naming convention.
+    Args:
+        original_file_path (str): The original file path of the episode.
+        season_number (int): The season number of the episode.
+        episode_number (int): The episode number of the episode.
+    Returns:
+        None
+    This function renames an episode file with a standardized naming convention based on the series title, season number,
+    and episode number. If a file with the intended new name already exists, it appends a numerical suffix to the filename
+    until it finds a unique name.
+    Example:
+        If original_file_path = '/path/to/episode.mkv', season_number = 1, and episode_number = 3, and the series title is 'Example',
+        the function will rename the file to 'Example - S01E03.mkv' if no file with that name already exists. If a file with that
+        name already exists, it will be renamed to 'Example - S01E03_2.mkv', and so on.
+    """
+    series_title = os.path.basename(
+        os.path.dirname(os.path.dirname(original_file_path))
+    )
+    original_file_name = os.path.basename(original_file_path)
+    extension = os.path.splitext(original_file_path)[-1]
+    new_file_name = (
+        f"{series_title} - S{season_number:02d}E{episode_number:02d}{extension}"
+    )
+    new_file_path = os.path.join(os.path.dirname(original_file_path), new_file_name)
+    # Check if the new file path already exists
+    if os.path.exists(new_file_path):
+        logger.warning(f"Filename already exists: {new_file_name}.")
+        # If the file already exists, find a unique name by appending a numerical suffix
+        suffix = 2
+        while True:
+            new_file_name = f"{series_title} - S{season_number:02d}E{episode_number:02d}_{suffix}{extension}"
+            new_file_path = os.path.join(
+                os.path.dirname(original_file_path), new_file_name
+            )
+            if not os.path.exists(new_file_path):
+                break
+            suffix += 1
+        logger.info(f"Renaming {original_file_name} -> {new_file_name}")
+        os.rename(original_file_path, new_file_path)
+    else:
+        logger.info(f"Renaming {original_file_name} -> {new_file_name}")
+        os.rename(original_file_path, new_file_path)
+def get_subtitles(show_id, seasons: Set[int]):
+    """
+    Retrieves and saves subtitles for a given TV show and seasons.
+    Args:
+        show_id (int): The ID of the TV show.
+        seasons (Set[int]): A set of season numbers for which subtitles should be retrieved.
+    Returns:
+        None
+    """
+    logger.info(f"Getting subtitles for show ID {show_id}")
+    config = get_config(CONFIG_FILE)
+    show_dir = config.get("show_dir")
+    series_name = os.path.basename(show_dir)
+    tmdb_api_key = config.get("tmdb_api_key")
+    open_subtitles_api_key = config.get("open_subtitles_api_key")
+    open_subtitles_user_agent = config.get("open_subtitles_user_agent")
+    open_subtitles_username = config.get("open_subtitles_username")
+    open_subtitles_password = config.get("open_subtitles_password")
+    if not all(
+        [
+            show_dir,
+            tmdb_api_key,
+            open_subtitles_api_key,
+            open_subtitles_user_agent,
+            open_subtitles_username,
+            open_subtitles_password,
+        ]
+    ):
+        logger.error("Missing configuration settings. Please run the setup script.")
+    try:
+        # Initialize the OpenSubtitles client
+        subtitles = OpenSubtitles(open_subtitles_user_agent, open_subtitles_api_key)
+        # Log in (retrieve auth token)
+        subtitles.login(open_subtitles_username, open_subtitles_password)
+    except Exception as e:
+        logger.error(f"Failed to log in to OpenSubtitles: {e}")
+        return
+    for season in seasons:
+        episodes = fetch_season_details(show_id, season)
+        logger.info(f"Found {episodes} episodes in Season {season}")
+        for episode in range(1, episodes + 1):
+            logger.info(f"Processing Season {season}, Episode {episode}...")
+            srt_filepath = os.path.join(
+                CACHE_DIR,
+                "data",
+                series_name,
+                f"{series_name} - S{season:02d}E{episode:02d}.srt",
+            )
+            if not os.path.exists(srt_filepath):
+                # get the episode info from TMDB
+                url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season}/episode/{episode}?api_key={tmdb_api_key}"
+                response = requests.get(url)
+                response.raise_for_status()
+                episode_data = response.json()
+                episode_name = episode_data["name"]
+                episode_id = episode_data["id"]
+                # search for the subtitle
+                response = subtitles.search(tmdb_id=episode_id, languages="en")
+                if len(response.data) == 0:
+                    logger.warning(
+                        f"No subtitles found for {series_name} - S{season:02d}E{episode:02d}"
+                    )
+                for subtitle in response.data:
+                    subtitle_dict = subtitle.to_dict()
+                    # Remove special characters and convert to uppercase
+                    filename_clean = re.sub(
+                        r"\W+", " ", subtitle_dict["file_name"]
+                    ).upper()
+                    if f"E{episode:02d}" in filename_clean:
+                        logger.info(f"Original filename: {subtitle_dict['file_name']}")
+                        srt_file = subtitles.download_and_save(subtitle)
+                        series_name = series_name.replace(":", " -")
+                        shutil.move(srt_file, srt_filepath)
+                        logger.info(f"Subtitle saved to {srt_filepath}")
+                        break
+                    else:
+                        continue
+            else:
+                logger.info(
+                    f"Subtitle already exists for {series_name} - S{season:02d}E{episode:02d}"
+                )
+                continue
+def cleanup_ocr_files(show_dir):
+    """
+    Clean up OCR files generated during the episode matching process.
+    Args:
+        show_dir (str): The directory containing the show files.
+    Returns:
+        None
+    This function cleans up the OCR files generated during the episode matching process.
+    It deletes the 'ocr' directory and all its contents in each season directory of the show.
+    """
+    for season_dir in os.listdir(show_dir):
+        season_dir_path = os.path.join(show_dir, season_dir)
+        ocr_dir_path = os.path.join(season_dir_path, "ocr")
+        if os.path.exists(ocr_dir_path):
+            logger.info(f"Cleaning up OCR files in {ocr_dir_path}")
+            shutil.rmtree(ocr_dir_path)

{mkv_episode_matcher-0.1.2 → mkv_episode_matcher-0.1.3}/pyproject.toml RENAMED Viewed

@@ -2,11 +2,6 @@
 requires = ["hatchling"]
 build-backend = "hatchling.build"
-[tool.hatch.build.targets.sdist]
-include = [
-  "/libraries/pgs2srtkg/**/*.py",
-  "/libraries"
-]
 [project]
 name = "mkv-episode-matcher"
 dynamic = ["version"]