PyPI - mkv-episode-matcher - Versions diffs - 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

mkv-episode-matcher 0.5.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mkv-episode-matcher might be problematic. Click here for more details.

Files changed (24) hide show

mkv_episode_matcher/episode_matcher.py CHANGED Viewed

@@ -1,33 +1,33 @@
 # mkv_episode_matcher/episode_matcher.py
-from pathlib import Path
-import shutil
 import glob
 import os
-from loguru import logger
 import re
-from mkv_episode_matcher.__main__ import CONFIG_FILE, CACHE_DIR
+import shutil
+from pathlib import Path
+from loguru import logger
+from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
 from mkv_episode_matcher.config import get_config
-from mkv_episode_matcher.mkv_to_srt import convert_mkv_to_srt
+from mkv_episode_matcher.episode_identification import EpisodeMatcher
 from mkv_episode_matcher.tmdb_client import fetch_show_id
 from mkv_episode_matcher.utils import (
     check_filename,
     clean_text,
-    cleanup_ocr_files,
     get_subtitles,
-    process_reference_srt_files,
-    process_srt_files,
-    compare_and_rename_files,get_valid_seasons,rename_episode_file
+    get_valid_seasons,
+    rename_episode_file,
 )
-from mkv_episode_matcher.episode_identification import EpisodeMatcher
 def process_show(season=None, dry_run=False, get_subs=False):
-    """Process the show using streaming speech recognition with OCR fallback."""
+    """Process the show using streaming speech recognition."""
     config = get_config(CONFIG_FILE)
     show_dir = config.get("show_dir")
     show_name = clean_text(os.path.basename(show_dir))
     matcher = EpisodeMatcher(CACHE_DIR, show_name)
     # Early check for reference files
     reference_dir = Path(CACHE_DIR) / "data" / show_name
     reference_files = list(reference_dir.glob("*.srt"))
@@ -35,10 +35,10 @@ def process_show(season=None, dry_run=False, get_subs=False):
         logger.error(f"No reference subtitle files found in {reference_dir}")
         logger.info("Please download reference subtitles first")
         return
     season_paths = get_valid_seasons(show_dir)
     if not season_paths:
-        logger.warning(f"No seasons with .mkv files found")
+        logger.warning("No seasons with .mkv files found")
         return
     if season is not None:
@@ -51,55 +51,35 @@ def process_show(season=None, dry_run=False, get_subs=False):
     for season_path in season_paths:
         mkv_files = [f for f in glob.glob(os.path.join(season_path, "*.mkv"))
                     if not check_filename(f)]
         if not mkv_files:
             logger.info(f"No new files to process in {season_path}")
             continue
         season_num = int(re.search(r'Season (\d+)', season_path).group(1))
         temp_dir = Path(season_path) / "temp"
-        ocr_dir = Path(season_path) / "ocr"
         temp_dir.mkdir(exist_ok=True)
-        ocr_dir.mkdir(exist_ok=True)
         try:
             if get_subs:
                 show_id = fetch_show_id(matcher.show_name)
                 if show_id:
                     get_subtitles(show_id, seasons={season_num}, config=config)
-            unmatched_files = []
             for mkv_file in mkv_files:
                 logger.info(f"Attempting speech recognition match for {mkv_file}")
                 match = matcher.identify_episode(mkv_file, temp_dir, season_num)
                 if match:
                     new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
                     logger.info(f"Speech matched {os.path.basename(mkv_file)} to {new_name} "
                               f"(confidence: {match['confidence']:.2f})")
                     if not dry_run:
                         logger.info(f"Renaming {mkv_file} to {new_name}")
                         rename_episode_file(mkv_file, new_name)
                 else:
-                    logger.info(f"Speech recognition match failed for {mkv_file}, trying OCR")
-                    unmatched_files.append(mkv_file)
-            # OCR fallback for unmatched files
-            if unmatched_files:
-                logger.info(f"Attempting OCR matching for {len(unmatched_files)} unmatched files")
-                convert_mkv_to_srt(season_path, unmatched_files)
-                reference_text_dict = process_reference_srt_files(matcher.show_name)
-                srt_text_dict = process_srt_files(str(ocr_dir))
-                compare_and_rename_files(
-                    srt_text_dict,
-                    reference_text_dict,
-                    dry_run=dry_run,
-                )
+                    logger.info(f"Speech recognition match failed for {mkv_file}")
         finally:
             if not dry_run:
                 shutil.rmtree(temp_dir)
-                cleanup_ocr_files(show_dir)

mkv_episode_matcher/subtitle_utils.py CHANGED Viewed

@@ -1,82 +1,83 @@
-from typing import List, Optional, Union
 import os
 import re
+from typing import Optional
-def generate_subtitle_patterns(series_name: str, season: int, episode: int) -> List[str]:
+def generate_subtitle_patterns(
+    series_name: str, season: int, episode: int
+) -> list[str]:
     """
     Generate various common subtitle filename patterns.
     Args:
         series_name (str): Name of the series
         season (int): Season number
         episode (int): Episode number
     Returns:
         List[str]: List of possible subtitle filenames
     """
     patterns = [
         # Standard format: "Show Name - S01E02.srt"
         f"{series_name} - S{season:02d}E{episode:02d}.srt",
         # Season x Episode format: "Show Name - 1x02.srt"
         f"{series_name} - {season}x{episode:02d}.srt",
         # Separate season/episode: "Show Name - Season 1 Episode 02.srt"
         f"{series_name} - Season {season} Episode {episode:02d}.srt",
         # Compact format: "ShowName.S01E02.srt"
         f"{series_name.replace(' ', '')}.S{season:02d}E{episode:02d}.srt",
         # Numbered format: "Show Name 102.srt"
         f"{series_name} {season:01d}{episode:02d}.srt",
         # Dot format: "Show.Name.1x02.srt"
         f"{series_name.replace(' ', '.')}.{season}x{episode:02d}.srt",
         # Underscore format: "Show_Name_S01E02.srt"
         f"{series_name.replace(' ', '_')}_S{season:02d}E{episode:02d}.srt",
     ]
     return patterns
-def find_existing_subtitle(series_cache_dir: str, series_name: str, season: int, episode: int) -> Optional[str]:
+def find_existing_subtitle(
+    series_cache_dir: str, series_name: str, season: int, episode: int
+) -> Optional[str]:
     """
     Check for existing subtitle files in various naming formats.
     Args:
         series_cache_dir (str): Directory containing subtitle files
         series_name (str): Name of the series
         season (int): Season number
         episode (int): Episode number
     Returns:
         Optional[str]: Path to existing subtitle file if found, None otherwise
     """
     patterns = generate_subtitle_patterns(series_name, season, episode)
     for pattern in patterns:
         filepath = os.path.join(series_cache_dir, pattern)
         if os.path.exists(filepath):
             return filepath
     return None
 def sanitize_filename(filename: str) -> str:
     """
     Sanitize filename by removing/replacing invalid characters.
     Args:
         filename (str): Original filename
     Returns:
         str: Sanitized filename
     """
     # Replace problematic characters
-    filename = filename.replace(':', ' -')
-    filename = filename.replace('/', '-')
-    filename = filename.replace('\\', '-')
+    filename = filename.replace(":", " -")
+    filename = filename.replace("/", "-")
+    filename = filename.replace("\\", "-")
     # Remove any other invalid characters
-    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
-    return filename.strip()
+    filename = re.sub(r'[<>:"/\\|?*]', "", filename)
+    return filename.strip()

mkv_episode_matcher/utils.py CHANGED Viewed

@@ -2,15 +2,18 @@
 import os
 import re
 import shutil
-import torch
 import requests
+import torch
 from loguru import logger
 from opensubtitlescom import OpenSubtitles
 from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
 from mkv_episode_matcher.config import get_config
+from mkv_episode_matcher.subtitle_utils import find_existing_subtitle, sanitize_filename
 from mkv_episode_matcher.tmdb_client import fetch_season_details
-from mkv_episode_matcher.subtitle_utils import find_existing_subtitle,sanitize_filename
 def get_valid_seasons(show_dir):
     """
     Get all season directories that contain MKV files.
@@ -36,13 +39,17 @@ def get_valid_seasons(show_dir):
             valid_season_paths.append(season_path)
     if not valid_season_paths:
-        logger.warning(f"No seasons with .mkv files found in show '{os.path.basename(show_dir)}'")
+        logger.warning(
+            f"No seasons with .mkv files found in show '{os.path.basename(show_dir)}'"
+        )
     else:
         logger.info(
             f"Found {len(valid_season_paths)} seasons with .mkv files in '{os.path.basename(show_dir)}'"
         )
     return valid_season_paths
 def check_filename(filename):
     """
     Check if the filename is in the correct format (S01E02).
@@ -54,7 +61,7 @@ def check_filename(filename):
         bool: True if the filename matches the expected pattern.
     """
     # Check if the filename matches the expected format
-    match = re.search(r'.*S\d+E\d+', filename)
+    match = re.search(r".*S\d+E\d+", filename)
     return bool(match)
@@ -95,11 +102,11 @@ def rename_episode_file(original_file_path, new_filename):
     """
     original_dir = os.path.dirname(original_file_path)
     new_file_path = os.path.join(original_dir, new_filename)
     # Check if new filepath already exists
     if os.path.exists(new_file_path):
         logger.warning(f"File already exists: {new_filename}")
         # Add numeric suffix if file exists
         base, ext = os.path.splitext(new_filename)
         suffix = 2
@@ -109,7 +116,7 @@ def rename_episode_file(original_file_path, new_filename):
             if not os.path.exists(new_file_path):
                 break
             suffix += 1
     try:
         os.rename(original_file_path, new_file_path)
         logger.info(f"Renamed {os.path.basename(original_file_path)} -> {new_filename}")
@@ -120,7 +127,8 @@ def rename_episode_file(original_file_path, new_filename):
     except FileExistsError as e:
         logger.error(f"Failed to rename file: {e}")
         return None
 def get_subtitles(show_id, seasons: set[int], config=None):
     """
     Retrieves and saves subtitles for a given TV show and seasons.
@@ -164,19 +172,21 @@ def get_subtitles(show_id, seasons: set[int], config=None):
         for episode in range(1, episodes + 1):
             logger.info(f"Processing Season {season}, Episode {episode}...")
             series_cache_dir = os.path.join(CACHE_DIR, "data", series_name)
             os.makedirs(series_cache_dir, exist_ok=True)
             # Check for existing subtitle in any supported format
             existing_subtitle = find_existing_subtitle(
                 series_cache_dir, series_name, season, episode
             )
             if existing_subtitle:
-                logger.info(f"Subtitle already exists: {os.path.basename(existing_subtitle)}")
+                logger.info(
+                    f"Subtitle already exists: {os.path.basename(existing_subtitle)}"
+                )
                 continue
             # Default to standard format for new downloads
             srt_filepath = os.path.join(
                 series_cache_dir,
@@ -189,7 +199,7 @@ def get_subtitles(show_id, seasons: set[int], config=None):
             response.raise_for_status()
             episode_data = response.json()
             episode_id = episode_data["id"]
             # search for the subtitle
             response = subtitles.search(tmdb_id=episode_id, languages="en")
             if len(response.data) == 0:
@@ -210,33 +220,13 @@ def get_subtitles(show_id, seasons: set[int], config=None):
                     break
-def cleanup_ocr_files(show_dir):
-    """
-    Clean up OCR files generated during the episode matching process.
-    Args:
-        show_dir (str): The directory containing the show files.
-    Returns:
-        None
-    This function cleans up the OCR files generated during the episode matching process.
-    It deletes the 'ocr' directory and all its contents in each season directory of the show.
-    """
-    for season_dir in os.listdir(show_dir):
-        season_dir_path = os.path.join(show_dir, season_dir)
-        ocr_dir_path = os.path.join(season_dir_path, "ocr")
-        if os.path.exists(ocr_dir_path):
-            logger.info(f"Cleaning up OCR files in {ocr_dir_path}")
-            shutil.rmtree(ocr_dir_path)
 def clean_text(text):
     # Remove brackets, parentheses, and their content
     cleaned_text = re.sub(r"\[.*?\]|\(.*?\)|\{.*?\}", "", text)
     # Strip leading/trailing whitespace
     return cleaned_text.strip()
 @logger.catch
 def process_reference_srt_files(series_name):
     """
@@ -249,12 +239,13 @@ def process_reference_srt_files(series_name):
         dict: A dictionary containing the reference files where the keys are the MKV filenames
               and the values are the corresponding SRT texts.
     """
-    from mkv_episode_matcher.__main__ import CACHE_DIR
     import os
+    from mkv_episode_matcher.__main__ import CACHE_DIR
     reference_files = {}
     reference_dir = os.path.join(CACHE_DIR, "data", series_name)
     for dirpath, _, filenames in os.walk(reference_dir):
         for filename in filenames:
             if filename.lower().endswith(".srt"):
@@ -264,9 +255,10 @@ def process_reference_srt_files(series_name):
                 season, episode = extract_season_episode(filename)
                 mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
                 reference_files[mkv_filename] = srt_text
     return reference_files
 def extract_srt_text(filepath):
     """
     Extracts text content from an SRT file.
@@ -280,49 +272,51 @@ def extract_srt_text(filepath):
     # Read the file content
     with open(filepath) as f:
         content = f.read()
     # Split into subtitle blocks
-    blocks = content.strip().split('\n\n')
+    blocks = content.strip().split("\n\n")
     text_lines = []
     for block in blocks:
-        lines = block.split('\n')
+        lines = block.split("\n")
         if len(lines) < 3:
             continue
         # Skip index and timestamp, get all remaining lines as text
-        text = ' '.join(lines[2:])
+        text = " ".join(lines[2:])
         # Remove stage directions and tags
-        text = re.sub(r'\[.*?\]|\<.*?\>', '', text)
+        text = re.sub(r"\[.*?\]|\<.*?\>", "", text)
         if text:
             text_lines.append(text)
     return text_lines
 def extract_season_episode(filename):
     """
     Extract season and episode numbers from filename with support for multiple formats.
     Args:
         filename (str): Filename to parse
     Returns:
         tuple: (season_number, episode_number)
     """
     # List of patterns to try
     patterns = [
-        r'S(\d+)E(\d+)',          # S01E01
-        r'(\d+)x(\d+)',           # 1x01 or 01x01
-        r'Season\s*(\d+).*?(\d+)' # Season 1 - 01
+        r"S(\d+)E(\d+)",  # S01E01
+        r"(\d+)x(\d+)",  # 1x01 or 01x01
+        r"Season\s*(\d+).*?(\d+)",  # Season 1 - 01
     ]
     for pattern in patterns:
         match = re.search(pattern, filename, re.IGNORECASE)
         if match:
             return int(match.group(1)), int(match.group(2))
     return None, None
 def process_srt_files(show_dir):
     """
     Process all SRT files in the given directory and its subdirectories.
@@ -342,6 +336,8 @@ def process_srt_files(show_dir):
                 srt_text = extract_srt_text(srt_file)
                 srt_files[srt_file] = srt_text
     return srt_files
 def compare_and_rename_files(srt_files, reference_files, dry_run=False):
     """
     Compare the srt files with the reference files and rename the matching mkv files.
@@ -372,6 +368,7 @@ def compare_and_rename_files(srt_files, reference_files, dry_run=False):
                     logger.info(f"Renaming {mkv_file} to {new_filename}")
                     rename_episode_file(mkv_file, new_filename)
 def compare_text(text1, text2):
     """
     Compare two lists of text lines and return the number of matching lines.
@@ -391,9 +388,12 @@ def compare_text(text1, text2):
     matching_lines = set(flat_text1).intersection(flat_text2)
     return len(matching_lines)
 def check_gpu_support():
-    logger.info('Checking GPU support...')
+    logger.info("Checking GPU support...")
     if torch.cuda.is_available():
         logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
     else:
-        logger.warning("CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.")
+        logger.warning(
+            "CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support."
+        )

{mkv_episode_matcher-0.5.0.dist-info → mkv_episode_matcher-0.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mkv-episode-matcher
-Version: 0.5.0
+Version: 0.6.0
 Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
 Home-page: https://github.com/Jsakkos/mkv-episode-matcher
 Author: Jonathan Sakkos
@@ -21,7 +21,6 @@ Requires-Dist: ffmpeg>=1.4
 Requires-Dist: loguru>=0.7.2
 Requires-Dist: openai-whisper>=20240930
 Requires-Dist: opensubtitlescom>=0.1.5
-Requires-Dist: pytesseract>=0.3.13
 Requires-Dist: rapidfuzz>=3.10.1
 Requires-Dist: requests>=2.32.3
 Requires-Dist: tmdb-client>=0.0.1
@@ -29,14 +28,6 @@ Requires-Dist: torch>=2.5.1
 Requires-Dist: torchaudio>=2.5.1
 Requires-Dist: torchvision>=0.20.1
 Requires-Dist: wave>=0.0.2
-Provides-Extra: cpu
-Requires-Dist: torch>=2.5.1; extra == "cpu"
-Requires-Dist: torchvision>=0.20.1; extra == "cpu"
-Requires-Dist: torchaudio>=2.5.1; extra == "cpu"
-Provides-Extra: cu124
-Requires-Dist: torch>=2.5.1; extra == "cu124"
-Requires-Dist: torchvision>=0.20.1; extra == "cu124"
-Requires-Dist: torchaudio>=2.5.1; extra == "cu124"
 # MKV Episode Matcher
@@ -48,7 +39,7 @@ Requires-Dist: torchaudio>=2.5.1; extra == "cu124"
 [![GitHub last commit](https://img.shields.io/github/last-commit/Jsakkos/mkv-episode-matcher)](https://github.com/Jsakkos/mkv-episode-matcher/commits/main)
 [![GitHub issues](https://img.shields.io/github/issues/Jsakkos/mkv-episode-matcher)](https://github.com/Jsakkos/mkv-episode-matcher/issues)
 [![Tests](https://github.com/Jsakkos/mkv-episode-matcher/actions/workflows/tests.yml/badge.svg)](https://github.com/Jsakkos/mkv-episode-matcher/actions/workflows/tests.yml)
-[![codecov](https://codecov.io/gh/Jsakkos/mkv-episode-matcher/branch/main/graph/badge.svg)](https://codecov.io/gh/Jsakkos/mkv-episode-matcher)
+[![codecov](https://codecov.io/gh/Jsakkos/mkv-episode-matcher/branch/main/graph/badge.svg)](https://codecov.io/gh/Jsakkos/mkv-episode-matcher/)
 Automatically match and rename your MKV TV episodes using The Movie Database (TMDb).
@@ -56,7 +47,7 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
 - 🎯 **Automatic Episode Matching**: Uses TMDb to accurately identify episodes
 - 📝 **Subtitle Extraction**: Extracts subtitles from MKV files
-- 🔍 **OCR Support**: Handles image-based subtitles
+- 🔊 **Speech Recognition**: Uses Whisper for accurate episode identification
 - 🚀 **Multi-threaded**: Fast processing of multiple files
 - ⬇️ **Subtitle Downloads**: Integration with OpenSubtitles
 - ✨ **Bulk Processing**: Handle entire seasons at once
@@ -66,7 +57,6 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
 - Python 3.9 or higher
 - [FFmpeg](https://ffmpeg.org/download.html) installed and available in system PATH
-- [Tesseract OCR](https://github.com/UB-Mannheim/tesseract/wiki) installed (required for image-based subtitle processing)
 - TMDb API key (optional, for subtitle downloads)
 - OpenSubtitles account (optional, for subtitle downloads)
@@ -135,3 +125,7 @@ Distributed under the MIT License. See `LICENSE` for more information.
 ## Documentation
 Full documentation is available at [https://jsakkos.github.io/mkv-episode-matcher/](https://jsakkos.github.io/mkv-episode-matcher/)
+## Changelog
+See [CHANGELOG.md](CHANGELOG.md) for a detailed list of changes.

mkv_episode_matcher-0.6.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
+mkv_episode_matcher/__init__.py,sha256=u3yZcpuK0ICeUjxYKePvW-zS61E5ss5q2AvqnSHuz9E,240
+mkv_episode_matcher/__main__.py,sha256=-iRYoAfut3eDfV29UvobJvCKmYTpsOn8qM49QBFnMUM,5735
+mkv_episode_matcher/config.py,sha256=EcJJjkekQ7oWtarUkufCYON_QWbQvq55-zMqCTOqSa4,2265
+mkv_episode_matcher/episode_identification.py,sha256=rWhUzeNE5_uqsLcRuw_B6g7k3ud9Oa1oKgvXrBA-Jsc,12457
+mkv_episode_matcher/episode_matcher.py,sha256=Yqos1hImF_QIZ8cV0IlemUxhpHwvwBn-mg89N9NDq9U,3126
+mkv_episode_matcher/subtitle_utils.py,sha256=Hz9b4CKPV07YKTY4dcN3WbvdbvH-S3J4zcb9CiyvPlE,2551
+mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
+mkv_episode_matcher/utils.py,sha256=1-RwYn1w_YQFp4KxTmYbCSQEieK-mnToVIS34EVAZLw,13837
+mkv_episode_matcher-0.6.0.dist-info/METADATA,sha256=LBtoWNzGS5Exd0H5q6fP5MdBSsMPOieYMOQ5uQoBZ64,5193
+mkv_episode_matcher-0.6.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+mkv_episode_matcher-0.6.0.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
+mkv_episode_matcher-0.6.0.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
+mkv_episode_matcher-0.6.0.dist-info/RECORD,,

{mkv_episode_matcher-0.5.0.dist-info → mkv_episode_matcher-0.6.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
+Generator: setuptools (75.8.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

mkv_episode_matcher/libraries/pgs2srt/.gitignore DELETED Viewed

	@@ -1,2 +0,0 @@
1	- __pycache__/
2	- .DS_Store

mkv-episode-matcher 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

Potentially problematic release.

mkv-episode-matcher 0.5.0py3-none-any.whl → 0.6.0py3-none-any.whl