PyPI - mkv-episode-matcher - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

mkv-episode-matcher 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mkv-episode-matcher might be problematic. Click here for more details.

Files changed (24) hide show

mkv_episode_matcher/episode_matcher.py CHANGED Viewed

@@ -1,105 +1,152 @@
 # mkv_episode_matcher/episode_matcher.py
-from pathlib import Path
-import shutil
 import glob
 import os
-from loguru import logger
 import re
-from mkv_episode_matcher.__main__ import CONFIG_FILE, CACHE_DIR
+import shutil
+from pathlib import Path
+from loguru import logger
+from rich.console import Console
+from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn
+from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
 from mkv_episode_matcher.config import get_config
-from mkv_episode_matcher.mkv_to_srt import convert_mkv_to_srt
+from mkv_episode_matcher.episode_identification import EpisodeMatcher
 from mkv_episode_matcher.tmdb_client import fetch_show_id
 from mkv_episode_matcher.utils import (
     check_filename,
     clean_text,
-    cleanup_ocr_files,
     get_subtitles,
-    process_reference_srt_files,
-    process_srt_files,
-    compare_and_rename_files,get_valid_seasons,rename_episode_file
+    get_valid_seasons,
+    rename_episode_file,
 )
-from mkv_episode_matcher.episode_identification import EpisodeMatcher
-def process_show(season=None, dry_run=False, get_subs=False):
-    """Process the show using streaming speech recognition with OCR fallback."""
+# Initialize Rich console
+console = Console()
+def process_show(season=None, dry_run=False, get_subs=False, verbose=False, confidence=0.6):
+    """
+    Process the show using streaming speech recognition with improved UI feedback.
+    Args:
+        season (int, optional): Season number to process. Defaults to None (all seasons).
+        dry_run (bool): If True, only simulate actions without making changes.
+        get_subs (bool): If True, download subtitles for the show.
+        verbose (bool): If True, display more detailed progress information.
+        confidence (float): Confidence threshold for episode matching (0.0-1.0).
+    """
     config = get_config(CONFIG_FILE)
     show_dir = config.get("show_dir")
     show_name = clean_text(os.path.basename(show_dir))
-    matcher = EpisodeMatcher(CACHE_DIR, show_name)
+    matcher = EpisodeMatcher(CACHE_DIR, show_name, min_confidence=confidence)
     # Early check for reference files
     reference_dir = Path(CACHE_DIR) / "data" / show_name
     reference_files = list(reference_dir.glob("*.srt"))
     if (not get_subs) and (not reference_files):
-        logger.error(f"No reference subtitle files found in {reference_dir}")
-        logger.info("Please download reference subtitles first")
+        console.print(
+            f"[bold yellow]Warning:[/bold yellow] No reference subtitle files found in {reference_dir}"
+        )
+        console.print("[cyan]Tip:[/cyan] Use --get-subs to download reference subtitles")
         return
     season_paths = get_valid_seasons(show_dir)
     if not season_paths:
-        logger.warning(f"No seasons with .mkv files found")
+        console.print("[bold red]Error:[/bold red] No seasons with .mkv files found")
         return
     if season is not None:
         season_path = os.path.join(show_dir, f"Season {season}")
         if season_path not in season_paths:
-            logger.warning(f"Season {season} has no .mkv files to process")
+            console.print(f"[bold red]Error:[/bold red] Season {season} has no .mkv files to process")
             return
         season_paths = [season_path]
+    total_processed = 0
+    total_matched = 0
     for season_path in season_paths:
-        mkv_files = [f for f in glob.glob(os.path.join(season_path, "*.mkv"))
-                    if not check_filename(f)]
+        mkv_files = [
+            f for f in glob.glob(os.path.join(season_path, "*.mkv"))
+            if not check_filename(f)
+        ]
         if not mkv_files:
-            logger.info(f"No new files to process in {season_path}")
+            season_num = os.path.basename(season_path).replace("Season ", "")
+            console.print(f"[dim]No new files to process in Season {season_num}[/dim]")
             continue
         season_num = int(re.search(r'Season (\d+)', season_path).group(1))
         temp_dir = Path(season_path) / "temp"
-        ocr_dir = Path(season_path) / "ocr"
         temp_dir.mkdir(exist_ok=True)
-        ocr_dir.mkdir(exist_ok=True)
         try:
             if get_subs:
                 show_id = fetch_show_id(matcher.show_name)
                 if show_id:
+                    console.print(f"[bold cyan]Downloading subtitles for Season {season_num}...[/bold cyan]")
                     get_subtitles(show_id, seasons={season_num}, config=config)
-            unmatched_files = []
-            for mkv_file in mkv_files:
-                logger.info(f"Attempting speech recognition match for {mkv_file}")
-                match = matcher.identify_episode(mkv_file, temp_dir, season_num)
-                if match:
-                    new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
-                    logger.info(f"Speech matched {os.path.basename(mkv_file)} to {new_name} "
-                              f"(confidence: {match['confidence']:.2f})")
-                    if not dry_run:
-                        logger.info(f"Renaming {mkv_file} to {new_name}")
-                        rename_episode_file(mkv_file, new_name)
                 else:
-                    logger.info(f"Speech recognition match failed for {mkv_file}, trying OCR")
-                    unmatched_files.append(mkv_file)
+                    console.print("[bold red]Error:[/bold red] Could not find show ID. Skipping subtitle download.")
-            # OCR fallback for unmatched files
-            if unmatched_files:
-                logger.info(f"Attempting OCR matching for {len(unmatched_files)} unmatched files")
-                convert_mkv_to_srt(season_path, unmatched_files)
+            console.print(f"[bold cyan]Processing {len(mkv_files)} files in Season {season_num}...[/bold cyan]")
+            # Process files with a progress bar
+            with Progress(
+                TextColumn("[progress.description]{task.description}"),
+                BarColumn(),
+                TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+                TimeElapsedColumn(),
+                console=console,
+            ) as progress:
+                task = progress.add_task(f"[cyan]Matching Season {season_num}[/cyan]", total=len(mkv_files))
-                reference_text_dict = process_reference_srt_files(matcher.show_name)
-                srt_text_dict = process_srt_files(str(ocr_dir))
-                compare_and_rename_files(
-                    srt_text_dict,
-                    reference_text_dict,
-                    dry_run=dry_run,
-                )
+                for mkv_file in mkv_files:
+                    file_basename = os.path.basename(mkv_file)
+                    progress.update(task, description=f"[cyan]Processing[/cyan] {file_basename}")
+                    if verbose:
+                        console.print(f"  Analyzing {file_basename}...")
+                    total_processed += 1
+                    match = matcher.identify_episode(mkv_file, temp_dir, season_num)
+                    if match:
+                        total_matched += 1
+                        new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
+                        confidence_color = "green" if match['confidence'] > 0.8 else "yellow"
+                        if verbose or dry_run:
+                            console.print(
+                                f"  Match: [bold]{file_basename}[/bold] → [bold cyan]{new_name}[/bold cyan] "
+                                f"(confidence: [{confidence_color}]{match['confidence']:.2f}[/{confidence_color}])"
+                            )
+                        if not dry_run:
+                            rename_episode_file(mkv_file, new_name)
+                    else:
+                        if verbose:
+                            console.print(f"  [yellow]No match found for {file_basename}[/yellow]")
+                    progress.advance(task)
         finally:
-            if not dry_run:
+            if not dry_run and temp_dir.exists():
                 shutil.rmtree(temp_dir)
-                cleanup_ocr_files(show_dir)
+    # Summary
+    console.print()
+    if total_processed == 0:
+        console.print("[yellow]No files needed processing[/yellow]")
+    else:
+        console.print(f"[bold]Summary:[/bold] Processed {total_processed} files")
+        console.print(f"[bold green]Successfully matched:[/bold green] {total_matched} files")
+        if total_matched < total_processed:
+            console.print(f"[bold yellow]Unmatched:[/bold yellow] {total_processed - total_matched} files")
+            console.print(
+                "[cyan]Tip:[/cyan] Try downloading subtitles with --get-subs or "
+                "check that your files are named consistently"
+            )

mkv_episode_matcher/subtitle_utils.py CHANGED Viewed

@@ -1,82 +1,83 @@
-from typing import List, Optional, Union
 import os
 import re
+from typing import Optional
-def generate_subtitle_patterns(series_name: str, season: int, episode: int) -> List[str]:
+def generate_subtitle_patterns(
+    series_name: str, season: int, episode: int
+) -> list[str]:
     """
     Generate various common subtitle filename patterns.
     Args:
         series_name (str): Name of the series
         season (int): Season number
         episode (int): Episode number
     Returns:
         List[str]: List of possible subtitle filenames
     """
     patterns = [
         # Standard format: "Show Name - S01E02.srt"
         f"{series_name} - S{season:02d}E{episode:02d}.srt",
         # Season x Episode format: "Show Name - 1x02.srt"
         f"{series_name} - {season}x{episode:02d}.srt",
         # Separate season/episode: "Show Name - Season 1 Episode 02.srt"
         f"{series_name} - Season {season} Episode {episode:02d}.srt",
         # Compact format: "ShowName.S01E02.srt"
         f"{series_name.replace(' ', '')}.S{season:02d}E{episode:02d}.srt",
         # Numbered format: "Show Name 102.srt"
         f"{series_name} {season:01d}{episode:02d}.srt",
         # Dot format: "Show.Name.1x02.srt"
         f"{series_name.replace(' ', '.')}.{season}x{episode:02d}.srt",
         # Underscore format: "Show_Name_S01E02.srt"
         f"{series_name.replace(' ', '_')}_S{season:02d}E{episode:02d}.srt",
     ]
     return patterns
-def find_existing_subtitle(series_cache_dir: str, series_name: str, season: int, episode: int) -> Optional[str]:
+def find_existing_subtitle(
+    series_cache_dir: str, series_name: str, season: int, episode: int
+) -> Optional[str]:
     """
     Check for existing subtitle files in various naming formats.
     Args:
         series_cache_dir (str): Directory containing subtitle files
         series_name (str): Name of the series
         season (int): Season number
         episode (int): Episode number
     Returns:
         Optional[str]: Path to existing subtitle file if found, None otherwise
     """
     patterns = generate_subtitle_patterns(series_name, season, episode)
     for pattern in patterns:
         filepath = os.path.join(series_cache_dir, pattern)
         if os.path.exists(filepath):
             return filepath
     return None
 def sanitize_filename(filename: str) -> str:
     """
     Sanitize filename by removing/replacing invalid characters.
     Args:
         filename (str): Original filename
     Returns:
         str: Sanitized filename
     """
     # Replace problematic characters
-    filename = filename.replace(':', ' -')
-    filename = filename.replace('/', '-')
-    filename = filename.replace('\\', '-')
+    filename = filename.replace(":", " -")
+    filename = filename.replace("/", "-")
+    filename = filename.replace("\\", "-")
     # Remove any other invalid characters
-    filename = re.sub(r'[<>:"/\\|?*]', '', filename)
-    return filename.strip()
+    filename = re.sub(r'[<>:"/\\|?*]', "", filename)
+    return filename.strip()

mkv_episode_matcher/utils.py CHANGED Viewed

@@ -2,15 +2,20 @@
 import os
 import re
 import shutil
-import torch
 import requests
+import torch
 from loguru import logger
 from opensubtitlescom import OpenSubtitles
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn
 from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
 from mkv_episode_matcher.config import get_config
+from mkv_episode_matcher.subtitle_utils import find_existing_subtitle, sanitize_filename
 from mkv_episode_matcher.tmdb_client import fetch_season_details
-from mkv_episode_matcher.subtitle_utils import find_existing_subtitle,sanitize_filename
+console = Console()
 def get_valid_seasons(show_dir):
     """
     Get all season directories that contain MKV files.
@@ -36,13 +41,17 @@ def get_valid_seasons(show_dir):
             valid_season_paths.append(season_path)
     if not valid_season_paths:
-        logger.warning(f"No seasons with .mkv files found in show '{os.path.basename(show_dir)}'")
+        logger.warning(
+            f"No seasons with .mkv files found in show '{os.path.basename(show_dir)}'"
+        )
     else:
         logger.info(
             f"Found {len(valid_season_paths)} seasons with .mkv files in '{os.path.basename(show_dir)}'"
         )
     return valid_season_paths
 def check_filename(filename):
     """
     Check if the filename is in the correct format (S01E02).
@@ -54,7 +63,7 @@ def check_filename(filename):
         bool: True if the filename matches the expected pattern.
     """
     # Check if the filename matches the expected format
-    match = re.search(r'.*S\d+E\d+', filename)
+    match = re.search(r".*S\d+E\d+", filename)
     return bool(match)
@@ -95,11 +104,11 @@ def rename_episode_file(original_file_path, new_filename):
     """
     original_dir = os.path.dirname(original_file_path)
     new_file_path = os.path.join(original_dir, new_filename)
     # Check if new filepath already exists
     if os.path.exists(new_file_path):
         logger.warning(f"File already exists: {new_filename}")
         # Add numeric suffix if file exists
         base, ext = os.path.splitext(new_filename)
         suffix = 2
@@ -109,7 +118,7 @@ def rename_episode_file(original_file_path, new_filename):
             if not os.path.exists(new_file_path):
                 break
             suffix += 1
     try:
         os.rename(original_file_path, new_file_path)
         logger.info(f"Renamed {os.path.basename(original_file_path)} -> {new_filename}")
@@ -120,7 +129,8 @@ def rename_episode_file(original_file_path, new_filename):
     except FileExistsError as e:
         logger.error(f"Failed to rename file: {e}")
         return None
 def get_subtitles(show_id, seasons: set[int], config=None):
     """
     Retrieves and saves subtitles for a given TV show and seasons.
@@ -164,19 +174,21 @@ def get_subtitles(show_id, seasons: set[int], config=None):
         for episode in range(1, episodes + 1):
             logger.info(f"Processing Season {season}, Episode {episode}...")
             series_cache_dir = os.path.join(CACHE_DIR, "data", series_name)
             os.makedirs(series_cache_dir, exist_ok=True)
             # Check for existing subtitle in any supported format
             existing_subtitle = find_existing_subtitle(
                 series_cache_dir, series_name, season, episode
             )
             if existing_subtitle:
-                logger.info(f"Subtitle already exists: {os.path.basename(existing_subtitle)}")
+                logger.info(
+                    f"Subtitle already exists: {os.path.basename(existing_subtitle)}"
+                )
                 continue
             # Default to standard format for new downloads
             srt_filepath = os.path.join(
                 series_cache_dir,
@@ -189,7 +201,7 @@ def get_subtitles(show_id, seasons: set[int], config=None):
             response.raise_for_status()
             episode_data = response.json()
             episode_id = episode_data["id"]
             # search for the subtitle
             response = subtitles.search(tmdb_id=episode_id, languages="en")
             if len(response.data) == 0:
@@ -210,33 +222,13 @@ def get_subtitles(show_id, seasons: set[int], config=None):
                     break
-def cleanup_ocr_files(show_dir):
-    """
-    Clean up OCR files generated during the episode matching process.
-    Args:
-        show_dir (str): The directory containing the show files.
-    Returns:
-        None
-    This function cleans up the OCR files generated during the episode matching process.
-    It deletes the 'ocr' directory and all its contents in each season directory of the show.
-    """
-    for season_dir in os.listdir(show_dir):
-        season_dir_path = os.path.join(show_dir, season_dir)
-        ocr_dir_path = os.path.join(season_dir_path, "ocr")
-        if os.path.exists(ocr_dir_path):
-            logger.info(f"Cleaning up OCR files in {ocr_dir_path}")
-            shutil.rmtree(ocr_dir_path)
 def clean_text(text):
     # Remove brackets, parentheses, and their content
     cleaned_text = re.sub(r"\[.*?\]|\(.*?\)|\{.*?\}", "", text)
     # Strip leading/trailing whitespace
     return cleaned_text.strip()
 @logger.catch
 def process_reference_srt_files(series_name):
     """
@@ -249,12 +241,13 @@ def process_reference_srt_files(series_name):
         dict: A dictionary containing the reference files where the keys are the MKV filenames
               and the values are the corresponding SRT texts.
     """
-    from mkv_episode_matcher.__main__ import CACHE_DIR
     import os
+    from mkv_episode_matcher.__main__ import CACHE_DIR
     reference_files = {}
     reference_dir = os.path.join(CACHE_DIR, "data", series_name)
     for dirpath, _, filenames in os.walk(reference_dir):
         for filename in filenames:
             if filename.lower().endswith(".srt"):
@@ -264,9 +257,10 @@ def process_reference_srt_files(series_name):
                 season, episode = extract_season_episode(filename)
                 mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
                 reference_files[mkv_filename] = srt_text
     return reference_files
 def extract_srt_text(filepath):
     """
     Extracts text content from an SRT file.
@@ -280,49 +274,51 @@ def extract_srt_text(filepath):
     # Read the file content
     with open(filepath) as f:
         content = f.read()
     # Split into subtitle blocks
-    blocks = content.strip().split('\n\n')
+    blocks = content.strip().split("\n\n")
     text_lines = []
     for block in blocks:
-        lines = block.split('\n')
+        lines = block.split("\n")
         if len(lines) < 3:
             continue
         # Skip index and timestamp, get all remaining lines as text
-        text = ' '.join(lines[2:])
+        text = " ".join(lines[2:])
         # Remove stage directions and tags
-        text = re.sub(r'\[.*?\]|\<.*?\>', '', text)
+        text = re.sub(r"\[.*?\]|\<.*?\>", "", text)
         if text:
             text_lines.append(text)
     return text_lines
 def extract_season_episode(filename):
     """
     Extract season and episode numbers from filename with support for multiple formats.
     Args:
         filename (str): Filename to parse
     Returns:
         tuple: (season_number, episode_number)
     """
     # List of patterns to try
     patterns = [
-        r'S(\d+)E(\d+)',          # S01E01
-        r'(\d+)x(\d+)',           # 1x01 or 01x01
-        r'Season\s*(\d+).*?(\d+)' # Season 1 - 01
+        r"S(\d+)E(\d+)",  # S01E01
+        r"(\d+)x(\d+)",  # 1x01 or 01x01
+        r"Season\s*(\d+).*?(\d+)",  # Season 1 - 01
     ]
     for pattern in patterns:
         match = re.search(pattern, filename, re.IGNORECASE)
         if match:
             return int(match.group(1)), int(match.group(2))
     return None, None
 def process_srt_files(show_dir):
     """
     Process all SRT files in the given directory and its subdirectories.
@@ -342,6 +338,8 @@ def process_srt_files(show_dir):
                 srt_text = extract_srt_text(srt_file)
                 srt_files[srt_file] = srt_text
     return srt_files
 def compare_and_rename_files(srt_files, reference_files, dry_run=False):
     """
     Compare the srt files with the reference files and rename the matching mkv files.
@@ -372,6 +370,7 @@ def compare_and_rename_files(srt_files, reference_files, dry_run=False):
                     logger.info(f"Renaming {mkv_file} to {new_filename}")
                     rename_episode_file(mkv_file, new_filename)
 def compare_text(text1, text2):
     """
     Compare two lists of text lines and return the number of matching lines.
@@ -391,9 +390,27 @@ def compare_text(text1, text2):
     matching_lines = set(flat_text1).intersection(flat_text2)
     return len(matching_lines)
 def check_gpu_support():
-    logger.info('Checking GPU support...')
+    logger.info("Checking GPU support...")
+    console.print("[bold]Checking GPU support...[/bold]")
     if torch.cuda.is_available():
         logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
+        console.print(
+        Panel.fit(
+                f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}",
+                title="GPU Support",
+                border_style="magenta",
+            )
+        )
     else:
-        logger.warning("CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.")
+        logger.warning(
+            "CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support."
+        )
+        console.print(
+        Panel.fit(
+            "CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.",
+            title="GPU Support",
+            border_style="red",
+        )
+        )

mkv-episode-matcher 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

Potentially problematic release.

mkv-episode-matcher 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl