mkv-episode-matcher 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- mkv_episode_matcher/__init__.py +2 -2
- mkv_episode_matcher/__main__.py +222 -76
- mkv_episode_matcher/config.py +0 -3
- mkv_episode_matcher/episode_identification.py +164 -124
- mkv_episode_matcher/episode_matcher.py +102 -55
- mkv_episode_matcher/subtitle_utils.py +26 -25
- mkv_episode_matcher/utils.py +74 -57
- {mkv_episode_matcher-0.5.0.dist-info → mkv_episode_matcher-0.7.0.dist-info}/METADATA +10 -13
- mkv_episode_matcher-0.7.0.dist-info/RECORD +14 -0
- {mkv_episode_matcher-0.5.0.dist-info → mkv_episode_matcher-0.7.0.dist-info}/WHEEL +1 -1
- mkv_episode_matcher/libraries/pgs2srt/.gitignore +0 -2
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py +0 -321
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py +0 -16700
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py +0 -260
- mkv_episode_matcher/libraries/pgs2srt/README.md +0 -26
- mkv_episode_matcher/libraries/pgs2srt/__init__.py +0 -0
- mkv_episode_matcher/libraries/pgs2srt/imagemaker.py +0 -89
- mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py +0 -150
- mkv_episode_matcher/libraries/pgs2srt/pgsreader.py +0 -225
- mkv_episode_matcher/libraries/pgs2srt/requirements.txt +0 -4
- mkv_episode_matcher/mkv_to_srt.py +0 -302
- mkv_episode_matcher-0.5.0.dist-info/RECORD +0 -25
- {mkv_episode_matcher-0.5.0.dist-info → mkv_episode_matcher-0.7.0.dist-info}/entry_points.txt +0 -0
- {mkv_episode_matcher-0.5.0.dist-info → mkv_episode_matcher-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -1,105 +1,152 @@
|
|
|
1
1
|
# mkv_episode_matcher/episode_matcher.py
|
|
2
2
|
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
import shutil
|
|
5
3
|
import glob
|
|
6
4
|
import os
|
|
7
|
-
from loguru import logger
|
|
8
5
|
import re
|
|
9
|
-
|
|
6
|
+
import shutil
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from loguru import logger
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn
|
|
12
|
+
|
|
13
|
+
from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
|
|
10
14
|
from mkv_episode_matcher.config import get_config
|
|
11
|
-
from mkv_episode_matcher.
|
|
15
|
+
from mkv_episode_matcher.episode_identification import EpisodeMatcher
|
|
12
16
|
from mkv_episode_matcher.tmdb_client import fetch_show_id
|
|
13
17
|
from mkv_episode_matcher.utils import (
|
|
14
18
|
check_filename,
|
|
15
19
|
clean_text,
|
|
16
|
-
cleanup_ocr_files,
|
|
17
20
|
get_subtitles,
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
compare_and_rename_files,get_valid_seasons,rename_episode_file
|
|
21
|
+
get_valid_seasons,
|
|
22
|
+
rename_episode_file,
|
|
21
23
|
)
|
|
22
|
-
from mkv_episode_matcher.episode_identification import EpisodeMatcher
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
# Initialize Rich console
|
|
26
|
+
console = Console()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def process_show(season=None, dry_run=False, get_subs=False, verbose=False, confidence=0.6):
|
|
30
|
+
"""
|
|
31
|
+
Process the show using streaming speech recognition with improved UI feedback.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
season (int, optional): Season number to process. Defaults to None (all seasons).
|
|
35
|
+
dry_run (bool): If True, only simulate actions without making changes.
|
|
36
|
+
get_subs (bool): If True, download subtitles for the show.
|
|
37
|
+
verbose (bool): If True, display more detailed progress information.
|
|
38
|
+
confidence (float): Confidence threshold for episode matching (0.0-1.0).
|
|
39
|
+
"""
|
|
26
40
|
config = get_config(CONFIG_FILE)
|
|
27
41
|
show_dir = config.get("show_dir")
|
|
28
42
|
show_name = clean_text(os.path.basename(show_dir))
|
|
29
|
-
matcher = EpisodeMatcher(CACHE_DIR, show_name)
|
|
30
|
-
|
|
43
|
+
matcher = EpisodeMatcher(CACHE_DIR, show_name, min_confidence=confidence)
|
|
44
|
+
|
|
31
45
|
# Early check for reference files
|
|
32
46
|
reference_dir = Path(CACHE_DIR) / "data" / show_name
|
|
33
47
|
reference_files = list(reference_dir.glob("*.srt"))
|
|
34
48
|
if (not get_subs) and (not reference_files):
|
|
35
|
-
|
|
36
|
-
|
|
49
|
+
console.print(
|
|
50
|
+
f"[bold yellow]Warning:[/bold yellow] No reference subtitle files found in {reference_dir}"
|
|
51
|
+
)
|
|
52
|
+
console.print("[cyan]Tip:[/cyan] Use --get-subs to download reference subtitles")
|
|
37
53
|
return
|
|
38
|
-
|
|
54
|
+
|
|
39
55
|
season_paths = get_valid_seasons(show_dir)
|
|
40
56
|
if not season_paths:
|
|
41
|
-
|
|
57
|
+
console.print("[bold red]Error:[/bold red] No seasons with .mkv files found")
|
|
42
58
|
return
|
|
43
59
|
|
|
44
60
|
if season is not None:
|
|
45
61
|
season_path = os.path.join(show_dir, f"Season {season}")
|
|
46
62
|
if season_path not in season_paths:
|
|
47
|
-
|
|
63
|
+
console.print(f"[bold red]Error:[/bold red] Season {season} has no .mkv files to process")
|
|
48
64
|
return
|
|
49
65
|
season_paths = [season_path]
|
|
50
66
|
|
|
67
|
+
total_processed = 0
|
|
68
|
+
total_matched = 0
|
|
69
|
+
|
|
51
70
|
for season_path in season_paths:
|
|
52
|
-
mkv_files = [
|
|
53
|
-
|
|
54
|
-
|
|
71
|
+
mkv_files = [
|
|
72
|
+
f for f in glob.glob(os.path.join(season_path, "*.mkv"))
|
|
73
|
+
if not check_filename(f)
|
|
74
|
+
]
|
|
75
|
+
|
|
55
76
|
if not mkv_files:
|
|
56
|
-
|
|
77
|
+
season_num = os.path.basename(season_path).replace("Season ", "")
|
|
78
|
+
console.print(f"[dim]No new files to process in Season {season_num}[/dim]")
|
|
57
79
|
continue
|
|
58
80
|
|
|
59
81
|
season_num = int(re.search(r'Season (\d+)', season_path).group(1))
|
|
60
82
|
temp_dir = Path(season_path) / "temp"
|
|
61
|
-
ocr_dir = Path(season_path) / "ocr"
|
|
62
83
|
temp_dir.mkdir(exist_ok=True)
|
|
63
|
-
ocr_dir.mkdir(exist_ok=True)
|
|
64
84
|
|
|
65
85
|
try:
|
|
66
86
|
if get_subs:
|
|
67
87
|
show_id = fetch_show_id(matcher.show_name)
|
|
68
88
|
if show_id:
|
|
89
|
+
console.print(f"[bold cyan]Downloading subtitles for Season {season_num}...[/bold cyan]")
|
|
69
90
|
get_subtitles(show_id, seasons={season_num}, config=config)
|
|
70
|
-
|
|
71
|
-
unmatched_files = []
|
|
72
|
-
for mkv_file in mkv_files:
|
|
73
|
-
logger.info(f"Attempting speech recognition match for {mkv_file}")
|
|
74
|
-
match = matcher.identify_episode(mkv_file, temp_dir, season_num)
|
|
75
|
-
|
|
76
|
-
if match:
|
|
77
|
-
new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
|
|
78
|
-
logger.info(f"Speech matched {os.path.basename(mkv_file)} to {new_name} "
|
|
79
|
-
f"(confidence: {match['confidence']:.2f})")
|
|
80
|
-
|
|
81
|
-
if not dry_run:
|
|
82
|
-
logger.info(f"Renaming {mkv_file} to {new_name}")
|
|
83
|
-
rename_episode_file(mkv_file, new_name)
|
|
84
91
|
else:
|
|
85
|
-
|
|
86
|
-
unmatched_files.append(mkv_file)
|
|
92
|
+
console.print("[bold red]Error:[/bold red] Could not find show ID. Skipping subtitle download.")
|
|
87
93
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
94
|
+
console.print(f"[bold cyan]Processing {len(mkv_files)} files in Season {season_num}...[/bold cyan]")
|
|
95
|
+
|
|
96
|
+
# Process files with a progress bar
|
|
97
|
+
with Progress(
|
|
98
|
+
TextColumn("[progress.description]{task.description}"),
|
|
99
|
+
BarColumn(),
|
|
100
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
101
|
+
TimeElapsedColumn(),
|
|
102
|
+
console=console,
|
|
103
|
+
) as progress:
|
|
104
|
+
task = progress.add_task(f"[cyan]Matching Season {season_num}[/cyan]", total=len(mkv_files))
|
|
92
105
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
106
|
+
for mkv_file in mkv_files:
|
|
107
|
+
file_basename = os.path.basename(mkv_file)
|
|
108
|
+
progress.update(task, description=f"[cyan]Processing[/cyan] {file_basename}")
|
|
109
|
+
|
|
110
|
+
if verbose:
|
|
111
|
+
console.print(f" Analyzing {file_basename}...")
|
|
112
|
+
|
|
113
|
+
total_processed += 1
|
|
114
|
+
match = matcher.identify_episode(mkv_file, temp_dir, season_num)
|
|
115
|
+
|
|
116
|
+
if match:
|
|
117
|
+
total_matched += 1
|
|
118
|
+
new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
|
|
119
|
+
|
|
120
|
+
confidence_color = "green" if match['confidence'] > 0.8 else "yellow"
|
|
121
|
+
|
|
122
|
+
if verbose or dry_run:
|
|
123
|
+
console.print(
|
|
124
|
+
f" Match: [bold]{file_basename}[/bold] → [bold cyan]{new_name}[/bold cyan] "
|
|
125
|
+
f"(confidence: [{confidence_color}]{match['confidence']:.2f}[/{confidence_color}])"
|
|
126
|
+
)
|
|
101
127
|
|
|
128
|
+
if not dry_run:
|
|
129
|
+
rename_episode_file(mkv_file, new_name)
|
|
130
|
+
else:
|
|
131
|
+
if verbose:
|
|
132
|
+
console.print(f" [yellow]No match found for {file_basename}[/yellow]")
|
|
133
|
+
|
|
134
|
+
progress.advance(task)
|
|
102
135
|
finally:
|
|
103
|
-
if not dry_run:
|
|
136
|
+
if not dry_run and temp_dir.exists():
|
|
104
137
|
shutil.rmtree(temp_dir)
|
|
105
|
-
|
|
138
|
+
|
|
139
|
+
# Summary
|
|
140
|
+
console.print()
|
|
141
|
+
if total_processed == 0:
|
|
142
|
+
console.print("[yellow]No files needed processing[/yellow]")
|
|
143
|
+
else:
|
|
144
|
+
console.print(f"[bold]Summary:[/bold] Processed {total_processed} files")
|
|
145
|
+
console.print(f"[bold green]Successfully matched:[/bold green] {total_matched} files")
|
|
146
|
+
|
|
147
|
+
if total_matched < total_processed:
|
|
148
|
+
console.print(f"[bold yellow]Unmatched:[/bold yellow] {total_processed - total_matched} files")
|
|
149
|
+
console.print(
|
|
150
|
+
"[cyan]Tip:[/cyan] Try downloading subtitles with --get-subs or "
|
|
151
|
+
"check that your files are named consistently"
|
|
152
|
+
)
|
|
@@ -1,82 +1,83 @@
|
|
|
1
|
-
from typing import List, Optional, Union
|
|
2
1
|
import os
|
|
3
2
|
import re
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
|
|
6
|
+
def generate_subtitle_patterns(
|
|
7
|
+
series_name: str, season: int, episode: int
|
|
8
|
+
) -> list[str]:
|
|
6
9
|
"""
|
|
7
10
|
Generate various common subtitle filename patterns.
|
|
8
|
-
|
|
11
|
+
|
|
9
12
|
Args:
|
|
10
13
|
series_name (str): Name of the series
|
|
11
14
|
season (int): Season number
|
|
12
15
|
episode (int): Episode number
|
|
13
|
-
|
|
16
|
+
|
|
14
17
|
Returns:
|
|
15
18
|
List[str]: List of possible subtitle filenames
|
|
16
19
|
"""
|
|
17
20
|
patterns = [
|
|
18
21
|
# Standard format: "Show Name - S01E02.srt"
|
|
19
22
|
f"{series_name} - S{season:02d}E{episode:02d}.srt",
|
|
20
|
-
|
|
21
23
|
# Season x Episode format: "Show Name - 1x02.srt"
|
|
22
24
|
f"{series_name} - {season}x{episode:02d}.srt",
|
|
23
|
-
|
|
24
25
|
# Separate season/episode: "Show Name - Season 1 Episode 02.srt"
|
|
25
26
|
f"{series_name} - Season {season} Episode {episode:02d}.srt",
|
|
26
|
-
|
|
27
27
|
# Compact format: "ShowName.S01E02.srt"
|
|
28
28
|
f"{series_name.replace(' ', '')}.S{season:02d}E{episode:02d}.srt",
|
|
29
|
-
|
|
30
29
|
# Numbered format: "Show Name 102.srt"
|
|
31
30
|
f"{series_name} {season:01d}{episode:02d}.srt",
|
|
32
|
-
|
|
33
31
|
# Dot format: "Show.Name.1x02.srt"
|
|
34
32
|
f"{series_name.replace(' ', '.')}.{season}x{episode:02d}.srt",
|
|
35
|
-
|
|
36
33
|
# Underscore format: "Show_Name_S01E02.srt"
|
|
37
34
|
f"{series_name.replace(' ', '_')}_S{season:02d}E{episode:02d}.srt",
|
|
38
35
|
]
|
|
39
|
-
|
|
36
|
+
|
|
40
37
|
return patterns
|
|
41
38
|
|
|
42
|
-
|
|
39
|
+
|
|
40
|
+
def find_existing_subtitle(
|
|
41
|
+
series_cache_dir: str, series_name: str, season: int, episode: int
|
|
42
|
+
) -> Optional[str]:
|
|
43
43
|
"""
|
|
44
44
|
Check for existing subtitle files in various naming formats.
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
Args:
|
|
47
47
|
series_cache_dir (str): Directory containing subtitle files
|
|
48
48
|
series_name (str): Name of the series
|
|
49
49
|
season (int): Season number
|
|
50
50
|
episode (int): Episode number
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
Returns:
|
|
53
53
|
Optional[str]: Path to existing subtitle file if found, None otherwise
|
|
54
54
|
"""
|
|
55
55
|
patterns = generate_subtitle_patterns(series_name, season, episode)
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
for pattern in patterns:
|
|
58
58
|
filepath = os.path.join(series_cache_dir, pattern)
|
|
59
59
|
if os.path.exists(filepath):
|
|
60
60
|
return filepath
|
|
61
|
-
|
|
61
|
+
|
|
62
62
|
return None
|
|
63
63
|
|
|
64
|
+
|
|
64
65
|
def sanitize_filename(filename: str) -> str:
|
|
65
66
|
"""
|
|
66
67
|
Sanitize filename by removing/replacing invalid characters.
|
|
67
|
-
|
|
68
|
+
|
|
68
69
|
Args:
|
|
69
70
|
filename (str): Original filename
|
|
70
|
-
|
|
71
|
+
|
|
71
72
|
Returns:
|
|
72
73
|
str: Sanitized filename
|
|
73
74
|
"""
|
|
74
75
|
# Replace problematic characters
|
|
75
|
-
filename = filename.replace(
|
|
76
|
-
filename = filename.replace(
|
|
77
|
-
filename = filename.replace(
|
|
78
|
-
|
|
76
|
+
filename = filename.replace(":", " -")
|
|
77
|
+
filename = filename.replace("/", "-")
|
|
78
|
+
filename = filename.replace("\\", "-")
|
|
79
|
+
|
|
79
80
|
# Remove any other invalid characters
|
|
80
|
-
filename = re.sub(r'[<>:"/\\|?*]',
|
|
81
|
-
|
|
82
|
-
return filename.strip()
|
|
81
|
+
filename = re.sub(r'[<>:"/\\|?*]', "", filename)
|
|
82
|
+
|
|
83
|
+
return filename.strip()
|
mkv_episode_matcher/utils.py
CHANGED
|
@@ -2,15 +2,20 @@
|
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
4
|
import shutil
|
|
5
|
-
|
|
5
|
+
|
|
6
6
|
import requests
|
|
7
|
+
import torch
|
|
7
8
|
from loguru import logger
|
|
8
9
|
from opensubtitlescom import OpenSubtitles
|
|
9
|
-
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
from rich.panel import Panel
|
|
12
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
10
13
|
from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
|
|
11
14
|
from mkv_episode_matcher.config import get_config
|
|
15
|
+
from mkv_episode_matcher.subtitle_utils import find_existing_subtitle, sanitize_filename
|
|
12
16
|
from mkv_episode_matcher.tmdb_client import fetch_season_details
|
|
13
|
-
|
|
17
|
+
|
|
18
|
+
console = Console()
|
|
14
19
|
def get_valid_seasons(show_dir):
|
|
15
20
|
"""
|
|
16
21
|
Get all season directories that contain MKV files.
|
|
@@ -36,13 +41,17 @@ def get_valid_seasons(show_dir):
|
|
|
36
41
|
valid_season_paths.append(season_path)
|
|
37
42
|
|
|
38
43
|
if not valid_season_paths:
|
|
39
|
-
logger.warning(
|
|
44
|
+
logger.warning(
|
|
45
|
+
f"No seasons with .mkv files found in show '{os.path.basename(show_dir)}'"
|
|
46
|
+
)
|
|
40
47
|
else:
|
|
41
48
|
logger.info(
|
|
42
49
|
f"Found {len(valid_season_paths)} seasons with .mkv files in '{os.path.basename(show_dir)}'"
|
|
43
50
|
)
|
|
44
51
|
|
|
45
52
|
return valid_season_paths
|
|
53
|
+
|
|
54
|
+
|
|
46
55
|
def check_filename(filename):
|
|
47
56
|
"""
|
|
48
57
|
Check if the filename is in the correct format (S01E02).
|
|
@@ -54,7 +63,7 @@ def check_filename(filename):
|
|
|
54
63
|
bool: True if the filename matches the expected pattern.
|
|
55
64
|
"""
|
|
56
65
|
# Check if the filename matches the expected format
|
|
57
|
-
match = re.search(r
|
|
66
|
+
match = re.search(r".*S\d+E\d+", filename)
|
|
58
67
|
return bool(match)
|
|
59
68
|
|
|
60
69
|
|
|
@@ -95,11 +104,11 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
95
104
|
"""
|
|
96
105
|
original_dir = os.path.dirname(original_file_path)
|
|
97
106
|
new_file_path = os.path.join(original_dir, new_filename)
|
|
98
|
-
|
|
107
|
+
|
|
99
108
|
# Check if new filepath already exists
|
|
100
109
|
if os.path.exists(new_file_path):
|
|
101
110
|
logger.warning(f"File already exists: {new_filename}")
|
|
102
|
-
|
|
111
|
+
|
|
103
112
|
# Add numeric suffix if file exists
|
|
104
113
|
base, ext = os.path.splitext(new_filename)
|
|
105
114
|
suffix = 2
|
|
@@ -109,7 +118,7 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
109
118
|
if not os.path.exists(new_file_path):
|
|
110
119
|
break
|
|
111
120
|
suffix += 1
|
|
112
|
-
|
|
121
|
+
|
|
113
122
|
try:
|
|
114
123
|
os.rename(original_file_path, new_file_path)
|
|
115
124
|
logger.info(f"Renamed {os.path.basename(original_file_path)} -> {new_filename}")
|
|
@@ -120,7 +129,8 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
120
129
|
except FileExistsError as e:
|
|
121
130
|
logger.error(f"Failed to rename file: {e}")
|
|
122
131
|
return None
|
|
123
|
-
|
|
132
|
+
|
|
133
|
+
|
|
124
134
|
def get_subtitles(show_id, seasons: set[int], config=None):
|
|
125
135
|
"""
|
|
126
136
|
Retrieves and saves subtitles for a given TV show and seasons.
|
|
@@ -164,19 +174,21 @@ def get_subtitles(show_id, seasons: set[int], config=None):
|
|
|
164
174
|
|
|
165
175
|
for episode in range(1, episodes + 1):
|
|
166
176
|
logger.info(f"Processing Season {season}, Episode {episode}...")
|
|
167
|
-
|
|
177
|
+
|
|
168
178
|
series_cache_dir = os.path.join(CACHE_DIR, "data", series_name)
|
|
169
179
|
os.makedirs(series_cache_dir, exist_ok=True)
|
|
170
|
-
|
|
180
|
+
|
|
171
181
|
# Check for existing subtitle in any supported format
|
|
172
182
|
existing_subtitle = find_existing_subtitle(
|
|
173
183
|
series_cache_dir, series_name, season, episode
|
|
174
184
|
)
|
|
175
|
-
|
|
185
|
+
|
|
176
186
|
if existing_subtitle:
|
|
177
|
-
logger.info(
|
|
187
|
+
logger.info(
|
|
188
|
+
f"Subtitle already exists: {os.path.basename(existing_subtitle)}"
|
|
189
|
+
)
|
|
178
190
|
continue
|
|
179
|
-
|
|
191
|
+
|
|
180
192
|
# Default to standard format for new downloads
|
|
181
193
|
srt_filepath = os.path.join(
|
|
182
194
|
series_cache_dir,
|
|
@@ -189,7 +201,7 @@ def get_subtitles(show_id, seasons: set[int], config=None):
|
|
|
189
201
|
response.raise_for_status()
|
|
190
202
|
episode_data = response.json()
|
|
191
203
|
episode_id = episode_data["id"]
|
|
192
|
-
|
|
204
|
+
|
|
193
205
|
# search for the subtitle
|
|
194
206
|
response = subtitles.search(tmdb_id=episode_id, languages="en")
|
|
195
207
|
if len(response.data) == 0:
|
|
@@ -210,33 +222,13 @@ def get_subtitles(show_id, seasons: set[int], config=None):
|
|
|
210
222
|
break
|
|
211
223
|
|
|
212
224
|
|
|
213
|
-
def cleanup_ocr_files(show_dir):
|
|
214
|
-
"""
|
|
215
|
-
Clean up OCR files generated during the episode matching process.
|
|
216
|
-
|
|
217
|
-
Args:
|
|
218
|
-
show_dir (str): The directory containing the show files.
|
|
219
|
-
|
|
220
|
-
Returns:
|
|
221
|
-
None
|
|
222
|
-
|
|
223
|
-
This function cleans up the OCR files generated during the episode matching process.
|
|
224
|
-
It deletes the 'ocr' directory and all its contents in each season directory of the show.
|
|
225
|
-
"""
|
|
226
|
-
for season_dir in os.listdir(show_dir):
|
|
227
|
-
season_dir_path = os.path.join(show_dir, season_dir)
|
|
228
|
-
ocr_dir_path = os.path.join(season_dir_path, "ocr")
|
|
229
|
-
if os.path.exists(ocr_dir_path):
|
|
230
|
-
logger.info(f"Cleaning up OCR files in {ocr_dir_path}")
|
|
231
|
-
shutil.rmtree(ocr_dir_path)
|
|
232
|
-
|
|
233
|
-
|
|
234
225
|
def clean_text(text):
|
|
235
226
|
# Remove brackets, parentheses, and their content
|
|
236
227
|
cleaned_text = re.sub(r"\[.*?\]|\(.*?\)|\{.*?\}", "", text)
|
|
237
228
|
# Strip leading/trailing whitespace
|
|
238
229
|
return cleaned_text.strip()
|
|
239
230
|
|
|
231
|
+
|
|
240
232
|
@logger.catch
|
|
241
233
|
def process_reference_srt_files(series_name):
|
|
242
234
|
"""
|
|
@@ -249,12 +241,13 @@ def process_reference_srt_files(series_name):
|
|
|
249
241
|
dict: A dictionary containing the reference files where the keys are the MKV filenames
|
|
250
242
|
and the values are the corresponding SRT texts.
|
|
251
243
|
"""
|
|
252
|
-
from mkv_episode_matcher.__main__ import CACHE_DIR
|
|
253
244
|
import os
|
|
254
|
-
|
|
245
|
+
|
|
246
|
+
from mkv_episode_matcher.__main__ import CACHE_DIR
|
|
247
|
+
|
|
255
248
|
reference_files = {}
|
|
256
249
|
reference_dir = os.path.join(CACHE_DIR, "data", series_name)
|
|
257
|
-
|
|
250
|
+
|
|
258
251
|
for dirpath, _, filenames in os.walk(reference_dir):
|
|
259
252
|
for filename in filenames:
|
|
260
253
|
if filename.lower().endswith(".srt"):
|
|
@@ -264,9 +257,10 @@ def process_reference_srt_files(series_name):
|
|
|
264
257
|
season, episode = extract_season_episode(filename)
|
|
265
258
|
mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
|
|
266
259
|
reference_files[mkv_filename] = srt_text
|
|
267
|
-
|
|
260
|
+
|
|
268
261
|
return reference_files
|
|
269
262
|
|
|
263
|
+
|
|
270
264
|
def extract_srt_text(filepath):
|
|
271
265
|
"""
|
|
272
266
|
Extracts text content from an SRT file.
|
|
@@ -280,49 +274,51 @@ def extract_srt_text(filepath):
|
|
|
280
274
|
# Read the file content
|
|
281
275
|
with open(filepath) as f:
|
|
282
276
|
content = f.read()
|
|
283
|
-
|
|
277
|
+
|
|
284
278
|
# Split into subtitle blocks
|
|
285
|
-
blocks = content.strip().split(
|
|
286
|
-
|
|
279
|
+
blocks = content.strip().split("\n\n")
|
|
280
|
+
|
|
287
281
|
text_lines = []
|
|
288
282
|
for block in blocks:
|
|
289
|
-
lines = block.split(
|
|
283
|
+
lines = block.split("\n")
|
|
290
284
|
if len(lines) < 3:
|
|
291
285
|
continue
|
|
292
|
-
|
|
286
|
+
|
|
293
287
|
# Skip index and timestamp, get all remaining lines as text
|
|
294
|
-
text =
|
|
288
|
+
text = " ".join(lines[2:])
|
|
295
289
|
# Remove stage directions and tags
|
|
296
|
-
text = re.sub(r
|
|
290
|
+
text = re.sub(r"\[.*?\]|\<.*?\>", "", text)
|
|
297
291
|
if text:
|
|
298
292
|
text_lines.append(text)
|
|
299
|
-
|
|
293
|
+
|
|
300
294
|
return text_lines
|
|
301
295
|
|
|
296
|
+
|
|
302
297
|
def extract_season_episode(filename):
|
|
303
298
|
"""
|
|
304
299
|
Extract season and episode numbers from filename with support for multiple formats.
|
|
305
|
-
|
|
300
|
+
|
|
306
301
|
Args:
|
|
307
302
|
filename (str): Filename to parse
|
|
308
|
-
|
|
303
|
+
|
|
309
304
|
Returns:
|
|
310
305
|
tuple: (season_number, episode_number)
|
|
311
306
|
"""
|
|
312
307
|
# List of patterns to try
|
|
313
308
|
patterns = [
|
|
314
|
-
r
|
|
315
|
-
r
|
|
316
|
-
r
|
|
309
|
+
r"S(\d+)E(\d+)", # S01E01
|
|
310
|
+
r"(\d+)x(\d+)", # 1x01 or 01x01
|
|
311
|
+
r"Season\s*(\d+).*?(\d+)", # Season 1 - 01
|
|
317
312
|
]
|
|
318
|
-
|
|
313
|
+
|
|
319
314
|
for pattern in patterns:
|
|
320
315
|
match = re.search(pattern, filename, re.IGNORECASE)
|
|
321
316
|
if match:
|
|
322
317
|
return int(match.group(1)), int(match.group(2))
|
|
323
|
-
|
|
318
|
+
|
|
324
319
|
return None, None
|
|
325
320
|
|
|
321
|
+
|
|
326
322
|
def process_srt_files(show_dir):
|
|
327
323
|
"""
|
|
328
324
|
Process all SRT files in the given directory and its subdirectories.
|
|
@@ -342,6 +338,8 @@ def process_srt_files(show_dir):
|
|
|
342
338
|
srt_text = extract_srt_text(srt_file)
|
|
343
339
|
srt_files[srt_file] = srt_text
|
|
344
340
|
return srt_files
|
|
341
|
+
|
|
342
|
+
|
|
345
343
|
def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
346
344
|
"""
|
|
347
345
|
Compare the srt files with the reference files and rename the matching mkv files.
|
|
@@ -372,6 +370,7 @@ def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
|
372
370
|
logger.info(f"Renaming {mkv_file} to {new_filename}")
|
|
373
371
|
rename_episode_file(mkv_file, new_filename)
|
|
374
372
|
|
|
373
|
+
|
|
375
374
|
def compare_text(text1, text2):
|
|
376
375
|
"""
|
|
377
376
|
Compare two lists of text lines and return the number of matching lines.
|
|
@@ -391,9 +390,27 @@ def compare_text(text1, text2):
|
|
|
391
390
|
matching_lines = set(flat_text1).intersection(flat_text2)
|
|
392
391
|
return len(matching_lines)
|
|
393
392
|
|
|
393
|
+
|
|
394
394
|
def check_gpu_support():
|
|
395
|
-
logger.info(
|
|
395
|
+
logger.info("Checking GPU support...")
|
|
396
|
+
console.print("[bold]Checking GPU support...[/bold]")
|
|
396
397
|
if torch.cuda.is_available():
|
|
397
398
|
logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
|
|
399
|
+
console.print(
|
|
400
|
+
Panel.fit(
|
|
401
|
+
f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}",
|
|
402
|
+
title="GPU Support",
|
|
403
|
+
border_style="magenta",
|
|
404
|
+
)
|
|
405
|
+
)
|
|
398
406
|
else:
|
|
399
|
-
logger.warning(
|
|
407
|
+
logger.warning(
|
|
408
|
+
"CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support."
|
|
409
|
+
)
|
|
410
|
+
console.print(
|
|
411
|
+
Panel.fit(
|
|
412
|
+
"CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.",
|
|
413
|
+
title="GPU Support",
|
|
414
|
+
border_style="red",
|
|
415
|
+
)
|
|
416
|
+
)
|