mkv-episode-matcher 0.4.5__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- mkv_episode_matcher/__init__.py +2 -2
- mkv_episode_matcher/__main__.py +14 -29
- mkv_episode_matcher/config.py +0 -3
- mkv_episode_matcher/episode_identification.py +222 -136
- mkv_episode_matcher/episode_matcher.py +19 -42
- mkv_episode_matcher/subtitle_utils.py +26 -25
- mkv_episode_matcher/utils.py +61 -54
- {mkv_episode_matcher-0.4.5.dist-info → mkv_episode_matcher-0.6.0.dist-info}/METADATA +7 -13
- mkv_episode_matcher-0.6.0.dist-info/RECORD +14 -0
- {mkv_episode_matcher-0.4.5.dist-info → mkv_episode_matcher-0.6.0.dist-info}/WHEEL +1 -1
- mkv_episode_matcher/libraries/pgs2srt/.gitignore +0 -2
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py +0 -321
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py +0 -16700
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py +0 -260
- mkv_episode_matcher/libraries/pgs2srt/README.md +0 -26
- mkv_episode_matcher/libraries/pgs2srt/__init__.py +0 -0
- mkv_episode_matcher/libraries/pgs2srt/imagemaker.py +0 -89
- mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py +0 -150
- mkv_episode_matcher/libraries/pgs2srt/pgsreader.py +0 -225
- mkv_episode_matcher/libraries/pgs2srt/requirements.txt +0 -4
- mkv_episode_matcher/mkv_to_srt.py +0 -302
- mkv_episode_matcher/speech_to_text.py +0 -96
- mkv_episode_matcher-0.4.5.dist-info/RECORD +0 -26
- {mkv_episode_matcher-0.4.5.dist-info → mkv_episode_matcher-0.6.0.dist-info}/entry_points.txt +0 -0
- {mkv_episode_matcher-0.4.5.dist-info → mkv_episode_matcher-0.6.0.dist-info}/top_level.txt +0 -0
|
@@ -1,34 +1,33 @@
|
|
|
1
1
|
# mkv_episode_matcher/episode_matcher.py
|
|
2
2
|
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
import shutil
|
|
5
3
|
import glob
|
|
6
4
|
import os
|
|
7
|
-
from loguru import logger
|
|
8
5
|
import re
|
|
9
|
-
|
|
6
|
+
import shutil
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from loguru import logger
|
|
10
|
+
|
|
11
|
+
from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
|
|
10
12
|
from mkv_episode_matcher.config import get_config
|
|
11
|
-
from mkv_episode_matcher.
|
|
13
|
+
from mkv_episode_matcher.episode_identification import EpisodeMatcher
|
|
12
14
|
from mkv_episode_matcher.tmdb_client import fetch_show_id
|
|
13
15
|
from mkv_episode_matcher.utils import (
|
|
14
16
|
check_filename,
|
|
15
17
|
clean_text,
|
|
16
|
-
cleanup_ocr_files,
|
|
17
18
|
get_subtitles,
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
compare_and_rename_files,get_valid_seasons,rename_episode_file
|
|
19
|
+
get_valid_seasons,
|
|
20
|
+
rename_episode_file,
|
|
21
21
|
)
|
|
22
|
-
|
|
23
|
-
from mkv_episode_matcher.episode_identification import EpisodeMatcher
|
|
22
|
+
|
|
24
23
|
|
|
25
24
|
def process_show(season=None, dry_run=False, get_subs=False):
|
|
26
|
-
"""Process the show using streaming speech recognition
|
|
25
|
+
"""Process the show using streaming speech recognition."""
|
|
27
26
|
config = get_config(CONFIG_FILE)
|
|
28
27
|
show_dir = config.get("show_dir")
|
|
29
28
|
show_name = clean_text(os.path.basename(show_dir))
|
|
30
29
|
matcher = EpisodeMatcher(CACHE_DIR, show_name)
|
|
31
|
-
|
|
30
|
+
|
|
32
31
|
# Early check for reference files
|
|
33
32
|
reference_dir = Path(CACHE_DIR) / "data" / show_name
|
|
34
33
|
reference_files = list(reference_dir.glob("*.srt"))
|
|
@@ -36,10 +35,10 @@ def process_show(season=None, dry_run=False, get_subs=False):
|
|
|
36
35
|
logger.error(f"No reference subtitle files found in {reference_dir}")
|
|
37
36
|
logger.info("Please download reference subtitles first")
|
|
38
37
|
return
|
|
39
|
-
|
|
38
|
+
|
|
40
39
|
season_paths = get_valid_seasons(show_dir)
|
|
41
40
|
if not season_paths:
|
|
42
|
-
logger.warning(
|
|
41
|
+
logger.warning("No seasons with .mkv files found")
|
|
43
42
|
return
|
|
44
43
|
|
|
45
44
|
if season is not None:
|
|
@@ -52,57 +51,35 @@ def process_show(season=None, dry_run=False, get_subs=False):
|
|
|
52
51
|
for season_path in season_paths:
|
|
53
52
|
mkv_files = [f for f in glob.glob(os.path.join(season_path, "*.mkv"))
|
|
54
53
|
if not check_filename(f)]
|
|
55
|
-
|
|
54
|
+
|
|
56
55
|
if not mkv_files:
|
|
57
56
|
logger.info(f"No new files to process in {season_path}")
|
|
58
57
|
continue
|
|
59
58
|
|
|
60
59
|
season_num = int(re.search(r'Season (\d+)', season_path).group(1))
|
|
61
60
|
temp_dir = Path(season_path) / "temp"
|
|
62
|
-
ocr_dir = Path(season_path) / "ocr"
|
|
63
61
|
temp_dir.mkdir(exist_ok=True)
|
|
64
|
-
ocr_dir.mkdir(exist_ok=True)
|
|
65
62
|
|
|
66
63
|
try:
|
|
67
64
|
if get_subs:
|
|
68
65
|
show_id = fetch_show_id(matcher.show_name)
|
|
69
66
|
if show_id:
|
|
70
67
|
get_subtitles(show_id, seasons={season_num}, config=config)
|
|
71
|
-
|
|
72
|
-
unmatched_files = []
|
|
68
|
+
|
|
73
69
|
for mkv_file in mkv_files:
|
|
74
70
|
logger.info(f"Attempting speech recognition match for {mkv_file}")
|
|
75
71
|
match = matcher.identify_episode(mkv_file, temp_dir, season_num)
|
|
76
|
-
|
|
72
|
+
|
|
77
73
|
if match:
|
|
78
74
|
new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
|
|
79
|
-
new_path = os.path.join(season_path, new_name)
|
|
80
|
-
|
|
81
75
|
logger.info(f"Speech matched {os.path.basename(mkv_file)} to {new_name} "
|
|
82
76
|
f"(confidence: {match['confidence']:.2f})")
|
|
83
|
-
|
|
77
|
+
|
|
84
78
|
if not dry_run:
|
|
85
79
|
logger.info(f"Renaming {mkv_file} to {new_name}")
|
|
86
80
|
rename_episode_file(mkv_file, new_name)
|
|
87
81
|
else:
|
|
88
|
-
logger.info(f"Speech recognition match failed for {mkv_file}
|
|
89
|
-
unmatched_files.append(mkv_file)
|
|
90
|
-
|
|
91
|
-
# OCR fallback for unmatched files
|
|
92
|
-
if unmatched_files:
|
|
93
|
-
logger.info(f"Attempting OCR matching for {len(unmatched_files)} unmatched files")
|
|
94
|
-
convert_mkv_to_srt(season_path, unmatched_files)
|
|
95
|
-
|
|
96
|
-
reference_text_dict = process_reference_srt_files(matcher.show_name)
|
|
97
|
-
srt_text_dict = process_srt_files(str(ocr_dir))
|
|
98
|
-
|
|
99
|
-
compare_and_rename_files(
|
|
100
|
-
srt_text_dict,
|
|
101
|
-
reference_text_dict,
|
|
102
|
-
dry_run=dry_run,
|
|
103
|
-
)
|
|
104
|
-
|
|
82
|
+
logger.info(f"Speech recognition match failed for {mkv_file}")
|
|
105
83
|
finally:
|
|
106
84
|
if not dry_run:
|
|
107
85
|
shutil.rmtree(temp_dir)
|
|
108
|
-
cleanup_ocr_files(show_dir)
|
|
@@ -1,82 +1,83 @@
|
|
|
1
|
-
from typing import List, Optional, Union
|
|
2
1
|
import os
|
|
3
2
|
import re
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
|
|
6
|
+
def generate_subtitle_patterns(
|
|
7
|
+
series_name: str, season: int, episode: int
|
|
8
|
+
) -> list[str]:
|
|
6
9
|
"""
|
|
7
10
|
Generate various common subtitle filename patterns.
|
|
8
|
-
|
|
11
|
+
|
|
9
12
|
Args:
|
|
10
13
|
series_name (str): Name of the series
|
|
11
14
|
season (int): Season number
|
|
12
15
|
episode (int): Episode number
|
|
13
|
-
|
|
16
|
+
|
|
14
17
|
Returns:
|
|
15
18
|
List[str]: List of possible subtitle filenames
|
|
16
19
|
"""
|
|
17
20
|
patterns = [
|
|
18
21
|
# Standard format: "Show Name - S01E02.srt"
|
|
19
22
|
f"{series_name} - S{season:02d}E{episode:02d}.srt",
|
|
20
|
-
|
|
21
23
|
# Season x Episode format: "Show Name - 1x02.srt"
|
|
22
24
|
f"{series_name} - {season}x{episode:02d}.srt",
|
|
23
|
-
|
|
24
25
|
# Separate season/episode: "Show Name - Season 1 Episode 02.srt"
|
|
25
26
|
f"{series_name} - Season {season} Episode {episode:02d}.srt",
|
|
26
|
-
|
|
27
27
|
# Compact format: "ShowName.S01E02.srt"
|
|
28
28
|
f"{series_name.replace(' ', '')}.S{season:02d}E{episode:02d}.srt",
|
|
29
|
-
|
|
30
29
|
# Numbered format: "Show Name 102.srt"
|
|
31
30
|
f"{series_name} {season:01d}{episode:02d}.srt",
|
|
32
|
-
|
|
33
31
|
# Dot format: "Show.Name.1x02.srt"
|
|
34
32
|
f"{series_name.replace(' ', '.')}.{season}x{episode:02d}.srt",
|
|
35
|
-
|
|
36
33
|
# Underscore format: "Show_Name_S01E02.srt"
|
|
37
34
|
f"{series_name.replace(' ', '_')}_S{season:02d}E{episode:02d}.srt",
|
|
38
35
|
]
|
|
39
|
-
|
|
36
|
+
|
|
40
37
|
return patterns
|
|
41
38
|
|
|
42
|
-
|
|
39
|
+
|
|
40
|
+
def find_existing_subtitle(
|
|
41
|
+
series_cache_dir: str, series_name: str, season: int, episode: int
|
|
42
|
+
) -> Optional[str]:
|
|
43
43
|
"""
|
|
44
44
|
Check for existing subtitle files in various naming formats.
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
Args:
|
|
47
47
|
series_cache_dir (str): Directory containing subtitle files
|
|
48
48
|
series_name (str): Name of the series
|
|
49
49
|
season (int): Season number
|
|
50
50
|
episode (int): Episode number
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
Returns:
|
|
53
53
|
Optional[str]: Path to existing subtitle file if found, None otherwise
|
|
54
54
|
"""
|
|
55
55
|
patterns = generate_subtitle_patterns(series_name, season, episode)
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
for pattern in patterns:
|
|
58
58
|
filepath = os.path.join(series_cache_dir, pattern)
|
|
59
59
|
if os.path.exists(filepath):
|
|
60
60
|
return filepath
|
|
61
|
-
|
|
61
|
+
|
|
62
62
|
return None
|
|
63
63
|
|
|
64
|
+
|
|
64
65
|
def sanitize_filename(filename: str) -> str:
|
|
65
66
|
"""
|
|
66
67
|
Sanitize filename by removing/replacing invalid characters.
|
|
67
|
-
|
|
68
|
+
|
|
68
69
|
Args:
|
|
69
70
|
filename (str): Original filename
|
|
70
|
-
|
|
71
|
+
|
|
71
72
|
Returns:
|
|
72
73
|
str: Sanitized filename
|
|
73
74
|
"""
|
|
74
75
|
# Replace problematic characters
|
|
75
|
-
filename = filename.replace(
|
|
76
|
-
filename = filename.replace(
|
|
77
|
-
filename = filename.replace(
|
|
78
|
-
|
|
76
|
+
filename = filename.replace(":", " -")
|
|
77
|
+
filename = filename.replace("/", "-")
|
|
78
|
+
filename = filename.replace("\\", "-")
|
|
79
|
+
|
|
79
80
|
# Remove any other invalid characters
|
|
80
|
-
filename = re.sub(r'[<>:"/\\|?*]',
|
|
81
|
-
|
|
82
|
-
return filename.strip()
|
|
81
|
+
filename = re.sub(r'[<>:"/\\|?*]', "", filename)
|
|
82
|
+
|
|
83
|
+
return filename.strip()
|
mkv_episode_matcher/utils.py
CHANGED
|
@@ -4,13 +4,16 @@ import re
|
|
|
4
4
|
import shutil
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
|
+
import torch
|
|
7
8
|
from loguru import logger
|
|
8
9
|
from opensubtitlescom import OpenSubtitles
|
|
9
10
|
|
|
10
11
|
from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
|
|
11
12
|
from mkv_episode_matcher.config import get_config
|
|
13
|
+
from mkv_episode_matcher.subtitle_utils import find_existing_subtitle, sanitize_filename
|
|
12
14
|
from mkv_episode_matcher.tmdb_client import fetch_season_details
|
|
13
|
-
|
|
15
|
+
|
|
16
|
+
|
|
14
17
|
def get_valid_seasons(show_dir):
|
|
15
18
|
"""
|
|
16
19
|
Get all season directories that contain MKV files.
|
|
@@ -36,13 +39,17 @@ def get_valid_seasons(show_dir):
|
|
|
36
39
|
valid_season_paths.append(season_path)
|
|
37
40
|
|
|
38
41
|
if not valid_season_paths:
|
|
39
|
-
logger.warning(
|
|
42
|
+
logger.warning(
|
|
43
|
+
f"No seasons with .mkv files found in show '{os.path.basename(show_dir)}'"
|
|
44
|
+
)
|
|
40
45
|
else:
|
|
41
46
|
logger.info(
|
|
42
47
|
f"Found {len(valid_season_paths)} seasons with .mkv files in '{os.path.basename(show_dir)}'"
|
|
43
48
|
)
|
|
44
49
|
|
|
45
50
|
return valid_season_paths
|
|
51
|
+
|
|
52
|
+
|
|
46
53
|
def check_filename(filename):
|
|
47
54
|
"""
|
|
48
55
|
Check if the filename is in the correct format (S01E02).
|
|
@@ -54,7 +61,7 @@ def check_filename(filename):
|
|
|
54
61
|
bool: True if the filename matches the expected pattern.
|
|
55
62
|
"""
|
|
56
63
|
# Check if the filename matches the expected format
|
|
57
|
-
match = re.search(r
|
|
64
|
+
match = re.search(r".*S\d+E\d+", filename)
|
|
58
65
|
return bool(match)
|
|
59
66
|
|
|
60
67
|
|
|
@@ -95,11 +102,11 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
95
102
|
"""
|
|
96
103
|
original_dir = os.path.dirname(original_file_path)
|
|
97
104
|
new_file_path = os.path.join(original_dir, new_filename)
|
|
98
|
-
|
|
105
|
+
|
|
99
106
|
# Check if new filepath already exists
|
|
100
107
|
if os.path.exists(new_file_path):
|
|
101
108
|
logger.warning(f"File already exists: {new_filename}")
|
|
102
|
-
|
|
109
|
+
|
|
103
110
|
# Add numeric suffix if file exists
|
|
104
111
|
base, ext = os.path.splitext(new_filename)
|
|
105
112
|
suffix = 2
|
|
@@ -109,7 +116,7 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
109
116
|
if not os.path.exists(new_file_path):
|
|
110
117
|
break
|
|
111
118
|
suffix += 1
|
|
112
|
-
|
|
119
|
+
|
|
113
120
|
try:
|
|
114
121
|
os.rename(original_file_path, new_file_path)
|
|
115
122
|
logger.info(f"Renamed {os.path.basename(original_file_path)} -> {new_filename}")
|
|
@@ -120,7 +127,8 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
120
127
|
except FileExistsError as e:
|
|
121
128
|
logger.error(f"Failed to rename file: {e}")
|
|
122
129
|
return None
|
|
123
|
-
|
|
130
|
+
|
|
131
|
+
|
|
124
132
|
def get_subtitles(show_id, seasons: set[int], config=None):
|
|
125
133
|
"""
|
|
126
134
|
Retrieves and saves subtitles for a given TV show and seasons.
|
|
@@ -164,19 +172,21 @@ def get_subtitles(show_id, seasons: set[int], config=None):
|
|
|
164
172
|
|
|
165
173
|
for episode in range(1, episodes + 1):
|
|
166
174
|
logger.info(f"Processing Season {season}, Episode {episode}...")
|
|
167
|
-
|
|
175
|
+
|
|
168
176
|
series_cache_dir = os.path.join(CACHE_DIR, "data", series_name)
|
|
169
177
|
os.makedirs(series_cache_dir, exist_ok=True)
|
|
170
|
-
|
|
178
|
+
|
|
171
179
|
# Check for existing subtitle in any supported format
|
|
172
180
|
existing_subtitle = find_existing_subtitle(
|
|
173
181
|
series_cache_dir, series_name, season, episode
|
|
174
182
|
)
|
|
175
|
-
|
|
183
|
+
|
|
176
184
|
if existing_subtitle:
|
|
177
|
-
logger.info(
|
|
185
|
+
logger.info(
|
|
186
|
+
f"Subtitle already exists: {os.path.basename(existing_subtitle)}"
|
|
187
|
+
)
|
|
178
188
|
continue
|
|
179
|
-
|
|
189
|
+
|
|
180
190
|
# Default to standard format for new downloads
|
|
181
191
|
srt_filepath = os.path.join(
|
|
182
192
|
series_cache_dir,
|
|
@@ -189,7 +199,7 @@ def get_subtitles(show_id, seasons: set[int], config=None):
|
|
|
189
199
|
response.raise_for_status()
|
|
190
200
|
episode_data = response.json()
|
|
191
201
|
episode_id = episode_data["id"]
|
|
192
|
-
|
|
202
|
+
|
|
193
203
|
# search for the subtitle
|
|
194
204
|
response = subtitles.search(tmdb_id=episode_id, languages="en")
|
|
195
205
|
if len(response.data) == 0:
|
|
@@ -210,33 +220,13 @@ def get_subtitles(show_id, seasons: set[int], config=None):
|
|
|
210
220
|
break
|
|
211
221
|
|
|
212
222
|
|
|
213
|
-
def cleanup_ocr_files(show_dir):
|
|
214
|
-
"""
|
|
215
|
-
Clean up OCR files generated during the episode matching process.
|
|
216
|
-
|
|
217
|
-
Args:
|
|
218
|
-
show_dir (str): The directory containing the show files.
|
|
219
|
-
|
|
220
|
-
Returns:
|
|
221
|
-
None
|
|
222
|
-
|
|
223
|
-
This function cleans up the OCR files generated during the episode matching process.
|
|
224
|
-
It deletes the 'ocr' directory and all its contents in each season directory of the show.
|
|
225
|
-
"""
|
|
226
|
-
for season_dir in os.listdir(show_dir):
|
|
227
|
-
season_dir_path = os.path.join(show_dir, season_dir)
|
|
228
|
-
ocr_dir_path = os.path.join(season_dir_path, "ocr")
|
|
229
|
-
if os.path.exists(ocr_dir_path):
|
|
230
|
-
logger.info(f"Cleaning up OCR files in {ocr_dir_path}")
|
|
231
|
-
shutil.rmtree(ocr_dir_path)
|
|
232
|
-
|
|
233
|
-
|
|
234
223
|
def clean_text(text):
|
|
235
224
|
# Remove brackets, parentheses, and their content
|
|
236
225
|
cleaned_text = re.sub(r"\[.*?\]|\(.*?\)|\{.*?\}", "", text)
|
|
237
226
|
# Strip leading/trailing whitespace
|
|
238
227
|
return cleaned_text.strip()
|
|
239
228
|
|
|
229
|
+
|
|
240
230
|
@logger.catch
|
|
241
231
|
def process_reference_srt_files(series_name):
|
|
242
232
|
"""
|
|
@@ -249,12 +239,13 @@ def process_reference_srt_files(series_name):
|
|
|
249
239
|
dict: A dictionary containing the reference files where the keys are the MKV filenames
|
|
250
240
|
and the values are the corresponding SRT texts.
|
|
251
241
|
"""
|
|
252
|
-
from mkv_episode_matcher.__main__ import CACHE_DIR
|
|
253
242
|
import os
|
|
254
|
-
|
|
243
|
+
|
|
244
|
+
from mkv_episode_matcher.__main__ import CACHE_DIR
|
|
245
|
+
|
|
255
246
|
reference_files = {}
|
|
256
247
|
reference_dir = os.path.join(CACHE_DIR, "data", series_name)
|
|
257
|
-
|
|
248
|
+
|
|
258
249
|
for dirpath, _, filenames in os.walk(reference_dir):
|
|
259
250
|
for filename in filenames:
|
|
260
251
|
if filename.lower().endswith(".srt"):
|
|
@@ -264,9 +255,10 @@ def process_reference_srt_files(series_name):
|
|
|
264
255
|
season, episode = extract_season_episode(filename)
|
|
265
256
|
mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
|
|
266
257
|
reference_files[mkv_filename] = srt_text
|
|
267
|
-
|
|
258
|
+
|
|
268
259
|
return reference_files
|
|
269
260
|
|
|
261
|
+
|
|
270
262
|
def extract_srt_text(filepath):
|
|
271
263
|
"""
|
|
272
264
|
Extracts text content from an SRT file.
|
|
@@ -280,49 +272,51 @@ def extract_srt_text(filepath):
|
|
|
280
272
|
# Read the file content
|
|
281
273
|
with open(filepath) as f:
|
|
282
274
|
content = f.read()
|
|
283
|
-
|
|
275
|
+
|
|
284
276
|
# Split into subtitle blocks
|
|
285
|
-
blocks = content.strip().split(
|
|
286
|
-
|
|
277
|
+
blocks = content.strip().split("\n\n")
|
|
278
|
+
|
|
287
279
|
text_lines = []
|
|
288
280
|
for block in blocks:
|
|
289
|
-
lines = block.split(
|
|
281
|
+
lines = block.split("\n")
|
|
290
282
|
if len(lines) < 3:
|
|
291
283
|
continue
|
|
292
|
-
|
|
284
|
+
|
|
293
285
|
# Skip index and timestamp, get all remaining lines as text
|
|
294
|
-
text =
|
|
286
|
+
text = " ".join(lines[2:])
|
|
295
287
|
# Remove stage directions and tags
|
|
296
|
-
text = re.sub(r
|
|
288
|
+
text = re.sub(r"\[.*?\]|\<.*?\>", "", text)
|
|
297
289
|
if text:
|
|
298
290
|
text_lines.append(text)
|
|
299
|
-
|
|
291
|
+
|
|
300
292
|
return text_lines
|
|
301
293
|
|
|
294
|
+
|
|
302
295
|
def extract_season_episode(filename):
|
|
303
296
|
"""
|
|
304
297
|
Extract season and episode numbers from filename with support for multiple formats.
|
|
305
|
-
|
|
298
|
+
|
|
306
299
|
Args:
|
|
307
300
|
filename (str): Filename to parse
|
|
308
|
-
|
|
301
|
+
|
|
309
302
|
Returns:
|
|
310
303
|
tuple: (season_number, episode_number)
|
|
311
304
|
"""
|
|
312
305
|
# List of patterns to try
|
|
313
306
|
patterns = [
|
|
314
|
-
r
|
|
315
|
-
r
|
|
316
|
-
r
|
|
307
|
+
r"S(\d+)E(\d+)", # S01E01
|
|
308
|
+
r"(\d+)x(\d+)", # 1x01 or 01x01
|
|
309
|
+
r"Season\s*(\d+).*?(\d+)", # Season 1 - 01
|
|
317
310
|
]
|
|
318
|
-
|
|
311
|
+
|
|
319
312
|
for pattern in patterns:
|
|
320
313
|
match = re.search(pattern, filename, re.IGNORECASE)
|
|
321
314
|
if match:
|
|
322
315
|
return int(match.group(1)), int(match.group(2))
|
|
323
|
-
|
|
316
|
+
|
|
324
317
|
return None, None
|
|
325
318
|
|
|
319
|
+
|
|
326
320
|
def process_srt_files(show_dir):
|
|
327
321
|
"""
|
|
328
322
|
Process all SRT files in the given directory and its subdirectories.
|
|
@@ -342,6 +336,8 @@ def process_srt_files(show_dir):
|
|
|
342
336
|
srt_text = extract_srt_text(srt_file)
|
|
343
337
|
srt_files[srt_file] = srt_text
|
|
344
338
|
return srt_files
|
|
339
|
+
|
|
340
|
+
|
|
345
341
|
def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
346
342
|
"""
|
|
347
343
|
Compare the srt files with the reference files and rename the matching mkv files.
|
|
@@ -372,6 +368,7 @@ def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
|
372
368
|
logger.info(f"Renaming {mkv_file} to {new_filename}")
|
|
373
369
|
rename_episode_file(mkv_file, new_filename)
|
|
374
370
|
|
|
371
|
+
|
|
375
372
|
def compare_text(text1, text2):
|
|
376
373
|
"""
|
|
377
374
|
Compare two lists of text lines and return the number of matching lines.
|
|
@@ -389,4 +386,14 @@ def compare_text(text1, text2):
|
|
|
389
386
|
|
|
390
387
|
# Compare the two lists of text lines
|
|
391
388
|
matching_lines = set(flat_text1).intersection(flat_text2)
|
|
392
|
-
return len(matching_lines)
|
|
389
|
+
return len(matching_lines)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def check_gpu_support():
|
|
393
|
+
logger.info("Checking GPU support...")
|
|
394
|
+
if torch.cuda.is_available():
|
|
395
|
+
logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
|
|
396
|
+
else:
|
|
397
|
+
logger.warning(
|
|
398
|
+
"CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support."
|
|
399
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: mkv-episode-matcher
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
5
5
|
Home-page: https://github.com/Jsakkos/mkv-episode-matcher
|
|
6
6
|
Author: Jonathan Sakkos
|
|
@@ -21,7 +21,6 @@ Requires-Dist: ffmpeg>=1.4
|
|
|
21
21
|
Requires-Dist: loguru>=0.7.2
|
|
22
22
|
Requires-Dist: openai-whisper>=20240930
|
|
23
23
|
Requires-Dist: opensubtitlescom>=0.1.5
|
|
24
|
-
Requires-Dist: pytesseract>=0.3.13
|
|
25
24
|
Requires-Dist: rapidfuzz>=3.10.1
|
|
26
25
|
Requires-Dist: requests>=2.32.3
|
|
27
26
|
Requires-Dist: tmdb-client>=0.0.1
|
|
@@ -29,14 +28,6 @@ Requires-Dist: torch>=2.5.1
|
|
|
29
28
|
Requires-Dist: torchaudio>=2.5.1
|
|
30
29
|
Requires-Dist: torchvision>=0.20.1
|
|
31
30
|
Requires-Dist: wave>=0.0.2
|
|
32
|
-
Provides-Extra: cpu
|
|
33
|
-
Requires-Dist: torch>=2.5.1; extra == "cpu"
|
|
34
|
-
Requires-Dist: torchvision>=0.20.1; extra == "cpu"
|
|
35
|
-
Requires-Dist: torchaudio>=2.5.1; extra == "cpu"
|
|
36
|
-
Provides-Extra: cu124
|
|
37
|
-
Requires-Dist: torch>=2.5.1; extra == "cu124"
|
|
38
|
-
Requires-Dist: torchvision>=0.20.1; extra == "cu124"
|
|
39
|
-
Requires-Dist: torchaudio>=2.5.1; extra == "cu124"
|
|
40
31
|
|
|
41
32
|
# MKV Episode Matcher
|
|
42
33
|
|
|
@@ -48,7 +39,7 @@ Requires-Dist: torchaudio>=2.5.1; extra == "cu124"
|
|
|
48
39
|
[](https://github.com/Jsakkos/mkv-episode-matcher/commits/main)
|
|
49
40
|
[](https://github.com/Jsakkos/mkv-episode-matcher/issues)
|
|
50
41
|
[](https://github.com/Jsakkos/mkv-episode-matcher/actions/workflows/tests.yml)
|
|
51
|
-
[](https://codecov.io/gh/Jsakkos/mkv-episode-matcher)
|
|
42
|
+
[](https://codecov.io/gh/Jsakkos/mkv-episode-matcher/)
|
|
52
43
|
|
|
53
44
|
Automatically match and rename your MKV TV episodes using The Movie Database (TMDb).
|
|
54
45
|
|
|
@@ -56,7 +47,7 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
|
|
|
56
47
|
|
|
57
48
|
- 🎯 **Automatic Episode Matching**: Uses TMDb to accurately identify episodes
|
|
58
49
|
- 📝 **Subtitle Extraction**: Extracts subtitles from MKV files
|
|
59
|
-
-
|
|
50
|
+
- 🔊 **Speech Recognition**: Uses Whisper for accurate episode identification
|
|
60
51
|
- 🚀 **Multi-threaded**: Fast processing of multiple files
|
|
61
52
|
- ⬇️ **Subtitle Downloads**: Integration with OpenSubtitles
|
|
62
53
|
- ✨ **Bulk Processing**: Handle entire seasons at once
|
|
@@ -66,7 +57,6 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
|
|
|
66
57
|
|
|
67
58
|
- Python 3.9 or higher
|
|
68
59
|
- [FFmpeg](https://ffmpeg.org/download.html) installed and available in system PATH
|
|
69
|
-
- [Tesseract OCR](https://github.com/UB-Mannheim/tesseract/wiki) installed (required for image-based subtitle processing)
|
|
70
60
|
- TMDb API key (optional, for subtitle downloads)
|
|
71
61
|
- OpenSubtitles account (optional, for subtitle downloads)
|
|
72
62
|
|
|
@@ -135,3 +125,7 @@ Distributed under the MIT License. See `LICENSE` for more information.
|
|
|
135
125
|
## Documentation
|
|
136
126
|
|
|
137
127
|
Full documentation is available at [https://jsakkos.github.io/mkv-episode-matcher/](https://jsakkos.github.io/mkv-episode-matcher/)
|
|
128
|
+
|
|
129
|
+
## Changelog
|
|
130
|
+
|
|
131
|
+
See [CHANGELOG.md](CHANGELOG.md) for a detailed list of changes.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
|
|
2
|
+
mkv_episode_matcher/__init__.py,sha256=u3yZcpuK0ICeUjxYKePvW-zS61E5ss5q2AvqnSHuz9E,240
|
|
3
|
+
mkv_episode_matcher/__main__.py,sha256=-iRYoAfut3eDfV29UvobJvCKmYTpsOn8qM49QBFnMUM,5735
|
|
4
|
+
mkv_episode_matcher/config.py,sha256=EcJJjkekQ7oWtarUkufCYON_QWbQvq55-zMqCTOqSa4,2265
|
|
5
|
+
mkv_episode_matcher/episode_identification.py,sha256=rWhUzeNE5_uqsLcRuw_B6g7k3ud9Oa1oKgvXrBA-Jsc,12457
|
|
6
|
+
mkv_episode_matcher/episode_matcher.py,sha256=Yqos1hImF_QIZ8cV0IlemUxhpHwvwBn-mg89N9NDq9U,3126
|
|
7
|
+
mkv_episode_matcher/subtitle_utils.py,sha256=Hz9b4CKPV07YKTY4dcN3WbvdbvH-S3J4zcb9CiyvPlE,2551
|
|
8
|
+
mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
|
|
9
|
+
mkv_episode_matcher/utils.py,sha256=1-RwYn1w_YQFp4KxTmYbCSQEieK-mnToVIS34EVAZLw,13837
|
|
10
|
+
mkv_episode_matcher-0.6.0.dist-info/METADATA,sha256=LBtoWNzGS5Exd0H5q6fP5MdBSsMPOieYMOQ5uQoBZ64,5193
|
|
11
|
+
mkv_episode_matcher-0.6.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
12
|
+
mkv_episode_matcher-0.6.0.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
|
|
13
|
+
mkv_episode_matcher-0.6.0.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
|
|
14
|
+
mkv_episode_matcher-0.6.0.dist-info/RECORD,,
|