mkv-episode-matcher 0.3.3__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mkv_episode_matcher/__init__.py +8 -0
- mkv_episode_matcher/__main__.py +2 -177
- mkv_episode_matcher/asr_models.py +506 -0
- mkv_episode_matcher/cli.py +558 -0
- mkv_episode_matcher/core/config_manager.py +100 -0
- mkv_episode_matcher/core/engine.py +577 -0
- mkv_episode_matcher/core/matcher.py +214 -0
- mkv_episode_matcher/core/models.py +91 -0
- mkv_episode_matcher/core/providers/asr.py +85 -0
- mkv_episode_matcher/core/providers/subtitles.py +341 -0
- mkv_episode_matcher/core/utils.py +148 -0
- mkv_episode_matcher/episode_identification.py +550 -118
- mkv_episode_matcher/subtitle_utils.py +82 -0
- mkv_episode_matcher/tmdb_client.py +56 -14
- mkv_episode_matcher/ui/flet_app.py +708 -0
- mkv_episode_matcher/utils.py +262 -139
- mkv_episode_matcher-1.0.0.dist-info/METADATA +242 -0
- mkv_episode_matcher-1.0.0.dist-info/RECORD +23 -0
- {mkv_episode_matcher-0.3.3.dist-info → mkv_episode_matcher-1.0.0.dist-info}/WHEEL +1 -1
- mkv_episode_matcher-1.0.0.dist-info/licenses/LICENSE +21 -0
- mkv_episode_matcher/config.py +0 -82
- mkv_episode_matcher/episode_matcher.py +0 -100
- mkv_episode_matcher/libraries/pgs2srt/.gitignore +0 -2
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py +0 -321
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py +0 -16700
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py +0 -260
- mkv_episode_matcher/libraries/pgs2srt/README.md +0 -26
- mkv_episode_matcher/libraries/pgs2srt/__init__.py +0 -0
- mkv_episode_matcher/libraries/pgs2srt/imagemaker.py +0 -89
- mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py +0 -150
- mkv_episode_matcher/libraries/pgs2srt/pgsreader.py +0 -225
- mkv_episode_matcher/libraries/pgs2srt/requirements.txt +0 -4
- mkv_episode_matcher/mkv_to_srt.py +0 -302
- mkv_episode_matcher/speech_to_text.py +0 -90
- mkv_episode_matcher-0.3.3.dist-info/METADATA +0 -125
- mkv_episode_matcher-0.3.3.dist-info/RECORD +0 -25
- {mkv_episode_matcher-0.3.3.dist-info → mkv_episode_matcher-1.0.0.dist-info}/entry_points.txt +0 -0
- {mkv_episode_matcher-0.3.3.dist-info → mkv_episode_matcher-1.0.0.dist-info}/top_level.txt +0 -0
mkv_episode_matcher/utils.py
CHANGED
|
@@ -2,15 +2,55 @@
|
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
4
|
import shutil
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
|
|
6
7
|
import requests
|
|
8
|
+
import torch
|
|
7
9
|
from loguru import logger
|
|
8
10
|
from opensubtitlescom import OpenSubtitles
|
|
11
|
+
from opensubtitlescom.exceptions import OpenSubtitlesException
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.panel import Panel
|
|
9
14
|
|
|
10
|
-
from mkv_episode_matcher.
|
|
11
|
-
from mkv_episode_matcher.
|
|
15
|
+
from mkv_episode_matcher.core.config_manager import get_config_manager
|
|
16
|
+
from mkv_episode_matcher.subtitle_utils import find_existing_subtitle, sanitize_filename
|
|
12
17
|
from mkv_episode_matcher.tmdb_client import fetch_season_details
|
|
13
18
|
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def normalize_path(path_str):
|
|
23
|
+
"""
|
|
24
|
+
Normalize a path string to handle cross-platform path issues.
|
|
25
|
+
Properly handles trailing slashes and backslashes in both Windows and Unix paths.
|
|
26
|
+
Also strips surrounding quotes that might be present in command line arguments.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
path_str (str): The path string to normalize
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
pathlib.Path: A normalized Path object
|
|
33
|
+
"""
|
|
34
|
+
# Convert to string if it's a Path object
|
|
35
|
+
if isinstance(path_str, Path):
|
|
36
|
+
path_str = str(path_str)
|
|
37
|
+
|
|
38
|
+
# Strip surrounding quotes (both single and double)
|
|
39
|
+
path_str = path_str.strip().strip('"').strip("'")
|
|
40
|
+
|
|
41
|
+
# Remove trailing slashes or backslashes
|
|
42
|
+
path_str = path_str.rstrip("/").rstrip("\\")
|
|
43
|
+
|
|
44
|
+
# Handle Windows paths on non-Windows platforms
|
|
45
|
+
if os.name != "nt" and "\\" in path_str and ":" in path_str[:2]:
|
|
46
|
+
# This looks like a Windows path on a non-Windows system
|
|
47
|
+
# Extract the last component which should be the directory/file name
|
|
48
|
+
components = path_str.split("\\")
|
|
49
|
+
return Path(components[-1])
|
|
50
|
+
|
|
51
|
+
return Path(path_str)
|
|
52
|
+
|
|
53
|
+
|
|
14
54
|
def get_valid_seasons(show_dir):
|
|
15
55
|
"""
|
|
16
56
|
Get all season directories that contain MKV files.
|
|
@@ -22,39 +62,44 @@ def get_valid_seasons(show_dir):
|
|
|
22
62
|
list: List of paths to valid season directories
|
|
23
63
|
"""
|
|
24
64
|
# Get all season directories
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
for d in os.listdir(show_dir)
|
|
28
|
-
if os.path.isdir(os.path.join(show_dir, d))
|
|
29
|
-
]
|
|
65
|
+
show_path = normalize_path(show_dir)
|
|
66
|
+
season_paths = [str(show_path / d.name) for d in show_path.iterdir() if d.is_dir()]
|
|
30
67
|
|
|
31
68
|
# Filter seasons to only include those with .mkv files
|
|
32
69
|
valid_season_paths = []
|
|
33
70
|
for season_path in season_paths:
|
|
34
|
-
|
|
71
|
+
season_path_obj = Path(season_path)
|
|
72
|
+
mkv_files = [f for f in season_path_obj.iterdir() if f.name.endswith(".mkv")]
|
|
35
73
|
if mkv_files:
|
|
36
74
|
valid_season_paths.append(season_path)
|
|
37
75
|
|
|
38
76
|
if not valid_season_paths:
|
|
39
|
-
logger.warning(
|
|
77
|
+
logger.warning(
|
|
78
|
+
f"No seasons with .mkv files found in show '{normalize_path(show_dir).name}'"
|
|
79
|
+
)
|
|
40
80
|
else:
|
|
41
81
|
logger.info(
|
|
42
|
-
f"Found {len(valid_season_paths)} seasons with .mkv files in '{
|
|
82
|
+
f"Found {len(valid_season_paths)} seasons with .mkv files in '{normalize_path(show_dir).name}'"
|
|
43
83
|
)
|
|
44
84
|
|
|
45
85
|
return valid_season_paths
|
|
86
|
+
|
|
87
|
+
|
|
46
88
|
def check_filename(filename):
|
|
47
89
|
"""
|
|
48
90
|
Check if the filename is in the correct format (S01E02).
|
|
49
91
|
|
|
50
92
|
Args:
|
|
51
|
-
filename (str): The filename to check.
|
|
93
|
+
filename (str or Path): The filename to check.
|
|
52
94
|
|
|
53
95
|
Returns:
|
|
54
96
|
bool: True if the filename matches the expected pattern.
|
|
55
97
|
"""
|
|
98
|
+
# Convert Path object to string if needed
|
|
99
|
+
if isinstance(filename, Path):
|
|
100
|
+
filename = str(filename)
|
|
56
101
|
# Check if the filename matches the expected format
|
|
57
|
-
match = re.search(r
|
|
102
|
+
match = re.search(r".*S\d+E\d+", filename)
|
|
58
103
|
return bool(match)
|
|
59
104
|
|
|
60
105
|
|
|
@@ -70,16 +115,14 @@ def scramble_filename(original_file_path, file_number):
|
|
|
70
115
|
None
|
|
71
116
|
"""
|
|
72
117
|
logger.info(f"Scrambling {original_file_path}")
|
|
73
|
-
series_title =
|
|
74
|
-
|
|
75
|
-
)
|
|
76
|
-
original_file_name = os.path.basename(original_file_path)
|
|
77
|
-
extension = os.path.splitext(original_file_path)[-1]
|
|
118
|
+
series_title = normalize_path(original_file_path).parent.parent.name
|
|
119
|
+
original_file_name = Path(original_file_path).name
|
|
120
|
+
extension = Path(original_file_path).suffix
|
|
78
121
|
new_file_name = f"{series_title} - {file_number:03d}{extension}"
|
|
79
|
-
new_file_path =
|
|
80
|
-
if not
|
|
122
|
+
new_file_path = Path(original_file_path).parent / new_file_name
|
|
123
|
+
if not new_file_path.exists():
|
|
81
124
|
logger.info(f"Renaming {original_file_name} -> {new_file_name}")
|
|
82
|
-
|
|
125
|
+
Path(original_file_path).rename(new_file_path)
|
|
83
126
|
|
|
84
127
|
|
|
85
128
|
def rename_episode_file(original_file_path, new_filename):
|
|
@@ -87,32 +130,32 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
87
130
|
Rename an episode file with a standardized naming convention.
|
|
88
131
|
|
|
89
132
|
Args:
|
|
90
|
-
original_file_path (str): The original file path of the episode.
|
|
91
|
-
new_filename (str): The new filename including season/episode info.
|
|
133
|
+
original_file_path (str or Path): The original file path of the episode.
|
|
134
|
+
new_filename (str or Path): The new filename including season/episode info.
|
|
92
135
|
|
|
93
136
|
Returns:
|
|
94
|
-
|
|
137
|
+
Path: Path to the renamed file, or None if rename failed.
|
|
95
138
|
"""
|
|
96
|
-
original_dir =
|
|
97
|
-
new_file_path =
|
|
98
|
-
|
|
139
|
+
original_dir = Path(original_file_path).parent
|
|
140
|
+
new_file_path = original_dir / new_filename
|
|
141
|
+
|
|
99
142
|
# Check if new filepath already exists
|
|
100
|
-
if
|
|
143
|
+
if new_file_path.exists():
|
|
101
144
|
logger.warning(f"File already exists: {new_filename}")
|
|
102
|
-
|
|
145
|
+
|
|
103
146
|
# Add numeric suffix if file exists
|
|
104
|
-
base, ext =
|
|
147
|
+
base, ext = Path(new_filename).stem, Path(new_filename).suffix
|
|
105
148
|
suffix = 2
|
|
106
149
|
while True:
|
|
107
150
|
new_filename = f"{base}_{suffix}{ext}"
|
|
108
|
-
new_file_path =
|
|
109
|
-
if not
|
|
151
|
+
new_file_path = original_dir / new_filename
|
|
152
|
+
if not new_file_path.exists():
|
|
110
153
|
break
|
|
111
154
|
suffix += 1
|
|
112
|
-
|
|
155
|
+
|
|
113
156
|
try:
|
|
114
|
-
|
|
115
|
-
logger.info(f"Renamed {
|
|
157
|
+
Path(original_file_path).rename(new_file_path)
|
|
158
|
+
logger.info(f"Renamed {Path(original_file_path).name} -> {new_filename}")
|
|
116
159
|
return new_file_path
|
|
117
160
|
except OSError as e:
|
|
118
161
|
logger.error(f"Failed to rename file: {e}")
|
|
@@ -120,28 +163,28 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
120
163
|
except FileExistsError as e:
|
|
121
164
|
logger.error(f"Failed to rename file: {e}")
|
|
122
165
|
return None
|
|
123
|
-
|
|
124
|
-
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_subtitles(show_id, seasons: set[int], config=None, max_retries=3):
|
|
125
169
|
"""
|
|
126
170
|
Retrieves and saves subtitles for a given TV show and seasons.
|
|
127
171
|
|
|
128
172
|
Args:
|
|
129
173
|
show_id (int): The ID of the TV show.
|
|
130
174
|
seasons (Set[int]): A set of season numbers for which subtitles should be retrieved.
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
None
|
|
175
|
+
config (Config object, optional): Preloaded configuration.
|
|
176
|
+
max_retries (int, optional): Number of times to retry subtitle download on OpenSubtitlesException. Defaults to 3.
|
|
134
177
|
"""
|
|
178
|
+
if config is None:
|
|
179
|
+
config = get_config_manager().load()
|
|
180
|
+
show_dir = config.show_dir
|
|
181
|
+
series_name = sanitize_filename(normalize_path(show_dir).name)
|
|
182
|
+
tmdb_api_key = config.tmdb_api_key
|
|
183
|
+
open_subtitles_api_key = config.open_subtitles_api_key
|
|
184
|
+
open_subtitles_user_agent = config.open_subtitles_user_agent
|
|
185
|
+
open_subtitles_username = config.open_subtitles_username
|
|
186
|
+
open_subtitles_password = config.open_subtitles_password
|
|
135
187
|
|
|
136
|
-
logger.info(f"Getting subtitles for show ID {show_id}")
|
|
137
|
-
config = get_config(CONFIG_FILE)
|
|
138
|
-
show_dir = config.get("show_dir")
|
|
139
|
-
series_name = os.path.basename(show_dir)
|
|
140
|
-
tmdb_api_key = config.get("tmdb_api_key")
|
|
141
|
-
open_subtitles_api_key = config.get("open_subtitles_api_key")
|
|
142
|
-
open_subtitles_user_agent = config.get("open_subtitles_user_agent")
|
|
143
|
-
open_subtitles_username = config.get("open_subtitles_username")
|
|
144
|
-
open_subtitles_password = config.get("open_subtitles_password")
|
|
145
188
|
if not all([
|
|
146
189
|
show_dir,
|
|
147
190
|
tmdb_api_key,
|
|
@@ -151,92 +194,134 @@ def get_subtitles(show_id, seasons: set[int]):
|
|
|
151
194
|
open_subtitles_password,
|
|
152
195
|
]):
|
|
153
196
|
logger.error("Missing configuration settings. Please run the setup script.")
|
|
197
|
+
return
|
|
198
|
+
|
|
154
199
|
try:
|
|
155
|
-
# Initialize the OpenSubtitles client
|
|
156
200
|
subtitles = OpenSubtitles(open_subtitles_user_agent, open_subtitles_api_key)
|
|
157
|
-
|
|
158
|
-
# Log in (retrieve auth token)
|
|
159
201
|
subtitles.login(open_subtitles_username, open_subtitles_password)
|
|
160
202
|
except Exception as e:
|
|
161
203
|
logger.error(f"Failed to log in to OpenSubtitles: {e}")
|
|
162
204
|
return
|
|
205
|
+
|
|
163
206
|
for season in seasons:
|
|
164
207
|
episodes = fetch_season_details(show_id, season)
|
|
165
208
|
logger.info(f"Found {episodes} episodes in Season {season}")
|
|
166
209
|
|
|
167
210
|
for episode in range(1, episodes + 1):
|
|
168
211
|
logger.info(f"Processing Season {season}, Episode {episode}...")
|
|
169
|
-
|
|
212
|
+
|
|
213
|
+
series_cache_dir = config.cache_dir / "data" / series_name
|
|
170
214
|
os.makedirs(series_cache_dir, exist_ok=True)
|
|
171
|
-
srt_filepath = os.path.join(
|
|
172
|
-
series_cache_dir,
|
|
173
|
-
f"{series_name} - S{season:02d}E{episode:02d}.srt",
|
|
174
|
-
)
|
|
175
|
-
if not os.path.exists(srt_filepath):
|
|
176
|
-
# get the episode info from TMDB
|
|
177
|
-
url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season}/episode/{episode}?api_key={tmdb_api_key}"
|
|
178
|
-
response = requests.get(url)
|
|
179
|
-
response.raise_for_status()
|
|
180
|
-
episode_data = response.json()
|
|
181
|
-
episode_data["name"]
|
|
182
|
-
episode_id = episode_data["id"]
|
|
183
|
-
# search for the subtitle
|
|
184
|
-
response = subtitles.search(tmdb_id=episode_id, languages="en")
|
|
185
|
-
if len(response.data) == 0:
|
|
186
|
-
logger.warning(
|
|
187
|
-
f"No subtitles found for {series_name} - S{season:02d}E{episode:02d}"
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
for subtitle in response.data:
|
|
191
|
-
subtitle_dict = subtitle.to_dict()
|
|
192
|
-
# Remove special characters and convert to uppercase
|
|
193
|
-
filename_clean = re.sub(
|
|
194
|
-
r"\W+", " ", subtitle_dict["file_name"]
|
|
195
|
-
).upper()
|
|
196
|
-
if f"E{episode:02d}" in filename_clean:
|
|
197
|
-
logger.info(f"Original filename: {subtitle_dict['file_name']}")
|
|
198
|
-
srt_file = subtitles.download_and_save(subtitle)
|
|
199
|
-
series_name = series_name.replace(":", " -")
|
|
200
|
-
shutil.move(srt_file, srt_filepath)
|
|
201
|
-
logger.info(f"Subtitle saved to {srt_filepath}")
|
|
202
|
-
break
|
|
203
|
-
else:
|
|
204
|
-
continue
|
|
205
|
-
else:
|
|
206
|
-
logger.info(
|
|
207
|
-
f"Subtitle already exists for {series_name} - S{season:02d}E{episode:02d}"
|
|
208
|
-
)
|
|
209
|
-
continue
|
|
210
215
|
|
|
216
|
+
# Check for existing subtitle in any supported format
|
|
217
|
+
existing_subtitle = find_existing_subtitle(
|
|
218
|
+
series_cache_dir, series_name, season, episode
|
|
219
|
+
)
|
|
211
220
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
221
|
+
if existing_subtitle:
|
|
222
|
+
logger.info(f"Subtitle already exists: {Path(existing_subtitle).name}")
|
|
223
|
+
continue
|
|
215
224
|
|
|
216
|
-
|
|
217
|
-
|
|
225
|
+
# Default to standard format for new downloads
|
|
226
|
+
srt_filepath = str(
|
|
227
|
+
series_cache_dir / f"{series_name} - S{season:02d}E{episode:02d}.srt"
|
|
228
|
+
)
|
|
218
229
|
|
|
219
|
-
|
|
220
|
-
|
|
230
|
+
# get the episode info from TMDB
|
|
231
|
+
url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season}/episode/{episode}?api_key={tmdb_api_key}"
|
|
232
|
+
response = requests.get(url)
|
|
233
|
+
response.raise_for_status()
|
|
234
|
+
episode_data = response.json()
|
|
235
|
+
episode_id = episode_data["id"]
|
|
236
|
+
|
|
237
|
+
# search for the subtitle
|
|
238
|
+
response = subtitles.search(tmdb_id=episode_id, languages="en")
|
|
239
|
+
if len(response.data) == 0:
|
|
240
|
+
logger.warning(
|
|
241
|
+
f"No subtitles found for {series_name} - S{season:02d}E{episode:02d}"
|
|
242
|
+
)
|
|
243
|
+
continue
|
|
221
244
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
245
|
+
for subtitle in response.data:
|
|
246
|
+
subtitle_dict = subtitle.to_dict()
|
|
247
|
+
# Remove special characters and convert to uppercase
|
|
248
|
+
filename_clean = re.sub(
|
|
249
|
+
r"\\W+", " ", subtitle_dict["file_name"]
|
|
250
|
+
).upper()
|
|
251
|
+
if f"E{episode:02d}" in filename_clean:
|
|
252
|
+
logger.info(f"Original filename: {subtitle_dict['file_name']}")
|
|
253
|
+
retry_count = 0
|
|
254
|
+
while retry_count < max_retries:
|
|
255
|
+
try:
|
|
256
|
+
srt_file = subtitles.download_and_save(subtitle)
|
|
257
|
+
shutil.move(srt_file, srt_filepath)
|
|
258
|
+
logger.info(f"Subtitle saved to {srt_filepath}")
|
|
259
|
+
break
|
|
260
|
+
except OpenSubtitlesException as e:
|
|
261
|
+
retry_count += 1
|
|
262
|
+
logger.error(
|
|
263
|
+
f"OpenSubtitlesException (attempt {retry_count}): {e}"
|
|
264
|
+
)
|
|
265
|
+
console.print(
|
|
266
|
+
f"[red]OpenSubtitlesException (attempt {retry_count}): {e}[/red]"
|
|
267
|
+
)
|
|
268
|
+
if retry_count >= max_retries:
|
|
269
|
+
user_input = input(
|
|
270
|
+
"Would you like to continue matching? (y/n): "
|
|
271
|
+
)
|
|
272
|
+
if user_input.strip().lower() != "y":
|
|
273
|
+
logger.info(
|
|
274
|
+
"User chose to stop matching due to the error."
|
|
275
|
+
)
|
|
276
|
+
return
|
|
277
|
+
else:
|
|
278
|
+
logger.info(
|
|
279
|
+
"User chose to continue matching despite the error."
|
|
280
|
+
)
|
|
281
|
+
break
|
|
282
|
+
except Exception as e:
|
|
283
|
+
logger.error(f"Failed to download and save subtitle: {e}")
|
|
284
|
+
console.print(
|
|
285
|
+
f"[red]Failed to download and save subtitle: {e}[/red]"
|
|
286
|
+
)
|
|
287
|
+
user_input = input(
|
|
288
|
+
"Would you like to continue matching despite the error? (y/n): "
|
|
289
|
+
)
|
|
290
|
+
if user_input.strip().lower() != "y":
|
|
291
|
+
logger.info(
|
|
292
|
+
"User chose to stop matching due to the error."
|
|
293
|
+
)
|
|
294
|
+
return
|
|
295
|
+
else:
|
|
296
|
+
logger.info(
|
|
297
|
+
"User chose to continue matching despite the error."
|
|
298
|
+
)
|
|
299
|
+
break
|
|
300
|
+
else:
|
|
301
|
+
continue
|
|
302
|
+
break
|
|
231
303
|
|
|
232
304
|
|
|
233
305
|
def clean_text(text):
|
|
234
|
-
# Remove brackets
|
|
235
|
-
cleaned_text = re.sub(r"\[.*?\]|\
|
|
306
|
+
# Remove brackets and curly braces with their content
|
|
307
|
+
cleaned_text = re.sub(r"\[.*?\]|\{.*?\}", "", text)
|
|
308
|
+
# Remove parentheses content EXCEPT for those containing exactly 4 digits (years)
|
|
309
|
+
# First, temporarily replace year patterns with a placeholder
|
|
310
|
+
year_pattern = r"\((\d{4})\)"
|
|
311
|
+
years = re.findall(year_pattern, cleaned_text)
|
|
312
|
+
cleaned_text = re.sub(year_pattern, "YEAR_PLACEHOLDER", cleaned_text)
|
|
313
|
+
# Remove all remaining parentheses content
|
|
314
|
+
cleaned_text = re.sub(r"\([^)]*\)", "", cleaned_text)
|
|
315
|
+
# Restore the years
|
|
316
|
+
for year in years:
|
|
317
|
+
cleaned_text = cleaned_text.replace("YEAR_PLACEHOLDER", f"({year})", 1)
|
|
318
|
+
# Normalize multiple spaces to single spaces
|
|
319
|
+
cleaned_text = re.sub(r"\s+", " ", cleaned_text)
|
|
236
320
|
# Strip leading/trailing whitespace
|
|
237
321
|
return cleaned_text.strip()
|
|
238
322
|
|
|
239
323
|
|
|
324
|
+
@logger.catch
|
|
240
325
|
def process_reference_srt_files(series_name):
|
|
241
326
|
"""
|
|
242
327
|
Process reference SRT files for a given series.
|
|
@@ -248,24 +333,24 @@ def process_reference_srt_files(series_name):
|
|
|
248
333
|
dict: A dictionary containing the reference files where the keys are the MKV filenames
|
|
249
334
|
and the values are the corresponding SRT texts.
|
|
250
335
|
"""
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
336
|
+
config = get_config_manager().load()
|
|
337
|
+
|
|
254
338
|
reference_files = {}
|
|
255
|
-
reference_dir =
|
|
256
|
-
|
|
339
|
+
reference_dir = config.cache_dir / "data" / series_name
|
|
340
|
+
|
|
257
341
|
for dirpath, _, filenames in os.walk(reference_dir):
|
|
258
342
|
for filename in filenames:
|
|
259
343
|
if filename.lower().endswith(".srt"):
|
|
260
|
-
srt_file =
|
|
344
|
+
srt_file = Path(dirpath) / filename
|
|
261
345
|
logger.info(f"Processing {srt_file}")
|
|
262
346
|
srt_text = extract_srt_text(srt_file)
|
|
263
347
|
season, episode = extract_season_episode(filename)
|
|
264
348
|
mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
|
|
265
349
|
reference_files[mkv_filename] = srt_text
|
|
266
|
-
|
|
350
|
+
|
|
267
351
|
return reference_files
|
|
268
352
|
|
|
353
|
+
|
|
269
354
|
def extract_srt_text(filepath):
|
|
270
355
|
"""
|
|
271
356
|
Extracts text content from an SRT file.
|
|
@@ -279,39 +364,51 @@ def extract_srt_text(filepath):
|
|
|
279
364
|
# Read the file content
|
|
280
365
|
with open(filepath) as f:
|
|
281
366
|
content = f.read()
|
|
282
|
-
|
|
367
|
+
|
|
283
368
|
# Split into subtitle blocks
|
|
284
|
-
blocks = content.strip().split(
|
|
285
|
-
|
|
369
|
+
blocks = content.strip().split("\n\n")
|
|
370
|
+
|
|
286
371
|
text_lines = []
|
|
287
372
|
for block in blocks:
|
|
288
|
-
lines = block.split(
|
|
373
|
+
lines = block.split("\n")
|
|
289
374
|
if len(lines) < 3:
|
|
290
375
|
continue
|
|
291
|
-
|
|
376
|
+
|
|
292
377
|
# Skip index and timestamp, get all remaining lines as text
|
|
293
|
-
text =
|
|
378
|
+
text = " ".join(lines[2:])
|
|
294
379
|
# Remove stage directions and tags
|
|
295
|
-
text = re.sub(r
|
|
380
|
+
text = re.sub(r"\[.*?\]|\<.*?\>", "", text)
|
|
296
381
|
if text:
|
|
297
382
|
text_lines.append(text)
|
|
298
|
-
|
|
383
|
+
|
|
299
384
|
return text_lines
|
|
300
385
|
|
|
386
|
+
|
|
301
387
|
def extract_season_episode(filename):
|
|
302
388
|
"""
|
|
303
|
-
Extract season and episode numbers from filename.
|
|
304
|
-
|
|
389
|
+
Extract season and episode numbers from filename with support for multiple formats.
|
|
390
|
+
|
|
305
391
|
Args:
|
|
306
392
|
filename (str): Filename to parse
|
|
307
|
-
|
|
393
|
+
|
|
308
394
|
Returns:
|
|
309
395
|
tuple: (season_number, episode_number)
|
|
310
396
|
"""
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
397
|
+
# List of patterns to try
|
|
398
|
+
patterns = [
|
|
399
|
+
r"S(\d+)E(\d+)", # S01E01
|
|
400
|
+
r"(\d+)x(\d+)", # 1x01 or 01x01
|
|
401
|
+
r"Season\s*(\d+).*?(\d+)", # Season 1 - 01
|
|
402
|
+
]
|
|
403
|
+
|
|
404
|
+
for pattern in patterns:
|
|
405
|
+
match = re.search(pattern, filename, re.IGNORECASE)
|
|
406
|
+
if match:
|
|
407
|
+
return int(match.group(1)), int(match.group(2))
|
|
408
|
+
|
|
314
409
|
return None, None
|
|
410
|
+
|
|
411
|
+
|
|
315
412
|
def process_srt_files(show_dir):
|
|
316
413
|
"""
|
|
317
414
|
Process all SRT files in the given directory and its subdirectories.
|
|
@@ -326,11 +423,13 @@ def process_srt_files(show_dir):
|
|
|
326
423
|
for dirpath, _, filenames in os.walk(show_dir):
|
|
327
424
|
for filename in filenames:
|
|
328
425
|
if filename.lower().endswith(".srt"):
|
|
329
|
-
srt_file =
|
|
426
|
+
srt_file = Path(dirpath) / filename
|
|
330
427
|
logger.info(f"Processing {srt_file}")
|
|
331
428
|
srt_text = extract_srt_text(srt_file)
|
|
332
429
|
srt_files[srt_file] = srt_text
|
|
333
430
|
return srt_files
|
|
431
|
+
|
|
432
|
+
|
|
334
433
|
def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
335
434
|
"""
|
|
336
435
|
Compare the srt files with the reference files and rename the matching mkv files.
|
|
@@ -344,22 +443,21 @@ def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
|
344
443
|
f"Comparing {len(srt_files)} srt files with {len(reference_files)} reference files"
|
|
345
444
|
)
|
|
346
445
|
for srt_text in srt_files.keys():
|
|
347
|
-
parent_dir =
|
|
446
|
+
parent_dir = Path(srt_text).parent.parent
|
|
348
447
|
for reference in reference_files.keys():
|
|
349
448
|
_season, _episode = extract_season_episode(reference)
|
|
350
|
-
mkv_file =
|
|
351
|
-
parent_dir, os.path.basename(srt_text).replace(".srt", ".mkv")
|
|
352
|
-
)
|
|
449
|
+
mkv_file = str(parent_dir / Path(srt_text).name.replace(".srt", ".mkv"))
|
|
353
450
|
matching_lines = compare_text(
|
|
354
451
|
reference_files[reference], srt_files[srt_text]
|
|
355
452
|
)
|
|
356
453
|
if matching_lines >= int(len(reference_files[reference]) * 0.1):
|
|
357
454
|
logger.info(f"Matching lines: {matching_lines}")
|
|
358
455
|
logger.info(f"Found matching file: {mkv_file} ->{reference}")
|
|
359
|
-
new_filename =
|
|
456
|
+
new_filename = parent_dir / reference
|
|
360
457
|
if not dry_run:
|
|
361
|
-
logger.info(f"Renaming {mkv_file} to {new_filename}")
|
|
362
|
-
rename_episode_file(mkv_file,
|
|
458
|
+
logger.info(f"Renaming {mkv_file} to {str(new_filename)}")
|
|
459
|
+
rename_episode_file(mkv_file, reference)
|
|
460
|
+
|
|
363
461
|
|
|
364
462
|
def compare_text(text1, text2):
|
|
365
463
|
"""
|
|
@@ -378,4 +476,29 @@ def compare_text(text1, text2):
|
|
|
378
476
|
|
|
379
477
|
# Compare the two lists of text lines
|
|
380
478
|
matching_lines = set(flat_text1).intersection(flat_text2)
|
|
381
|
-
return len(matching_lines)
|
|
479
|
+
return len(matching_lines)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def check_gpu_support():
|
|
483
|
+
logger.info("Checking GPU support...")
|
|
484
|
+
console.print("[bold]Checking GPU support...[/bold]")
|
|
485
|
+
if torch.cuda.is_available():
|
|
486
|
+
logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
|
|
487
|
+
console.print(
|
|
488
|
+
Panel.fit(
|
|
489
|
+
f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}",
|
|
490
|
+
title="GPU Support",
|
|
491
|
+
border_style="magenta",
|
|
492
|
+
)
|
|
493
|
+
)
|
|
494
|
+
else:
|
|
495
|
+
logger.warning(
|
|
496
|
+
"CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support."
|
|
497
|
+
)
|
|
498
|
+
console.print(
|
|
499
|
+
Panel.fit(
|
|
500
|
+
"CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.",
|
|
501
|
+
title="GPU Support",
|
|
502
|
+
border_style="red",
|
|
503
|
+
)
|
|
504
|
+
)
|