mkv-episode-matcher 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- mkv_episode_matcher/__main__.py +8 -4
- mkv_episode_matcher/episode_matcher.py +39 -26
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py +38 -12
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py +16644 -193
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py +125 -80
- mkv_episode_matcher/libraries/pgs2srt/imagemaker.py +7 -5
- mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py +49 -20
- mkv_episode_matcher/libraries/pgs2srt/pgsreader.py +53 -49
- mkv_episode_matcher/mkv_to_srt.py +150 -22
- mkv_episode_matcher/utils.py +16 -20
- mkv_episode_matcher-0.2.0.dist-info/METADATA +117 -0
- mkv_episode_matcher-0.2.0.dist-info/RECORD +23 -0
- mkv_episode_matcher/notebooks/get_subtitles_test.ipynb +0 -252
- mkv_episode_matcher/notebooks/whisper.ipynb +0 -122
- mkv_episode_matcher-0.1.13.dist-info/METADATA +0 -113
- mkv_episode_matcher-0.1.13.dist-info/RECORD +0 -25
- {mkv_episode_matcher-0.1.13.dist-info → mkv_episode_matcher-0.2.0.dist-info}/WHEEL +0 -0
- {mkv_episode_matcher-0.1.13.dist-info → mkv_episode_matcher-0.2.0.dist-info}/entry_points.txt +0 -0
- {mkv_episode_matcher-0.1.13.dist-info → mkv_episode_matcher-0.2.0.dist-info}/top_level.txt +0 -0
mkv_episode_matcher/__main__.py
CHANGED
|
@@ -10,7 +10,6 @@ from mkv_episode_matcher.config import get_config, set_config
|
|
|
10
10
|
logger.info("Starting the application")
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
# Check if the configuration directory exists, if not create it
|
|
15
14
|
if not os.path.exists(os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher")):
|
|
16
15
|
os.makedirs(os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher"))
|
|
@@ -31,10 +30,16 @@ if not os.path.exists(log_dir):
|
|
|
31
30
|
os.mkdir(log_dir)
|
|
32
31
|
|
|
33
32
|
# Add a new handler for stdout logs
|
|
34
|
-
logger.add(
|
|
33
|
+
logger.add(
|
|
34
|
+
os.path.join(log_dir, "stdout.log"),
|
|
35
|
+
format="{time} {level} {message}",
|
|
36
|
+
level="DEBUG",
|
|
37
|
+
rotation="10 MB",
|
|
38
|
+
)
|
|
35
39
|
|
|
36
40
|
# Add a new handler for error logs
|
|
37
|
-
logger.add(os.path.join(log_dir,"stderr.log"), level="ERROR", rotation="10 MB")
|
|
41
|
+
logger.add(os.path.join(log_dir, "stderr.log"), level="ERROR", rotation="10 MB")
|
|
42
|
+
|
|
38
43
|
|
|
39
44
|
@logger.catch
|
|
40
45
|
def main():
|
|
@@ -55,7 +60,6 @@ def main():
|
|
|
55
60
|
The function logs its progress to two separate log files: one for standard output and one for errors.
|
|
56
61
|
"""
|
|
57
62
|
|
|
58
|
-
|
|
59
63
|
# Parse command-line arguments
|
|
60
64
|
parser = argparse.ArgumentParser(description="Process shows with TMDb API")
|
|
61
65
|
parser.add_argument("--tmdb-api-key", help="TMDb API key")
|
|
@@ -8,7 +8,12 @@ from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
|
|
|
8
8
|
from mkv_episode_matcher.config import get_config
|
|
9
9
|
from mkv_episode_matcher.mkv_to_srt import convert_mkv_to_srt
|
|
10
10
|
from mkv_episode_matcher.tmdb_client import fetch_show_id
|
|
11
|
-
from mkv_episode_matcher.utils import
|
|
11
|
+
from mkv_episode_matcher.utils import (
|
|
12
|
+
check_filename,
|
|
13
|
+
clean_text,
|
|
14
|
+
cleanup_ocr_files,
|
|
15
|
+
get_subtitles,
|
|
16
|
+
)
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
# hash_data = {}
|
|
@@ -25,7 +30,7 @@ def process_show(season=None, dry_run=False, get_subs=False):
|
|
|
25
30
|
show_dir = config.get("show_dir")
|
|
26
31
|
show_name = clean_text(os.path.basename(show_dir))
|
|
27
32
|
logger.info(f"Processing show '{show_name}'...")
|
|
28
|
-
|
|
33
|
+
|
|
29
34
|
show_id = fetch_show_id(show_name)
|
|
30
35
|
if show_id is None:
|
|
31
36
|
logger.error(f"Could not find show '{os.path.basename(show_dir)}' on TMDb.")
|
|
@@ -41,10 +46,7 @@ def process_show(season=None, dry_run=False, get_subs=False):
|
|
|
41
46
|
# Filter seasons to only include those with .mkv files
|
|
42
47
|
valid_season_paths = []
|
|
43
48
|
for season_path in season_paths:
|
|
44
|
-
mkv_files = [
|
|
45
|
-
f for f in os.listdir(season_path)
|
|
46
|
-
if f.endswith(".mkv")
|
|
47
|
-
]
|
|
49
|
+
mkv_files = [f for f in os.listdir(season_path) if f.endswith(".mkv")]
|
|
48
50
|
if mkv_files:
|
|
49
51
|
valid_season_paths.append(season_path)
|
|
50
52
|
|
|
@@ -58,7 +60,7 @@ def process_show(season=None, dry_run=False, get_subs=False):
|
|
|
58
60
|
|
|
59
61
|
# Extract season numbers from valid paths
|
|
60
62
|
seasons_to_process = [
|
|
61
|
-
int(os.path.basename(season_path).split()[-1])
|
|
63
|
+
int(os.path.basename(season_path).split()[-1])
|
|
62
64
|
for season_path in valid_season_paths
|
|
63
65
|
]
|
|
64
66
|
|
|
@@ -71,34 +73,43 @@ def process_show(season=None, dry_run=False, get_subs=False):
|
|
|
71
73
|
if season_path not in valid_season_paths:
|
|
72
74
|
logger.warning(f"Season {season} has no .mkv files to process")
|
|
73
75
|
return
|
|
74
|
-
|
|
76
|
+
|
|
77
|
+
season_paths_to_process = [season_path]
|
|
78
|
+
else:
|
|
79
|
+
# Process all valid seasons
|
|
80
|
+
season_paths_to_process = valid_season_paths
|
|
81
|
+
|
|
82
|
+
# Process each season
|
|
83
|
+
for season_path in season_paths_to_process:
|
|
84
|
+
logger.info(f"Processing season path: {season_path}")
|
|
75
85
|
mkv_files = [
|
|
76
86
|
os.path.join(season_path, f)
|
|
77
87
|
for f in os.listdir(season_path)
|
|
78
88
|
if f.endswith(".mkv")
|
|
79
89
|
]
|
|
80
|
-
|
|
81
|
-
#
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
90
|
+
|
|
91
|
+
# Filter out files that have already been processed
|
|
92
|
+
unprocessed_files = []
|
|
93
|
+
for f in mkv_files:
|
|
94
|
+
if check_filename(f):
|
|
95
|
+
logger.info(f"Skipping {f}, already processed")
|
|
96
|
+
else:
|
|
97
|
+
unprocessed_files.append(f)
|
|
98
|
+
|
|
99
|
+
if not unprocessed_files:
|
|
100
|
+
logger.info(f"No new files to process in {season_path}")
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
logger.info(f"Processing {len(unprocessed_files)} files in {season_path}")
|
|
104
|
+
convert_mkv_to_srt(season_path, unprocessed_files)
|
|
105
|
+
|
|
106
|
+
# Process reference and SRT files after all seasons are converted
|
|
97
107
|
reference_text_dict = process_reference_srt_files(show_name)
|
|
98
108
|
srt_text_dict = process_srt_files(show_dir)
|
|
99
109
|
compare_and_rename_files(srt_text_dict, reference_text_dict, dry_run=dry_run)
|
|
100
110
|
cleanup_ocr_files(show_dir)
|
|
101
111
|
|
|
112
|
+
|
|
102
113
|
def check_filename(filename):
|
|
103
114
|
"""
|
|
104
115
|
Check if the filename is in the correct format.
|
|
@@ -112,6 +123,8 @@ def check_filename(filename):
|
|
|
112
123
|
# Check if the filename matches the expected format
|
|
113
124
|
match = re.match(r".*S\d+E\d+", filename)
|
|
114
125
|
return bool(match)
|
|
126
|
+
|
|
127
|
+
|
|
115
128
|
def extract_srt_text(filepath):
|
|
116
129
|
"""
|
|
117
130
|
Extracts the text from an SRT file.
|
|
@@ -242,7 +255,7 @@ def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
|
242
255
|
for srt_text in srt_files.keys():
|
|
243
256
|
parent_dir = os.path.dirname(os.path.dirname(srt_text))
|
|
244
257
|
for reference in reference_files.keys():
|
|
245
|
-
|
|
258
|
+
_season, _episode = extract_season_episode(reference)
|
|
246
259
|
mkv_file = os.path.join(
|
|
247
260
|
parent_dir, os.path.basename(srt_text).replace(".srt", ".mkv")
|
|
248
261
|
)
|
|
@@ -51,6 +51,7 @@ class Processor:
|
|
|
51
51
|
"""
|
|
52
52
|
Processor base class
|
|
53
53
|
"""
|
|
54
|
+
|
|
54
55
|
name = None
|
|
55
56
|
parent = None
|
|
56
57
|
supported = None
|
|
@@ -69,7 +70,7 @@ class Processor:
|
|
|
69
70
|
return content
|
|
70
71
|
|
|
71
72
|
def __repr__(self):
|
|
72
|
-
return "Processor
|
|
73
|
+
return f"Processor <{self.__class__.__name__} {self.info}>"
|
|
73
74
|
|
|
74
75
|
def __str__(self):
|
|
75
76
|
return repr(self)
|
|
@@ -82,10 +83,13 @@ class ReProcessor(Processor):
|
|
|
82
83
|
"""
|
|
83
84
|
Regex processor
|
|
84
85
|
"""
|
|
86
|
+
|
|
85
87
|
pattern = None
|
|
86
88
|
replace_with = None
|
|
87
89
|
|
|
88
|
-
def __init__(
|
|
90
|
+
def __init__(
|
|
91
|
+
self, pattern, replace_with, name=None, supported=None, entry=False, **kwargs
|
|
92
|
+
):
|
|
89
93
|
super(ReProcessor, self).__init__(name=name, supported=supported)
|
|
90
94
|
self.pattern = pattern
|
|
91
95
|
self.replace_with = replace_with
|
|
@@ -116,6 +120,7 @@ class MultipleWordReProcessor(ReProcessor):
|
|
|
116
120
|
}
|
|
117
121
|
replaces found key in pattern with the corresponding value in data
|
|
118
122
|
"""
|
|
123
|
+
|
|
119
124
|
def __init__(self, snr_dict, name=None, parent=None, supported=None, **kwargs):
|
|
120
125
|
super(ReProcessor, self).__init__(name=name, supported=supported)
|
|
121
126
|
self.snr_dict = snr_dict
|
|
@@ -124,7 +129,9 @@ class MultipleWordReProcessor(ReProcessor):
|
|
|
124
129
|
if not self.snr_dict["data"]:
|
|
125
130
|
return content
|
|
126
131
|
|
|
127
|
-
return self.snr_dict["pattern"].sub(
|
|
132
|
+
return self.snr_dict["pattern"].sub(
|
|
133
|
+
lambda x: self.snr_dict["data"][x.group(0)], content
|
|
134
|
+
)
|
|
128
135
|
|
|
129
136
|
|
|
130
137
|
class EmptyEntryError(Exception):
|
|
@@ -151,7 +158,9 @@ class SubtitleModification:
|
|
|
151
158
|
def __init__(self):
|
|
152
159
|
return
|
|
153
160
|
|
|
154
|
-
def _process(
|
|
161
|
+
def _process(
|
|
162
|
+
self, content, processors, debug=False, parent=None, index=None, **kwargs
|
|
163
|
+
):
|
|
155
164
|
if not content:
|
|
156
165
|
return
|
|
157
166
|
|
|
@@ -184,13 +193,19 @@ class SubtitleModification:
|
|
|
184
193
|
return new_content
|
|
185
194
|
|
|
186
195
|
def pre_process(self, content, debug=False, parent=None, **kwargs):
|
|
187
|
-
return self._process(
|
|
196
|
+
return self._process(
|
|
197
|
+
content, self.pre_processors, debug=debug, parent=parent, **kwargs
|
|
198
|
+
)
|
|
188
199
|
|
|
189
200
|
def process(self, content, debug=False, parent=None, **kwargs):
|
|
190
|
-
return self._process(
|
|
201
|
+
return self._process(
|
|
202
|
+
content, self.processors, debug=debug, parent=parent, **kwargs
|
|
203
|
+
)
|
|
191
204
|
|
|
192
205
|
def post_process(self, content, debug=False, parent=None, **kwargs):
|
|
193
|
-
return self._process(
|
|
206
|
+
return self._process(
|
|
207
|
+
content, self.post_processors, debug=debug, parent=parent, **kwargs
|
|
208
|
+
)
|
|
194
209
|
|
|
195
210
|
def modify(self, content, debug=False, parent=None, procs=None, **kwargs):
|
|
196
211
|
if not content:
|
|
@@ -200,15 +215,22 @@ class SubtitleModification:
|
|
|
200
215
|
for method in procs or ("pre_process", "process", "post_process"):
|
|
201
216
|
if not new_content:
|
|
202
217
|
return
|
|
203
|
-
new_content = self._process(
|
|
204
|
-
|
|
218
|
+
new_content = self._process(
|
|
219
|
+
new_content,
|
|
220
|
+
getattr(self, f"{method}ors"),
|
|
221
|
+
debug=debug,
|
|
222
|
+
parent=parent,
|
|
223
|
+
**kwargs,
|
|
224
|
+
)
|
|
205
225
|
|
|
206
226
|
return new_content
|
|
207
227
|
|
|
208
228
|
@classmethod
|
|
209
229
|
def get_signature(cls, **kwargs):
|
|
210
|
-
string_args = ",".join([
|
|
211
|
-
|
|
230
|
+
string_args = ",".join([
|
|
231
|
+
f"{key}={value}" for key, value in kwargs.items()
|
|
232
|
+
])
|
|
233
|
+
return f"{cls.identifier}({string_args})"
|
|
212
234
|
|
|
213
235
|
@classmethod
|
|
214
236
|
def merge_args(cls, args1, args2):
|
|
@@ -224,7 +246,9 @@ class StringProcessor(Processor):
|
|
|
224
246
|
String replacement processor base
|
|
225
247
|
"""
|
|
226
248
|
|
|
227
|
-
def __init__(
|
|
249
|
+
def __init__(
|
|
250
|
+
self, search, replace, name=None, parent=None, supported=None, **kwargs
|
|
251
|
+
):
|
|
228
252
|
super(StringProcessor, self).__init__(name=name, supported=supported)
|
|
229
253
|
self.search = search
|
|
230
254
|
self.replace = replace
|
|
@@ -243,6 +267,7 @@ class MultipleLineProcessor(Processor):
|
|
|
243
267
|
"data": {"old_value": "new_value"}
|
|
244
268
|
}
|
|
245
269
|
"""
|
|
270
|
+
|
|
246
271
|
def __init__(self, snr_dict, name=None, parent=None, supported=None, **kwargs):
|
|
247
272
|
super(MultipleLineProcessor, self).__init__(name=name, supported=supported)
|
|
248
273
|
self.snr_dict = snr_dict
|
|
@@ -286,6 +311,7 @@ class MultipleWordProcessor(MultipleLineProcessor):
|
|
|
286
311
|
"data": {"old_value": "new_value"}
|
|
287
312
|
}
|
|
288
313
|
"""
|
|
314
|
+
|
|
289
315
|
def process(self, content, debug=False, **kwargs):
|
|
290
316
|
words = content.split(" ")
|
|
291
317
|
new_words = []
|