mkv-episode-matcher 0.7.2__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.coverage +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/PKG-INFO +1 -1
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/episode_identification.py +154 -76
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/PKG-INFO +1 -1
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/setup.cfg +1 -1
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.gitattributes +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.github/funding.yml +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.github/workflows/documentation.yml +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.github/workflows/python-publish.yml +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.github/workflows/tests.yml +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.gitignore +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.gitmodules +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.python-version +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.vscode/settings.json +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/CHANGELOG.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/README.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/docs/api/index.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/docs/changelog.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/docs/cli.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/docs/configuration.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/docs/installation.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/docs/quickstart.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/docs/tips.md +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkdocs.yml +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/.gitattributes +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/__init__.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/__main__.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/config.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/episode_matcher.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/subtitle_utils.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/tmdb_client.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/utils.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/SOURCES.txt +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/dependency_links.txt +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/entry_points.txt +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/requires.txt +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/top_level.txt +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/pyproject.toml +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/setup.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/tests/__init__.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/tests/test_main.py +0 -0
- {mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/uv.lock +0 -0
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkv-episode-matcher
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
5
5
|
Home-page: https://github.com/Jsakkos/mkv-episode-matcher
|
|
6
6
|
Author: Jonathan Sakkos
|
|
@@ -10,19 +10,56 @@ import torch
|
|
|
10
10
|
import whisper
|
|
11
11
|
from loguru import logger
|
|
12
12
|
from rapidfuzz import fuzz
|
|
13
|
-
from utils import extract_season_episode
|
|
13
|
+
from mkv_episode_matcher.utils import extract_season_episode
|
|
14
|
+
from functools import lru_cache
|
|
14
15
|
|
|
15
16
|
console = Console()
|
|
16
17
|
|
|
18
|
+
class SubtitleCache:
|
|
19
|
+
"""Cache for storing parsed subtitle data to avoid repeated loading and parsing."""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.subtitles = {} # {file_path: parsed_content}
|
|
23
|
+
self.chunk_cache = {} # {(file_path, chunk_idx): text}
|
|
24
|
+
|
|
25
|
+
def get_subtitle_content(self, srt_file):
|
|
26
|
+
"""Get the full content of a subtitle file, loading it only once."""
|
|
27
|
+
srt_file = str(srt_file)
|
|
28
|
+
if srt_file not in self.subtitles:
|
|
29
|
+
reader = SubtitleReader()
|
|
30
|
+
self.subtitles[srt_file] = reader.read_srt_file(srt_file)
|
|
31
|
+
return self.subtitles[srt_file]
|
|
32
|
+
|
|
33
|
+
def get_chunk(self, srt_file, chunk_idx, chunk_start, chunk_end):
|
|
34
|
+
"""Get a specific time chunk from a subtitle file, with caching."""
|
|
35
|
+
srt_file = str(srt_file)
|
|
36
|
+
cache_key = (srt_file, chunk_idx)
|
|
37
|
+
|
|
38
|
+
if cache_key not in self.chunk_cache:
|
|
39
|
+
content = self.get_subtitle_content(srt_file)
|
|
40
|
+
reader = SubtitleReader()
|
|
41
|
+
text_lines = reader.extract_subtitle_chunk(content, chunk_start, chunk_end)
|
|
42
|
+
self.chunk_cache[cache_key] = " ".join(text_lines)
|
|
43
|
+
|
|
44
|
+
return self.chunk_cache[cache_key]
|
|
45
|
+
|
|
46
|
+
|
|
17
47
|
class EpisodeMatcher:
|
|
18
48
|
def __init__(self, cache_dir, show_name, min_confidence=0.6):
|
|
19
49
|
self.cache_dir = Path(cache_dir)
|
|
20
50
|
self.min_confidence = min_confidence
|
|
21
51
|
self.show_name = show_name
|
|
22
52
|
self.chunk_duration = 30
|
|
53
|
+
self.skip_initial_duration = 300
|
|
23
54
|
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
24
55
|
self.temp_dir = Path(tempfile.gettempdir()) / "whisper_chunks"
|
|
25
56
|
self.temp_dir.mkdir(exist_ok=True)
|
|
57
|
+
# Initialize subtitle cache
|
|
58
|
+
self.subtitle_cache = SubtitleCache()
|
|
59
|
+
# Cache for extracted audio chunks
|
|
60
|
+
self.audio_chunks = {}
|
|
61
|
+
# Store reference files to avoid repeated glob operations
|
|
62
|
+
self.reference_files_cache = {}
|
|
26
63
|
|
|
27
64
|
def clean_text(self, text):
|
|
28
65
|
text = text.lower().strip()
|
|
@@ -39,7 +76,12 @@ class EpisodeMatcher:
|
|
|
39
76
|
) / 100.0
|
|
40
77
|
|
|
41
78
|
def extract_audio_chunk(self, mkv_file, start_time):
|
|
42
|
-
"""Extract a chunk of audio from MKV file."""
|
|
79
|
+
"""Extract a chunk of audio from MKV file with caching."""
|
|
80
|
+
cache_key = (str(mkv_file), start_time)
|
|
81
|
+
|
|
82
|
+
if cache_key in self.audio_chunks:
|
|
83
|
+
return self.audio_chunks[cache_key]
|
|
84
|
+
|
|
43
85
|
chunk_path = self.temp_dir / f"chunk_{start_time}.wav"
|
|
44
86
|
if not chunk_path.exists():
|
|
45
87
|
cmd = [
|
|
@@ -59,14 +101,18 @@ class EpisodeMatcher:
|
|
|
59
101
|
"16000",
|
|
60
102
|
"-ac",
|
|
61
103
|
"1",
|
|
104
|
+
"-y", # Overwrite output files without asking
|
|
62
105
|
str(chunk_path),
|
|
63
106
|
]
|
|
64
107
|
subprocess.run(cmd, capture_output=True)
|
|
65
|
-
|
|
108
|
+
|
|
109
|
+
chunk_path_str = str(chunk_path)
|
|
110
|
+
self.audio_chunks[cache_key] = chunk_path_str
|
|
111
|
+
return chunk_path_str
|
|
66
112
|
|
|
67
113
|
def load_reference_chunk(self, srt_file, chunk_idx):
|
|
68
114
|
"""
|
|
69
|
-
Load reference subtitles for a specific time chunk with
|
|
115
|
+
Load reference subtitles for a specific time chunk with caching.
|
|
70
116
|
|
|
71
117
|
Args:
|
|
72
118
|
srt_file (str or Path): Path to the SRT file
|
|
@@ -75,28 +121,57 @@ class EpisodeMatcher:
|
|
|
75
121
|
Returns:
|
|
76
122
|
str: Combined text from the subtitle chunk
|
|
77
123
|
"""
|
|
78
|
-
chunk_start = chunk_idx * self.chunk_duration
|
|
79
|
-
chunk_end = chunk_start + self.chunk_duration
|
|
80
|
-
|
|
81
124
|
try:
|
|
82
|
-
#
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
return " ".join(text_lines)
|
|
90
|
-
|
|
125
|
+
# Apply the same offset as in _try_match_with_model
|
|
126
|
+
chunk_start = self.skip_initial_duration + (chunk_idx * self.chunk_duration)
|
|
127
|
+
chunk_end = chunk_start + self.chunk_duration
|
|
128
|
+
|
|
129
|
+
return self.subtitle_cache.get_chunk(srt_file, chunk_idx, chunk_start, chunk_end)
|
|
130
|
+
|
|
91
131
|
except Exception as e:
|
|
92
132
|
logger.error(f"Error loading reference chunk from {srt_file}: {e}")
|
|
93
133
|
return ""
|
|
94
134
|
|
|
135
|
+
def get_reference_files(self, season_number):
|
|
136
|
+
"""Get reference subtitle files with caching."""
|
|
137
|
+
cache_key = (self.show_name, season_number)
|
|
138
|
+
logger.debug(f"Reference cache key: {cache_key}")
|
|
139
|
+
|
|
140
|
+
if cache_key in self.reference_files_cache:
|
|
141
|
+
logger.debug("Returning cached reference files")
|
|
142
|
+
return self.reference_files_cache[cache_key]
|
|
143
|
+
|
|
144
|
+
reference_dir = self.cache_dir / "data" / self.show_name
|
|
145
|
+
patterns = [
|
|
146
|
+
f"S{season_number:02d}E",
|
|
147
|
+
f"S{season_number}E",
|
|
148
|
+
f"{season_number:02d}x",
|
|
149
|
+
f"{season_number}x",
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
reference_files = []
|
|
153
|
+
for _pattern in patterns:
|
|
154
|
+
files = [
|
|
155
|
+
f
|
|
156
|
+
for f in reference_dir.glob("*.srt")
|
|
157
|
+
if any(
|
|
158
|
+
re.search(f"{p}\\d+", f.name, re.IGNORECASE) for p in patterns
|
|
159
|
+
)
|
|
160
|
+
]
|
|
161
|
+
reference_files.extend(files)
|
|
162
|
+
|
|
163
|
+
# Remove duplicates while preserving order
|
|
164
|
+
reference_files = list(dict.fromkeys(reference_files))
|
|
165
|
+
logger.debug(f"Found {len(reference_files)} reference files for season {season_number}")
|
|
166
|
+
self.reference_files_cache[cache_key] = reference_files
|
|
167
|
+
return reference_files
|
|
168
|
+
|
|
95
169
|
def _try_match_with_model(
|
|
96
170
|
self, video_file, model_name, max_duration, reference_files
|
|
97
171
|
):
|
|
98
172
|
"""
|
|
99
|
-
Attempt to match using specified model, checking multiple
|
|
173
|
+
Attempt to match using specified model, checking multiple chunks starting from skip_initial_duration
|
|
174
|
+
and continuing up to max_duration.
|
|
100
175
|
|
|
101
176
|
Args:
|
|
102
177
|
video_file: Path to the video file
|
|
@@ -107,18 +182,30 @@ class EpisodeMatcher:
|
|
|
107
182
|
# Use cached model
|
|
108
183
|
model = get_whisper_model(model_name, self.device)
|
|
109
184
|
|
|
110
|
-
# Calculate number of chunks to check
|
|
111
|
-
num_chunks = max_duration // self.chunk_duration
|
|
185
|
+
# Calculate number of chunks to check
|
|
186
|
+
num_chunks = min(max_duration // self.chunk_duration, 10) # Limit to 10 chunks for initial check
|
|
187
|
+
|
|
188
|
+
# Pre-load all reference chunks for the chunks we'll check
|
|
189
|
+
for chunk_idx in range(num_chunks):
|
|
190
|
+
for ref_file in reference_files:
|
|
191
|
+
self.load_reference_chunk(ref_file, chunk_idx)
|
|
112
192
|
|
|
113
193
|
for chunk_idx in range(num_chunks):
|
|
114
|
-
|
|
194
|
+
# Start at self.skip_initial_duration and check subsequent chunks
|
|
195
|
+
start_time = self.skip_initial_duration + (chunk_idx * self.chunk_duration)
|
|
115
196
|
logger.debug(f"Trying {model_name} model at {start_time} seconds")
|
|
116
197
|
|
|
117
198
|
audio_path = self.extract_audio_chunk(video_file, start_time)
|
|
199
|
+
logger.debug(f"Extracted audio chunk: {audio_path}")
|
|
118
200
|
|
|
119
201
|
result = model.transcribe(audio_path, task="transcribe", language="en")
|
|
120
202
|
|
|
203
|
+
|
|
121
204
|
chunk_text = result["text"]
|
|
205
|
+
logger.debug(f"Transcription result: {chunk_text} ({len(chunk_text)} characters)")
|
|
206
|
+
if len(chunk_text) < 10:
|
|
207
|
+
logger.debug(f"Transcription result too short: {chunk_text} ({len(chunk_text)} characters)")
|
|
208
|
+
continue
|
|
122
209
|
best_confidence = 0
|
|
123
210
|
best_match = None
|
|
124
211
|
|
|
@@ -128,14 +215,14 @@ class EpisodeMatcher:
|
|
|
128
215
|
confidence = self.chunk_score(chunk_text, ref_text)
|
|
129
216
|
|
|
130
217
|
if confidence > best_confidence:
|
|
131
|
-
|
|
218
|
+
logger.debug(f"New best confidence: {confidence} for {ref_file}")
|
|
132
219
|
best_confidence = confidence
|
|
133
220
|
best_match = Path(ref_file)
|
|
134
221
|
|
|
135
222
|
if confidence > self.min_confidence:
|
|
136
223
|
print(f"Matched with {best_match} (confidence: {best_confidence:.2f})")
|
|
137
224
|
try:
|
|
138
|
-
season,episode = extract_season_episode(best_match.stem)
|
|
225
|
+
season, episode = extract_season_episode(best_match.stem)
|
|
139
226
|
except Exception as e:
|
|
140
227
|
print(f"Error extracting season/episode: {e}")
|
|
141
228
|
continue
|
|
@@ -157,54 +244,22 @@ class EpisodeMatcher:
|
|
|
157
244
|
def identify_episode(self, video_file, temp_dir, season_number):
|
|
158
245
|
"""Progressive episode identification with faster initial attempt."""
|
|
159
246
|
try:
|
|
160
|
-
# Get reference files first
|
|
161
|
-
|
|
162
|
-
patterns = [
|
|
163
|
-
f"S{season_number:02d}E",
|
|
164
|
-
f"S{season_number}E",
|
|
165
|
-
f"{season_number:02d}x",
|
|
166
|
-
f"{season_number}x",
|
|
167
|
-
]
|
|
168
|
-
|
|
169
|
-
reference_files = []
|
|
170
|
-
# TODO Figure our why patterns is not being used
|
|
171
|
-
for _pattern in patterns:
|
|
172
|
-
files = [
|
|
173
|
-
f
|
|
174
|
-
for f in reference_dir.glob("*.srt")
|
|
175
|
-
if any(
|
|
176
|
-
re.search(f"{p}\\d+", f.name, re.IGNORECASE) for p in patterns
|
|
177
|
-
)
|
|
178
|
-
]
|
|
179
|
-
reference_files.extend(files)
|
|
180
|
-
|
|
181
|
-
reference_files = list(dict.fromkeys(reference_files))
|
|
247
|
+
# Get reference files first with caching
|
|
248
|
+
reference_files = self.get_reference_files(season_number)
|
|
182
249
|
|
|
183
250
|
if not reference_files:
|
|
184
251
|
logger.error(f"No reference files found for season {season_number}")
|
|
185
252
|
return None
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
"-v",
|
|
190
|
-
"error",
|
|
191
|
-
"-show_entries",
|
|
192
|
-
"format=duration",
|
|
193
|
-
"-of",
|
|
194
|
-
"default=noprint_wrappers=1:nokey=1",
|
|
195
|
-
video_file,
|
|
196
|
-
]).decode()
|
|
197
|
-
)
|
|
253
|
+
|
|
254
|
+
# Cache video duration
|
|
255
|
+
duration = get_video_duration(video_file)
|
|
198
256
|
|
|
199
|
-
duration = int(np.ceil(duration))
|
|
200
257
|
# Try with tiny model first (fastest)
|
|
201
258
|
logger.info("Attempting match with tiny model...")
|
|
202
259
|
match = self._try_match_with_model(
|
|
203
|
-
video_file, "tiny", duration, reference_files
|
|
260
|
+
video_file, "tiny.en", min(duration, 300), reference_files # Limit to first 5 minutes
|
|
204
261
|
)
|
|
205
|
-
if
|
|
206
|
-
match and match["confidence"] > 0.65
|
|
207
|
-
): # Slightly lower threshold for tiny
|
|
262
|
+
if match and match["confidence"] > 0.65: # Slightly lower threshold for tiny
|
|
208
263
|
logger.info(
|
|
209
264
|
f"Successfully matched with tiny model at {match['matched_at']}s (confidence: {match['confidence']:.2f})"
|
|
210
265
|
)
|
|
@@ -212,10 +267,10 @@ class EpisodeMatcher:
|
|
|
212
267
|
|
|
213
268
|
# If no match, try base model
|
|
214
269
|
logger.info(
|
|
215
|
-
"No match
|
|
270
|
+
"No match with tiny model, extending base model search to 5 minutes..."
|
|
216
271
|
)
|
|
217
272
|
match = self._try_match_with_model(
|
|
218
|
-
video_file, "base", duration, reference_files
|
|
273
|
+
video_file, "base.en", min(duration, 300), reference_files # Limit to first 5 minutes
|
|
219
274
|
)
|
|
220
275
|
if match:
|
|
221
276
|
logger.info(
|
|
@@ -227,12 +282,30 @@ class EpisodeMatcher:
|
|
|
227
282
|
return None
|
|
228
283
|
|
|
229
284
|
finally:
|
|
230
|
-
# Cleanup temp files
|
|
231
|
-
for
|
|
285
|
+
# Cleanup temp files - keep this limited to only files we know we created
|
|
286
|
+
for chunk_info in self.audio_chunks.values():
|
|
232
287
|
try:
|
|
233
|
-
|
|
288
|
+
Path(chunk_info).unlink(missing_ok=True)
|
|
234
289
|
except Exception as e:
|
|
235
|
-
logger.warning(f"Failed to delete temp file {
|
|
290
|
+
logger.warning(f"Failed to delete temp file {chunk_info}: {e}")
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
@lru_cache(maxsize=100)
|
|
294
|
+
def get_video_duration(video_file):
|
|
295
|
+
"""Get video duration with caching."""
|
|
296
|
+
duration = float(
|
|
297
|
+
subprocess.check_output([
|
|
298
|
+
"ffprobe",
|
|
299
|
+
"-v",
|
|
300
|
+
"error",
|
|
301
|
+
"-show_entries",
|
|
302
|
+
"format=duration",
|
|
303
|
+
"-of",
|
|
304
|
+
"default=noprint_wrappers=1:nokey=1",
|
|
305
|
+
video_file,
|
|
306
|
+
]).decode()
|
|
307
|
+
)
|
|
308
|
+
return int(np.ceil(duration))
|
|
236
309
|
|
|
237
310
|
|
|
238
311
|
def detect_file_encoding(file_path):
|
|
@@ -247,7 +320,7 @@ def detect_file_encoding(file_path):
|
|
|
247
320
|
"""
|
|
248
321
|
try:
|
|
249
322
|
with open(file_path, "rb") as f:
|
|
250
|
-
raw_data = f.read()
|
|
323
|
+
raw_data = f.read(min(1024 * 1024, Path(file_path).stat().st_size)) # Read up to 1MB
|
|
251
324
|
result = chardet.detect(raw_data)
|
|
252
325
|
encoding = result["encoding"]
|
|
253
326
|
confidence = result["confidence"]
|
|
@@ -261,6 +334,7 @@ def detect_file_encoding(file_path):
|
|
|
261
334
|
return "utf-8"
|
|
262
335
|
|
|
263
336
|
|
|
337
|
+
@lru_cache(maxsize=100)
|
|
264
338
|
def read_file_with_fallback(file_path, encodings=None):
|
|
265
339
|
"""
|
|
266
340
|
Read a file trying multiple encodings in order of preference.
|
|
@@ -344,12 +418,16 @@ class SubtitleReader:
|
|
|
344
418
|
|
|
345
419
|
try:
|
|
346
420
|
timestamp = lines[1]
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
end_stamp =
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
421
|
+
time_parts = timestamp.split(" --> ")
|
|
422
|
+
start_stamp = time_parts[0].strip()
|
|
423
|
+
end_stamp = time_parts[1].strip()
|
|
424
|
+
|
|
425
|
+
subtitle_start = SubtitleReader.parse_timestamp(start_stamp)
|
|
426
|
+
subtitle_end = SubtitleReader.parse_timestamp(end_stamp)
|
|
427
|
+
|
|
428
|
+
# Check if this subtitle overlaps with our chunk
|
|
429
|
+
if subtitle_end >= start_time and subtitle_start <= end_time:
|
|
430
|
+
text = " ".join(lines[2:])
|
|
353
431
|
text_lines.append(text)
|
|
354
432
|
|
|
355
433
|
except (IndexError, ValueError) as e:
|
|
@@ -359,9 +437,9 @@ class SubtitleReader:
|
|
|
359
437
|
return text_lines
|
|
360
438
|
|
|
361
439
|
|
|
440
|
+
# Global whisper model cache with better cache key
|
|
362
441
|
_whisper_models = {}
|
|
363
442
|
|
|
364
|
-
|
|
365
443
|
def get_whisper_model(model_name="tiny", device=None):
|
|
366
444
|
"""Cache whisper models to avoid reloading."""
|
|
367
445
|
global _whisper_models
|
|
@@ -373,4 +451,4 @@ def get_whisper_model(model_name="tiny", device=None):
|
|
|
373
451
|
_whisper_models[key] = whisper.load_model(model_name, device=device)
|
|
374
452
|
logger.info(f"Loaded {model_name} model on {device}")
|
|
375
453
|
|
|
376
|
-
return _whisper_models[key]
|
|
454
|
+
return _whisper_models[key]
|
{mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkv-episode-matcher
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
5
5
|
Home-page: https://github.com/Jsakkos/mkv-episode-matcher
|
|
6
6
|
Author: Jonathan Sakkos
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = mkv_episode_matcher
|
|
3
|
-
version = 0.
|
|
3
|
+
version = 0.8.1
|
|
4
4
|
author = Jonathan Sakkos
|
|
5
5
|
author_email = jonathansakkos@gmail.com
|
|
6
6
|
description = The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/.github/workflows/python-publish.yml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/episode_matcher.py
RENAMED
|
File without changes
|
{mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher/subtitle_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/requires.txt
RENAMED
|
File without changes
|
{mkv_episode_matcher-0.7.2 → mkv_episode_matcher-0.8.1}/mkv_episode_matcher.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|