mkv-episode-matcher 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- mkv_episode_matcher/episode_identification.py +20 -8
- {mkv_episode_matcher-0.8.0.dist-info → mkv_episode_matcher-0.8.1.dist-info}/METADATA +1 -1
- {mkv_episode_matcher-0.8.0.dist-info → mkv_episode_matcher-0.8.1.dist-info}/RECORD +6 -6
- {mkv_episode_matcher-0.8.0.dist-info → mkv_episode_matcher-0.8.1.dist-info}/WHEEL +1 -1
- {mkv_episode_matcher-0.8.0.dist-info → mkv_episode_matcher-0.8.1.dist-info}/entry_points.txt +0 -0
- {mkv_episode_matcher-0.8.0.dist-info → mkv_episode_matcher-0.8.1.dist-info}/top_level.txt +0 -0
|
@@ -50,6 +50,7 @@ class EpisodeMatcher:
|
|
|
50
50
|
self.min_confidence = min_confidence
|
|
51
51
|
self.show_name = show_name
|
|
52
52
|
self.chunk_duration = 30
|
|
53
|
+
self.skip_initial_duration = 300
|
|
53
54
|
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
54
55
|
self.temp_dir = Path(tempfile.gettempdir()) / "whisper_chunks"
|
|
55
56
|
self.temp_dir.mkdir(exist_ok=True)
|
|
@@ -121,7 +122,8 @@ class EpisodeMatcher:
|
|
|
121
122
|
str: Combined text from the subtitle chunk
|
|
122
123
|
"""
|
|
123
124
|
try:
|
|
124
|
-
|
|
125
|
+
# Apply the same offset as in _try_match_with_model
|
|
126
|
+
chunk_start = self.skip_initial_duration + (chunk_idx * self.chunk_duration)
|
|
125
127
|
chunk_end = chunk_start + self.chunk_duration
|
|
126
128
|
|
|
127
129
|
return self.subtitle_cache.get_chunk(srt_file, chunk_idx, chunk_start, chunk_end)
|
|
@@ -133,8 +135,10 @@ class EpisodeMatcher:
|
|
|
133
135
|
def get_reference_files(self, season_number):
|
|
134
136
|
"""Get reference subtitle files with caching."""
|
|
135
137
|
cache_key = (self.show_name, season_number)
|
|
138
|
+
logger.debug(f"Reference cache key: {cache_key}")
|
|
136
139
|
|
|
137
140
|
if cache_key in self.reference_files_cache:
|
|
141
|
+
logger.debug("Returning cached reference files")
|
|
138
142
|
return self.reference_files_cache[cache_key]
|
|
139
143
|
|
|
140
144
|
reference_dir = self.cache_dir / "data" / self.show_name
|
|
@@ -158,7 +162,7 @@ class EpisodeMatcher:
|
|
|
158
162
|
|
|
159
163
|
# Remove duplicates while preserving order
|
|
160
164
|
reference_files = list(dict.fromkeys(reference_files))
|
|
161
|
-
|
|
165
|
+
logger.debug(f"Found {len(reference_files)} reference files for season {season_number}")
|
|
162
166
|
self.reference_files_cache[cache_key] = reference_files
|
|
163
167
|
return reference_files
|
|
164
168
|
|
|
@@ -166,7 +170,8 @@ class EpisodeMatcher:
|
|
|
166
170
|
self, video_file, model_name, max_duration, reference_files
|
|
167
171
|
):
|
|
168
172
|
"""
|
|
169
|
-
Attempt to match using specified model, checking multiple
|
|
173
|
+
Attempt to match using specified model, checking multiple chunks starting from skip_initial_duration
|
|
174
|
+
and continuing up to max_duration.
|
|
170
175
|
|
|
171
176
|
Args:
|
|
172
177
|
video_file: Path to the video file
|
|
@@ -177,7 +182,7 @@ class EpisodeMatcher:
|
|
|
177
182
|
# Use cached model
|
|
178
183
|
model = get_whisper_model(model_name, self.device)
|
|
179
184
|
|
|
180
|
-
# Calculate number of chunks to check
|
|
185
|
+
# Calculate number of chunks to check
|
|
181
186
|
num_chunks = min(max_duration // self.chunk_duration, 10) # Limit to 10 chunks for initial check
|
|
182
187
|
|
|
183
188
|
# Pre-load all reference chunks for the chunks we'll check
|
|
@@ -186,14 +191,21 @@ class EpisodeMatcher:
|
|
|
186
191
|
self.load_reference_chunk(ref_file, chunk_idx)
|
|
187
192
|
|
|
188
193
|
for chunk_idx in range(num_chunks):
|
|
189
|
-
|
|
194
|
+
# Start at self.skip_initial_duration and check subsequent chunks
|
|
195
|
+
start_time = self.skip_initial_duration + (chunk_idx * self.chunk_duration)
|
|
190
196
|
logger.debug(f"Trying {model_name} model at {start_time} seconds")
|
|
191
197
|
|
|
192
198
|
audio_path = self.extract_audio_chunk(video_file, start_time)
|
|
199
|
+
logger.debug(f"Extracted audio chunk: {audio_path}")
|
|
193
200
|
|
|
194
201
|
result = model.transcribe(audio_path, task="transcribe", language="en")
|
|
195
202
|
|
|
203
|
+
|
|
196
204
|
chunk_text = result["text"]
|
|
205
|
+
logger.debug(f"Transcription result: {chunk_text} ({len(chunk_text)} characters)")
|
|
206
|
+
if len(chunk_text) < 10:
|
|
207
|
+
logger.debug(f"Transcription result too short: {chunk_text} ({len(chunk_text)} characters)")
|
|
208
|
+
continue
|
|
197
209
|
best_confidence = 0
|
|
198
210
|
best_match = None
|
|
199
211
|
|
|
@@ -245,7 +257,7 @@ class EpisodeMatcher:
|
|
|
245
257
|
# Try with tiny model first (fastest)
|
|
246
258
|
logger.info("Attempting match with tiny model...")
|
|
247
259
|
match = self._try_match_with_model(
|
|
248
|
-
video_file, "tiny", min(duration, 300), reference_files # Limit to first 5 minutes
|
|
260
|
+
video_file, "tiny.en", min(duration, 300), reference_files # Limit to first 5 minutes
|
|
249
261
|
)
|
|
250
262
|
if match and match["confidence"] > 0.65: # Slightly lower threshold for tiny
|
|
251
263
|
logger.info(
|
|
@@ -255,10 +267,10 @@ class EpisodeMatcher:
|
|
|
255
267
|
|
|
256
268
|
# If no match, try base model
|
|
257
269
|
logger.info(
|
|
258
|
-
"No match with tiny model, extending base model search to
|
|
270
|
+
"No match with tiny model, extending base model search to 5 minutes..."
|
|
259
271
|
)
|
|
260
272
|
match = self._try_match_with_model(
|
|
261
|
-
video_file, "base", min(duration,
|
|
273
|
+
video_file, "base.en", min(duration, 300), reference_files # Limit to first 5 minutes
|
|
262
274
|
)
|
|
263
275
|
if match:
|
|
264
276
|
logger.info(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkv-episode-matcher
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
5
5
|
Home-page: https://github.com/Jsakkos/mkv-episode-matcher
|
|
6
6
|
Author: Jonathan Sakkos
|
|
@@ -2,13 +2,13 @@ mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb
|
|
|
2
2
|
mkv_episode_matcher/__init__.py,sha256=u3yZcpuK0ICeUjxYKePvW-zS61E5ss5q2AvqnSHuz9E,240
|
|
3
3
|
mkv_episode_matcher/__main__.py,sha256=O3GQk5R9BFuA-QNlqfBgDSS7G_W8IGSxiV8CFUbcaLc,10059
|
|
4
4
|
mkv_episode_matcher/config.py,sha256=EcJJjkekQ7oWtarUkufCYON_QWbQvq55-zMqCTOqSa4,2265
|
|
5
|
-
mkv_episode_matcher/episode_identification.py,sha256=
|
|
5
|
+
mkv_episode_matcher/episode_identification.py,sha256=xH5HIa6oC4nXhlqzdqQn1XYQFNUrnbUVlW-R9RsBHq4,16745
|
|
6
6
|
mkv_episode_matcher/episode_matcher.py,sha256=SxAbnXuTJITD1o0WohE9heE3Fm9zW_w0Nq3GzqtcIpQ,6329
|
|
7
7
|
mkv_episode_matcher/subtitle_utils.py,sha256=Hz9b4CKPV07YKTY4dcN3WbvdbvH-S3J4zcb9CiyvPlE,2551
|
|
8
8
|
mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
|
|
9
9
|
mkv_episode_matcher/utils.py,sha256=modXMLmt2fpny8liXwqe4ylxnwwfg_98OLOacv5izps,14501
|
|
10
|
-
mkv_episode_matcher-0.8.
|
|
11
|
-
mkv_episode_matcher-0.8.
|
|
12
|
-
mkv_episode_matcher-0.8.
|
|
13
|
-
mkv_episode_matcher-0.8.
|
|
14
|
-
mkv_episode_matcher-0.8.
|
|
10
|
+
mkv_episode_matcher-0.8.1.dist-info/METADATA,sha256=JpSdL1OU5UwQb6aPARqV9YzQWtoEdmoJZkmw_7FcUwM,5384
|
|
11
|
+
mkv_episode_matcher-0.8.1.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
12
|
+
mkv_episode_matcher-0.8.1.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
|
|
13
|
+
mkv_episode_matcher-0.8.1.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
|
|
14
|
+
mkv_episode_matcher-0.8.1.dist-info/RECORD,,
|
{mkv_episode_matcher-0.8.0.dist-info → mkv_episode_matcher-0.8.1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|