mkv-episode-matcher 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkv-episode-matcher might be problematic. Click here for more details.

@@ -50,6 +50,7 @@ class EpisodeMatcher:
50
50
  self.min_confidence = min_confidence
51
51
  self.show_name = show_name
52
52
  self.chunk_duration = 30
53
+ self.skip_initial_duration = 300
53
54
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
54
55
  self.temp_dir = Path(tempfile.gettempdir()) / "whisper_chunks"
55
56
  self.temp_dir.mkdir(exist_ok=True)
@@ -121,7 +122,8 @@ class EpisodeMatcher:
121
122
  str: Combined text from the subtitle chunk
122
123
  """
123
124
  try:
124
- chunk_start = chunk_idx * self.chunk_duration
125
+ # Apply the same offset as in _try_match_with_model
126
+ chunk_start = self.skip_initial_duration + (chunk_idx * self.chunk_duration)
125
127
  chunk_end = chunk_start + self.chunk_duration
126
128
 
127
129
  return self.subtitle_cache.get_chunk(srt_file, chunk_idx, chunk_start, chunk_end)
@@ -133,8 +135,10 @@ class EpisodeMatcher:
133
135
  def get_reference_files(self, season_number):
134
136
  """Get reference subtitle files with caching."""
135
137
  cache_key = (self.show_name, season_number)
138
+ logger.debug(f"Reference cache key: {cache_key}")
136
139
 
137
140
  if cache_key in self.reference_files_cache:
141
+ logger.debug("Returning cached reference files")
138
142
  return self.reference_files_cache[cache_key]
139
143
 
140
144
  reference_dir = self.cache_dir / "data" / self.show_name
@@ -158,7 +162,7 @@ class EpisodeMatcher:
158
162
 
159
163
  # Remove duplicates while preserving order
160
164
  reference_files = list(dict.fromkeys(reference_files))
161
-
165
+ logger.debug(f"Found {len(reference_files)} reference files for season {season_number}")
162
166
  self.reference_files_cache[cache_key] = reference_files
163
167
  return reference_files
164
168
 
@@ -166,7 +170,8 @@ class EpisodeMatcher:
166
170
  self, video_file, model_name, max_duration, reference_files
167
171
  ):
168
172
  """
169
- Attempt to match using specified model, checking multiple 30-second chunks up to max_duration.
173
+ Attempt to match using specified model, checking multiple chunks starting from skip_initial_duration
174
+ and continuing up to max_duration.
170
175
 
171
176
  Args:
172
177
  video_file: Path to the video file
@@ -177,7 +182,7 @@ class EpisodeMatcher:
177
182
  # Use cached model
178
183
  model = get_whisper_model(model_name, self.device)
179
184
 
180
- # Calculate number of chunks to check (30 seconds each)
185
+ # Calculate number of chunks to check
181
186
  num_chunks = min(max_duration // self.chunk_duration, 10) # Limit to 10 chunks for initial check
182
187
 
183
188
  # Pre-load all reference chunks for the chunks we'll check
@@ -186,14 +191,21 @@ class EpisodeMatcher:
186
191
  self.load_reference_chunk(ref_file, chunk_idx)
187
192
 
188
193
  for chunk_idx in range(num_chunks):
189
- start_time = chunk_idx * self.chunk_duration
194
+ # Start at self.skip_initial_duration and check subsequent chunks
195
+ start_time = self.skip_initial_duration + (chunk_idx * self.chunk_duration)
190
196
  logger.debug(f"Trying {model_name} model at {start_time} seconds")
191
197
 
192
198
  audio_path = self.extract_audio_chunk(video_file, start_time)
199
+ logger.debug(f"Extracted audio chunk: {audio_path}")
193
200
 
194
201
  result = model.transcribe(audio_path, task="transcribe", language="en")
195
202
 
203
+
196
204
  chunk_text = result["text"]
205
+ logger.debug(f"Transcription result: {chunk_text} ({len(chunk_text)} characters)")
206
+ if len(chunk_text) < 10:
207
+ logger.debug(f"Transcription result too short: {chunk_text} ({len(chunk_text)} characters)")
208
+ continue
197
209
  best_confidence = 0
198
210
  best_match = None
199
211
 
@@ -245,7 +257,7 @@ class EpisodeMatcher:
245
257
  # Try with tiny model first (fastest)
246
258
  logger.info("Attempting match with tiny model...")
247
259
  match = self._try_match_with_model(
248
- video_file, "tiny", min(duration, 300), reference_files # Limit to first 5 minutes
260
+ video_file, "tiny.en", min(duration, 300), reference_files # Limit to first 5 minutes
249
261
  )
250
262
  if match and match["confidence"] > 0.65: # Slightly lower threshold for tiny
251
263
  logger.info(
@@ -255,10 +267,10 @@ class EpisodeMatcher:
255
267
 
256
268
  # If no match, try base model
257
269
  logger.info(
258
- "No match with tiny model, extending base model search to 10 minutes..."
270
+ "No match with tiny model, extending base model search to 5 minutes..."
259
271
  )
260
272
  match = self._try_match_with_model(
261
- video_file, "base", min(duration, 600), reference_files # Limit to first 10 minutes
273
+ video_file, "base.en", min(duration, 300), reference_files # Limit to first 5 minutes
262
274
  )
263
275
  if match:
264
276
  logger.info(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkv-episode-matcher
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
5
5
  Home-page: https://github.com/Jsakkos/mkv-episode-matcher
6
6
  Author: Jonathan Sakkos
@@ -2,13 +2,13 @@ mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb
2
2
  mkv_episode_matcher/__init__.py,sha256=u3yZcpuK0ICeUjxYKePvW-zS61E5ss5q2AvqnSHuz9E,240
3
3
  mkv_episode_matcher/__main__.py,sha256=O3GQk5R9BFuA-QNlqfBgDSS7G_W8IGSxiV8CFUbcaLc,10059
4
4
  mkv_episode_matcher/config.py,sha256=EcJJjkekQ7oWtarUkufCYON_QWbQvq55-zMqCTOqSa4,2265
5
- mkv_episode_matcher/episode_identification.py,sha256=IMB1m3-oY4Z31XIWCFjpdXDENwmKMgzjctl3CilthJ4,15926
5
+ mkv_episode_matcher/episode_identification.py,sha256=xH5HIa6oC4nXhlqzdqQn1XYQFNUrnbUVlW-R9RsBHq4,16745
6
6
  mkv_episode_matcher/episode_matcher.py,sha256=SxAbnXuTJITD1o0WohE9heE3Fm9zW_w0Nq3GzqtcIpQ,6329
7
7
  mkv_episode_matcher/subtitle_utils.py,sha256=Hz9b4CKPV07YKTY4dcN3WbvdbvH-S3J4zcb9CiyvPlE,2551
8
8
  mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
9
9
  mkv_episode_matcher/utils.py,sha256=modXMLmt2fpny8liXwqe4ylxnwwfg_98OLOacv5izps,14501
10
- mkv_episode_matcher-0.8.0.dist-info/METADATA,sha256=TcH5g5UfyJop2ZV_tWShEm4O28EkVGLlcpOXbG74mjI,5384
11
- mkv_episode_matcher-0.8.0.dist-info/WHEEL,sha256=7ciDxtlje1X8OhobNuGgi1t-ACdFSelPnSmDPrtlobY,91
12
- mkv_episode_matcher-0.8.0.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
13
- mkv_episode_matcher-0.8.0.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
14
- mkv_episode_matcher-0.8.0.dist-info/RECORD,,
10
+ mkv_episode_matcher-0.8.1.dist-info/METADATA,sha256=JpSdL1OU5UwQb6aPARqV9YzQWtoEdmoJZkmw_7FcUwM,5384
11
+ mkv_episode_matcher-0.8.1.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
12
+ mkv_episode_matcher-0.8.1.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
13
+ mkv_episode_matcher-0.8.1.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
14
+ mkv_episode_matcher-0.8.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.2.0)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5