mkv-episode-matcher 0.9.2__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkv-episode-matcher might be problematic. Click here for more details.

@@ -7,7 +7,6 @@ from typing import Optional
7
7
  from loguru import logger
8
8
  from rich.console import Console
9
9
  from rich.panel import Panel
10
- from rich.progress import Progress, SpinnerColumn, TextColumn
11
10
  from rich.prompt import Confirm, Prompt
12
11
 
13
12
  from mkv_episode_matcher import __version__
@@ -62,15 +61,17 @@ def print_welcome_message():
62
61
  console.print()
63
62
 
64
63
 
65
- def confirm_api_key(config_value: Optional[str], key_name: str, description: str) -> str:
64
+ def confirm_api_key(
65
+ config_value: Optional[str], key_name: str, description: str
66
+ ) -> str:
66
67
  """
67
68
  Confirm if the user wants to use an existing API key or enter a new one.
68
-
69
+
69
70
  Args:
70
71
  config_value: The current value from the config
71
72
  key_name: The name of the key
72
73
  description: Description of the key for user information
73
-
74
+
74
75
  Returns:
75
76
  The API key to use
76
77
  """
@@ -79,7 +80,7 @@ def confirm_api_key(config_value: Optional[str], key_name: str, description: str
79
80
  console.print(f"Current value: [green]{mask_api_key(config_value)}[/green]")
80
81
  if Confirm.ask("Use existing key?", default=True):
81
82
  return config_value
82
-
83
+
83
84
  return Prompt.ask(f"Enter your {key_name}")
84
85
 
85
86
 
@@ -95,10 +96,10 @@ def mask_api_key(key: str) -> str:
95
96
  def select_season(seasons):
96
97
  """
97
98
  Allow user to select a season from a list.
98
-
99
+
99
100
  Args:
100
101
  seasons: List of available seasons
101
-
102
+
102
103
  Returns:
103
104
  Selected season number or None for all seasons
104
105
  """
@@ -106,21 +107,51 @@ def select_season(seasons):
106
107
  for i, season in enumerate(seasons, 1):
107
108
  season_num = Path(season).name.replace("Season ", "")
108
109
  console.print(f" {i}. Season {season_num}")
109
-
110
- console.print(f" 0. All Seasons")
111
-
110
+
111
+ console.print(" 0. All Seasons")
112
+
112
113
  choice = Prompt.ask(
113
114
  "Select a season number (0 for all)",
114
115
  choices=[str(i) for i in range(len(seasons) + 1)],
115
- default="0"
116
+ default="0",
116
117
  )
117
-
118
+
118
119
  if int(choice) == 0:
119
120
  return None
120
-
121
+
121
122
  selected_season = seasons[int(choice) - 1]
122
123
  return int(Path(selected_season).name.replace("Season ", ""))
123
124
 
125
+ def onboarding(config_path):
126
+ """Prompt user for all required config values, showing existing as defaults."""
127
+ config = get_config(config_path) if config_path.exists() else {}
128
+
129
+ def ask_with_default(prompt_text, key, description, secret=False):
130
+ current = config.get(key)
131
+ if current:
132
+ console.print(f"[cyan]{key}:[/cyan] {description}")
133
+ console.print(f"Current value: [green]{mask_api_key(current) if secret else current}[/green]")
134
+ if Confirm.ask("Use existing value?", default=True):
135
+ return current
136
+ return Prompt.ask(f"Enter your {key}", default=current or "")
137
+
138
+ tmdb_api_key = ask_with_default("TMDb API key", "tmdb_api_key", "Used to lookup show and episode information. To get your API key, create an account at https://www.themoviedb.org/ and follow the instructions at https://developer.themoviedb.org/docs/getting-started", secret=True)
139
+ open_subtitles_username = ask_with_default("OpenSubtitles Username", "open_subtitles_username", "Account username for OpenSubtitles. To create an account, visit https://www.opensubtitles.com/ then click 'Register'")
140
+ open_subtitles_password = ask_with_default("OpenSubtitles Password", "open_subtitles_password", "Account password for OpenSubtitles", secret=True)
141
+ open_subtitles_user_agent = ask_with_default("OpenSubtitles Consumer Name", "open_subtitles_user_agent", "Required for subtitle downloads. Go to https://www.opensubtitles.com/en/consumers, click 'New Consumer', give it a name, then click 'Save'")
142
+ open_subtitles_api_key = ask_with_default("OpenSubtitles API key", "open_subtitles_api_key", "Required for subtitle downloads. Enter the API key linked with the OpenSubtitles Consumer that you created in the previous step.", secret=True)
143
+ show_dir = ask_with_default("Show Directory", "show_dir", "Main directory of the show")
144
+
145
+ set_config(
146
+ tmdb_api_key,
147
+ open_subtitles_api_key,
148
+ open_subtitles_user_agent,
149
+ open_subtitles_username,
150
+ open_subtitles_password,
151
+ show_dir,
152
+ config_path,
153
+ )
154
+ console.print("[bold green]Onboarding complete! Configuration saved.[/bold green]")
124
155
 
125
156
  @logger.catch
126
157
  def main():
@@ -165,7 +196,8 @@ def main():
165
196
  help="Check if GPU is available for faster processing",
166
197
  )
167
198
  parser.add_argument(
168
- "--verbose", "-v",
199
+ "--verbose",
200
+ "-v",
169
201
  action="store_true",
170
202
  help="Enable verbose output",
171
203
  )
@@ -175,22 +207,30 @@ def main():
175
207
  default=0.7,
176
208
  help="Set confidence threshold for episode matching (0.0-1.0)",
177
209
  )
178
-
210
+ parser.add_argument(
211
+ "--onboard",
212
+ action="store_true",
213
+ help="Run onboarding to set up configuration",
214
+ )
179
215
  args = parser.parse_args()
180
216
  if args.verbose:
181
217
  console.print("[bold cyan]Command-line Arguments[/bold cyan]")
182
218
  console.print(args)
183
219
  if args.check_gpu:
184
220
  from mkv_episode_matcher.utils import check_gpu_support
221
+
185
222
  with console.status("[bold green]Checking GPU support..."):
186
223
  check_gpu_support()
187
224
  return
188
225
 
189
-
190
226
  logger.debug(f"Command-line arguments: {args}")
191
-
192
- # Load configuration once
193
- config = get_config(CONFIG_FILE)
227
+ # Onboarding: run if --onboard or config file missing
228
+ if args.onboard or not CONFIG_FILE.exists():
229
+ onboarding(CONFIG_FILE)
230
+ # Reload config after onboarding
231
+ config = get_config(CONFIG_FILE)
232
+ else:
233
+ config = get_config(CONFIG_FILE)
194
234
 
195
235
  # Get TMDb API key
196
236
  tmdb_api_key = args.tmdb_api_key or config.get("tmdb_api_key")
@@ -202,49 +242,49 @@ def main():
202
242
 
203
243
  if args.get_subs:
204
244
  console.print("[bold cyan]Subtitle Download Configuration[/bold cyan]")
205
-
245
+
206
246
  tmdb_api_key = confirm_api_key(
207
- tmdb_api_key,
208
- "TMDb API key",
209
- "Used to lookup show and episode information"
247
+ tmdb_api_key, "TMDb API key", "Used to lookup show and episode information"
210
248
  )
211
-
249
+
212
250
  open_subtitles_api_key = confirm_api_key(
213
251
  open_subtitles_api_key,
214
252
  "OpenSubtitles API key",
215
- "Required for subtitle downloads"
253
+ "Required for subtitle downloads",
216
254
  )
217
-
255
+
218
256
  open_subtitles_user_agent = confirm_api_key(
219
257
  open_subtitles_user_agent,
220
258
  "OpenSubtitles User Agent",
221
- "Required for subtitle downloads"
259
+ "Required for subtitle downloads",
222
260
  )
223
-
261
+
224
262
  open_subtitles_username = confirm_api_key(
225
263
  open_subtitles_username,
226
264
  "OpenSubtitles Username",
227
- "Account username for OpenSubtitles"
265
+ "Account username for OpenSubtitles",
228
266
  )
229
-
267
+
230
268
  open_subtitles_password = confirm_api_key(
231
269
  open_subtitles_password,
232
270
  "OpenSubtitles Password",
233
- "Account password for OpenSubtitles"
271
+ "Account password for OpenSubtitles",
234
272
  )
235
273
 
236
274
  # Use config for show directory
237
275
  show_dir = args.show_dir or config.get("show_dir")
238
276
  if not show_dir:
239
277
  show_dir = Prompt.ask("Enter the main directory of the show")
240
-
278
+
241
279
  logger.info(f"Show Directory: {show_dir}")
242
280
  if not Path(show_dir).exists():
243
- console.print(f"[bold red]Error:[/bold red] Show directory '{show_dir}' does not exist.")
281
+ console.print(
282
+ f"[bold red]Error:[/bold red] Show directory '{show_dir}' does not exist."
283
+ )
244
284
  return
245
-
285
+
246
286
  if not show_dir:
247
- show_dir = os.getcwd()
287
+ show_dir = Path.cwd()
248
288
  console.print(f"Using current directory: [cyan]{show_dir}[/cyan]")
249
289
 
250
290
  logger.debug(f"Show Directory: {show_dir}")
@@ -274,25 +314,27 @@ def main():
274
314
  border_style="yellow",
275
315
  )
276
316
  )
277
-
317
+
278
318
  seasons = get_valid_seasons(show_dir)
279
319
  if not seasons:
280
- console.print("[bold red]Error:[/bold red] No seasons with .mkv files found in the show directory.")
320
+ console.print(
321
+ "[bold red]Error:[/bold red] No seasons with .mkv files found in the show directory."
322
+ )
281
323
  return
282
-
324
+
283
325
  # If season wasn't specified and there are multiple seasons, let user choose
284
326
  selected_season = args.season
285
327
  if selected_season is None and len(seasons) > 1:
286
328
  selected_season = select_season(seasons)
287
-
329
+
288
330
  # Show what's going to happen
289
331
  show_name = Path(show_dir).name
290
332
  season_text = f"Season {selected_season}" if selected_season else "all seasons"
291
-
333
+
292
334
  console.print(
293
335
  f"[bold green]Processing[/bold green] [cyan]{show_name}[/cyan], {season_text}"
294
336
  )
295
-
337
+
296
338
  # # Setup progress spinner
297
339
  # with Progress(
298
340
  # TextColumn("[bold green]Processing...[/bold green]"),
@@ -300,15 +342,15 @@ def main():
300
342
  # ) as progress:
301
343
  # task = progress.add_task("", total=None)
302
344
  process_show(
303
- selected_season,
304
- dry_run=args.dry_run,
305
- get_subs=args.get_subs,
345
+ selected_season,
346
+ dry_run=args.dry_run,
347
+ get_subs=args.get_subs,
306
348
  verbose=args.verbose,
307
- confidence=args.confidence
349
+ confidence=args.confidence,
308
350
  )
309
-
351
+
310
352
  console.print("[bold green]✓[/bold green] Processing completed successfully!")
311
-
353
+
312
354
  # Show where logs are stored
313
355
  console.print(f"\n[dim]Logs available at: {log_dir}[/dim]")
314
356
 
@@ -323,4 +365,4 @@ if __name__ == "__main__":
323
365
  except Exception as e:
324
366
  console.print(f"\n[bold red]Error:[/bold red] {str(e)}")
325
367
  logger.exception("Unhandled exception")
326
- sys.exit(1)
368
+ sys.exit(1)
@@ -1,27 +1,30 @@
1
1
  import re
2
2
  import subprocess
3
3
  import tempfile
4
+ from functools import lru_cache
4
5
  from pathlib import Path
5
- from rich import print
6
- from rich.console import Console
6
+
7
7
  import chardet
8
8
  import numpy as np
9
9
  import torch
10
10
  import whisper
11
11
  from loguru import logger
12
12
  from rapidfuzz import fuzz
13
+ from rich import print
14
+ from rich.console import Console
15
+
13
16
  from mkv_episode_matcher.utils import extract_season_episode
14
- from functools import lru_cache
15
17
 
16
18
  console = Console()
17
19
 
20
+
18
21
  class SubtitleCache:
19
22
  """Cache for storing parsed subtitle data to avoid repeated loading and parsing."""
20
-
23
+
21
24
  def __init__(self):
22
25
  self.subtitles = {} # {file_path: parsed_content}
23
26
  self.chunk_cache = {} # {(file_path, chunk_idx): text}
24
-
27
+
25
28
  def get_subtitle_content(self, srt_file):
26
29
  """Get the full content of a subtitle file, loading it only once."""
27
30
  srt_file = str(srt_file)
@@ -29,18 +32,18 @@ class SubtitleCache:
29
32
  reader = SubtitleReader()
30
33
  self.subtitles[srt_file] = reader.read_srt_file(srt_file)
31
34
  return self.subtitles[srt_file]
32
-
35
+
33
36
  def get_chunk(self, srt_file, chunk_idx, chunk_start, chunk_end):
34
37
  """Get a specific time chunk from a subtitle file, with caching."""
35
38
  srt_file = str(srt_file)
36
39
  cache_key = (srt_file, chunk_idx)
37
-
40
+
38
41
  if cache_key not in self.chunk_cache:
39
42
  content = self.get_subtitle_content(srt_file)
40
43
  reader = SubtitleReader()
41
44
  text_lines = reader.extract_subtitle_chunk(content, chunk_start, chunk_end)
42
45
  self.chunk_cache[cache_key] = " ".join(text_lines)
43
-
46
+
44
47
  return self.chunk_cache[cache_key]
45
48
 
46
49
 
@@ -78,10 +81,10 @@ class EpisodeMatcher:
78
81
  def extract_audio_chunk(self, mkv_file, start_time):
79
82
  """Extract a chunk of audio from MKV file with caching."""
80
83
  cache_key = (str(mkv_file), start_time)
81
-
84
+
82
85
  if cache_key in self.audio_chunks:
83
86
  return self.audio_chunks[cache_key]
84
-
87
+
85
88
  chunk_path = self.temp_dir / f"chunk_{start_time}.wav"
86
89
  if not chunk_path.exists():
87
90
  cmd = [
@@ -105,7 +108,7 @@ class EpisodeMatcher:
105
108
  str(chunk_path),
106
109
  ]
107
110
  subprocess.run(cmd, capture_output=True)
108
-
111
+
109
112
  chunk_path_str = str(chunk_path)
110
113
  self.audio_chunks[cache_key] = chunk_path_str
111
114
  return chunk_path_str
@@ -125,9 +128,11 @@ class EpisodeMatcher:
125
128
  # Apply the same offset as in _try_match_with_model
126
129
  chunk_start = self.skip_initial_duration + (chunk_idx * self.chunk_duration)
127
130
  chunk_end = chunk_start + self.chunk_duration
128
-
129
- return self.subtitle_cache.get_chunk(srt_file, chunk_idx, chunk_start, chunk_end)
130
-
131
+
132
+ return self.subtitle_cache.get_chunk(
133
+ srt_file, chunk_idx, chunk_start, chunk_end
134
+ )
135
+
131
136
  except Exception as e:
132
137
  logger.error(f"Error loading reference chunk from {srt_file}: {e}")
133
138
  return ""
@@ -136,11 +141,11 @@ class EpisodeMatcher:
136
141
  """Get reference subtitle files with caching."""
137
142
  cache_key = (self.show_name, season_number)
138
143
  logger.debug(f"Reference cache key: {cache_key}")
139
-
144
+
140
145
  if cache_key in self.reference_files_cache:
141
146
  logger.debug("Returning cached reference files")
142
147
  return self.reference_files_cache[cache_key]
143
-
148
+
144
149
  reference_dir = self.cache_dir / "data" / self.show_name
145
150
  patterns = [
146
151
  f"S{season_number:02d}E",
@@ -154,15 +159,15 @@ class EpisodeMatcher:
154
159
  files = [
155
160
  f
156
161
  for f in reference_dir.glob("*.srt")
157
- if any(
158
- re.search(f"{p}\\d+", f.name, re.IGNORECASE) for p in patterns
159
- )
162
+ if any(re.search(f"{p}\\d+", f.name, re.IGNORECASE) for p in patterns)
160
163
  ]
161
164
  reference_files.extend(files)
162
165
 
163
166
  # Remove duplicates while preserving order
164
167
  reference_files = list(dict.fromkeys(reference_files))
165
- logger.debug(f"Found {len(reference_files)} reference files for season {season_number}")
168
+ logger.debug(
169
+ f"Found {len(reference_files)} reference files for season {season_number}"
170
+ )
166
171
  self.reference_files_cache[cache_key] = reference_files
167
172
  return reference_files
168
173
 
@@ -183,7 +188,9 @@ class EpisodeMatcher:
183
188
  model = get_whisper_model(model_name, self.device)
184
189
 
185
190
  # Calculate number of chunks to check
186
- num_chunks = min(max_duration // self.chunk_duration, 10) # Limit to 10 chunks for initial check
191
+ num_chunks = min(
192
+ max_duration // self.chunk_duration, 10
193
+ ) # Limit to 10 chunks for initial check
187
194
 
188
195
  # Pre-load all reference chunks for the chunks we'll check
189
196
  for chunk_idx in range(num_chunks):
@@ -200,11 +207,14 @@ class EpisodeMatcher:
200
207
 
201
208
  result = model.transcribe(audio_path, task="transcribe", language="en")
202
209
 
203
-
204
210
  chunk_text = result["text"]
205
- logger.debug(f"Transcription result: {chunk_text} ({len(chunk_text)} characters)")
211
+ logger.debug(
212
+ f"Transcription result: {chunk_text} ({len(chunk_text)} characters)"
213
+ )
206
214
  if len(chunk_text) < 10:
207
- logger.debug(f"Transcription result too short: {chunk_text} ({len(chunk_text)} characters)")
215
+ logger.debug(
216
+ f"Transcription result too short: {chunk_text} ({len(chunk_text)} characters)"
217
+ )
208
218
  continue
209
219
  best_confidence = 0
210
220
  best_match = None
@@ -220,13 +230,17 @@ class EpisodeMatcher:
220
230
  best_match = Path(ref_file)
221
231
 
222
232
  if confidence > self.min_confidence:
223
- print(f"Matched with {best_match} (confidence: {best_confidence:.2f})")
233
+ print(
234
+ f"Matched with {best_match} (confidence: {best_confidence:.2f})"
235
+ )
224
236
  try:
225
237
  season, episode = extract_season_episode(best_match.stem)
226
238
  except Exception as e:
227
239
  print(f"Error extracting season/episode: {e}")
228
240
  continue
229
- print(f"Season: {season}, Episode: {episode} (confidence: {best_confidence:.2f})")
241
+ print(
242
+ f"Season: {season}, Episode: {episode} (confidence: {best_confidence:.2f})"
243
+ )
230
244
  if season and episode:
231
245
  return {
232
246
  "season": season,
@@ -250,16 +264,21 @@ class EpisodeMatcher:
250
264
  if not reference_files:
251
265
  logger.error(f"No reference files found for season {season_number}")
252
266
  return None
253
-
267
+
254
268
  # Cache video duration
255
269
  duration = get_video_duration(video_file)
256
270
 
257
271
  # Try with tiny model first (fastest)
258
272
  logger.info("Attempting match with tiny model...")
259
273
  match = self._try_match_with_model(
260
- video_file, "tiny.en", min(duration, 300), reference_files # Limit to first 5 minutes
274
+ video_file,
275
+ "tiny.en",
276
+ min(duration, 300),
277
+ reference_files, # Limit to first 5 minutes
261
278
  )
262
- if match and match["confidence"] > 0.65: # Slightly lower threshold for tiny
279
+ if (
280
+ match and match["confidence"] > 0.65
281
+ ): # Slightly lower threshold for tiny
263
282
  logger.info(
264
283
  f"Successfully matched with tiny model at {match['matched_at']}s (confidence: {match['confidence']:.2f})"
265
284
  )
@@ -270,7 +289,10 @@ class EpisodeMatcher:
270
289
  "No match with tiny model, extending base model search to 5 minutes..."
271
290
  )
272
291
  match = self._try_match_with_model(
273
- video_file, "base.en", min(duration, 300), reference_files # Limit to first 5 minutes
292
+ video_file,
293
+ "base.en",
294
+ min(duration, 300),
295
+ reference_files, # Limit to first 5 minutes
274
296
  )
275
297
  if match:
276
298
  logger.info(
@@ -320,7 +342,9 @@ def detect_file_encoding(file_path):
320
342
  """
321
343
  try:
322
344
  with open(file_path, "rb") as f:
323
- raw_data = f.read(min(1024 * 1024, Path(file_path).stat().st_size)) # Read up to 1MB
345
+ raw_data = f.read(
346
+ min(1024 * 1024, Path(file_path).stat().st_size)
347
+ ) # Read up to 1MB
324
348
  result = chardet.detect(raw_data)
325
349
  encoding = result["encoding"]
326
350
  confidence = result["confidence"]
@@ -421,10 +445,10 @@ class SubtitleReader:
421
445
  time_parts = timestamp.split(" --> ")
422
446
  start_stamp = time_parts[0].strip()
423
447
  end_stamp = time_parts[1].strip()
424
-
448
+
425
449
  subtitle_start = SubtitleReader.parse_timestamp(start_stamp)
426
450
  subtitle_end = SubtitleReader.parse_timestamp(end_stamp)
427
-
451
+
428
452
  # Check if this subtitle overlaps with our chunk
429
453
  if subtitle_end >= start_time and subtitle_start <= end_time:
430
454
  text = " ".join(lines[2:])
@@ -440,6 +464,7 @@ class SubtitleReader:
440
464
  # Global whisper model cache with better cache key
441
465
  _whisper_models = {}
442
466
 
467
+
443
468
  def get_whisper_model(model_name="tiny", device=None):
444
469
  """Cache whisper models to avoid reloading."""
445
470
  global _whisper_models
@@ -451,4 +476,4 @@ def get_whisper_model(model_name="tiny", device=None):
451
476
  _whisper_models[key] = whisper.load_model(model_name, device=device)
452
477
  logger.info(f"Loaded {model_name} model on {device}")
453
478
 
454
- return _whisper_models[key]
479
+ return _whisper_models[key]
@@ -4,9 +4,8 @@ import re
4
4
  import shutil
5
5
  from pathlib import Path
6
6
 
7
- from loguru import logger
8
7
  from rich.console import Console
9
- from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn
8
+ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
10
9
 
11
10
  from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
12
11
  from mkv_episode_matcher.config import get_config
@@ -25,10 +24,12 @@ from mkv_episode_matcher.utils import (
25
24
  console = Console()
26
25
 
27
26
 
28
- def process_show(season=None, dry_run=False, get_subs=False, verbose=False, confidence=0.6):
27
+ def process_show(
28
+ season=None, dry_run=False, get_subs=False, verbose=False, confidence=0.6
29
+ ):
29
30
  """
30
31
  Process the show using streaming speech recognition with improved UI feedback.
31
-
32
+
32
33
  Args:
33
34
  season (int, optional): Season number to process. Defaults to None (all seasons).
34
35
  dry_run (bool): If True, only simulate actions without making changes.
@@ -48,7 +49,9 @@ def process_show(season=None, dry_run=False, get_subs=False, verbose=False, conf
48
49
  console.print(
49
50
  f"[bold yellow]Warning:[/bold yellow] No reference subtitle files found in {reference_dir}"
50
51
  )
51
- console.print("[cyan]Tip:[/cyan] Use --get-subs to download reference subtitles")
52
+ console.print(
53
+ "[cyan]Tip:[/cyan] Use --get-subs to download reference subtitles"
54
+ )
52
55
  return
53
56
 
54
57
  season_paths = get_valid_seasons(show_dir)
@@ -59,7 +62,9 @@ def process_show(season=None, dry_run=False, get_subs=False, verbose=False, conf
59
62
  if season is not None:
60
63
  season_path = str(Path(show_dir) / f"Season {season}")
61
64
  if season_path not in season_paths:
62
- console.print(f"[bold red]Error:[/bold red] Season {season} has no .mkv files to process")
65
+ console.print(
66
+ f"[bold red]Error:[/bold red] Season {season} has no .mkv files to process"
67
+ )
63
68
  return
64
69
  season_paths = [season_path]
65
70
 
@@ -68,8 +73,7 @@ def process_show(season=None, dry_run=False, get_subs=False, verbose=False, conf
68
73
 
69
74
  for season_path in season_paths:
70
75
  mkv_files = [
71
- f for f in Path(season_path).glob("*.mkv")
72
- if not check_filename(f)
76
+ f for f in Path(season_path).glob("*.mkv") if not check_filename(f)
73
77
  ]
74
78
 
75
79
  if not mkv_files:
@@ -77,7 +81,7 @@ def process_show(season=None, dry_run=False, get_subs=False, verbose=False, conf
77
81
  console.print(f"[dim]No new files to process in Season {season_num}[/dim]")
78
82
  continue
79
83
 
80
- season_num = int(re.search(r'Season (\d+)', season_path).group(1))
84
+ season_num = int(re.search(r"Season (\d+)", season_path).group(1))
81
85
  temp_dir = Path(season_path) / "temp"
82
86
  temp_dir.mkdir(exist_ok=True)
83
87
 
@@ -85,13 +89,19 @@ def process_show(season=None, dry_run=False, get_subs=False, verbose=False, conf
85
89
  if get_subs:
86
90
  show_id = fetch_show_id(matcher.show_name)
87
91
  if show_id:
88
- console.print(f"[bold cyan]Downloading subtitles for Season {season_num}...[/bold cyan]")
92
+ console.print(
93
+ f"[bold cyan]Downloading subtitles for Season {season_num}...[/bold cyan]"
94
+ )
89
95
  get_subtitles(show_id, seasons={season_num}, config=config)
90
96
  else:
91
- console.print("[bold red]Error:[/bold red] Could not find show ID. Skipping subtitle download.")
97
+ console.print(
98
+ "[bold red]Error:[/bold red] Could not find show ID. Skipping subtitle download."
99
+ )
100
+
101
+ console.print(
102
+ f"[bold cyan]Processing {len(mkv_files)} files in Season {season_num}...[/bold cyan]"
103
+ )
92
104
 
93
- console.print(f"[bold cyan]Processing {len(mkv_files)} files in Season {season_num}...[/bold cyan]")
94
-
95
105
  # Process files with a progress bar
96
106
  with Progress(
97
107
  TextColumn("[progress.description]{task.description}"),
@@ -100,24 +110,30 @@ def process_show(season=None, dry_run=False, get_subs=False, verbose=False, conf
100
110
  TimeElapsedColumn(),
101
111
  console=console,
102
112
  ) as progress:
103
- task = progress.add_task(f"[cyan]Matching Season {season_num}[/cyan]", total=len(mkv_files))
104
-
113
+ task = progress.add_task(
114
+ f"[cyan]Matching Season {season_num}[/cyan]", total=len(mkv_files)
115
+ )
116
+
105
117
  for mkv_file in mkv_files:
106
118
  file_basename = Path(mkv_file).name
107
- progress.update(task, description=f"[cyan]Processing[/cyan] {file_basename}")
108
-
119
+ progress.update(
120
+ task, description=f"[cyan]Processing[/cyan] {file_basename}"
121
+ )
122
+
109
123
  if verbose:
110
124
  console.print(f" Analyzing {file_basename}...")
111
-
125
+
112
126
  total_processed += 1
113
127
  match = matcher.identify_episode(mkv_file, temp_dir, season_num)
114
128
 
115
129
  if match:
116
130
  total_matched += 1
117
131
  new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
118
-
119
- confidence_color = "green" if match['confidence'] > 0.8 else "yellow"
120
-
132
+
133
+ confidence_color = (
134
+ "green" if match["confidence"] > 0.8 else "yellow"
135
+ )
136
+
121
137
  if verbose or dry_run:
122
138
  console.print(
123
139
  f" Match: [bold]{file_basename}[/bold] → [bold cyan]{new_name}[/bold cyan] "
@@ -128,24 +144,30 @@ def process_show(season=None, dry_run=False, get_subs=False, verbose=False, conf
128
144
  rename_episode_file(mkv_file, new_name)
129
145
  else:
130
146
  if verbose:
131
- console.print(f" [yellow]No match found for {file_basename}[/yellow]")
132
-
147
+ console.print(
148
+ f" [yellow]No match found for {file_basename}[/yellow]"
149
+ )
150
+
133
151
  progress.advance(task)
134
152
  finally:
135
153
  if not dry_run and temp_dir.exists():
136
154
  shutil.rmtree(temp_dir)
137
-
155
+
138
156
  # Summary
139
157
  console.print()
140
158
  if total_processed == 0:
141
159
  console.print("[yellow]No files needed processing[/yellow]")
142
160
  else:
143
161
  console.print(f"[bold]Summary:[/bold] Processed {total_processed} files")
144
- console.print(f"[bold green]Successfully matched:[/bold green] {total_matched} files")
145
-
162
+ console.print(
163
+ f"[bold green]Successfully matched:[/bold green] {total_matched} files"
164
+ )
165
+
146
166
  if total_matched < total_processed:
147
- console.print(f"[bold yellow]Unmatched:[/bold yellow] {total_processed - total_matched} files")
167
+ console.print(
168
+ f"[bold yellow]Unmatched:[/bold yellow] {total_processed - total_matched} files"
169
+ )
148
170
  console.print(
149
171
  "[cyan]Tip:[/cyan] Try downloading subtitles with --get-subs or "
150
172
  "check that your files are named consistently"
151
- )
173
+ )
@@ -1,7 +1,7 @@
1
1
  # utils.py
2
+ import os
2
3
  import re
3
4
  import shutil
4
- import os
5
5
  from pathlib import Path
6
6
 
7
7
  import requests
@@ -11,7 +11,7 @@ from opensubtitlescom import OpenSubtitles
11
11
  from opensubtitlescom.exceptions import OpenSubtitlesException
12
12
  from rich.console import Console
13
13
  from rich.panel import Panel
14
- from rich.progress import Progress, SpinnerColumn, TextColumn
14
+
15
15
  from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
16
16
  from mkv_episode_matcher.config import get_config
17
17
  from mkv_episode_matcher.subtitle_utils import find_existing_subtitle, sanitize_filename
@@ -24,29 +24,30 @@ def normalize_path(path_str):
24
24
  """
25
25
  Normalize a path string to handle cross-platform path issues.
26
26
  Properly handles trailing slashes and backslashes in both Windows and Unix paths.
27
-
27
+
28
28
  Args:
29
29
  path_str (str): The path string to normalize
30
-
30
+
31
31
  Returns:
32
32
  pathlib.Path: A normalized Path object
33
33
  """
34
34
  # Convert to string if it's a Path object
35
35
  if isinstance(path_str, Path):
36
36
  path_str = str(path_str)
37
-
37
+
38
38
  # Remove trailing slashes or backslashes
39
- path_str = path_str.rstrip('/').rstrip('\\')
40
-
39
+ path_str = path_str.rstrip("/").rstrip("\\")
40
+
41
41
  # Handle Windows paths on non-Windows platforms
42
- if os.name != 'nt' and '\\' in path_str and ':' in path_str[:2]:
42
+ if os.name != "nt" and "\\" in path_str and ":" in path_str[:2]:
43
43
  # This looks like a Windows path on a non-Windows system
44
44
  # Extract the last component which should be the directory/file name
45
- components = path_str.split('\\')
45
+ components = path_str.split("\\")
46
46
  return Path(components[-1])
47
-
47
+
48
48
  return Path(path_str)
49
49
 
50
+
50
51
  def get_valid_seasons(show_dir):
51
52
  """
52
53
  Get all season directories that contain MKV files.
@@ -59,11 +60,7 @@ def get_valid_seasons(show_dir):
59
60
  """
60
61
  # Get all season directories
61
62
  show_path = normalize_path(show_dir)
62
- season_paths = [
63
- str(show_path / d.name)
64
- for d in show_path.iterdir()
65
- if d.is_dir()
66
- ]
63
+ season_paths = [str(show_path / d.name) for d in show_path.iterdir() if d.is_dir()]
67
64
 
68
65
  # Filter seasons to only include those with .mkv files
69
66
  valid_season_paths = []
@@ -219,13 +216,13 @@ def get_subtitles(show_id, seasons: set[int], config=None, max_retries=3):
219
216
  )
220
217
 
221
218
  if existing_subtitle:
222
- logger.info(
223
- f"Subtitle already exists: {Path(existing_subtitle).name}"
224
- )
219
+ logger.info(f"Subtitle already exists: {Path(existing_subtitle).name}")
225
220
  continue
226
221
 
227
222
  # Default to standard format for new downloads
228
- srt_filepath = str(series_cache_dir / f"{series_name} - S{season:02d}E{episode:02d}.srt")
223
+ srt_filepath = str(
224
+ series_cache_dir / f"{series_name} - S{season:02d}E{episode:02d}.srt"
225
+ )
229
226
 
230
227
  # get the episode info from TMDB
231
228
  url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season}/episode/{episode}?api_key={tmdb_api_key}"
@@ -245,7 +242,9 @@ def get_subtitles(show_id, seasons: set[int], config=None, max_retries=3):
245
242
  for subtitle in response.data:
246
243
  subtitle_dict = subtitle.to_dict()
247
244
  # Remove special characters and convert to uppercase
248
- filename_clean = re.sub(r"\\W+", " ", subtitle_dict["file_name"]).upper()
245
+ filename_clean = re.sub(
246
+ r"\\W+", " ", subtitle_dict["file_name"]
247
+ ).upper()
249
248
  if f"E{episode:02d}" in filename_clean:
250
249
  logger.info(f"Original filename: {subtitle_dict['file_name']}")
251
250
  retry_count = 0
@@ -257,25 +256,43 @@ def get_subtitles(show_id, seasons: set[int], config=None, max_retries=3):
257
256
  break
258
257
  except OpenSubtitlesException as e:
259
258
  retry_count += 1
260
- logger.error(f"OpenSubtitlesException (attempt {retry_count}): {e}")
261
- console.print(f"[red]OpenSubtitlesException (attempt {retry_count}): {e}[/red]")
259
+ logger.error(
260
+ f"OpenSubtitlesException (attempt {retry_count}): {e}"
261
+ )
262
+ console.print(
263
+ f"[red]OpenSubtitlesException (attempt {retry_count}): {e}[/red]"
264
+ )
262
265
  if retry_count >= max_retries:
263
- user_input = input("Would you like to continue matching? (y/n): ")
264
- if user_input.strip().lower() != 'y':
265
- logger.info("User chose to stop matching due to the error.")
266
+ user_input = input(
267
+ "Would you like to continue matching? (y/n): "
268
+ )
269
+ if user_input.strip().lower() != "y":
270
+ logger.info(
271
+ "User chose to stop matching due to the error."
272
+ )
266
273
  return
267
274
  else:
268
- logger.info("User chose to continue matching despite the error.")
275
+ logger.info(
276
+ "User chose to continue matching despite the error."
277
+ )
269
278
  break
270
279
  except Exception as e:
271
280
  logger.error(f"Failed to download and save subtitle: {e}")
272
- console.print(f"[red]Failed to download and save subtitle: {e}[/red]")
273
- user_input = input("Would you like to continue matching despite the error? (y/n): ")
274
- if user_input.strip().lower() != 'y':
275
- logger.info("User chose to stop matching due to the error.")
281
+ console.print(
282
+ f"[red]Failed to download and save subtitle: {e}[/red]"
283
+ )
284
+ user_input = input(
285
+ "Would you like to continue matching despite the error? (y/n): "
286
+ )
287
+ if user_input.strip().lower() != "y":
288
+ logger.info(
289
+ "User chose to stop matching due to the error."
290
+ )
276
291
  return
277
292
  else:
278
- logger.info("User chose to continue matching despite the error.")
293
+ logger.info(
294
+ "User chose to continue matching despite the error."
295
+ )
279
296
  break
280
297
  else:
281
298
  continue
@@ -453,7 +470,7 @@ def check_gpu_support():
453
470
  if torch.cuda.is_available():
454
471
  logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
455
472
  console.print(
456
- Panel.fit(
473
+ Panel.fit(
457
474
  f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}",
458
475
  title="GPU Support",
459
476
  border_style="magenta",
@@ -464,9 +481,9 @@ def check_gpu_support():
464
481
  "CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support."
465
482
  )
466
483
  console.print(
467
- Panel.fit(
468
- "CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.",
469
- title="GPU Support",
470
- border_style="red",
484
+ Panel.fit(
485
+ "CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.",
486
+ title="GPU Support",
487
+ border_style="red",
488
+ )
471
489
  )
472
- )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkv-episode-matcher
3
- Version: 0.9.2
3
+ Version: 0.9.3
4
4
  Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
5
5
  Home-page: https://github.com/Jsakkos/mkv-episode-matcher
6
6
  Author: Jonathan Sakkos
@@ -45,15 +45,11 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
45
45
 
46
46
  ## Features
47
47
 
48
- - 🎯 **Automatic Episode Matching**: Uses TMDb to accurately identify episodes
49
- - 🎨 **Rich User Interface**: Color-coded output and progress indicators
50
- - 📝 **Subtitle Extraction**: Extracts subtitles from MKV files
51
- - 🔊 **Speech Recognition**: Uses Whisper for accurate episode identification
52
- - 🚀 **Multi-threaded**: Fast processing of multiple files
48
+ - 🎯 **Automatic Episode Matching**: Uses TMDb and OpenSubtitles to accurately identify episodes
49
+ - 🔊 **Speech Recognition**: Uses OpenAI Whisper for accurate episode identification
53
50
  - ⬇️ **Subtitle Downloads**: Integration with OpenSubtitles
54
51
  - ✨ **Bulk Processing**: Handle entire seasons at once
55
52
  - 🧪 **Dry Run Mode**: Test changes before applying
56
- - 🎮 **Interactive Mode**: User-friendly season selection and configuration
57
53
 
58
54
  ## Prerequisites
59
55
 
@@ -66,15 +62,41 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
66
62
 
67
63
  1. Install the package:
68
64
  ```bash
69
- pip install mkv-episode-matcher
65
+ pip install -U mkv-episode-matcher
70
66
  ```
71
- 2. Download .srt subtitles files to ~/.mkv-episode-matcher/cache/data/Show Name/
67
+ 2. Run onboarding to set up your configuration (first-time users or to update credentials):
68
+ ```bash
69
+ mkv-match --onboard
70
+ ```
71
+ - You will be prompted for:
72
+ - TMDb API key (for episode matching)
73
+ - OpenSubtitles API key, Consumer Name, Username, and Password (for subtitle downloads)
74
+ - Show Directory (main directory of your show)
75
+ - If a config value already exists, you can accept the default or enter a new value.
72
76
 
73
- 3. Run on your show directory:
77
+ 3.
78
+ a. If you setup the TMDb and Opensubtitles credentials above, automatically fetch subtitles with the `--get-subs` flag.
79
+ b. Alternatively, manually download .srt subtitles files to ~/.mkv-episode-matcher/cache/data/Show Name/
80
+
81
+ 4. Run on your show directory:
74
82
  ```bash
75
83
  mkv-match --show-dir "path/to/your/show"
76
84
  ```
77
85
 
86
+ ## Onboarding & Configuration
87
+
88
+ The onboarding process will prompt you for all required configuration values if you run with `--onboard` or if no config file exists. You can re-run onboarding at any time to update your credentials or show directory.
89
+
90
+ **Required information:**
91
+ - TMDb API key (for episode matching)
92
+ - OpenSubtitles API key (for subtitle downloads)
93
+ - OpenSubtitles Consumer Name (for subtitle downloads)
94
+ - OpenSubtitles Username (for subtitle downloads)
95
+ - OpenSubtitles Password (for subtitle downloads)
96
+ - Show Directory (main directory of your show)
97
+
98
+ If a value already exists, it will be shown as the default and you can accept it or enter a new value.
99
+
78
100
  ## Directory Structure
79
101
 
80
102
  MKV Episode Matcher expects your TV shows to be organized as follows:
@@ -0,0 +1,14 @@
1
+ mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
2
+ mkv_episode_matcher/__init__.py,sha256=u3yZcpuK0ICeUjxYKePvW-zS61E5ss5q2AvqnSHuz9E,240
3
+ mkv_episode_matcher/__main__.py,sha256=iYuO2xWt1Xf_MXxJd_XKCrEgpicawE8LlZWtIq9dk90,12380
4
+ mkv_episode_matcher/config.py,sha256=KuKxvKuOrmpCZ80mjykT6oZeD3uArsq6XPioMBrAxuU,2279
5
+ mkv_episode_matcher/episode_identification.py,sha256=YwkH3cmd79I6zFh4nIuv19Kh4N9vXcP8HnZlmUQTA7c,16953
6
+ mkv_episode_matcher/episode_matcher.py,sha256=r6A9K4g4a8yU5aJ42n2yWuVgu5azBRE9FY3kDcdfe3w,6545
7
+ mkv_episode_matcher/subtitle_utils.py,sha256=z4eYTMAoI8BVzdCNeqHu-9mkhwG8RzxE5BbNjWUJwCg,2552
8
+ mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
9
+ mkv_episode_matcher/utils.py,sha256=Q9XFA-EIbMAsUKRtK5z23G1zgJKtxTcD-TkaHQKYIK8,17956
10
+ mkv_episode_matcher-0.9.3.dist-info/METADATA,sha256=lF6Y5I3N17CWo2KkfJelo2cF7syZWtJGwm0_aQ_ZFk0,6362
11
+ mkv_episode_matcher-0.9.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ mkv_episode_matcher-0.9.3.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
13
+ mkv_episode_matcher-0.9.3.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
14
+ mkv_episode_matcher-0.9.3.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
2
- mkv_episode_matcher/__init__.py,sha256=u3yZcpuK0ICeUjxYKePvW-zS61E5ss5q2AvqnSHuz9E,240
3
- mkv_episode_matcher/__main__.py,sha256=tIx_lahBMvwIGC_LHYHvCP7ILNIFGyyn0Go2gyaVA-0,10006
4
- mkv_episode_matcher/config.py,sha256=KuKxvKuOrmpCZ80mjykT6oZeD3uArsq6XPioMBrAxuU,2279
5
- mkv_episode_matcher/episode_identification.py,sha256=xH5HIa6oC4nXhlqzdqQn1XYQFNUrnbUVlW-R9RsBHq4,16745
6
- mkv_episode_matcher/episode_matcher.py,sha256=OHtBZd3HnLpANe7HgSvcAQIZjilWgKHVOqfju557NyA,6300
7
- mkv_episode_matcher/subtitle_utils.py,sha256=z4eYTMAoI8BVzdCNeqHu-9mkhwG8RzxE5BbNjWUJwCg,2552
8
- mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
9
- mkv_episode_matcher/utils.py,sha256=N2je7Pc1EaTYHL-lwTPjCThUzpvKfo62QesBJwGaKx8,17405
10
- mkv_episode_matcher-0.9.2.dist-info/METADATA,sha256=uHnJI9fMFFNDxeKp35E8TPtkmfrrckbuf4Dt17mdnSA,5357
11
- mkv_episode_matcher-0.9.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- mkv_episode_matcher-0.9.2.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
13
- mkv_episode_matcher-0.9.2.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
14
- mkv_episode_matcher-0.9.2.dist-info/RECORD,,