mkv-episode-matcher 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkv-episode-matcher might be problematic. Click here for more details.

@@ -108,75 +108,53 @@ def main():
108
108
  )
109
109
  args = parser.parse_args()
110
110
  if args.check_gpu:
111
- from mkv_episode_matcher.speech_to_text import check_gpu_support
111
+ from mkv_episode_matcher.utils import check_gpu_support
112
112
  check_gpu_support()
113
113
  return
114
114
  logger.debug(f"Command-line arguments: {args}")
115
- open_subtitles_api_key = ""
116
- open_subtitles_user_agent = ""
117
- open_subtitles_username = ""
118
- open_subtitles_password = ""
119
- # Check if API key is provided via command-line argument
120
- tmdb_api_key = args.tmdb_api_key
121
-
122
- # If API key is not provided, try to get it from the cache
123
- if not tmdb_api_key:
124
- cached_config = get_config(CONFIG_FILE)
125
- if cached_config:
126
- tmdb_api_key = cached_config.get("tmdb_api_key")
127
115
 
128
- # If API key is still not available, prompt the user to input it
116
+ # Load configuration once
117
+ config = get_config(CONFIG_FILE)
118
+
119
+ # Get TMDb API key
120
+ tmdb_api_key = args.tmdb_api_key or config.get("tmdb_api_key")
129
121
  if not tmdb_api_key:
130
122
  tmdb_api_key = input("Enter your TMDb API key: ")
131
- # Cache the API key
132
-
133
123
  logger.debug(f"TMDb API Key: {tmdb_api_key}")
124
+
134
125
  logger.debug("Getting OpenSubtitles API key")
135
- cached_config = get_config(CONFIG_FILE)
136
- try:
137
- open_subtitles_api_key = cached_config.get("open_subtitles_api_key")
138
- open_subtitles_user_agent = cached_config.get("open_subtitles_user_agent")
139
- open_subtitles_username = cached_config.get("open_subtitles_username")
140
- open_subtitles_password = cached_config.get("open_subtitles_password")
141
- except:
142
- pass
126
+ open_subtitles_api_key = config.get("open_subtitles_api_key")
127
+ open_subtitles_user_agent = config.get("open_subtitles_user_agent")
128
+ open_subtitles_username = config.get("open_subtitles_username")
129
+ open_subtitles_password = config.get("open_subtitles_password")
130
+
143
131
  if args.get_subs:
144
132
  if not open_subtitles_api_key:
145
133
  open_subtitles_api_key = input("Enter your OpenSubtitles API key: ")
146
-
147
134
  if not open_subtitles_user_agent:
148
135
  open_subtitles_user_agent = input("Enter your OpenSubtitles User Agent: ")
149
-
150
136
  if not open_subtitles_username:
151
137
  open_subtitles_username = input("Enter your OpenSubtitles Username: ")
152
-
153
138
  if not open_subtitles_password:
154
139
  open_subtitles_password = input("Enter your OpenSubtitles Password: ")
155
-
156
- # If show directory is provided via command-line argument, use it
157
- show_dir = args.show_dir
140
+
141
+ # Use config for show directory and tesseract path
142
+ show_dir = args.show_dir or config.get("show_dir")
143
+ if not show_dir:
144
+ show_dir = input("Enter the main directory of the show:")
145
+ logger.info(f"Show Directory: {show_dir}")
158
146
  if not show_dir:
159
- show_dir = cached_config.get("show_dir")
160
- if not show_dir:
161
- # If show directory is not provided, prompt the user to input it
162
- show_dir = input("Enter the main directory of the show:")
163
- logger.info(f"Show Directory: {show_dir}")
164
- # if the user does not provide a show directory, make the default show directory the current working directory
165
- if not show_dir:
166
- show_dir = os.getcwd()
147
+ show_dir = os.getcwd()
148
+
167
149
  if not args.tesseract_path:
168
- tesseract_path = cached_config.get("tesseract_path")
169
-
150
+ tesseract_path = config.get("tesseract_path")
170
151
  if not tesseract_path:
171
- tesseract_path = input(
172
- r"Enter the path to the tesseract executable: ['C:\Program Files\Tesseract-OCR\tesseract.exe']"
173
- )
174
-
152
+ tesseract_path = input(r"Enter the path to the tesseract executable: ['C:\Program Files\Tesseract-OCR\tesseract.exe']")
175
153
  else:
176
154
  tesseract_path = args.tesseract_path
177
155
  logger.debug(f"Teesseract Path: {tesseract_path}")
178
156
  logger.debug(f"Show Directory: {show_dir}")
179
-
157
+
180
158
  # Set the configuration
181
159
  set_config(
182
160
  tmdb_api_key,
@@ -18,7 +18,7 @@ class EpisodeMatcher:
18
18
  self.cache_dir = Path(cache_dir)
19
19
  self.min_confidence = min_confidence
20
20
  self.show_name = show_name
21
- self.chunk_duration = 300 # 5 minutes
21
+ self.chunk_duration = 30
22
22
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
23
23
  self.temp_dir = Path(tempfile.gettempdir()) / "whisper_chunks"
24
24
  self.temp_dir.mkdir(exist_ok=True)
@@ -44,7 +44,9 @@ class EpisodeMatcher:
44
44
  '-ss', str(start_time),
45
45
  '-t', str(self.chunk_duration),
46
46
  '-i', mkv_file,
47
- '-vn',
47
+ '-vn', # Disable video
48
+ '-sn', # Disable subtitles
49
+ '-dn', # Disable data streams
48
50
  '-acodec', 'pcm_s16le',
49
51
  '-ar', '16000',
50
52
  '-ac', '1',
@@ -80,31 +82,73 @@ class EpisodeMatcher:
80
82
  except Exception as e:
81
83
  logger.error(f"Error loading reference chunk from {srt_file}: {e}")
82
84
  return ''
83
-
84
- def identify_episode(self, video_file, temp_dir, season_number):
85
- try:
86
- # Get video duration
87
- duration = float(subprocess.check_output([
88
- 'ffprobe', '-v', 'error',
89
- '-show_entries', 'format=duration',
90
- '-of', 'default=noprint_wrappers=1:nokey=1',
91
- video_file
92
- ]).decode())
85
+ def _try_match_with_model(self, video_file, model_name, max_duration, reference_files):
86
+ """
87
+ Attempt to match using specified model, checking multiple 30-second chunks up to max_duration.
88
+
89
+ Args:
90
+ video_file: Path to the video file
91
+ model_name: Name of the Whisper model to use
92
+ max_duration: Maximum duration in seconds to check
93
+ reference_files: List of reference subtitle files
94
+ """
95
+ # Use cached model
96
+ model = get_whisper_model(model_name, self.device)
97
+
98
+ # Calculate number of chunks to check (30 seconds each)
99
+ num_chunks = max_duration // self.chunk_duration
100
+
101
+ for chunk_idx in range(num_chunks):
102
+ start_time = chunk_idx * self.chunk_duration
103
+ logger.debug(f"Trying {model_name} model at {start_time} seconds")
93
104
 
94
- total_chunks = int(np.ceil(duration / self.chunk_duration))
105
+ audio_path = self.extract_audio_chunk(video_file, start_time)
95
106
 
96
- # Load Whisper model
97
- model = whisper.load_model("base", device=self.device)
107
+ result = model.transcribe(
108
+ audio_path,
109
+ task="transcribe",
110
+ language="en"
111
+ )
98
112
 
99
- # Get season-specific reference files using multiple patterns
100
- reference_dir = self.cache_dir / "data" / self.show_name
113
+ chunk_text = result["text"]
114
+ best_confidence = 0
115
+ best_match = None
101
116
 
102
- # Create season patterns for different formats
117
+ # Compare with reference chunks
118
+ for ref_file in reference_files:
119
+ ref_text = self.load_reference_chunk(ref_file, chunk_idx)
120
+ confidence = self.chunk_score(chunk_text, ref_text)
121
+
122
+ if confidence > best_confidence:
123
+ best_confidence = confidence
124
+ best_match = ref_file
125
+
126
+ if confidence > self.min_confidence:
127
+ season_ep = re.search(r'S(\d+)E(\d+)', best_match.stem)
128
+ if season_ep:
129
+ season, episode = map(int, season_ep.groups())
130
+ return {
131
+ 'season': season,
132
+ 'episode': episode,
133
+ 'confidence': best_confidence,
134
+ 'reference_file': str(best_match),
135
+ 'matched_at': start_time
136
+ }
137
+
138
+ logger.debug(f"No match found at {start_time} seconds (best confidence: {best_confidence:.2f})")
139
+
140
+ return None
141
+
142
+ def identify_episode(self, video_file, temp_dir, season_number):
143
+ """Progressive episode identification with faster initial attempt."""
144
+ try:
145
+ # Get reference files first
146
+ reference_dir = self.cache_dir / "data" / self.show_name
103
147
  patterns = [
104
- f"S{season_number:02d}E", # S01E01
105
- f"S{season_number}E", # S1E01
106
- f"{season_number:02d}x", # 01x01
107
- f"{season_number}x", # 1x01
148
+ f"S{season_number:02d}E",
149
+ f"S{season_number}E",
150
+ f"{season_number:02d}x",
151
+ f"{season_number}x",
108
152
  ]
109
153
 
110
154
  reference_files = []
@@ -114,55 +158,43 @@ class EpisodeMatcher:
114
158
  for p in patterns)]
115
159
  reference_files.extend(files)
116
160
 
117
- # Remove duplicates while preserving order
118
161
  reference_files = list(dict.fromkeys(reference_files))
119
162
 
120
163
  if not reference_files:
121
164
  logger.error(f"No reference files found for season {season_number}")
122
165
  return None
123
-
124
- # Process chunks until match found
125
- for chunk_idx in range(min(3, total_chunks)): # Only try first 3 chunks
126
- start_time = chunk_idx * self.chunk_duration
127
- audio_path = self.extract_audio_chunk(video_file, start_time)
128
-
129
- # Transcribe chunk
130
- result = model.transcribe(
131
- audio_path,
132
- task="transcribe",
133
- language="en"
134
- )
135
-
136
- chunk_text = result["text"]
137
- best_confidence = 0
138
- best_match = None
139
-
140
- # Compare with reference chunks
141
- for ref_file in reference_files:
142
- ref_text = self.load_reference_chunk(ref_file, chunk_idx)
143
- confidence = self.chunk_score(chunk_text, ref_text)
144
-
145
- if confidence > best_confidence:
146
- best_confidence = confidence
147
- best_match = ref_file
148
-
149
- if confidence > self.min_confidence:
150
- season_ep = re.search(r'S(\d+)E(\d+)', best_match.stem)
151
- if season_ep:
152
- season, episode = map(int, season_ep.groups())
153
- return {
154
- 'season': season,
155
- 'episode': episode,
156
- 'confidence': best_confidence,
157
- 'reference_file': str(best_match),
158
- }
166
+
167
+ # Try with tiny model first (fastest) - check first 2 minutes
168
+ logger.info("Attempting match with tiny model (first 2 minutes)...")
169
+ match = self._try_match_with_model(video_file, "tiny", 120, reference_files)
170
+ if match and match['confidence'] > 0.65: # Slightly lower threshold for tiny
171
+ logger.info(f"Successfully matched with tiny model at {match['matched_at']}s (confidence: {match['confidence']:.2f})")
172
+ return match
173
+
174
+ # If unsuccessful with tiny, try base model on first 3 minutes
175
+ logger.info("Tiny model match failed, trying base model (first 3 minutes)...")
176
+ match = self._try_match_with_model(video_file, "base", 180, reference_files)
177
+ if match and match['confidence'] > self.min_confidence:
178
+ logger.info(f"Successfully matched with base model at {match['matched_at']}s (confidence: {match['confidence']:.2f})")
179
+ return match
159
180
 
181
+ # If still no match, try base model on up to 10 minutes
182
+ logger.info("No match in first 3 minutes, extending base model search to 10 minutes...")
183
+ match = self._try_match_with_model(video_file, "base", 600, reference_files)
184
+ if match:
185
+ logger.info(f"Successfully matched with base model at {match['matched_at']}s (confidence: {match['confidence']:.2f})")
186
+ return match
187
+
188
+ logger.info("Speech recognition match failed")
160
189
  return None
161
190
 
162
191
  finally:
163
192
  # Cleanup temp files
164
193
  for file in self.temp_dir.glob("chunk_*.wav"):
165
- file.unlink()
194
+ try:
195
+ file.unlink()
196
+ except Exception as e:
197
+ logger.warning(f"Failed to delete temp file {file}: {e}")
166
198
 
167
199
  def detect_file_encoding(file_path):
168
200
  """
@@ -279,4 +311,19 @@ class SubtitleReader:
279
311
  logger.warning(f"Error parsing subtitle block: {e}")
280
312
  continue
281
313
 
282
- return text_lines
314
+ return text_lines
315
+
316
+ _whisper_models = {}
317
+
318
+ def get_whisper_model(model_name="tiny", device=None):
319
+ """Cache whisper models to avoid reloading."""
320
+ global _whisper_models
321
+ if device is None:
322
+ device = "cuda" if torch.cuda.is_available() else "cpu"
323
+
324
+ key = f"{model_name}_{device}"
325
+ if key not in _whisper_models:
326
+ _whisper_models[key] = whisper.load_model(model_name, device=device)
327
+ logger.info(f"Loaded {model_name} model on {device}")
328
+
329
+ return _whisper_models[key]
@@ -19,7 +19,6 @@ from mkv_episode_matcher.utils import (
19
19
  process_srt_files,
20
20
  compare_and_rename_files,get_valid_seasons,rename_episode_file
21
21
  )
22
- from mkv_episode_matcher.speech_to_text import process_speech_to_text
23
22
  from mkv_episode_matcher.episode_identification import EpisodeMatcher
24
23
 
25
24
  def process_show(season=None, dry_run=False, get_subs=False):
@@ -32,7 +31,7 @@ def process_show(season=None, dry_run=False, get_subs=False):
32
31
  # Early check for reference files
33
32
  reference_dir = Path(CACHE_DIR) / "data" / show_name
34
33
  reference_files = list(reference_dir.glob("*.srt"))
35
- if not reference_files:
34
+ if (not get_subs) and (not reference_files):
36
35
  logger.error(f"No reference subtitle files found in {reference_dir}")
37
36
  logger.info("Please download reference subtitles first")
38
37
  return
@@ -67,7 +66,7 @@ def process_show(season=None, dry_run=False, get_subs=False):
67
66
  if get_subs:
68
67
  show_id = fetch_show_id(matcher.show_name)
69
68
  if show_id:
70
- get_subtitles(show_id, seasons={season_num})
69
+ get_subtitles(show_id, seasons={season_num}, config=config)
71
70
 
72
71
  unmatched_files = []
73
72
  for mkv_file in mkv_files:
@@ -76,8 +75,6 @@ def process_show(season=None, dry_run=False, get_subs=False):
76
75
 
77
76
  if match:
78
77
  new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
79
- new_path = os.path.join(season_path, new_name)
80
-
81
78
  logger.info(f"Speech matched {os.path.basename(mkv_file)} to {new_name} "
82
79
  f"(confidence: {match['confidence']:.2f})")
83
80
 
@@ -105,4 +102,4 @@ def process_show(season=None, dry_run=False, get_subs=False):
105
102
  finally:
106
103
  if not dry_run:
107
104
  shutil.rmtree(temp_dir)
108
- cleanup_ocr_files(show_dir)
105
+ cleanup_ocr_files(show_dir)
@@ -2,7 +2,7 @@
2
2
  import os
3
3
  import re
4
4
  import shutil
5
-
5
+ import torch
6
6
  import requests
7
7
  from loguru import logger
8
8
  from opensubtitlescom import OpenSubtitles
@@ -121,16 +121,17 @@ def rename_episode_file(original_file_path, new_filename):
121
121
  logger.error(f"Failed to rename file: {e}")
122
122
  return None
123
123
 
124
- def get_subtitles(show_id, seasons: set[int]):
124
+ def get_subtitles(show_id, seasons: set[int], config=None):
125
125
  """
126
126
  Retrieves and saves subtitles for a given TV show and seasons.
127
127
 
128
128
  Args:
129
129
  show_id (int): The ID of the TV show.
130
130
  seasons (Set[int]): A set of season numbers for which subtitles should be retrieved.
131
+ config (Config object, optional): Preloaded configuration.
131
132
  """
132
- logger.info(f"Getting subtitles for show ID {show_id}")
133
- config = get_config(CONFIG_FILE)
133
+ if config is None:
134
+ config = get_config(CONFIG_FILE)
134
135
  show_dir = config.get("show_dir")
135
136
  series_name = sanitize_filename(os.path.basename(show_dir))
136
137
  tmdb_api_key = config.get("tmdb_api_key")
@@ -388,4 +389,11 @@ def compare_text(text1, text2):
388
389
 
389
390
  # Compare the two lists of text lines
390
391
  matching_lines = set(flat_text1).intersection(flat_text2)
391
- return len(matching_lines)
392
+ return len(matching_lines)
393
+
394
+ def check_gpu_support():
395
+ logger.info('Checking GPU support...')
396
+ if torch.cuda.is_available():
397
+ logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
398
+ else:
399
+ logger.warning("CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mkv-episode-matcher
3
- Version: 0.4.1
3
+ Version: 0.5.0
4
4
  Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
5
5
  Home-page: https://github.com/Jsakkos/mkv-episode-matcher
6
6
  Author: Jonathan Sakkos
@@ -1,14 +1,13 @@
1
1
  mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
2
2
  mkv_episode_matcher/__init__.py,sha256=aNlpgTo1kHVrBcR3SH6wRmCgKu8KjNTki1ZvFfAud6s,240
3
- mkv_episode_matcher/__main__.py,sha256=0ZwFXw30744lZ8HIMzUN4F2abpTRCT4glBCM9XQdD3k,7328
3
+ mkv_episode_matcher/__main__.py,sha256=swYnLA2T8hvYMuNmK-EVRPBYzUdMxLP7pb8vxLuAnmc,6508
4
4
  mkv_episode_matcher/config.py,sha256=zDDKBcsDt5fME9BRqiTi7yWKeast1pZh36BNYMvIBYM,2419
5
- mkv_episode_matcher/episode_identification.py,sha256=_6M1UJkq1RGfmLI32u9dNOVvgp5Vf2MjqW2MTx0Gl8E,10329
6
- mkv_episode_matcher/episode_matcher.py,sha256=vunYpHQxyXo3l88BUScXa7_kMYMCV1pXpQxaLa-plZA,4325
5
+ mkv_episode_matcher/episode_identification.py,sha256=jpDWvb16YAHNUzn9fuiHNJ_TB9EYmNg1ahdp361zSf4,12671
6
+ mkv_episode_matcher/episode_matcher.py,sha256=BjPdPQwEHJWx_EOqj_AjKTsEFumdWHGNh7ERP-gfJ2g,4204
7
7
  mkv_episode_matcher/mkv_to_srt.py,sha256=4yxBHRVhgVby0UtQ2aTXGuoQpid8pkgjMIaHU6GCdzc,10857
8
- mkv_episode_matcher/speech_to_text.py,sha256=wVDrFFR7oASGMyq5cfOWmInEIeU9b3MPCLs9EyJrOMw,3128
9
8
  mkv_episode_matcher/subtitle_utils.py,sha256=rYSbd393pKYQW0w4sXgals02WFGqMYYYkQHDbEkWF8c,2666
10
9
  mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
11
- mkv_episode_matcher/utils.py,sha256=VASbougN3rb2iu40iZWkGjKIbahW713TOrFBo_TR9wo,14269
10
+ mkv_episode_matcher/utils.py,sha256=bw2-cQsA4tdL9E1HNVTBuCkjXWDYR1And_1k2_BqdMg,14651
12
11
  mkv_episode_matcher/libraries/pgs2srt/.gitignore,sha256=mt3uxWYZaFurMw_yGE258gWhtGKPVR7e3Ll4ALJpyj4,23
13
12
  mkv_episode_matcher/libraries/pgs2srt/README.md,sha256=olb25G17tj0kxPgp_LcH5I2QWXjgP1m8JFyjYRGz4UU,1374
14
13
  mkv_episode_matcher/libraries/pgs2srt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,8 +18,8 @@ mkv_episode_matcher/libraries/pgs2srt/requirements.txt,sha256=sg87dqWw_qpbwciw-M
19
18
  mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py,sha256=geT1LXdVd8yED9zoJ9K1XfP2JzGcM7u1SslHYrJI09o,10061
20
19
  mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py,sha256=GKtVy_Lxv-z27mkRG8pJF2znKWXwZTot7jL6kN-zIxM,10503
21
20
  mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py,sha256=AlJHUYXl85J95OzGRik-AHVfzDd7Q8BJCvD4Nr8kRIk,938598
22
- mkv_episode_matcher-0.4.1.dist-info/METADATA,sha256=IWKnIxGcG_W1EEtPfYasyMCfyZL7-NL_ScYfNl0HVlg,5579
23
- mkv_episode_matcher-0.4.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
24
- mkv_episode_matcher-0.4.1.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
25
- mkv_episode_matcher-0.4.1.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
26
- mkv_episode_matcher-0.4.1.dist-info/RECORD,,
21
+ mkv_episode_matcher-0.5.0.dist-info/METADATA,sha256=3U2-ciHxqaP2BvtZ-awK5siPcCE9nFjopBID493NgBs,5579
22
+ mkv_episode_matcher-0.5.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
23
+ mkv_episode_matcher-0.5.0.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
24
+ mkv_episode_matcher-0.5.0.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
25
+ mkv_episode_matcher-0.5.0.dist-info/RECORD,,
@@ -1,96 +0,0 @@
1
- # mkv_episode_matcher/speech_to_text.py
2
-
3
- import os
4
- import subprocess
5
- from pathlib import Path
6
- import whisper
7
- import torch
8
- from loguru import logger
9
-
10
- def process_speech_to_text(mkv_file, output_dir):
11
- """
12
- Convert MKV file to transcript using Whisper.
13
-
14
- Args:
15
- mkv_file (str): Path to MKV file
16
- output_dir (str): Directory to save transcript files
17
- """
18
- # Extract audio if not already done
19
- wav_file = extract_audio(mkv_file, output_dir)
20
- if not wav_file:
21
- return None
22
-
23
- # Load model
24
- device = "cuda" if torch.cuda.is_available() else "cpu"
25
- if device == "cuda":
26
- logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
27
- else:
28
- logger.info("CUDA not available. Using CPU.")
29
-
30
- model = whisper.load_model("base", device=device)
31
-
32
- # Generate transcript
33
- segments_file = os.path.join(output_dir, f"{Path(mkv_file).stem}.segments.json")
34
- if not os.path.exists(segments_file):
35
- try:
36
- result = model.transcribe(
37
- wav_file,
38
- task="transcribe",
39
- language="en",
40
- )
41
-
42
- # Save segments
43
- import json
44
- with open(segments_file, 'w', encoding='utf-8') as f:
45
- json.dump(result["segments"], f, indent=2)
46
-
47
- logger.info(f"Transcript saved to {segments_file}")
48
-
49
- except Exception as e:
50
- logger.error(f"Error during transcription: {e}")
51
- return None
52
- else:
53
- logger.info(f"Using existing transcript: {segments_file}")
54
-
55
- return segments_file
56
-
57
- def extract_audio(mkv_file, output_dir):
58
- """
59
- Extract audio from MKV file using FFmpeg.
60
-
61
- Args:
62
- mkv_file (str): Path to MKV file
63
- output_dir (str): Directory to save WAV file
64
-
65
- Returns:
66
- str: Path to extracted WAV file
67
- """
68
- wav_file = os.path.join(output_dir, f"{Path(mkv_file).stem}.wav")
69
-
70
- if not os.path.exists(wav_file):
71
- logger.info(f"Extracting audio from {mkv_file}")
72
- try:
73
- cmd = [
74
- 'ffmpeg',
75
- '-i', mkv_file,
76
- '-vn', # Disable video
77
- '-acodec', 'pcm_s16le', # Convert to PCM format
78
- '-ar', '16000', # Set sample rate to 16kHz
79
- '-ac', '1', # Convert to mono
80
- wav_file
81
- ]
82
- subprocess.run(cmd, check=True, capture_output=True)
83
- logger.info(f"Audio extracted to {wav_file}")
84
- except subprocess.CalledProcessError as e:
85
- logger.error(f"Error extracting audio: {e}")
86
- return None
87
- else:
88
- logger.info(f"Audio file {wav_file} already exists, skipping extraction")
89
-
90
- return wav_file
91
- def check_gpu_support():
92
- logger.info('Checking GPU support...')
93
- if torch.cuda.is_available():
94
- logger.info(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
95
- else:
96
- logger.warning("CUDA not available. Using CPU. Refer to https://pytorch.org/get-started/locally/ for GPU support.")