GameSentenceMiner 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,359 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import shutil
5
+ from dataclasses import dataclass, field
6
+ from logging.handlers import RotatingFileHandler
7
+ from os.path import expanduser
8
+ from typing import List, Dict
9
+
10
+ import toml
11
+ from dataclasses_json import dataclass_json
12
+
13
+
14
+ OFF = 'OFF'
15
+ VOSK = 'VOSK'
16
+ SILERO = 'SILERO'
17
+ WHISPER = 'WHISPER'
18
+
19
+ VOSK_BASE = 'BASE'
20
+ VOSK_SMALL = 'SMALL'
21
+
22
+ WHISPER_TINY = 'tiny'
23
+ WHISPER_BASE = 'base'
24
+ WHISPER_SMALL = 'small'
25
+ WHISPER_MEDIUM = 'medium'
26
+ WHSIPER_LARGE = 'large'
27
+
28
+ INFO = 'INFO'
29
+ DEBUG = 'DEBUG'
30
+
31
+ DEFAULT_CONFIG = 'Default'
32
+
33
+ current_game = ''
34
+
35
+
36
+ @dataclass_json
37
+ @dataclass
38
+ class General:
39
+ use_websocket: bool = True
40
+ websocket_uri: str = 'localhost:6677'
41
+ open_config_on_startup: bool = False
42
+
43
+
44
+ @dataclass_json
45
+ @dataclass
46
+ class Paths:
47
+ folder_to_watch: str = expanduser("~/Videos/OBS")
48
+ audio_destination: str = expanduser("~/Videos/OBS/Audio/")
49
+ screenshot_destination: str = expanduser("~/Videos/OBS/SS/")
50
+ remove_video: bool = True
51
+ remove_audio: bool = False
52
+ remove_screenshot: bool = False
53
+
54
+
55
+ @dataclass_json
56
+ @dataclass
57
+ class Anki:
58
+ update_anki: bool = True
59
+ url: str = 'http://127.0.0.1:8765'
60
+ sentence_field: str = "Sentence"
61
+ sentence_audio_field: str = "SentenceAudio"
62
+ picture_field: str = "Picture"
63
+ word_field: str = 'Word'
64
+ previous_sentence_field: str = ''
65
+ custom_tags: List[str] = None # Initialize to None and set it in __post_init__
66
+ tags_to_check: List[str] = None
67
+ add_game_tag: bool = True
68
+ polling_rate: int = 200
69
+ overwrite_audio: bool = False
70
+ overwrite_picture: bool = True
71
+ anki_custom_fields: Dict[str, str] = None # Initialize to None and set it in __post_init__
72
+
73
+ def __post_init__(self):
74
+ if self.custom_tags is None:
75
+ self.custom_tags = []
76
+ if self.anki_custom_fields is None:
77
+ self.anki_custom_fields = {}
78
+ if self.tags_to_check is None:
79
+ self.tags_to_check = []
80
+
81
+
82
+ @dataclass_json
83
+ @dataclass
84
+ class Features:
85
+ full_auto: bool = True
86
+ notify_on_update: bool = True
87
+ open_anki_edit: bool = False
88
+ backfill_audio: bool = False
89
+
90
+
91
+ @dataclass_json
92
+ @dataclass
93
+ class Screenshot:
94
+ width: str = 0
95
+ height: str = 0
96
+ quality: str = 85
97
+ extension: str = "webp"
98
+ custom_ffmpeg_settings: str = ''
99
+ screenshot_hotkey_updates_anki: bool = False
100
+ seconds_after_line: int = 1
101
+
102
+
103
+ @dataclass_json
104
+ @dataclass
105
+ class Audio:
106
+ extension: str = 'opus'
107
+ beginning_offset: float = 0.0
108
+ end_offset: float = 0.5
109
+ ffmpeg_reencode_options: str = ''
110
+ external_tool: str = ""
111
+ anki_media_collection: str = ""
112
+
113
+
114
+ @dataclass_json
115
+ @dataclass
116
+ class OBS:
117
+ enabled: bool = True
118
+ host: str = "localhost"
119
+ port: int = 4455
120
+ password: str = "your_password"
121
+ start_buffer: bool = True
122
+ get_game_from_scene: bool = True
123
+ minimum_replay_size: int = 0
124
+
125
+
126
+ @dataclass_json
127
+ @dataclass
128
+ class Hotkeys:
129
+ reset_line: str = 'f5'
130
+ take_screenshot: str = 'f6'
131
+
132
+
133
+ @dataclass_json
134
+ @dataclass
135
+ class VAD:
136
+ whisper_model: str = WHISPER_BASE
137
+ do_vad_postprocessing: bool = True
138
+ vosk_url: str = VOSK_BASE
139
+ selected_vad_model: str = SILERO
140
+ backup_vad_model: str = OFF
141
+ trim_beginning: bool = False
142
+
143
+
144
+ @dataclass_json
145
+ @dataclass
146
+ class ProfileConfig:
147
+ name: str = 'Default'
148
+ general: General = field(default_factory=General)
149
+ paths: Paths = field(default_factory=Paths)
150
+ anki: Anki = field(default_factory=Anki)
151
+ features: Features = field(default_factory=Features)
152
+ screenshot: Screenshot = field(default_factory=Screenshot)
153
+ audio: Audio = field(default_factory=Audio)
154
+ obs: OBS = field(default_factory=OBS)
155
+ hotkeys: Hotkeys = field(default_factory=Hotkeys)
156
+ vad: VAD = field(default_factory=VAD)
157
+
158
+
159
+ # This is just for legacy support
160
+ def load_from_toml(self, file_path: str):
161
+ with open(file_path, 'r') as f:
162
+ config_data = toml.load(f)
163
+
164
+ self.paths.folder_to_watch = expanduser(config_data['paths'].get('folder_to_watch', self.paths.folder_to_watch))
165
+ self.paths.audio_destination = expanduser(
166
+ config_data['paths'].get('audio_destination', self.paths.audio_destination))
167
+ self.paths.screenshot_destination = expanduser(config_data['paths'].get('screenshot_destination',
168
+ self.paths.screenshot_destination))
169
+
170
+ self.anki.url = config_data['anki'].get('url', self.anki.url)
171
+ self.anki.sentence_field = config_data['anki'].get('sentence_field', self.anki.sentence_field)
172
+ self.anki.sentence_audio_field = config_data['anki'].get('sentence_audio_field', self.anki.sentence_audio_field)
173
+ self.anki.word_field = config_data['anki'].get('word_field', self.anki.word_field)
174
+ self.anki.picture_field = config_data['anki'].get('picture_field', self.anki.picture_field)
175
+ self.anki.custom_tags = config_data['anki'].get('custom_tags', self.anki.custom_tags)
176
+ self.anki.add_game_tag = config_data['anki'].get('add_game_tag', self.anki.add_game_tag)
177
+ self.anki.polling_rate = config_data['anki'].get('polling_rate', self.anki.polling_rate)
178
+ self.anki.overwrite_audio = config_data['anki_overwrites'].get('overwrite_audio', self.anki.overwrite_audio)
179
+ self.anki.overwrite_picture = config_data['anki_overwrites'].get('overwrite_picture',
180
+ self.anki.overwrite_picture)
181
+
182
+ self.features.full_auto = config_data['features'].get('do_vosk_postprocessing', self.features.full_auto)
183
+ self.features.notify_on_update = config_data['features'].get('notify_on_update', self.features.notify_on_update)
184
+ self.features.open_anki_edit = config_data['features'].get('open_anki_edit', self.features.open_anki_edit)
185
+ self.features.backfill_audio = config_data['features'].get('backfill_audio', self.features.backfill_audio)
186
+
187
+ self.screenshot.width = config_data['screenshot'].get('width', self.screenshot.width)
188
+ self.screenshot.height = config_data['screenshot'].get('height', self.screenshot.height)
189
+ self.screenshot.quality = config_data['screenshot'].get('quality', self.screenshot.quality)
190
+ self.screenshot.extension = config_data['screenshot'].get('extension', self.screenshot.extension)
191
+ self.screenshot.custom_ffmpeg_settings = config_data['screenshot'].get('custom_ffmpeg_settings',
192
+ self.screenshot.custom_ffmpeg_settings)
193
+
194
+ self.audio.extension = config_data['audio'].get('extension', self.audio.extension)
195
+ self.audio.beginning_offset = config_data['audio'].get('beginning_offset', self.audio.beginning_offset)
196
+ self.audio.end_offset = config_data['audio'].get('end_offset', self.audio.end_offset)
197
+ self.audio.ffmpeg_reencode_options = config_data['audio'].get('ffmpeg_reencode_options',
198
+ self.audio.ffmpeg_reencode_options)
199
+
200
+ self.vad.whisper_model = config_data['vosk'].get('whisper_model', self.vad.whisper_model)
201
+ self.vad.vosk_url = config_data['vosk'].get('url', self.vad.vosk_url)
202
+ self.vad.do_vad_postprocessing = config_data['features'].get('do_vosk_postprocessing',
203
+ self.vad.do_vad_postprocessing)
204
+ self.vad.trim_beginning = config_data['audio'].get('vosk_trim_beginning', self.vad.trim_beginning)
205
+
206
+ self.obs.enabled = config_data['obs'].get('enabled', self.obs.enabled)
207
+ self.obs.host = config_data['obs'].get('host', self.obs.host)
208
+ self.obs.port = config_data['obs'].get('port', self.obs.port)
209
+ self.obs.password = config_data['obs'].get('password', self.obs.password)
210
+
211
+ self.general.use_websocket = config_data['websocket'].get('enabled', self.general.use_websocket)
212
+ self.general.websocket_uri = config_data['websocket'].get('uri', self.general.websocket_uri)
213
+
214
+ self.hotkeys.reset_line = config_data['hotkeys'].get('reset_line', self.hotkeys.reset_line)
215
+ self.hotkeys.take_screenshot = config_data['hotkeys'].get('take_screenshot', self.hotkeys.take_screenshot)
216
+
217
+ self.anki.anki_custom_fields = config_data.get('anki_custom_fields', {})
218
+
219
+ with open('config.json', 'w') as f:
220
+ f.write(self.to_json(indent=4))
221
+ print(
222
+ 'config.json successfully generated from previous settings. config.toml will no longer be used.')
223
+
224
+ return self
225
+
226
+ @dataclass_json
227
+ @dataclass
228
+ class Config:
229
+ configs: Dict[str, ProfileConfig] = field(default_factory=dict)
230
+ current_profile: str = DEFAULT_CONFIG
231
+
232
+ @classmethod
233
+ def new(cls):
234
+ instance = cls(configs={DEFAULT_CONFIG: ProfileConfig()}, current_profile=DEFAULT_CONFIG)
235
+ return instance
236
+
237
+ def get_config(self) -> ProfileConfig:
238
+ return self.configs[self.current_profile]
239
+
240
+ def set_config_for_profile(self, profile: str, config: ProfileConfig):
241
+ config.name = profile
242
+ self.configs[profile] = config
243
+
244
+ def has_config_for_current_game(self):
245
+ return current_game in self.configs
246
+
247
+ def get_all_profile_names(self):
248
+ return list(self.configs.keys())
249
+
250
+
251
+ logger = logging.getLogger("GameSentenceMiner")
252
+ logger.setLevel(logging.DEBUG) # Set the base level to DEBUG so that all messages are captured
253
+
254
+ # Create console handler with level INFO
255
+ console_handler = logging.StreamHandler()
256
+ console_handler.setLevel(logging.INFO)
257
+
258
+ # Create rotating file handler with level DEBUG
259
+ file_handler = RotatingFileHandler("gamesentenceminer.log", maxBytes=10_000_000, backupCount=2, encoding='utf-8')
260
+ file_handler.setLevel(logging.DEBUG)
261
+
262
+ # Create a formatter
263
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
264
+
265
+ # Add formatter to handlers
266
+ console_handler.setFormatter(formatter)
267
+ file_handler.setFormatter(formatter)
268
+
269
+ # Add handlers to the logger
270
+ logger.addHandler(console_handler)
271
+ logger.addHandler(file_handler)
272
+
273
+ CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'get_config().json')
274
+ temp_directory = ''
275
+
276
+ def get_app_directory():
277
+ appdata_dir = os.getenv('APPDATA') # Get the AppData directory
278
+ config_dir = os.path.join(appdata_dir, 'GameSentenceMiner')
279
+ os.makedirs(config_dir, exist_ok=True) # Create the directory if it doesn't exist
280
+ return config_dir
281
+
282
+ def get_config_path():
283
+ return os.path.join(get_app_directory(), 'config.json')
284
+
285
+
286
+ def load_config():
287
+ config_path = get_config_path()
288
+
289
+ if os.path.exists('config.json') and not os.path.exists(config_path):
290
+ shutil.copy('config.json', config_path)
291
+
292
+ if os.path.exists(config_path):
293
+ try:
294
+ with open(config_path, 'r') as file:
295
+ config_file = json.load(file)
296
+ if "current_profile" in config_file:
297
+ return Config.from_dict(config_file)
298
+ else:
299
+ print(f"Loading Profile-less Config, Converting to new Config!")
300
+ with open(config_path, 'r') as file:
301
+ config_file = json.load(file)
302
+
303
+ config = ProfileConfig.from_dict(config_file)
304
+ new_config = Config(configs = {DEFAULT_CONFIG : config}, current_profile=DEFAULT_CONFIG)
305
+
306
+ print(new_config)
307
+
308
+ with open(config_path, 'w') as file:
309
+ json.dump(new_config.to_dict(), file, indent=4)
310
+ return new_config
311
+ except json.JSONDecodeError as e:
312
+ print(f"Error parsing config.json: {e}")
313
+ return None
314
+ elif os.path.exists('config.toml'):
315
+ config = ProfileConfig().load_from_toml('config.toml')
316
+ new_config = Config({DEFAULT_CONFIG: config}, current_profile=DEFAULT_CONFIG)
317
+ return new_config
318
+ else:
319
+ config = Config.new()
320
+ with open(config_path, 'w') as file:
321
+ json.dump(config.to_dict(), file, indent=4)
322
+ return config
323
+
324
+
325
+ config_instance: Config = None
326
+
327
+
328
+ def get_config():
329
+ global config_instance
330
+ if config_instance is None:
331
+ config_instance = load_config()
332
+ config = config_instance.get_config()
333
+
334
+ if config.features.backfill_audio and config.features.full_auto:
335
+ print("Cannot have backfill_audio and obs_full_auto_mode turned on at the same time!")
336
+ exit(1)
337
+
338
+ # print(config_instance.get_config())
339
+ return config_instance.get_config()
340
+
341
+
342
+ def reload_config():
343
+ global config_instance
344
+ config_instance = load_config()
345
+ config = config_instance.get_config()
346
+
347
+ if config.features.backfill_audio and config.features.full_auto:
348
+ print("Cannot have backfill_audio and obs_full_auto_mode turned on at the same time!")
349
+ exit(1)
350
+
351
+ def get_master_config():
352
+ return config_instance
353
+
354
+ def switch_profile_and_save(profile_name):
355
+ global config_instance
356
+ config_instance.current_profile = profile_name
357
+ with open('config.json', 'w') as file:
358
+ json.dump(config_instance.to_dict(), file, indent=4)
359
+ return config_instance.get_config()
@@ -0,0 +1,297 @@
1
+ import tempfile
2
+ import time
3
+
4
+ from . import obs
5
+ from . import util
6
+ from . import configuration
7
+ from .configuration import *
8
+ from .util import *
9
+
10
+ ffmpeg_base_command_list = ["ffmpeg", "-hide_banner", "-loglevel", "error", '-nostdin']
11
+
12
+
13
+ def get_screenshot(video_file, time_from_end):
14
+ time_from_end_to_capture = -time_from_end if time_from_end else -1
15
+ output_image = make_unique_file_name(
16
+ get_config().paths.screenshot_destination + obs.get_current_game(sanitize=True) + f".{get_config().screenshot.extension}")
17
+ # FFmpeg command to extract the last frame of the video
18
+ ffmpeg_command = ffmpeg_base_command_list + [
19
+ "-sseof", f"{time_from_end_to_capture}", # Seek to 1 second before the end of the video
20
+ "-i", f"{video_file}",
21
+ "-vframes", "1" # Extract only one frame
22
+ ]
23
+
24
+ if get_config().screenshot.custom_ffmpeg_settings:
25
+ ffmpeg_command.extend(get_config().screenshot.custom_ffmpeg_settings.replace("\"", "").split(" "))
26
+ else:
27
+ ffmpeg_command.extend(["-compression_level", "6", "-q:v", get_config().screenshot.quality])
28
+
29
+ if get_config().screenshot.width or get_config().screenshot.height:
30
+ ffmpeg_command.extend(
31
+ ["-vf", f"scale={get_config().screenshot.width or -1}:{get_config().screenshot.height or -1}"])
32
+
33
+ ffmpeg_command.append(f"{output_image}")
34
+
35
+ logger.debug(f"FFMPEG SS Command: {ffmpeg_command}")
36
+
37
+ # Run the command
38
+ subprocess.run(ffmpeg_command)
39
+
40
+ logger.info(f"Screenshot saved to: {output_image}")
41
+
42
+ return output_image
43
+
44
+
45
+ def get_screenshot_time(video_path, line_time):
46
+ file_length = get_video_duration(video_path)
47
+ file_mod_time = get_file_modification_time(video_path)
48
+
49
+ time_delta = file_mod_time - line_time
50
+ total_seconds = file_length - time_delta.total_seconds()
51
+
52
+ time_from_end = file_length - total_seconds - get_config().screenshot.seconds_after_line
53
+
54
+ if time_from_end < 0 or time_from_end > (file_length - total_seconds):
55
+ raise ValueError("Calculated screenshot time is out of bounds for trimmed video.")
56
+
57
+ return time_from_end
58
+
59
+
60
+ def process_image(image_file):
61
+ output_image = make_unique_file_name(
62
+ get_config().paths.screenshot_destination + obs.get_current_game(sanitize=True) + f".{get_config().screenshot.extension}")
63
+
64
+ # FFmpeg command to process the input image
65
+ ffmpeg_command = ffmpeg_base_command_list + [
66
+ "-i", image_file
67
+ ]
68
+
69
+ if get_config().screenshot.custom_ffmpeg_settings:
70
+ ffmpeg_command.extend(get_config().screenshot.custom_ffmpeg_settings.split(" "))
71
+ else:
72
+ ffmpeg_command.extend(["-compression_level", "6", "-q:v", get_config().screenshot.quality])
73
+
74
+ if get_config().screenshot.width or get_config().screenshot.height:
75
+ ffmpeg_command.extend(
76
+ ["-vf", f"scale={get_config().screenshot.width or -1}:{get_config().screenshot.height or -1}"])
77
+
78
+ ffmpeg_command.append(output_image)
79
+ logger.debug(ffmpeg_command)
80
+ logger.debug(" ".join(ffmpeg_command))
81
+ # Run the command
82
+ subprocess.run(ffmpeg_command)
83
+
84
+ logger.info(f"Processed image saved to: {output_image}")
85
+
86
+ return output_image
87
+
88
+
89
+ def get_audio_codec(video_path):
90
+ command = [
91
+ "ffprobe",
92
+ "-v", "error",
93
+ "-select_streams", "a:0",
94
+ "-show_entries", "stream=codec_name",
95
+ "-of", "json",
96
+ video_path
97
+ ]
98
+
99
+ logger.debug(" ".join(command))
100
+ # Run the command and capture the output
101
+ result = subprocess.run(command, capture_output=True, text=True)
102
+
103
+ # Parse the JSON output
104
+ try:
105
+ output = json.loads(result.stdout)
106
+ codec_name = output['streams'][0]['codec_name']
107
+ return codec_name
108
+ except (json.JSONDecodeError, KeyError, IndexError):
109
+ logger.error("Failed to get codec information. Re-encoding Anyways")
110
+ return None
111
+
112
+
113
+ def get_audio_and_trim(video_path, line_time, next_line_time):
114
+ supported_formats = {
115
+ 'opus': 'opus',
116
+ 'mp3': 'libmp3lame',
117
+ 'ogg': 'libvorbis',
118
+ 'aac': 'aac',
119
+ 'm4a': 'aac',
120
+ }
121
+
122
+ codec = get_audio_codec(video_path)
123
+
124
+ if codec == get_config().audio.extension:
125
+ codec_command = ['-c:a', 'copy']
126
+ logger.info(f"Extracting {get_config().audio.extension} from video")
127
+ else:
128
+ codec_command = ["-c:a", f"{supported_formats[get_config().audio.extension]}"]
129
+ logger.info(f"Re-encoding {codec} to {get_config().audio.extension}")
130
+
131
+ untrimmed_audio = tempfile.NamedTemporaryFile(dir=configuration.temp_directory,
132
+ suffix=f"_untrimmed.{get_config().audio.extension}").name
133
+
134
+ command = ffmpeg_base_command_list + [
135
+ "-i", video_path,
136
+ "-map", "0:a"] + codec_command + [
137
+ untrimmed_audio
138
+ ]
139
+
140
+ # FFmpeg command to extract OR re-encode the audio
141
+ # command = f"{ffmpeg_base_command} -i \"{video_path}\" -map 0:a {codec_command} \"{untrimmed_audio}\""
142
+
143
+ logger.debug(" ".join(command))
144
+
145
+ subprocess.run(command)
146
+
147
+ return trim_audio_based_on_last_line(untrimmed_audio, video_path, line_time, next_line_time)
148
+
149
+
150
+ def get_video_duration(file_path):
151
+ ffprobe_command = [
152
+ "ffprobe",
153
+ "-v", "error",
154
+ "-show_entries", "format=duration",
155
+ "-of", "json",
156
+ file_path
157
+ ]
158
+ result = subprocess.run(ffprobe_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
159
+ duration_info = json.loads(result.stdout)
160
+ return float(duration_info["format"]["duration"]) # Return the duration in seconds
161
+
162
+
163
+ def trim_audio_based_on_last_line(untrimmed_audio, video_path, line_time, next_line):
164
+ trimmed_audio = tempfile.NamedTemporaryFile(dir=configuration.temp_directory,
165
+ suffix=f".{get_config().audio.extension}").name
166
+ file_mod_time = get_file_modification_time(video_path)
167
+ file_length = get_video_duration(video_path)
168
+ time_delta = file_mod_time - line_time
169
+ # Convert time_delta to FFmpeg-friendly format (HH:MM:SS.milliseconds)
170
+ total_seconds = file_length - time_delta.total_seconds() + get_config().audio.beginning_offset
171
+ if total_seconds < 0 or total_seconds >= file_length:
172
+ logger.info(f"0 seconds trimmed off of beginning")
173
+ return untrimmed_audio
174
+
175
+ hours, remainder = divmod(total_seconds, 3600)
176
+ minutes, seconds = divmod(remainder, 60)
177
+ start_trim_time = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
178
+
179
+ ffmpeg_command = ffmpeg_base_command_list + [
180
+ "-i", untrimmed_audio,
181
+ "-ss", start_trim_time]
182
+ if next_line and next_line > line_time:
183
+ end_total_seconds = total_seconds + (next_line - line_time).total_seconds() + 1
184
+ hours, remainder = divmod(end_total_seconds, 3600)
185
+ minutes, seconds = divmod(remainder, 60)
186
+ end_trim_time = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
187
+ ffmpeg_command.extend(['-to', end_trim_time])
188
+ logger.info(
189
+ f"Looks like Clipboard/Websocket was modified before the script knew about the anki card! Trimming end of video to {end_trim_time}")
190
+
191
+ ffmpeg_command.extend([
192
+ "-c", "copy", # Using copy to avoid re-encoding, adjust if needed
193
+ trimmed_audio
194
+ ])
195
+
196
+ logger.debug(" ".join(ffmpeg_command))
197
+ subprocess.run(ffmpeg_command)
198
+
199
+ logger.info(f"{total_seconds} trimmed off of beginning")
200
+
201
+ logger.info(f"Audio trimmed and saved to {trimmed_audio}")
202
+ return trimmed_audio
203
+
204
+
205
+ def reencode_file_with_user_config(input_file, final_output_audio, user_ffmpeg_options):
206
+ logger.info(f"Re-encode running with settings: {user_ffmpeg_options}")
207
+ temp_file = create_temp_file_with_same_name(input_file)
208
+ command = ffmpeg_base_command_list + [
209
+ "-i", input_file,
210
+ "-map", "0:a"
211
+ ] + user_ffmpeg_options.replace("\"", "").split(" ") + [
212
+ temp_file
213
+ ]
214
+
215
+ logger.debug(" ".join(command))
216
+ process = subprocess.run(command)
217
+
218
+ if process.returncode != 0:
219
+ logger.error("Re-encode failed, using original audio")
220
+ return
221
+
222
+ replace_file_with_retry(temp_file, final_output_audio)
223
+
224
+
225
+ def create_temp_file_with_same_name(input_file: str):
226
+ split = input_file.split(".")
227
+ return f"{split[0]}_temp.{split[1]}"
228
+
229
+
230
+ def replace_file_with_retry(temp_file, input_file, retries=5, delay=1):
231
+ for attempt in range(retries):
232
+ try:
233
+ os.replace(temp_file, input_file)
234
+ logger.info(f'Re-encode Finished!')
235
+ return
236
+ except OSError as e:
237
+ if attempt < retries - 1:
238
+ logger.warning(f"Attempt {attempt + 1}: File still in use. Retrying in {delay} seconds...")
239
+ time.sleep(delay)
240
+ else:
241
+ logger.error(f"Failed to replace the file after {retries} attempts. Error: {e}")
242
+ raise
243
+
244
+
245
+ def trim_audio_by_end_time(input_audio, end_time, output_audio):
246
+ command = ffmpeg_base_command_list + [
247
+ "-i", input_audio,
248
+ "-to", str(end_time),
249
+ "-c", "copy",
250
+ output_audio
251
+ ]
252
+ logger.debug(" ".join(command))
253
+ subprocess.run(command)
254
+
255
+
256
+ def convert_audio_to_wav(input_audio, output_wav):
257
+ command = ffmpeg_base_command_list + [
258
+ "-i", input_audio,
259
+ "-ar", "16000",
260
+ "-ac", "1",
261
+ "-af", "afftdn,dialoguenhance" if not util.is_linux() else "afftdn",
262
+ output_wav
263
+ ]
264
+ logger.debug(" ".join(command))
265
+ subprocess.run(command)
266
+
267
+
268
+ # Trim the audio using FFmpeg based on detected speech timestamps
269
+ def trim_audio(input_audio, start_time, end_time, output_audio):
270
+ command = ffmpeg_base_command_list.copy()
271
+
272
+ if get_config().vad.trim_beginning and start_time > 0:
273
+ command.extend(['-ss', f"{start_time:.2f}"])
274
+
275
+ command.extend([
276
+ '-to', f"{end_time:.2f}",
277
+ '-i', input_audio,
278
+ '-c', 'copy',
279
+ output_audio
280
+ ])
281
+
282
+ logger.debug(" ".join(command))
283
+
284
+ subprocess.run(command)
285
+
286
+
287
+ def is_video_big_enough(file_path, min_size_kb=250):
288
+ try:
289
+ file_size = os.path.getsize(file_path) # Size in bytes
290
+ file_size_kb = file_size / 1024 # Convert to KB
291
+ return file_size_kb >= min_size_kb
292
+ except FileNotFoundError:
293
+ logger.error("File not found!")
294
+ return False
295
+ except Exception as e:
296
+ logger.error(f"Error: {e}")
297
+ return False