GameSentenceMiner 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/__init__.py +0 -0
- GameSentenceMiner/anki.py +265 -0
- GameSentenceMiner/config_gui.py +803 -0
- GameSentenceMiner/configuration.py +359 -0
- GameSentenceMiner/ffmpeg.py +297 -0
- GameSentenceMiner/gametext.py +128 -0
- GameSentenceMiner/gsm.py +385 -0
- GameSentenceMiner/model.py +84 -0
- GameSentenceMiner/notification.py +69 -0
- GameSentenceMiner/obs.py +128 -0
- GameSentenceMiner/util.py +136 -0
- GameSentenceMiner/vad/__init__.py +0 -0
- GameSentenceMiner/vad/silero_trim.py +43 -0
- GameSentenceMiner/vad/vosk_helper.py +152 -0
- GameSentenceMiner/vad/whisper_helper.py +98 -0
- GameSentenceMiner-2.0.0.dist-info/METADATA +346 -0
- GameSentenceMiner-2.0.0.dist-info/RECORD +20 -0
- GameSentenceMiner-2.0.0.dist-info/WHEEL +5 -0
- GameSentenceMiner-2.0.0.dist-info/entry_points.txt +2 -0
- GameSentenceMiner-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,359 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
import shutil
|
5
|
+
from dataclasses import dataclass, field
|
6
|
+
from logging.handlers import RotatingFileHandler
|
7
|
+
from os.path import expanduser
|
8
|
+
from typing import List, Dict
|
9
|
+
|
10
|
+
import toml
|
11
|
+
from dataclasses_json import dataclass_json
|
12
|
+
|
13
|
+
|
14
|
+
OFF = 'OFF'
|
15
|
+
VOSK = 'VOSK'
|
16
|
+
SILERO = 'SILERO'
|
17
|
+
WHISPER = 'WHISPER'
|
18
|
+
|
19
|
+
VOSK_BASE = 'BASE'
|
20
|
+
VOSK_SMALL = 'SMALL'
|
21
|
+
|
22
|
+
WHISPER_TINY = 'tiny'
|
23
|
+
WHISPER_BASE = 'base'
|
24
|
+
WHISPER_SMALL = 'small'
|
25
|
+
WHISPER_MEDIUM = 'medium'
|
26
|
+
WHSIPER_LARGE = 'large'
|
27
|
+
|
28
|
+
INFO = 'INFO'
|
29
|
+
DEBUG = 'DEBUG'
|
30
|
+
|
31
|
+
DEFAULT_CONFIG = 'Default'
|
32
|
+
|
33
|
+
current_game = ''
|
34
|
+
|
35
|
+
|
36
|
+
@dataclass_json
|
37
|
+
@dataclass
|
38
|
+
class General:
|
39
|
+
use_websocket: bool = True
|
40
|
+
websocket_uri: str = 'localhost:6677'
|
41
|
+
open_config_on_startup: bool = False
|
42
|
+
|
43
|
+
|
44
|
+
@dataclass_json
|
45
|
+
@dataclass
|
46
|
+
class Paths:
|
47
|
+
folder_to_watch: str = expanduser("~/Videos/OBS")
|
48
|
+
audio_destination: str = expanduser("~/Videos/OBS/Audio/")
|
49
|
+
screenshot_destination: str = expanduser("~/Videos/OBS/SS/")
|
50
|
+
remove_video: bool = True
|
51
|
+
remove_audio: bool = False
|
52
|
+
remove_screenshot: bool = False
|
53
|
+
|
54
|
+
|
55
|
+
@dataclass_json
|
56
|
+
@dataclass
|
57
|
+
class Anki:
|
58
|
+
update_anki: bool = True
|
59
|
+
url: str = 'http://127.0.0.1:8765'
|
60
|
+
sentence_field: str = "Sentence"
|
61
|
+
sentence_audio_field: str = "SentenceAudio"
|
62
|
+
picture_field: str = "Picture"
|
63
|
+
word_field: str = 'Word'
|
64
|
+
previous_sentence_field: str = ''
|
65
|
+
custom_tags: List[str] = None # Initialize to None and set it in __post_init__
|
66
|
+
tags_to_check: List[str] = None
|
67
|
+
add_game_tag: bool = True
|
68
|
+
polling_rate: int = 200
|
69
|
+
overwrite_audio: bool = False
|
70
|
+
overwrite_picture: bool = True
|
71
|
+
anki_custom_fields: Dict[str, str] = None # Initialize to None and set it in __post_init__
|
72
|
+
|
73
|
+
def __post_init__(self):
|
74
|
+
if self.custom_tags is None:
|
75
|
+
self.custom_tags = []
|
76
|
+
if self.anki_custom_fields is None:
|
77
|
+
self.anki_custom_fields = {}
|
78
|
+
if self.tags_to_check is None:
|
79
|
+
self.tags_to_check = []
|
80
|
+
|
81
|
+
|
82
|
+
@dataclass_json
|
83
|
+
@dataclass
|
84
|
+
class Features:
|
85
|
+
full_auto: bool = True
|
86
|
+
notify_on_update: bool = True
|
87
|
+
open_anki_edit: bool = False
|
88
|
+
backfill_audio: bool = False
|
89
|
+
|
90
|
+
|
91
|
+
@dataclass_json
|
92
|
+
@dataclass
|
93
|
+
class Screenshot:
|
94
|
+
width: str = 0
|
95
|
+
height: str = 0
|
96
|
+
quality: str = 85
|
97
|
+
extension: str = "webp"
|
98
|
+
custom_ffmpeg_settings: str = ''
|
99
|
+
screenshot_hotkey_updates_anki: bool = False
|
100
|
+
seconds_after_line: int = 1
|
101
|
+
|
102
|
+
|
103
|
+
@dataclass_json
|
104
|
+
@dataclass
|
105
|
+
class Audio:
|
106
|
+
extension: str = 'opus'
|
107
|
+
beginning_offset: float = 0.0
|
108
|
+
end_offset: float = 0.5
|
109
|
+
ffmpeg_reencode_options: str = ''
|
110
|
+
external_tool: str = ""
|
111
|
+
anki_media_collection: str = ""
|
112
|
+
|
113
|
+
|
114
|
+
@dataclass_json
|
115
|
+
@dataclass
|
116
|
+
class OBS:
|
117
|
+
enabled: bool = True
|
118
|
+
host: str = "localhost"
|
119
|
+
port: int = 4455
|
120
|
+
password: str = "your_password"
|
121
|
+
start_buffer: bool = True
|
122
|
+
get_game_from_scene: bool = True
|
123
|
+
minimum_replay_size: int = 0
|
124
|
+
|
125
|
+
|
126
|
+
@dataclass_json
|
127
|
+
@dataclass
|
128
|
+
class Hotkeys:
|
129
|
+
reset_line: str = 'f5'
|
130
|
+
take_screenshot: str = 'f6'
|
131
|
+
|
132
|
+
|
133
|
+
@dataclass_json
|
134
|
+
@dataclass
|
135
|
+
class VAD:
|
136
|
+
whisper_model: str = WHISPER_BASE
|
137
|
+
do_vad_postprocessing: bool = True
|
138
|
+
vosk_url: str = VOSK_BASE
|
139
|
+
selected_vad_model: str = SILERO
|
140
|
+
backup_vad_model: str = OFF
|
141
|
+
trim_beginning: bool = False
|
142
|
+
|
143
|
+
|
144
|
+
@dataclass_json
|
145
|
+
@dataclass
|
146
|
+
class ProfileConfig:
|
147
|
+
name: str = 'Default'
|
148
|
+
general: General = field(default_factory=General)
|
149
|
+
paths: Paths = field(default_factory=Paths)
|
150
|
+
anki: Anki = field(default_factory=Anki)
|
151
|
+
features: Features = field(default_factory=Features)
|
152
|
+
screenshot: Screenshot = field(default_factory=Screenshot)
|
153
|
+
audio: Audio = field(default_factory=Audio)
|
154
|
+
obs: OBS = field(default_factory=OBS)
|
155
|
+
hotkeys: Hotkeys = field(default_factory=Hotkeys)
|
156
|
+
vad: VAD = field(default_factory=VAD)
|
157
|
+
|
158
|
+
|
159
|
+
# This is just for legacy support
|
160
|
+
def load_from_toml(self, file_path: str):
|
161
|
+
with open(file_path, 'r') as f:
|
162
|
+
config_data = toml.load(f)
|
163
|
+
|
164
|
+
self.paths.folder_to_watch = expanduser(config_data['paths'].get('folder_to_watch', self.paths.folder_to_watch))
|
165
|
+
self.paths.audio_destination = expanduser(
|
166
|
+
config_data['paths'].get('audio_destination', self.paths.audio_destination))
|
167
|
+
self.paths.screenshot_destination = expanduser(config_data['paths'].get('screenshot_destination',
|
168
|
+
self.paths.screenshot_destination))
|
169
|
+
|
170
|
+
self.anki.url = config_data['anki'].get('url', self.anki.url)
|
171
|
+
self.anki.sentence_field = config_data['anki'].get('sentence_field', self.anki.sentence_field)
|
172
|
+
self.anki.sentence_audio_field = config_data['anki'].get('sentence_audio_field', self.anki.sentence_audio_field)
|
173
|
+
self.anki.word_field = config_data['anki'].get('word_field', self.anki.word_field)
|
174
|
+
self.anki.picture_field = config_data['anki'].get('picture_field', self.anki.picture_field)
|
175
|
+
self.anki.custom_tags = config_data['anki'].get('custom_tags', self.anki.custom_tags)
|
176
|
+
self.anki.add_game_tag = config_data['anki'].get('add_game_tag', self.anki.add_game_tag)
|
177
|
+
self.anki.polling_rate = config_data['anki'].get('polling_rate', self.anki.polling_rate)
|
178
|
+
self.anki.overwrite_audio = config_data['anki_overwrites'].get('overwrite_audio', self.anki.overwrite_audio)
|
179
|
+
self.anki.overwrite_picture = config_data['anki_overwrites'].get('overwrite_picture',
|
180
|
+
self.anki.overwrite_picture)
|
181
|
+
|
182
|
+
self.features.full_auto = config_data['features'].get('do_vosk_postprocessing', self.features.full_auto)
|
183
|
+
self.features.notify_on_update = config_data['features'].get('notify_on_update', self.features.notify_on_update)
|
184
|
+
self.features.open_anki_edit = config_data['features'].get('open_anki_edit', self.features.open_anki_edit)
|
185
|
+
self.features.backfill_audio = config_data['features'].get('backfill_audio', self.features.backfill_audio)
|
186
|
+
|
187
|
+
self.screenshot.width = config_data['screenshot'].get('width', self.screenshot.width)
|
188
|
+
self.screenshot.height = config_data['screenshot'].get('height', self.screenshot.height)
|
189
|
+
self.screenshot.quality = config_data['screenshot'].get('quality', self.screenshot.quality)
|
190
|
+
self.screenshot.extension = config_data['screenshot'].get('extension', self.screenshot.extension)
|
191
|
+
self.screenshot.custom_ffmpeg_settings = config_data['screenshot'].get('custom_ffmpeg_settings',
|
192
|
+
self.screenshot.custom_ffmpeg_settings)
|
193
|
+
|
194
|
+
self.audio.extension = config_data['audio'].get('extension', self.audio.extension)
|
195
|
+
self.audio.beginning_offset = config_data['audio'].get('beginning_offset', self.audio.beginning_offset)
|
196
|
+
self.audio.end_offset = config_data['audio'].get('end_offset', self.audio.end_offset)
|
197
|
+
self.audio.ffmpeg_reencode_options = config_data['audio'].get('ffmpeg_reencode_options',
|
198
|
+
self.audio.ffmpeg_reencode_options)
|
199
|
+
|
200
|
+
self.vad.whisper_model = config_data['vosk'].get('whisper_model', self.vad.whisper_model)
|
201
|
+
self.vad.vosk_url = config_data['vosk'].get('url', self.vad.vosk_url)
|
202
|
+
self.vad.do_vad_postprocessing = config_data['features'].get('do_vosk_postprocessing',
|
203
|
+
self.vad.do_vad_postprocessing)
|
204
|
+
self.vad.trim_beginning = config_data['audio'].get('vosk_trim_beginning', self.vad.trim_beginning)
|
205
|
+
|
206
|
+
self.obs.enabled = config_data['obs'].get('enabled', self.obs.enabled)
|
207
|
+
self.obs.host = config_data['obs'].get('host', self.obs.host)
|
208
|
+
self.obs.port = config_data['obs'].get('port', self.obs.port)
|
209
|
+
self.obs.password = config_data['obs'].get('password', self.obs.password)
|
210
|
+
|
211
|
+
self.general.use_websocket = config_data['websocket'].get('enabled', self.general.use_websocket)
|
212
|
+
self.general.websocket_uri = config_data['websocket'].get('uri', self.general.websocket_uri)
|
213
|
+
|
214
|
+
self.hotkeys.reset_line = config_data['hotkeys'].get('reset_line', self.hotkeys.reset_line)
|
215
|
+
self.hotkeys.take_screenshot = config_data['hotkeys'].get('take_screenshot', self.hotkeys.take_screenshot)
|
216
|
+
|
217
|
+
self.anki.anki_custom_fields = config_data.get('anki_custom_fields', {})
|
218
|
+
|
219
|
+
with open('config.json', 'w') as f:
|
220
|
+
f.write(self.to_json(indent=4))
|
221
|
+
print(
|
222
|
+
'config.json successfully generated from previous settings. config.toml will no longer be used.')
|
223
|
+
|
224
|
+
return self
|
225
|
+
|
226
|
+
@dataclass_json
|
227
|
+
@dataclass
|
228
|
+
class Config:
|
229
|
+
configs: Dict[str, ProfileConfig] = field(default_factory=dict)
|
230
|
+
current_profile: str = DEFAULT_CONFIG
|
231
|
+
|
232
|
+
@classmethod
|
233
|
+
def new(cls):
|
234
|
+
instance = cls(configs={DEFAULT_CONFIG: ProfileConfig()}, current_profile=DEFAULT_CONFIG)
|
235
|
+
return instance
|
236
|
+
|
237
|
+
def get_config(self) -> ProfileConfig:
|
238
|
+
return self.configs[self.current_profile]
|
239
|
+
|
240
|
+
def set_config_for_profile(self, profile: str, config: ProfileConfig):
|
241
|
+
config.name = profile
|
242
|
+
self.configs[profile] = config
|
243
|
+
|
244
|
+
def has_config_for_current_game(self):
|
245
|
+
return current_game in self.configs
|
246
|
+
|
247
|
+
def get_all_profile_names(self):
|
248
|
+
return list(self.configs.keys())
|
249
|
+
|
250
|
+
|
251
|
+
logger = logging.getLogger("GameSentenceMiner")
|
252
|
+
logger.setLevel(logging.DEBUG) # Set the base level to DEBUG so that all messages are captured
|
253
|
+
|
254
|
+
# Create console handler with level INFO
|
255
|
+
console_handler = logging.StreamHandler()
|
256
|
+
console_handler.setLevel(logging.INFO)
|
257
|
+
|
258
|
+
# Create rotating file handler with level DEBUG
|
259
|
+
file_handler = RotatingFileHandler("gamesentenceminer.log", maxBytes=10_000_000, backupCount=2, encoding='utf-8')
|
260
|
+
file_handler.setLevel(logging.DEBUG)
|
261
|
+
|
262
|
+
# Create a formatter
|
263
|
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
264
|
+
|
265
|
+
# Add formatter to handlers
|
266
|
+
console_handler.setFormatter(formatter)
|
267
|
+
file_handler.setFormatter(formatter)
|
268
|
+
|
269
|
+
# Add handlers to the logger
|
270
|
+
logger.addHandler(console_handler)
|
271
|
+
logger.addHandler(file_handler)
|
272
|
+
|
273
|
+
CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'get_config().json')
|
274
|
+
temp_directory = ''
|
275
|
+
|
276
|
+
def get_app_directory():
|
277
|
+
appdata_dir = os.getenv('APPDATA') # Get the AppData directory
|
278
|
+
config_dir = os.path.join(appdata_dir, 'GameSentenceMiner')
|
279
|
+
os.makedirs(config_dir, exist_ok=True) # Create the directory if it doesn't exist
|
280
|
+
return config_dir
|
281
|
+
|
282
|
+
def get_config_path():
|
283
|
+
return os.path.join(get_app_directory(), 'config.json')
|
284
|
+
|
285
|
+
|
286
|
+
def load_config():
|
287
|
+
config_path = get_config_path()
|
288
|
+
|
289
|
+
if os.path.exists('config.json') and not os.path.exists(config_path):
|
290
|
+
shutil.copy('config.json', config_path)
|
291
|
+
|
292
|
+
if os.path.exists(config_path):
|
293
|
+
try:
|
294
|
+
with open(config_path, 'r') as file:
|
295
|
+
config_file = json.load(file)
|
296
|
+
if "current_profile" in config_file:
|
297
|
+
return Config.from_dict(config_file)
|
298
|
+
else:
|
299
|
+
print(f"Loading Profile-less Config, Converting to new Config!")
|
300
|
+
with open(config_path, 'r') as file:
|
301
|
+
config_file = json.load(file)
|
302
|
+
|
303
|
+
config = ProfileConfig.from_dict(config_file)
|
304
|
+
new_config = Config(configs = {DEFAULT_CONFIG : config}, current_profile=DEFAULT_CONFIG)
|
305
|
+
|
306
|
+
print(new_config)
|
307
|
+
|
308
|
+
with open(config_path, 'w') as file:
|
309
|
+
json.dump(new_config.to_dict(), file, indent=4)
|
310
|
+
return new_config
|
311
|
+
except json.JSONDecodeError as e:
|
312
|
+
print(f"Error parsing config.json: {e}")
|
313
|
+
return None
|
314
|
+
elif os.path.exists('config.toml'):
|
315
|
+
config = ProfileConfig().load_from_toml('config.toml')
|
316
|
+
new_config = Config({DEFAULT_CONFIG: config}, current_profile=DEFAULT_CONFIG)
|
317
|
+
return new_config
|
318
|
+
else:
|
319
|
+
config = Config.new()
|
320
|
+
with open(config_path, 'w') as file:
|
321
|
+
json.dump(config.to_dict(), file, indent=4)
|
322
|
+
return config
|
323
|
+
|
324
|
+
|
325
|
+
config_instance: Config = None
|
326
|
+
|
327
|
+
|
328
|
+
def get_config():
|
329
|
+
global config_instance
|
330
|
+
if config_instance is None:
|
331
|
+
config_instance = load_config()
|
332
|
+
config = config_instance.get_config()
|
333
|
+
|
334
|
+
if config.features.backfill_audio and config.features.full_auto:
|
335
|
+
print("Cannot have backfill_audio and obs_full_auto_mode turned on at the same time!")
|
336
|
+
exit(1)
|
337
|
+
|
338
|
+
# print(config_instance.get_config())
|
339
|
+
return config_instance.get_config()
|
340
|
+
|
341
|
+
|
342
|
+
def reload_config():
|
343
|
+
global config_instance
|
344
|
+
config_instance = load_config()
|
345
|
+
config = config_instance.get_config()
|
346
|
+
|
347
|
+
if config.features.backfill_audio and config.features.full_auto:
|
348
|
+
print("Cannot have backfill_audio and obs_full_auto_mode turned on at the same time!")
|
349
|
+
exit(1)
|
350
|
+
|
351
|
+
def get_master_config():
|
352
|
+
return config_instance
|
353
|
+
|
354
|
+
def switch_profile_and_save(profile_name):
|
355
|
+
global config_instance
|
356
|
+
config_instance.current_profile = profile_name
|
357
|
+
with open('config.json', 'w') as file:
|
358
|
+
json.dump(config_instance.to_dict(), file, indent=4)
|
359
|
+
return config_instance.get_config()
|
@@ -0,0 +1,297 @@
|
|
1
|
+
import tempfile
|
2
|
+
import time
|
3
|
+
|
4
|
+
from . import obs
|
5
|
+
from . import util
|
6
|
+
from . import configuration
|
7
|
+
from .configuration import *
|
8
|
+
from .util import *
|
9
|
+
|
10
|
+
ffmpeg_base_command_list = ["ffmpeg", "-hide_banner", "-loglevel", "error", '-nostdin']
|
11
|
+
|
12
|
+
|
13
|
+
def get_screenshot(video_file, time_from_end):
|
14
|
+
time_from_end_to_capture = -time_from_end if time_from_end else -1
|
15
|
+
output_image = make_unique_file_name(
|
16
|
+
get_config().paths.screenshot_destination + obs.get_current_game(sanitize=True) + f".{get_config().screenshot.extension}")
|
17
|
+
# FFmpeg command to extract the last frame of the video
|
18
|
+
ffmpeg_command = ffmpeg_base_command_list + [
|
19
|
+
"-sseof", f"{time_from_end_to_capture}", # Seek to 1 second before the end of the video
|
20
|
+
"-i", f"{video_file}",
|
21
|
+
"-vframes", "1" # Extract only one frame
|
22
|
+
]
|
23
|
+
|
24
|
+
if get_config().screenshot.custom_ffmpeg_settings:
|
25
|
+
ffmpeg_command.extend(get_config().screenshot.custom_ffmpeg_settings.replace("\"", "").split(" "))
|
26
|
+
else:
|
27
|
+
ffmpeg_command.extend(["-compression_level", "6", "-q:v", get_config().screenshot.quality])
|
28
|
+
|
29
|
+
if get_config().screenshot.width or get_config().screenshot.height:
|
30
|
+
ffmpeg_command.extend(
|
31
|
+
["-vf", f"scale={get_config().screenshot.width or -1}:{get_config().screenshot.height or -1}"])
|
32
|
+
|
33
|
+
ffmpeg_command.append(f"{output_image}")
|
34
|
+
|
35
|
+
logger.debug(f"FFMPEG SS Command: {ffmpeg_command}")
|
36
|
+
|
37
|
+
# Run the command
|
38
|
+
subprocess.run(ffmpeg_command)
|
39
|
+
|
40
|
+
logger.info(f"Screenshot saved to: {output_image}")
|
41
|
+
|
42
|
+
return output_image
|
43
|
+
|
44
|
+
|
45
|
+
def get_screenshot_time(video_path, line_time):
|
46
|
+
file_length = get_video_duration(video_path)
|
47
|
+
file_mod_time = get_file_modification_time(video_path)
|
48
|
+
|
49
|
+
time_delta = file_mod_time - line_time
|
50
|
+
total_seconds = file_length - time_delta.total_seconds()
|
51
|
+
|
52
|
+
time_from_end = file_length - total_seconds - get_config().screenshot.seconds_after_line
|
53
|
+
|
54
|
+
if time_from_end < 0 or time_from_end > (file_length - total_seconds):
|
55
|
+
raise ValueError("Calculated screenshot time is out of bounds for trimmed video.")
|
56
|
+
|
57
|
+
return time_from_end
|
58
|
+
|
59
|
+
|
60
|
+
def process_image(image_file):
|
61
|
+
output_image = make_unique_file_name(
|
62
|
+
get_config().paths.screenshot_destination + obs.get_current_game(sanitize=True) + f".{get_config().screenshot.extension}")
|
63
|
+
|
64
|
+
# FFmpeg command to process the input image
|
65
|
+
ffmpeg_command = ffmpeg_base_command_list + [
|
66
|
+
"-i", image_file
|
67
|
+
]
|
68
|
+
|
69
|
+
if get_config().screenshot.custom_ffmpeg_settings:
|
70
|
+
ffmpeg_command.extend(get_config().screenshot.custom_ffmpeg_settings.split(" "))
|
71
|
+
else:
|
72
|
+
ffmpeg_command.extend(["-compression_level", "6", "-q:v", get_config().screenshot.quality])
|
73
|
+
|
74
|
+
if get_config().screenshot.width or get_config().screenshot.height:
|
75
|
+
ffmpeg_command.extend(
|
76
|
+
["-vf", f"scale={get_config().screenshot.width or -1}:{get_config().screenshot.height or -1}"])
|
77
|
+
|
78
|
+
ffmpeg_command.append(output_image)
|
79
|
+
logger.debug(ffmpeg_command)
|
80
|
+
logger.debug(" ".join(ffmpeg_command))
|
81
|
+
# Run the command
|
82
|
+
subprocess.run(ffmpeg_command)
|
83
|
+
|
84
|
+
logger.info(f"Processed image saved to: {output_image}")
|
85
|
+
|
86
|
+
return output_image
|
87
|
+
|
88
|
+
|
89
|
+
def get_audio_codec(video_path):
|
90
|
+
command = [
|
91
|
+
"ffprobe",
|
92
|
+
"-v", "error",
|
93
|
+
"-select_streams", "a:0",
|
94
|
+
"-show_entries", "stream=codec_name",
|
95
|
+
"-of", "json",
|
96
|
+
video_path
|
97
|
+
]
|
98
|
+
|
99
|
+
logger.debug(" ".join(command))
|
100
|
+
# Run the command and capture the output
|
101
|
+
result = subprocess.run(command, capture_output=True, text=True)
|
102
|
+
|
103
|
+
# Parse the JSON output
|
104
|
+
try:
|
105
|
+
output = json.loads(result.stdout)
|
106
|
+
codec_name = output['streams'][0]['codec_name']
|
107
|
+
return codec_name
|
108
|
+
except (json.JSONDecodeError, KeyError, IndexError):
|
109
|
+
logger.error("Failed to get codec information. Re-encoding Anyways")
|
110
|
+
return None
|
111
|
+
|
112
|
+
|
113
|
+
def get_audio_and_trim(video_path, line_time, next_line_time):
|
114
|
+
supported_formats = {
|
115
|
+
'opus': 'opus',
|
116
|
+
'mp3': 'libmp3lame',
|
117
|
+
'ogg': 'libvorbis',
|
118
|
+
'aac': 'aac',
|
119
|
+
'm4a': 'aac',
|
120
|
+
}
|
121
|
+
|
122
|
+
codec = get_audio_codec(video_path)
|
123
|
+
|
124
|
+
if codec == get_config().audio.extension:
|
125
|
+
codec_command = ['-c:a', 'copy']
|
126
|
+
logger.info(f"Extracting {get_config().audio.extension} from video")
|
127
|
+
else:
|
128
|
+
codec_command = ["-c:a", f"{supported_formats[get_config().audio.extension]}"]
|
129
|
+
logger.info(f"Re-encoding {codec} to {get_config().audio.extension}")
|
130
|
+
|
131
|
+
untrimmed_audio = tempfile.NamedTemporaryFile(dir=configuration.temp_directory,
|
132
|
+
suffix=f"_untrimmed.{get_config().audio.extension}").name
|
133
|
+
|
134
|
+
command = ffmpeg_base_command_list + [
|
135
|
+
"-i", video_path,
|
136
|
+
"-map", "0:a"] + codec_command + [
|
137
|
+
untrimmed_audio
|
138
|
+
]
|
139
|
+
|
140
|
+
# FFmpeg command to extract OR re-encode the audio
|
141
|
+
# command = f"{ffmpeg_base_command} -i \"{video_path}\" -map 0:a {codec_command} \"{untrimmed_audio}\""
|
142
|
+
|
143
|
+
logger.debug(" ".join(command))
|
144
|
+
|
145
|
+
subprocess.run(command)
|
146
|
+
|
147
|
+
return trim_audio_based_on_last_line(untrimmed_audio, video_path, line_time, next_line_time)
|
148
|
+
|
149
|
+
|
150
|
+
def get_video_duration(file_path):
|
151
|
+
ffprobe_command = [
|
152
|
+
"ffprobe",
|
153
|
+
"-v", "error",
|
154
|
+
"-show_entries", "format=duration",
|
155
|
+
"-of", "json",
|
156
|
+
file_path
|
157
|
+
]
|
158
|
+
result = subprocess.run(ffprobe_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
159
|
+
duration_info = json.loads(result.stdout)
|
160
|
+
return float(duration_info["format"]["duration"]) # Return the duration in seconds
|
161
|
+
|
162
|
+
|
163
|
+
def trim_audio_based_on_last_line(untrimmed_audio, video_path, line_time, next_line):
|
164
|
+
trimmed_audio = tempfile.NamedTemporaryFile(dir=configuration.temp_directory,
|
165
|
+
suffix=f".{get_config().audio.extension}").name
|
166
|
+
file_mod_time = get_file_modification_time(video_path)
|
167
|
+
file_length = get_video_duration(video_path)
|
168
|
+
time_delta = file_mod_time - line_time
|
169
|
+
# Convert time_delta to FFmpeg-friendly format (HH:MM:SS.milliseconds)
|
170
|
+
total_seconds = file_length - time_delta.total_seconds() + get_config().audio.beginning_offset
|
171
|
+
if total_seconds < 0 or total_seconds >= file_length:
|
172
|
+
logger.info(f"0 seconds trimmed off of beginning")
|
173
|
+
return untrimmed_audio
|
174
|
+
|
175
|
+
hours, remainder = divmod(total_seconds, 3600)
|
176
|
+
minutes, seconds = divmod(remainder, 60)
|
177
|
+
start_trim_time = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
|
178
|
+
|
179
|
+
ffmpeg_command = ffmpeg_base_command_list + [
|
180
|
+
"-i", untrimmed_audio,
|
181
|
+
"-ss", start_trim_time]
|
182
|
+
if next_line and next_line > line_time:
|
183
|
+
end_total_seconds = total_seconds + (next_line - line_time).total_seconds() + 1
|
184
|
+
hours, remainder = divmod(end_total_seconds, 3600)
|
185
|
+
minutes, seconds = divmod(remainder, 60)
|
186
|
+
end_trim_time = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
|
187
|
+
ffmpeg_command.extend(['-to', end_trim_time])
|
188
|
+
logger.info(
|
189
|
+
f"Looks like Clipboard/Websocket was modified before the script knew about the anki card! Trimming end of video to {end_trim_time}")
|
190
|
+
|
191
|
+
ffmpeg_command.extend([
|
192
|
+
"-c", "copy", # Using copy to avoid re-encoding, adjust if needed
|
193
|
+
trimmed_audio
|
194
|
+
])
|
195
|
+
|
196
|
+
logger.debug(" ".join(ffmpeg_command))
|
197
|
+
subprocess.run(ffmpeg_command)
|
198
|
+
|
199
|
+
logger.info(f"{total_seconds} trimmed off of beginning")
|
200
|
+
|
201
|
+
logger.info(f"Audio trimmed and saved to {trimmed_audio}")
|
202
|
+
return trimmed_audio
|
203
|
+
|
204
|
+
|
205
|
+
def reencode_file_with_user_config(input_file, final_output_audio, user_ffmpeg_options):
|
206
|
+
logger.info(f"Re-encode running with settings: {user_ffmpeg_options}")
|
207
|
+
temp_file = create_temp_file_with_same_name(input_file)
|
208
|
+
command = ffmpeg_base_command_list + [
|
209
|
+
"-i", input_file,
|
210
|
+
"-map", "0:a"
|
211
|
+
] + user_ffmpeg_options.replace("\"", "").split(" ") + [
|
212
|
+
temp_file
|
213
|
+
]
|
214
|
+
|
215
|
+
logger.debug(" ".join(command))
|
216
|
+
process = subprocess.run(command)
|
217
|
+
|
218
|
+
if process.returncode != 0:
|
219
|
+
logger.error("Re-encode failed, using original audio")
|
220
|
+
return
|
221
|
+
|
222
|
+
replace_file_with_retry(temp_file, final_output_audio)
|
223
|
+
|
224
|
+
|
225
|
+
def create_temp_file_with_same_name(input_file: str):
|
226
|
+
split = input_file.split(".")
|
227
|
+
return f"{split[0]}_temp.{split[1]}"
|
228
|
+
|
229
|
+
|
230
|
+
def replace_file_with_retry(temp_file, input_file, retries=5, delay=1):
|
231
|
+
for attempt in range(retries):
|
232
|
+
try:
|
233
|
+
os.replace(temp_file, input_file)
|
234
|
+
logger.info(f'Re-encode Finished!')
|
235
|
+
return
|
236
|
+
except OSError as e:
|
237
|
+
if attempt < retries - 1:
|
238
|
+
logger.warning(f"Attempt {attempt + 1}: File still in use. Retrying in {delay} seconds...")
|
239
|
+
time.sleep(delay)
|
240
|
+
else:
|
241
|
+
logger.error(f"Failed to replace the file after {retries} attempts. Error: {e}")
|
242
|
+
raise
|
243
|
+
|
244
|
+
|
245
|
+
def trim_audio_by_end_time(input_audio, end_time, output_audio):
|
246
|
+
command = ffmpeg_base_command_list + [
|
247
|
+
"-i", input_audio,
|
248
|
+
"-to", str(end_time),
|
249
|
+
"-c", "copy",
|
250
|
+
output_audio
|
251
|
+
]
|
252
|
+
logger.debug(" ".join(command))
|
253
|
+
subprocess.run(command)
|
254
|
+
|
255
|
+
|
256
|
+
def convert_audio_to_wav(input_audio, output_wav):
|
257
|
+
command = ffmpeg_base_command_list + [
|
258
|
+
"-i", input_audio,
|
259
|
+
"-ar", "16000",
|
260
|
+
"-ac", "1",
|
261
|
+
"-af", "afftdn,dialoguenhance" if not util.is_linux() else "afftdn",
|
262
|
+
output_wav
|
263
|
+
]
|
264
|
+
logger.debug(" ".join(command))
|
265
|
+
subprocess.run(command)
|
266
|
+
|
267
|
+
|
268
|
+
# Trim the audio using FFmpeg based on detected speech timestamps
|
269
|
+
def trim_audio(input_audio, start_time, end_time, output_audio):
|
270
|
+
command = ffmpeg_base_command_list.copy()
|
271
|
+
|
272
|
+
if get_config().vad.trim_beginning and start_time > 0:
|
273
|
+
command.extend(['-ss', f"{start_time:.2f}"])
|
274
|
+
|
275
|
+
command.extend([
|
276
|
+
'-to', f"{end_time:.2f}",
|
277
|
+
'-i', input_audio,
|
278
|
+
'-c', 'copy',
|
279
|
+
output_audio
|
280
|
+
])
|
281
|
+
|
282
|
+
logger.debug(" ".join(command))
|
283
|
+
|
284
|
+
subprocess.run(command)
|
285
|
+
|
286
|
+
|
287
|
+
def is_video_big_enough(file_path, min_size_kb=250):
|
288
|
+
try:
|
289
|
+
file_size = os.path.getsize(file_path) # Size in bytes
|
290
|
+
file_size_kb = file_size / 1024 # Convert to KB
|
291
|
+
return file_size_kb >= min_size_kb
|
292
|
+
except FileNotFoundError:
|
293
|
+
logger.error("File not found!")
|
294
|
+
return False
|
295
|
+
except Exception as e:
|
296
|
+
logger.error(f"Error: {e}")
|
297
|
+
return False
|