GameSentenceMiner 2.9.29__py3-none-any.whl → 2.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/anki.py +9 -9
- GameSentenceMiner/config_gui.py +826 -628
- GameSentenceMiner/gametext.py +5 -2
- GameSentenceMiner/gsm.py +10 -10
- GameSentenceMiner/ocr/gsm_ocr_config.py +16 -0
- GameSentenceMiner/ocr/owocr_area_selector.py +2 -0
- GameSentenceMiner/ocr/owocr_helper.py +18 -33
- GameSentenceMiner/ocr/ss_picker.py +17 -1
- GameSentenceMiner/util/audio_offset_selector.py +205 -0
- GameSentenceMiner/util/configuration.py +45 -16
- GameSentenceMiner/util/ffmpeg.py +23 -95
- GameSentenceMiner/util/gsm_utils.py +56 -1
- GameSentenceMiner/util/text_log.py +2 -2
- GameSentenceMiner/vad.py +3 -14
- GameSentenceMiner/web/texthooking_page.py +2 -2
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/METADATA +4 -2
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/RECORD +21 -20
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/top_level.txt +0 -0
@@ -41,6 +41,14 @@ DEFAULT_CONFIG = 'Default'
|
|
41
41
|
|
42
42
|
current_game = ''
|
43
43
|
|
44
|
+
supported_formats = {
|
45
|
+
'opus': 'libopus',
|
46
|
+
'mp3': 'libmp3lame',
|
47
|
+
'ogg': 'libvorbis',
|
48
|
+
'aac': 'aac',
|
49
|
+
'm4a': 'aac',
|
50
|
+
}
|
51
|
+
|
44
52
|
def is_linux():
|
45
53
|
return platform == 'linux'
|
46
54
|
|
@@ -77,7 +85,6 @@ class General:
|
|
77
85
|
open_multimine_on_startup: bool = True
|
78
86
|
texthook_replacement_regex: str = ""
|
79
87
|
texthooker_port: int = 55000
|
80
|
-
use_old_texthooker: bool = False
|
81
88
|
|
82
89
|
|
83
90
|
@dataclass_json
|
@@ -90,6 +97,10 @@ class Paths:
|
|
90
97
|
remove_audio: bool = False
|
91
98
|
remove_screenshot: bool = False
|
92
99
|
|
100
|
+
def __post_init__(self):
|
101
|
+
self.folder_to_watch = os.path.normpath(self.folder_to_watch)
|
102
|
+
self.audio_destination = os.path.normpath(self.audio_destination)
|
103
|
+
self.screenshot_destination = os.path.normpath(self.screenshot_destination)
|
93
104
|
|
94
105
|
@dataclass_json
|
95
106
|
@dataclass
|
@@ -109,13 +120,10 @@ class Anki:
|
|
109
120
|
overwrite_audio: bool = False
|
110
121
|
overwrite_picture: bool = True
|
111
122
|
multi_overwrites_sentence: bool = True
|
112
|
-
anki_custom_fields: Dict[str, str] = None # Initialize to None and set it in __post_init__
|
113
123
|
|
114
124
|
def __post_init__(self):
|
115
125
|
if self.custom_tags is None:
|
116
|
-
self.custom_tags = []
|
117
|
-
if self.anki_custom_fields is None:
|
118
|
-
self.anki_custom_fields = {}
|
126
|
+
self.custom_tags = ['GSM']
|
119
127
|
if self.tags_to_check is None:
|
120
128
|
self.tags_to_check = []
|
121
129
|
|
@@ -145,7 +153,7 @@ class Screenshot:
|
|
145
153
|
seconds_after_line: float = 1.0
|
146
154
|
use_beginning_of_line_as_screenshot: bool = True
|
147
155
|
use_new_screenshot_logic: bool = False
|
148
|
-
screenshot_timing_setting: str = '' # 'middle', 'end'
|
156
|
+
screenshot_timing_setting: str = 'beginning' # 'middle', 'end'
|
149
157
|
use_screenshot_selector: bool = False
|
150
158
|
|
151
159
|
def __post_init__(self):
|
@@ -167,11 +175,19 @@ class Audio:
|
|
167
175
|
end_offset: float = 0.5
|
168
176
|
pre_vad_end_offset: float = 0.0
|
169
177
|
ffmpeg_reencode_options: str = '-c:a libopus -f opus -af \"afade=t=in:d=0.10\"' if is_windows() else ''
|
178
|
+
ffmpeg_reencode_options_to_use: str = ''
|
170
179
|
external_tool: str = ""
|
171
180
|
anki_media_collection: str = ""
|
172
181
|
external_tool_enabled: bool = True
|
173
182
|
custom_encode_settings: str = ''
|
174
183
|
|
184
|
+
def __post_init__(self):
|
185
|
+
self.ffmpeg_reencode_options_to_use = self.ffmpeg_reencode_options.replace("{format}", self.extension).replace("{encoder}", supported_formats.get(self.extension, ''))
|
186
|
+
|
187
|
+
self.anki_media_collection = os.path.normpath(self.anki_media_collection)
|
188
|
+
self.external_tool = os.path.normpath(self.external_tool)
|
189
|
+
|
190
|
+
|
175
191
|
|
176
192
|
@dataclass_json
|
177
193
|
@dataclass
|
@@ -231,7 +247,6 @@ class Advanced:
|
|
231
247
|
show_screenshot_buttons: bool = False
|
232
248
|
multi_line_line_break: str = '<br>'
|
233
249
|
multi_line_sentence_storage_field: str = ''
|
234
|
-
ocr_sends_to_clipboard: bool = True
|
235
250
|
ocr_websocket_port: int = 9002
|
236
251
|
texthooker_communication_websocket_port: int = 55001
|
237
252
|
use_anki_note_creation_time: bool = True
|
@@ -335,8 +350,6 @@ class ProfileConfig:
|
|
335
350
|
self.hotkeys.reset_line = config_data['hotkeys'].get('reset_line', self.hotkeys.reset_line)
|
336
351
|
self.hotkeys.take_screenshot = config_data['hotkeys'].get('take_screenshot', self.hotkeys.take_screenshot)
|
337
352
|
|
338
|
-
self.anki.anki_custom_fields = config_data.get('anki_custom_fields', {})
|
339
|
-
|
340
353
|
with open(get_config_path(), 'w') as f:
|
341
354
|
f.write(self.to_json(indent=4))
|
342
355
|
print(
|
@@ -446,12 +459,10 @@ class Config:
|
|
446
459
|
self.sync_shared_field(config.anki, profile.anki, "overwrite_audio")
|
447
460
|
self.sync_shared_field(config.anki, profile.anki, "overwrite_picture")
|
448
461
|
self.sync_shared_field(config.anki, profile.anki, "multi_overwrites_sentence")
|
449
|
-
self.sync_shared_field(config.anki, profile.anki, "anki_custom_fields")
|
450
462
|
self.sync_shared_field(config.general, profile.general, "open_config_on_startup")
|
451
463
|
self.sync_shared_field(config.general, profile.general, "open_multimine_on_startup")
|
452
464
|
self.sync_shared_field(config.general, profile.general, "websocket_uri")
|
453
465
|
self.sync_shared_field(config.general, profile.general, "texthooker_port")
|
454
|
-
self.sync_shared_field(config.general, profile.general, "use_old_texthooker")
|
455
466
|
self.sync_shared_field(config.audio, profile.audio, "external_tool")
|
456
467
|
self.sync_shared_field(config.audio, profile.audio, "anki_media_collection")
|
457
468
|
self.sync_shared_field(config.audio, profile.audio, "external_tool_enabled")
|
@@ -633,13 +644,15 @@ console_handler.setFormatter(formatter)
|
|
633
644
|
|
634
645
|
logger.addHandler(console_handler)
|
635
646
|
|
636
|
-
# Create rotating file handler with level DEBUG
|
637
647
|
file_path = get_log_path()
|
638
648
|
try:
|
639
|
-
if os.path.exists(file_path) and os.path.getsize(file_path) >
|
640
|
-
|
649
|
+
if os.path.exists(file_path) and os.path.getsize(file_path) > 1 * 1024 * 1024 and os.access(file_path, os.W_OK):
|
650
|
+
old_log_path = os.path.join(os.path.dirname(file_path), "gamesentenceminer_old.log")
|
651
|
+
if os.path.exists(old_log_path):
|
652
|
+
os.remove(old_log_path)
|
653
|
+
shutil.move(file_path, old_log_path)
|
641
654
|
except Exception as e:
|
642
|
-
logger.
|
655
|
+
logger.info("Couldn't rotate log, probably because the file is being written to by another process. NOT AN ERROR")
|
643
656
|
|
644
657
|
file_handler = logging.FileHandler(file_path, encoding='utf-8')
|
645
658
|
file_handler.setLevel(logging.DEBUG)
|
@@ -655,6 +668,7 @@ class GsmAppState:
|
|
655
668
|
self.anki_note_for_screenshot = None
|
656
669
|
self.previous_line_for_audio = None
|
657
670
|
self.previous_line_for_screenshot = None
|
671
|
+
self.previous_trim_args = None
|
658
672
|
self.previous_audio = None
|
659
673
|
self.previous_screenshot = None
|
660
674
|
self.previous_replay = None
|
@@ -700,6 +714,21 @@ class GsmStatus:
|
|
700
714
|
self.words_being_processed.remove(word)
|
701
715
|
|
702
716
|
|
717
|
+
def is_running_from_source():
|
718
|
+
# Check for .git directory at the project root
|
719
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
720
|
+
project_root = current_dir
|
721
|
+
while project_root != os.path.dirname(project_root): # Avoid infinite loop
|
722
|
+
if os.path.isdir(os.path.join(project_root, '.git')):
|
723
|
+
return True
|
724
|
+
if os.path.isfile(os.path.join(project_root, 'pyproject.toml')):
|
725
|
+
return True
|
726
|
+
project_root = os.path.dirname(project_root)
|
727
|
+
return False
|
728
|
+
|
703
729
|
gsm_status = GsmStatus()
|
704
730
|
anki_results = {}
|
705
|
-
gsm_state = GsmAppState()
|
731
|
+
gsm_state = GsmAppState()
|
732
|
+
is_dev = is_running_from_source()
|
733
|
+
|
734
|
+
logger.debug(f"Running in development mode: {is_dev}")
|
GameSentenceMiner/util/ffmpeg.py
CHANGED
@@ -282,83 +282,6 @@ def get_audio_and_trim(video_path, game_line, next_line_time, anki_card_creation
|
|
282
282
|
|
283
283
|
return trim_audio_based_on_last_line(untrimmed_audio, video_path, game_line, next_line_time, anki_card_creation_time)
|
284
284
|
|
285
|
-
def get_audio_and_trim_combined(video_path, game_line, next_line_time, anki_card_creation_time):
|
286
|
-
supported_formats = {
|
287
|
-
'opus': 'libopus',
|
288
|
-
'mp3': 'libmp3lame',
|
289
|
-
'ogg': 'libvorbis',
|
290
|
-
'aac': 'aac',
|
291
|
-
'm4a': 'aac',
|
292
|
-
}
|
293
|
-
|
294
|
-
codec = get_audio_codec(video_path)
|
295
|
-
output_extension = get_config().audio.extension
|
296
|
-
output_audio_path = tempfile.NamedTemporaryFile(
|
297
|
-
dir=configuration.get_temporary_directory(),
|
298
|
-
suffix=f".{output_extension}",
|
299
|
-
delete=False
|
300
|
-
).name
|
301
|
-
|
302
|
-
if codec == output_extension:
|
303
|
-
codec_command = ['-c:a', 'copy']
|
304
|
-
logger.debug(f"Extracting {output_extension} from video (copying)")
|
305
|
-
else:
|
306
|
-
codec_command = ["-c:a", f"{supported_formats[output_extension]}"]
|
307
|
-
logger.debug(f"Re-encoding {codec} to {output_extension}")
|
308
|
-
|
309
|
-
start_trim_time, start_time_float, total_seconds_after_offset, file_length = get_video_timings(video_path, game_line, anki_card_creation_time)
|
310
|
-
|
311
|
-
ffmpeg_command = ffmpeg_base_command_list + [
|
312
|
-
"-ss", str(start_trim_time),
|
313
|
-
"-i", video_path,
|
314
|
-
"-map", "0:a"
|
315
|
-
]
|
316
|
-
|
317
|
-
end_trim_time_str = ""
|
318
|
-
|
319
|
-
if next_line_time and next_line_time > game_line.time:
|
320
|
-
end_total_seconds = next_line_time + get_config().audio.pre_vad_end_offset
|
321
|
-
hours, remainder = divmod(end_total_seconds, 3600)
|
322
|
-
minutes, seconds = divmod(remainder, 60)
|
323
|
-
end_trim_time_str = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
|
324
|
-
ffmpeg_command.extend(['-to', end_trim_time_str])
|
325
|
-
logger.debug(
|
326
|
-
f"Trimming end of audio to {end_trim_time_str} based on next line time.")
|
327
|
-
elif get_config().audio.pre_vad_end_offset is not None and get_config().audio.pre_vad_end_offset < 0:
|
328
|
-
end_total_seconds = file_length + get_config().audio.pre_vad_end_offset
|
329
|
-
end_total_seconds = max(end_total_seconds, start_time_float)
|
330
|
-
|
331
|
-
hours, remainder = divmod(end_total_seconds, 3600)
|
332
|
-
minutes, seconds = divmod(remainder, 60)
|
333
|
-
end_trim_time_str = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
|
334
|
-
ffmpeg_command.extend(['-to', end_trim_time_str])
|
335
|
-
logger.debug(f"Trimming end of audio to {end_trim_time_str} due to negative pre-vad end offset.")
|
336
|
-
|
337
|
-
ffmpeg_command.extend(codec_command)
|
338
|
-
ffmpeg_command.append(output_audio_path)
|
339
|
-
|
340
|
-
logger.debug("Executing combined audio extraction and trimming command")
|
341
|
-
logger.debug(" ".join(ffmpeg_command))
|
342
|
-
|
343
|
-
try:
|
344
|
-
subprocess.run(ffmpeg_command, check=True)
|
345
|
-
logger.debug(f"{total_seconds_after_offset} trimmed off of beginning")
|
346
|
-
|
347
|
-
if end_trim_time_str:
|
348
|
-
logger.info(f"Audio Extracted and trimmed to {start_trim_time} seconds with end time {end_trim_time_str}")
|
349
|
-
else:
|
350
|
-
logger.info(f"Audio Extracted and trimmed to {start_trim_time} seconds (to end of file)")
|
351
|
-
|
352
|
-
logger.debug(f"Audio trimmed and saved to {output_audio_path}")
|
353
|
-
return output_audio_path
|
354
|
-
except subprocess.CalledProcessError as e:
|
355
|
-
logger.error(f"FFmpeg command failed: {e}")
|
356
|
-
logger.error(f"Command: {' '.join(ffmpeg_command)}")
|
357
|
-
raise
|
358
|
-
except Exception as e:
|
359
|
-
logger.error(f"An unexpected error occurred: {e}")
|
360
|
-
raise
|
361
|
-
|
362
285
|
|
363
286
|
def get_video_duration(file_path):
|
364
287
|
ffprobe_command = [
|
@@ -379,26 +302,22 @@ def trim_audio_based_on_last_line(untrimmed_audio, video_path, game_line, next_l
|
|
379
302
|
trimmed_audio = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(),
|
380
303
|
suffix=f".{get_config().audio.extension}").name
|
381
304
|
start_trim_time, total_seconds, total_seconds_after_offset, file_length = get_video_timings(video_path, game_line, anki_card_creation_time)
|
382
|
-
end_trim_time =
|
305
|
+
end_trim_time = 0
|
383
306
|
|
384
307
|
ffmpeg_command = ffmpeg_base_command_list + [
|
385
308
|
"-i", untrimmed_audio,
|
386
309
|
"-ss", str(start_trim_time)]
|
387
310
|
if next_line and next_line > game_line.time:
|
388
311
|
end_total_seconds = total_seconds + (next_line - game_line.time).total_seconds() + get_config().audio.pre_vad_end_offset
|
389
|
-
|
390
|
-
minutes, seconds = divmod(remainder, 60)
|
391
|
-
end_trim_time = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
|
312
|
+
end_trim_time = f"{end_total_seconds:.3f}"
|
392
313
|
ffmpeg_command.extend(['-to', end_trim_time])
|
393
314
|
logger.debug(
|
394
|
-
f"Looks Like this is mining from History, or Multiple Lines were selected Trimming end of audio to {end_trim_time}")
|
315
|
+
f"Looks Like this is mining from History, or Multiple Lines were selected Trimming end of audio to {end_trim_time} seconds")
|
395
316
|
elif get_config().audio.pre_vad_end_offset and get_config().audio.pre_vad_end_offset < 0:
|
396
317
|
end_total_seconds = file_length + get_config().audio.pre_vad_end_offset
|
397
|
-
|
398
|
-
minutes, seconds = divmod(remainder, 60)
|
399
|
-
end_trim_time = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
|
318
|
+
end_trim_time = f"{end_total_seconds:.3f}"
|
400
319
|
ffmpeg_command.extend(['-to', end_trim_time])
|
401
|
-
logger.debug(f"Trimming end of audio to {end_trim_time} due to pre-vad end offset")
|
320
|
+
logger.debug(f"Trimming end of audio to {end_trim_time} seconds due to pre-vad end offset")
|
402
321
|
|
403
322
|
ffmpeg_command.extend([
|
404
323
|
"-c", "copy", # Using copy to avoid re-encoding, adjust if needed
|
@@ -407,6 +326,7 @@ def trim_audio_based_on_last_line(untrimmed_audio, video_path, game_line, next_l
|
|
407
326
|
|
408
327
|
logger.debug(" ".join(ffmpeg_command))
|
409
328
|
subprocess.run(ffmpeg_command)
|
329
|
+
gsm_state.previous_trim_args = (untrimmed_audio, start_trim_time, end_trim_time)
|
410
330
|
|
411
331
|
logger.debug(f"{total_seconds_after_offset} trimmed off of beginning")
|
412
332
|
|
@@ -431,12 +351,9 @@ def get_video_timings(video_path, game_line, anki_card_creation_time=None):
|
|
431
351
|
if total_seconds < 0 or total_seconds >= file_length:
|
432
352
|
logger.error("Line mined is outside of the replay buffer! Defaulting to the beginning of the replay buffer. ")
|
433
353
|
logger.info("Recommend either increasing replay buffer length in OBS Settings or mining faster.")
|
434
|
-
return 0, 0, 0
|
354
|
+
return 0, 0, 0, file_length
|
435
355
|
|
436
|
-
|
437
|
-
minutes, seconds = divmod(remainder, 60)
|
438
|
-
start_trim_time = "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
|
439
|
-
return start_trim_time, total_seconds, total_seconds_after_offset, file_length
|
356
|
+
return total_seconds_after_offset, total_seconds, total_seconds_after_offset, file_length
|
440
357
|
|
441
358
|
|
442
359
|
def reencode_file_with_user_config(input_file, final_output_audio, user_ffmpeg_options):
|
@@ -528,7 +445,7 @@ def convert_audio_to_mp3(input_audio):
|
|
528
445
|
|
529
446
|
|
530
447
|
# Trim the audio using FFmpeg based on detected speech timestamps
|
531
|
-
def trim_audio(input_audio, start_time, end_time, output_audio, trim_beginning=False, fade_in_duration=0.05,
|
448
|
+
def trim_audio(input_audio, start_time, end_time=0, output_audio=None, trim_beginning=False, fade_in_duration=0.05,
|
532
449
|
fade_out_duration=0.05):
|
533
450
|
command = ffmpeg_base_command_list.copy()
|
534
451
|
|
@@ -545,9 +462,10 @@ def trim_audio(input_audio, start_time, end_time, output_audio, trim_beginning=F
|
|
545
462
|
fade_filter.append(f'afade=t=out:st={end_time - fade_out_duration:.2f}:d={fade_out_duration}')
|
546
463
|
# fade_filter.append(f'afade=t=out:d={fade_out_duration}')
|
547
464
|
|
548
|
-
|
549
|
-
|
550
|
-
|
465
|
+
if end_time > 0:
|
466
|
+
command.extend([
|
467
|
+
'-to', f"{end_time:.2f}",
|
468
|
+
])
|
551
469
|
|
552
470
|
if fade_filter:
|
553
471
|
command.extend(['-af', f'afade=t=in:d={fade_in_duration},afade=t=out:st={end_time - fade_out_duration:.2f}:d={fade_out_duration}'])
|
@@ -596,3 +514,13 @@ def is_video_big_enough(file_path, min_size_kb=250):
|
|
596
514
|
logger.error(f"Error: {e}")
|
597
515
|
return False
|
598
516
|
|
517
|
+
|
518
|
+
def get_audio_length(path):
|
519
|
+
result = subprocess.run(
|
520
|
+
[get_ffprobe_path(), "-v", "error", "-show_entries", "format=duration", "-of",
|
521
|
+
"default=noprint_wrappers=1:nokey=1", path],
|
522
|
+
stdout=subprocess.PIPE,
|
523
|
+
stderr=subprocess.PIPE,
|
524
|
+
text=True
|
525
|
+
)
|
526
|
+
return float(result.stdout.strip())
|
@@ -10,6 +10,7 @@ import time
|
|
10
10
|
from datetime import datetime
|
11
11
|
from pathlib import Path
|
12
12
|
|
13
|
+
import requests
|
13
14
|
from rapidfuzz import process
|
14
15
|
|
15
16
|
from GameSentenceMiner.util.configuration import logger, get_config, get_app_directory
|
@@ -248,4 +249,58 @@ os.makedirs(os.path.dirname(TEXT_REPLACEMENTS_FILE), exist_ok=True)
|
|
248
249
|
# with open(OCR_REPLACEMENTS_FILE, 'w', encoding='utf-8') as f:
|
249
250
|
# f.write(data)
|
250
251
|
# except Exception as e:
|
251
|
-
# logger.error(f"Failed to fetch JSON from {url}: {e}")
|
252
|
+
# logger.error(f"Failed to fetch JSON from {url}: {e}")
|
253
|
+
|
254
|
+
|
255
|
+
# Remove GitHub replacements from local OCR replacements file, these replacements are not needed
|
256
|
+
def remove_github_replacements_from_local_ocr():
|
257
|
+
github_url = "https://raw.githubusercontent.com/bpwhelan/GameSentenceMiner/main/electron-src/assets/ocr_replacements.json"
|
258
|
+
|
259
|
+
github_replacements = {}
|
260
|
+
try:
|
261
|
+
response = requests.get(github_url)
|
262
|
+
response.raise_for_status()
|
263
|
+
github_data = response.json()
|
264
|
+
github_replacements = github_data.get('args', {}).get('replacements', {})
|
265
|
+
logger.debug(f"Successfully fetched {len(github_replacements)} replacements from GitHub.")
|
266
|
+
except requests.exceptions.RequestException as e:
|
267
|
+
logger.debug(f"Failed to fetch GitHub replacements from {github_url}: {e}")
|
268
|
+
return
|
269
|
+
except json.JSONDecodeError as e:
|
270
|
+
logger.debug(f"Error decoding JSON from GitHub response: {e}")
|
271
|
+
return
|
272
|
+
|
273
|
+
if not os.path.exists(OCR_REPLACEMENTS_FILE):
|
274
|
+
logger.warning(f"Local file {OCR_REPLACEMENTS_FILE} does not exist. No replacements to remove.")
|
275
|
+
return
|
276
|
+
|
277
|
+
try:
|
278
|
+
with open(OCR_REPLACEMENTS_FILE, 'r', encoding='utf-8') as f:
|
279
|
+
local_ocr_data = json.load(f)
|
280
|
+
|
281
|
+
local_replacements = local_ocr_data.get('args', {}).get('replacements', {})
|
282
|
+
original_count = len(local_replacements)
|
283
|
+
logger.debug(f"Loaded {original_count} replacements from local file.")
|
284
|
+
|
285
|
+
removed_count = 0
|
286
|
+
for key_to_remove in github_replacements.keys():
|
287
|
+
if key_to_remove in local_replacements:
|
288
|
+
del local_replacements[key_to_remove]
|
289
|
+
removed_count += 1
|
290
|
+
|
291
|
+
if removed_count > 0:
|
292
|
+
local_ocr_data['args']['replacements'] = local_replacements
|
293
|
+
with open(OCR_REPLACEMENTS_FILE, 'w', encoding='utf-8') as f:
|
294
|
+
json.dump(local_ocr_data, f, ensure_ascii=False, indent=4)
|
295
|
+
logger.debug(f"Successfully removed {removed_count} replacements from {OCR_REPLACEMENTS_FILE}.")
|
296
|
+
logger.debug(f"Remaining replacements in local file: {len(local_replacements)}")
|
297
|
+
else:
|
298
|
+
logger.debug("No matching replacements from GitHub found in your local file to remove.")
|
299
|
+
|
300
|
+
except json.JSONDecodeError as e:
|
301
|
+
logger.debug(f"Error decoding JSON from {OCR_REPLACEMENTS_FILE}: {e}. Please ensure it's valid JSON.")
|
302
|
+
except Exception as e:
|
303
|
+
logger.debug(f"An unexpected error occurred while processing {OCR_REPLACEMENTS_FILE}: {e}")
|
304
|
+
|
305
|
+
|
306
|
+
remove_github_replacements_from_local_ocr()
|
@@ -113,7 +113,7 @@ def one_contains_the_other(a, b):
|
|
113
113
|
def lines_match(a, b):
|
114
114
|
similarity = similar(a, b)
|
115
115
|
logger.debug(f"Comparing: {a} with {b} - Similarity: {similarity}, Or One contains the other: {one_contains_the_other(a, b)}")
|
116
|
-
return similar(a, b) >= 0.
|
116
|
+
return similar(a, b) >= 0.80 or one_contains_the_other(a, b)
|
117
117
|
|
118
118
|
|
119
119
|
def get_text_event(last_note) -> GameLine:
|
@@ -163,7 +163,7 @@ def get_mined_line(last_note: AnkiCard, lines=None):
|
|
163
163
|
lines = get_all_lines()
|
164
164
|
|
165
165
|
sentence = last_note.get_field(get_config().anki.sentence_field)
|
166
|
-
for line in lines:
|
166
|
+
for line in reversed(lines):
|
167
167
|
if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
|
168
168
|
return line
|
169
169
|
return lines[-1]
|
GameSentenceMiner/vad.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import subprocess
|
2
1
|
import tempfile
|
3
2
|
import time
|
4
3
|
import warnings
|
@@ -6,7 +5,7 @@ from abc import abstractmethod, ABC
|
|
6
5
|
|
7
6
|
from GameSentenceMiner.util import configuration, ffmpeg
|
8
7
|
from GameSentenceMiner.util.configuration import *
|
9
|
-
from GameSentenceMiner.util.ffmpeg import
|
8
|
+
from GameSentenceMiner.util.ffmpeg import get_audio_length
|
10
9
|
from GameSentenceMiner.util.gsm_utils import make_unique_file_name, run_new_thread
|
11
10
|
from GameSentenceMiner.util.model import VADResult
|
12
11
|
|
@@ -81,17 +80,6 @@ class VADProcessor(ABC):
|
|
81
80
|
def _detect_voice_activity(self, input_audio):
|
82
81
|
pass
|
83
82
|
|
84
|
-
@staticmethod
|
85
|
-
def get_audio_length(path):
|
86
|
-
result = subprocess.run(
|
87
|
-
[get_ffprobe_path(), "-v", "error", "-show_entries", "format=duration", "-of",
|
88
|
-
"default=noprint_wrappers=1:nokey=1", path],
|
89
|
-
stdout=subprocess.PIPE,
|
90
|
-
stderr=subprocess.PIPE,
|
91
|
-
text=True
|
92
|
-
)
|
93
|
-
return float(result.stdout.strip())
|
94
|
-
|
95
83
|
@staticmethod
|
96
84
|
def extract_audio_and_combine_segments(input_audio, segments, output_audio, padding=0.1):
|
97
85
|
files = []
|
@@ -138,7 +126,7 @@ class VADProcessor(ABC):
|
|
138
126
|
|
139
127
|
# Attempt to fix the end time if the last segment is too short
|
140
128
|
if game_line and game_line.next and len(voice_activity) > 1:
|
141
|
-
audio_length =
|
129
|
+
audio_length = get_audio_length(input_audio)
|
142
130
|
if 0 > audio_length - voice_activity[-1]['start'] + get_config().audio.beginning_offset:
|
143
131
|
end_time = voice_activity[-2]['end']
|
144
132
|
|
@@ -369,6 +357,7 @@ class GroqVADProcessor(VADProcessor):
|
|
369
357
|
logger.error(f"Error detecting voice with Groq: {e}")
|
370
358
|
return [], 0.0
|
371
359
|
|
360
|
+
|
372
361
|
vad_processor = VADSystem()
|
373
362
|
|
374
363
|
# test_vad = WhisperVADProcessor()
|
@@ -224,11 +224,11 @@ def serve_static(filename):
|
|
224
224
|
|
225
225
|
@app.route('/')
|
226
226
|
def index():
|
227
|
-
return
|
227
|
+
return send_from_directory('templates', 'index.html')
|
228
228
|
|
229
229
|
@app.route('/texthooker')
|
230
230
|
def texthooker():
|
231
|
-
return
|
231
|
+
return send_from_directory('templates', 'index.html')
|
232
232
|
|
233
233
|
@app.route('/textreplacements')
|
234
234
|
def textreplacements():
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: GameSentenceMiner
|
3
|
-
Version: 2.
|
4
|
-
Summary: A tool for mining sentences from games.
|
3
|
+
Version: 2.10.0
|
4
|
+
Summary: A tool for mining sentences from games. Update: Full UI Re-design
|
5
5
|
Author-email: Beangate <bpwhelan95@gmail.com>
|
6
6
|
License: MIT License
|
7
7
|
Project-URL: Homepage, https://github.com/bpwhelan/GameSentenceMiner
|
@@ -37,6 +37,8 @@ Requires-Dist: pygetwindow; sys_platform == "win32"
|
|
37
37
|
Requires-Dist: flask
|
38
38
|
Requires-Dist: groq
|
39
39
|
Requires-Dist: obsws-python~=1.7.2
|
40
|
+
Requires-Dist: matplotlib
|
41
|
+
Requires-Dist: sounddevice
|
40
42
|
Dynamic: license-file
|
41
43
|
|
42
44
|
# GameSentenceMiner (GSM)
|
@@ -1,10 +1,10 @@
|
|
1
1
|
GameSentenceMiner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
GameSentenceMiner/anki.py,sha256=
|
3
|
-
GameSentenceMiner/config_gui.py,sha256=
|
4
|
-
GameSentenceMiner/gametext.py,sha256=
|
5
|
-
GameSentenceMiner/gsm.py,sha256=
|
2
|
+
GameSentenceMiner/anki.py,sha256=ACIKAO3CBvO26XxTYdOvyDmdlA_hmMx9bvLjgLyr3xw,16392
|
3
|
+
GameSentenceMiner/config_gui.py,sha256=OqXEyWNlWwlBBxaDP4Iu-tN6FQHKVVcp4t239dGpylc,89688
|
4
|
+
GameSentenceMiner/gametext.py,sha256=6VkjmBeiuZfPk8T6PHFdIAElBH2Y_oLVYvmcafqN7RM,6747
|
5
|
+
GameSentenceMiner/gsm.py,sha256=vn-4T38PZvgVuLYJUE_0Yg9ehSVMtytmbcV2jaKMg3A,24298
|
6
6
|
GameSentenceMiner/obs.py,sha256=YG8LwBf9BTsGbROm_Uq6LhFDSrbf3jgogp78rBbJq94,14728
|
7
|
-
GameSentenceMiner/vad.py,sha256=
|
7
|
+
GameSentenceMiner/vad.py,sha256=G0NkaWFJaIfKQAV7LOFxyKoih7pPNYHDuy4SzeFVCkI,16389
|
8
8
|
GameSentenceMiner/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
GameSentenceMiner/ai/ai_prompting.py,sha256=0jBAnngNwmc3dqJiVWe_QRy4Syr-muV-ML2rq0FiUtU,10215
|
10
10
|
GameSentenceMiner/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -16,11 +16,11 @@ GameSentenceMiner/assets/icon512.png,sha256=HxUj2GHjyQsk8NV433256UxU9phPhtjCY-YB
|
|
16
16
|
GameSentenceMiner/assets/icon64.png,sha256=N8xgdZXvhqVQP9QUK3wX5iqxX9LxHljD7c-Bmgim6tM,9301
|
17
17
|
GameSentenceMiner/assets/pickaxe.png,sha256=VfIGyXyIZdzEnVcc4PmG3wszPMO1W4KCT7Q_nFK6eSE,1403829
|
18
18
|
GameSentenceMiner/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=
|
19
|
+
GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=6Ncq79Poolc7htBiusLZfMRlyThxm1Aky1Z4DlGIY58,3940
|
20
20
|
GameSentenceMiner/ocr/ocrconfig.py,sha256=_tY8mjnzHMJrLS8E5pHqYXZjMuLoGKYgJwdhYgN-ny4,6466
|
21
|
-
GameSentenceMiner/ocr/owocr_area_selector.py,sha256=
|
22
|
-
GameSentenceMiner/ocr/owocr_helper.py,sha256=
|
23
|
-
GameSentenceMiner/ocr/ss_picker.py,sha256=
|
21
|
+
GameSentenceMiner/ocr/owocr_area_selector.py,sha256=59zrzamPbBeU_Pfdeivc8RawlLXhXqNrhkBrhc69ZZo,47057
|
22
|
+
GameSentenceMiner/ocr/owocr_helper.py,sha256=nSrAQAUdLpdH2TIOq-3902MGaORLQqzPruGiIzbzcSA,19843
|
23
|
+
GameSentenceMiner/ocr/ss_picker.py,sha256=0IhxUdaKruFpZyBL-8SpxWg7bPrlGpy3lhTcMMZ5rwo,5224
|
24
24
|
GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9lKnRCj6oZgR0,49
|
25
25
|
GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
|
26
26
|
GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
|
@@ -29,15 +29,16 @@ GameSentenceMiner/owocr/owocr/ocr.py,sha256=fWrbKomSrdkSdlEiMGTKb6-F7wCgfaZZNBUo
|
|
29
29
|
GameSentenceMiner/owocr/owocr/run.py,sha256=wOileOoP1djCpOLgg7d-nWrYS78NctClNTscLZ3kwDc,55198
|
30
30
|
GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
|
31
31
|
GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
|
-
GameSentenceMiner/util/
|
32
|
+
GameSentenceMiner/util/audio_offset_selector.py,sha256=8EM5sueNBrJGNjQ_F4TzwpkTA_yQu1amkwwq0_qJaRs,8294
|
33
|
+
GameSentenceMiner/util/configuration.py,sha256=ibwIJlfhzjmlQJLhwnrcRJ7jImPN5S-pw62wa2eBZWo,28538
|
33
34
|
GameSentenceMiner/util/electron_config.py,sha256=3VmIrcXhC-wIMMc4uqV85NrNenRl4ZUbnQfSjWEwuig,9852
|
34
|
-
GameSentenceMiner/util/ffmpeg.py,sha256=
|
35
|
-
GameSentenceMiner/util/gsm_utils.py,sha256=
|
35
|
+
GameSentenceMiner/util/ffmpeg.py,sha256=t0tflxq170n8PZKkdw8fTZIUQfXD0p_qARa9JTdhBTc,21530
|
36
|
+
GameSentenceMiner/util/gsm_utils.py,sha256=JFWYSEf3cNw_d_m61RF3Fqf8j_UHUHPnwe_uW0WZ8wU,11348
|
36
37
|
GameSentenceMiner/util/model.py,sha256=ROH-uO55im7H4COonyyPZQ8l9-8EPtyOk7l_DNEonbk,6630
|
37
38
|
GameSentenceMiner/util/notification.py,sha256=0OnEYjn3DUEZ6c6OtPjdVZe-DG-QSoMAl9fetjjCvNU,3874
|
38
39
|
GameSentenceMiner/util/package.py,sha256=u1ym5z869lw5EHvIviC9h9uH97bzUXSXXA8KIn8rUvk,1157
|
39
40
|
GameSentenceMiner/util/ss_selector.py,sha256=oCzmDbpEGvVselF-oDPIrBcQktGIZT0Zt16uDLDAHMQ,4493
|
40
|
-
GameSentenceMiner/util/text_log.py,sha256=
|
41
|
+
GameSentenceMiner/util/text_log.py,sha256=KvNE0yT9FGh0aJNj22BgqFiallhB8D2Ibc19ZMZV1dM,5481
|
41
42
|
GameSentenceMiner/util/communication/__init__.py,sha256=xh__yn2MhzXi9eLi89PeZWlJPn-cbBSjskhi1BRraXg,643
|
42
43
|
GameSentenceMiner/util/communication/send.py,sha256=Wki9qIY2CgYnuHbmnyKVIYkcKAN_oYS4up93XMikBaI,222
|
43
44
|
GameSentenceMiner/util/communication/websocket.py,sha256=TbphRGmxVrgEupS7tNdifsmQfWDfIp0Hio2cSiUKgsk,3317
|
@@ -47,7 +48,7 @@ GameSentenceMiner/util/downloader/download_tools.py,sha256=mvnOjDHFlV1AbjHaNI7md
|
|
47
48
|
GameSentenceMiner/util/downloader/oneocr_dl.py,sha256=w7WbPad2LTuz3TAKtJlrslLQlUe-gJMQfOnDwmO98h4,10341
|
48
49
|
GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
50
|
GameSentenceMiner/web/service.py,sha256=1m27LxMHbR-9H6VM7uBoWdvhUafefyYoRXqZHCOZ53A,5674
|
50
|
-
GameSentenceMiner/web/texthooking_page.py,sha256=
|
51
|
+
GameSentenceMiner/web/texthooking_page.py,sha256=4qIimQggb-RUtDsD2wpRYrXmAj7jfsQVGYCXRv44dRA,15370
|
51
52
|
GameSentenceMiner/web/static/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
53
|
GameSentenceMiner/web/static/apple-touch-icon.png,sha256=OcMI8af_68DA_tweOsQ5LytTyMwm7-hPW07IfrOVgEs,46132
|
53
54
|
GameSentenceMiner/web/static/favicon-96x96.png,sha256=lOePzjiKl1JY2J1kT_PMdyEnrlJmi5GWbmXJunM12B4,16502
|
@@ -61,9 +62,9 @@ GameSentenceMiner/web/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
61
62
|
GameSentenceMiner/web/templates/index.html,sha256=HZKiIjiGJV8PGQ9T2aLDUNSfJn71qOwbYCjbRuSIjpY,213583
|
62
63
|
GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
|
63
64
|
GameSentenceMiner/web/templates/utility.html,sha256=3flZinKNqUJ7pvrZk6xu__v67z44rXnaK7UTZ303R-8,16946
|
64
|
-
gamesentenceminer-2.
|
65
|
-
gamesentenceminer-2.
|
66
|
-
gamesentenceminer-2.
|
67
|
-
gamesentenceminer-2.
|
68
|
-
gamesentenceminer-2.
|
69
|
-
gamesentenceminer-2.
|
65
|
+
gamesentenceminer-2.10.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
66
|
+
gamesentenceminer-2.10.0.dist-info/METADATA,sha256=EDRHC0Mp9BewmGMO_agNAzQgQ5oZhHYKwUSjYqGAasU,7354
|
67
|
+
gamesentenceminer-2.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
68
|
+
gamesentenceminer-2.10.0.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
|
69
|
+
gamesentenceminer-2.10.0.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
|
70
|
+
gamesentenceminer-2.10.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|