GameSentenceMiner 2.8.50__py3-none-any.whl → 2.8.51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/anki.py +3 -2
- GameSentenceMiner/ffmpeg.py +5 -5
- GameSentenceMiner/gsm.py +15 -9
- GameSentenceMiner/ocr/owocr_helper.py +51 -68
- GameSentenceMiner/owocr/owocr/ocr.py +9 -2
- GameSentenceMiner/owocr/owocr/run.py +6 -8
- GameSentenceMiner/vad/silero_trim.py +8 -2
- GameSentenceMiner/vad/vosk_helper.py +8 -2
- GameSentenceMiner/vad/whisper_helper.py +9 -3
- GameSentenceMiner/web/templates/utility.html +15 -3
- {gamesentenceminer-2.8.50.dist-info → gamesentenceminer-2.8.51.dist-info}/METADATA +1 -1
- {gamesentenceminer-2.8.50.dist-info → gamesentenceminer-2.8.51.dist-info}/RECORD +16 -16
- {gamesentenceminer-2.8.50.dist-info → gamesentenceminer-2.8.51.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.8.50.dist-info → gamesentenceminer-2.8.51.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.8.50.dist-info → gamesentenceminer-2.8.51.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.8.50.dist-info → gamesentenceminer-2.8.51.dist-info}/top_level.txt +0 -0
GameSentenceMiner/anki.py
CHANGED
@@ -40,12 +40,13 @@ def update_anki_card(last_note: AnkiCard, note=None, audio_path='', video_path='
|
|
40
40
|
if update_audio:
|
41
41
|
audio_in_anki = store_media_file(audio_path)
|
42
42
|
if update_picture:
|
43
|
+
logger.info("Getting Screenshot...")
|
43
44
|
screenshot = ffmpeg.get_screenshot(video_path, ss_time)
|
44
45
|
wait_for_stable_file(screenshot)
|
45
46
|
screenshot_in_anki = store_media_file(screenshot)
|
46
47
|
if get_config().paths.remove_screenshot:
|
47
48
|
os.remove(screenshot)
|
48
|
-
if get_config().anki.previous_image_field:
|
49
|
+
if get_config().anki.previous_image_field and game_line.prev:
|
49
50
|
prev_screenshot = ffmpeg.get_screenshot_for_line(video_path, selected_lines[0].prev if selected_lines else game_line.prev)
|
50
51
|
wait_for_stable_file(prev_screenshot)
|
51
52
|
prev_screenshot_in_anki = store_media_file(prev_screenshot)
|
@@ -184,7 +185,7 @@ def store_media_file(path):
|
|
184
185
|
return invoke('storeMediaFile', filename=path, data=convert_to_base64(path))
|
185
186
|
except Exception as e:
|
186
187
|
logger.error(f"Error storing media file, check anki card for blank media fields: {e}")
|
187
|
-
return
|
188
|
+
return None
|
188
189
|
|
189
190
|
|
190
191
|
def convert_to_base64(file_path):
|
GameSentenceMiner/ffmpeg.py
CHANGED
@@ -146,19 +146,19 @@ def get_screenshot_time(video_path, game_line, default_beginning=False, vad_resu
|
|
146
146
|
# logger.info(f"Using VAD result {vad_result} for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
|
147
147
|
if get_config().screenshot.screenshot_timing_setting == "beginning":
|
148
148
|
screenshot_time_from_beginning = line_timestamp_in_video + screenshot_offset
|
149
|
-
logger.
|
149
|
+
logger.debug(f"Using 'beginning' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
|
150
150
|
elif get_config().screenshot.screenshot_timing_setting == "middle":
|
151
151
|
if game_line.next:
|
152
152
|
screenshot_time_from_beginning = line_timestamp_in_video + ((game_line.next.time - game_line.time).total_seconds() / 2) + screenshot_offset
|
153
153
|
else:
|
154
154
|
screenshot_time_from_beginning = (file_length - ((file_length - line_timestamp_in_video) / 2)) + screenshot_offset
|
155
|
-
logger.
|
155
|
+
logger.debug(f"Using 'middle' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
|
156
156
|
elif get_config().screenshot.screenshot_timing_setting == "end":
|
157
157
|
if game_line.next:
|
158
158
|
screenshot_time_from_beginning = line_timestamp_in_video + (game_line.next.time - game_line.time).total_seconds() - screenshot_offset
|
159
159
|
else:
|
160
160
|
screenshot_time_from_beginning = file_length - screenshot_offset
|
161
|
-
logger.
|
161
|
+
logger.debug(f"Using 'end' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
|
162
162
|
else:
|
163
163
|
logger.error(f"Invalid screenshot timing setting: {get_config().screenshot.screenshot_timing_setting}")
|
164
164
|
screenshot_time_from_beginning = line_timestamp_in_video + screenshot_offset
|
@@ -317,7 +317,7 @@ def trim_audio_based_on_last_line(untrimmed_audio, video_path, game_line, next_l
|
|
317
317
|
logger.debug(" ".join(ffmpeg_command))
|
318
318
|
subprocess.run(ffmpeg_command)
|
319
319
|
|
320
|
-
logger.
|
320
|
+
logger.debug(f"{total_seconds_after_offset} trimmed off of beginning")
|
321
321
|
|
322
322
|
logger.debug(f"Audio trimmed and saved to {trimmed_audio}")
|
323
323
|
return trimmed_audio
|
@@ -412,7 +412,7 @@ def trim_audio(input_audio, start_time, end_time, output_audio):
|
|
412
412
|
command.extend(['-i', input_audio])
|
413
413
|
|
414
414
|
if get_config().vad.trim_beginning and start_time > 0:
|
415
|
-
logger.
|
415
|
+
logger.debug(f"trimming beginning to {start_time}")
|
416
416
|
command.extend(['-ss', f"{start_time:.2f}"])
|
417
417
|
|
418
418
|
command.extend([
|
GameSentenceMiner/gsm.py
CHANGED
@@ -186,6 +186,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
186
186
|
|
187
187
|
@staticmethod
|
188
188
|
def get_audio(game_line, next_line_time, video_path, anki_card_creation_time=None, temporary=False, timing_only=False):
|
189
|
+
logger.info("Getting audio from video...")
|
189
190
|
trimmed_audio = get_audio_and_trim(video_path, game_line, next_line_time, anki_card_creation_time)
|
190
191
|
if temporary:
|
191
192
|
return trimmed_audio
|
@@ -195,13 +196,18 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
195
196
|
f"{obs.get_current_game(sanitize=True)}.{get_config().audio.extension}"))
|
196
197
|
result = VADResult(False, 0, 0)
|
197
198
|
if get_config().vad.do_vad_postprocessing:
|
198
|
-
|
199
|
+
logger.info("Trimming audio with Voice Detection...")
|
200
|
+
result = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio, vad_trimmed_audio, game_line=game_line)
|
199
201
|
if not result.success:
|
200
202
|
result = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio,
|
201
|
-
vad_trimmed_audio)
|
202
|
-
if not result.success
|
203
|
-
|
204
|
-
|
203
|
+
vad_trimmed_audio, game_line=game_line)
|
204
|
+
if not result.success:
|
205
|
+
if get_config().vad.add_audio_on_no_results:
|
206
|
+
logger.info("No voice activity detected, using full audio.")
|
207
|
+
vad_trimmed_audio = trimmed_audio
|
208
|
+
else:
|
209
|
+
logger.info("No voice activity detected.")
|
210
|
+
return None, result, None
|
205
211
|
if timing_only:
|
206
212
|
return result
|
207
213
|
if get_config().audio.ffmpeg_reencode_options and os.path.exists(vad_trimmed_audio):
|
@@ -212,19 +218,19 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
212
218
|
return final_audio_output, result, vad_trimmed_audio
|
213
219
|
|
214
220
|
|
215
|
-
def do_vad_processing(model, trimmed_audio, vad_trimmed_audio, second_pass=False):
|
221
|
+
def do_vad_processing(model, trimmed_audio, vad_trimmed_audio, game_line=None, second_pass=False):
|
216
222
|
match model:
|
217
223
|
case configuration.OFF:
|
218
224
|
pass
|
219
225
|
case configuration.SILERO:
|
220
226
|
from GameSentenceMiner.vad import silero_trim
|
221
|
-
return silero_trim.process_audio_with_silero(trimmed_audio, vad_trimmed_audio)
|
227
|
+
return silero_trim.process_audio_with_silero(trimmed_audio, vad_trimmed_audio, game_line)
|
222
228
|
case configuration.VOSK:
|
223
229
|
from GameSentenceMiner.vad import vosk_helper
|
224
|
-
return vosk_helper.process_audio_with_vosk(trimmed_audio, vad_trimmed_audio)
|
230
|
+
return vosk_helper.process_audio_with_vosk(trimmed_audio, vad_trimmed_audio, game_line)
|
225
231
|
case configuration.WHISPER:
|
226
232
|
from GameSentenceMiner.vad import whisper_helper
|
227
|
-
return whisper_helper.process_audio_with_whisper(trimmed_audio, vad_trimmed_audio)
|
233
|
+
return whisper_helper.process_audio_with_whisper(trimmed_audio, vad_trimmed_audio, game_line)
|
228
234
|
|
229
235
|
|
230
236
|
def play_audio_in_external(filepath):
|
@@ -180,7 +180,7 @@ class WebsocketServerThread(threading.Thread):
|
|
180
180
|
finally:
|
181
181
|
self.clients.remove(websocket)
|
182
182
|
|
183
|
-
def send_text(self, text, line_time: datetime):
|
183
|
+
async def send_text(self, text, line_time: datetime):
|
184
184
|
if text:
|
185
185
|
return asyncio.run_coroutine_threadsafe(
|
186
186
|
self.send_text_coroutine(json.dumps({"sentence": text, "time": line_time.isoformat()})), self.loop)
|
@@ -212,8 +212,6 @@ def do_second_ocr(ocr1_text, time, img, filtering, scrolling=False):
|
|
212
212
|
try:
|
213
213
|
orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
|
214
214
|
engine=ocr2)
|
215
|
-
print(filtering)
|
216
|
-
print(last_ocr2_result)
|
217
215
|
if scrolling:
|
218
216
|
return text
|
219
217
|
if fuzz.ratio(last_ocr2_result, orig_text) >= 80:
|
@@ -221,7 +219,7 @@ def do_second_ocr(ocr1_text, time, img, filtering, scrolling=False):
|
|
221
219
|
return
|
222
220
|
save_result_image(img)
|
223
221
|
last_ocr2_result = orig_text
|
224
|
-
send_result(text, time)
|
222
|
+
asyncio.run(send_result(text, time))
|
225
223
|
except json.JSONDecodeError:
|
226
224
|
print("Invalid JSON received.")
|
227
225
|
except Exception as e:
|
@@ -238,13 +236,17 @@ def save_result_image(img):
|
|
238
236
|
img.close()
|
239
237
|
|
240
238
|
|
241
|
-
def send_result(text, time):
|
239
|
+
async def send_result(text, time):
|
242
240
|
if text:
|
243
241
|
text = do_text_replacements(text, OCR_REPLACEMENTS_FILE)
|
244
|
-
if get_config().advanced.ocr_sends_to_clipboard:
|
242
|
+
if get_config().advanced.ocr_sends_to_clipboard or ssonly:
|
245
243
|
import pyperclip
|
246
244
|
pyperclip.copy(text)
|
247
|
-
|
245
|
+
if not ssonly:
|
246
|
+
try:
|
247
|
+
await websocket_server_thread.send_text(text, time)
|
248
|
+
except Exception as e:
|
249
|
+
logger.debug(f"Error sending text to websocket: {e}")
|
248
250
|
|
249
251
|
|
250
252
|
previous_text_list = []
|
@@ -253,74 +255,57 @@ previous_ocr1_result = "" # Store last OCR1 result
|
|
253
255
|
last_oneocr_time = None # Store last OCR time
|
254
256
|
text_stable_start_time = None # Store the start time when text becomes stable
|
255
257
|
previous_img = None
|
256
|
-
|
258
|
+
previous_orig_text = "" # Store original text result
|
257
259
|
TEXT_APPEARENCE_DELAY = get_ocr_scan_rate() * 1000 + 500 # Adjust as needed
|
258
260
|
force_stable = False
|
259
261
|
scrolling_text_images = []
|
260
262
|
|
261
|
-
def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering=None):
|
262
|
-
global twopassocr, ocr2, previous_text, last_oneocr_time, text_stable_start_time,
|
263
|
+
def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering=None, crop_coords=None):
|
264
|
+
global twopassocr, ocr2, previous_text, last_oneocr_time, text_stable_start_time, previous_orig_text, previous_img, force_stable, previous_ocr1_result, scrolling_text_images, previous_text_list
|
263
265
|
orig_text_string = ''.join([item for item in orig_text if item is not None]) if orig_text else ""
|
264
266
|
if came_from_ss:
|
265
267
|
save_result_image(img)
|
266
|
-
send_result(text, time)
|
268
|
+
asyncio.run(send_result(text, time))
|
267
269
|
return
|
268
270
|
|
269
271
|
line_start_time = time if time else datetime.now()
|
270
272
|
|
271
273
|
if not twopassocr:
|
272
|
-
if previous_text and fuzz.ratio(orig_text_string,
|
274
|
+
if previous_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 90:
|
273
275
|
logger.info("Seems like Text we already sent, not doing anything.")
|
274
276
|
return
|
275
277
|
save_result_image(img)
|
276
|
-
send_result(text, time)
|
277
|
-
|
278
|
-
previous_text =
|
278
|
+
asyncio.run(send_result(text, time))
|
279
|
+
previous_orig_text = orig_text_string
|
280
|
+
previous_text = None
|
279
281
|
previous_img = None
|
280
282
|
text_stable_start_time = None
|
281
283
|
last_oneocr_time = None
|
282
284
|
return
|
283
285
|
if not text or force_stable:
|
284
|
-
# if scrolling_text_images:
|
285
|
-
# stable_time = text_stable_start_time
|
286
|
-
# full_text = "".join([do_second_ocr(orig_text_string, line_start_time, img, filtering, True) for img in scrolling_text_images])
|
287
|
-
# scrolling_text_images = []
|
288
|
-
# send_result(full_text, stable_time)
|
289
|
-
# orig_text_result = orig_text_string
|
290
|
-
# previous_text = previous_text
|
291
|
-
# previous_img = None
|
292
|
-
# text_stable_start_time = None
|
293
|
-
# last_oneocr_time = None
|
294
286
|
force_stable = False
|
295
|
-
if previous_text:
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
287
|
+
if previous_text and text_stable_start_time:
|
288
|
+
stable_time = text_stable_start_time
|
289
|
+
previous_img_local = previous_img
|
290
|
+
if previous_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 90:
|
291
|
+
logger.info("Seems like Text we already sent, not doing anything.")
|
292
|
+
return
|
293
|
+
previous_orig_text = orig_text_string
|
294
|
+
previous_ocr1_result = previous_text
|
295
|
+
if crop_coords:
|
296
|
+
previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
|
297
|
+
previous_img_local = previous_img_local.crop(crop_coords)
|
298
|
+
do_second_ocr(previous_text, stable_time, previous_img_local, filtering)
|
299
|
+
previous_img = None
|
300
|
+
previous_text = None
|
301
|
+
text_stable_start_time = None
|
302
|
+
last_oneocr_time = None
|
303
|
+
previous_text = None
|
309
304
|
return
|
310
|
-
# elif previous_text_list and all(
|
311
|
-
# fuzz.partial_ratio(token, prev_token) >= 95 for token in orig_text for prev_token in
|
312
|
-
# previous_text_list[1:]):
|
313
|
-
# logger.info(f"Previous text: {previous_text_list}. Current text: {orig_text}.")
|
314
|
-
# logger.info("Seems like Scrolling text potentially...")
|
315
|
-
# previous_img_local = previous_img
|
316
|
-
# scrolling_text_images.append(previous_img_local)
|
317
|
-
# previous_text_list = orig_text
|
318
|
-
# previous_text = orig_text_string
|
319
|
-
# return
|
320
305
|
|
321
306
|
if not text_stable_start_time:
|
322
307
|
text_stable_start_time = line_start_time
|
323
|
-
previous_text =
|
308
|
+
previous_text = text
|
324
309
|
previous_text_list = orig_text
|
325
310
|
last_oneocr_time = line_start_time
|
326
311
|
previous_img = img
|
@@ -333,15 +318,17 @@ def run_oneocr(ocr_config: OCRConfig, area=False):
|
|
333
318
|
print("Running OneOCR")
|
334
319
|
screen_area = None
|
335
320
|
screen_areas = []
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
321
|
+
if not ssonly:
|
322
|
+
for rect_config in ocr_config.rectangles:
|
323
|
+
coords = rect_config.coordinates
|
324
|
+
monitor_config = rect_config.monitor
|
325
|
+
screen_area = ",".join(str(c) for c in coords) if area else None
|
326
|
+
if screen_area:
|
327
|
+
screen_areas.append(screen_area)
|
342
328
|
exclusions = list(rect.coordinates for rect in list(filter(lambda x: x.is_excluded, ocr_config.rectangles)))
|
343
|
-
run.
|
344
|
-
|
329
|
+
run.init_config(False)
|
330
|
+
run.run(read_from="screencapture" if not ssonly else "clipboard",
|
331
|
+
read_from_secondary="clipboard" if not ssonly else None,
|
345
332
|
write_to="callback",
|
346
333
|
screen_capture_area=screen_area,
|
347
334
|
# screen_capture_monitor=monitor_config['index'],
|
@@ -387,7 +374,7 @@ def set_force_stable_hotkey():
|
|
387
374
|
print("Press Ctrl+Shift+F to toggle force stable mode.")
|
388
375
|
|
389
376
|
if __name__ == "__main__":
|
390
|
-
global ocr1, ocr2, twopassocr, language
|
377
|
+
global ocr1, ocr2, twopassocr, language, ssonly
|
391
378
|
import sys
|
392
379
|
|
393
380
|
args = sys.argv[1:]
|
@@ -411,6 +398,8 @@ if __name__ == "__main__":
|
|
411
398
|
ocr1 = "oneocr"
|
412
399
|
ocr2 = "glens"
|
413
400
|
twopassocr = True
|
401
|
+
|
402
|
+
ssonly = "--ssonly" in args
|
414
403
|
logger.info(f"Received arguments: ocr1={ocr1}, ocr2={ocr2}, twopassocr={twopassocr}")
|
415
404
|
# set_force_stable_hotkey()
|
416
405
|
global ocr_config
|
@@ -431,15 +420,9 @@ if __name__ == "__main__":
|
|
431
420
|
if ocr_config:
|
432
421
|
rectangles = list(filter(lambda rect: not rect.is_excluded, ocr_config.rectangles))
|
433
422
|
oneocr_threads = []
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
oneocr_threads.append(thread)
|
438
|
-
thread.start()
|
439
|
-
else:
|
440
|
-
single_ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config,False, ), daemon=True)
|
441
|
-
oneocr_threads.append(single_ocr_thread)
|
442
|
-
single_ocr_thread.start()
|
423
|
+
single_ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config,ocr_config.rectangles ), daemon=True)
|
424
|
+
oneocr_threads.append(single_ocr_thread)
|
425
|
+
single_ocr_thread.start()
|
443
426
|
websocket_server_thread = WebsocketServerThread(read=True)
|
444
427
|
websocket_server_thread.start()
|
445
428
|
try:
|
@@ -17,6 +17,8 @@ from google.generativeai import GenerationConfig
|
|
17
17
|
from loguru import logger
|
18
18
|
import requests
|
19
19
|
|
20
|
+
from ...configuration import get_temporary_directory
|
21
|
+
|
20
22
|
try:
|
21
23
|
from manga_ocr import MangaOcr as MOCR
|
22
24
|
except ImportError:
|
@@ -765,11 +767,16 @@ class OneOCR:
|
|
765
767
|
img = input_to_pil_image(img)
|
766
768
|
if not img:
|
767
769
|
return (False, 'Invalid image provided')
|
768
|
-
|
770
|
+
crop_coords = None
|
769
771
|
if sys.platform == 'win32':
|
770
772
|
try:
|
771
773
|
ocr_resp = self.model.recognize_pil(img)
|
772
774
|
# print(json.dumps(ocr_resp))
|
775
|
+
x_coords = [line['bounding_rect'][f'x{i}'] for line in ocr_resp['lines'] for i in range(1, 5)]
|
776
|
+
y_coords = [line['bounding_rect'][f'y{i}'] for line in ocr_resp['lines'] for i in range(1, 5)]
|
777
|
+
if x_coords and y_coords:
|
778
|
+
crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
|
779
|
+
|
773
780
|
res = ocr_resp['text']
|
774
781
|
except RuntimeError as e:
|
775
782
|
return (False, e)
|
@@ -787,7 +794,7 @@ class OneOCR:
|
|
787
794
|
|
788
795
|
res = res.json()['text']
|
789
796
|
|
790
|
-
x = (True, res)
|
797
|
+
x = (True, res, crop_coords)
|
791
798
|
|
792
799
|
# img.close()
|
793
800
|
return x
|
@@ -381,10 +381,7 @@ class TextFiltering:
|
|
381
381
|
else:
|
382
382
|
orig_text_filtered.append(None)
|
383
383
|
|
384
|
-
if not isinstance(last_result, tuple):
|
385
|
-
print(type(last_result))
|
386
384
|
if isinstance(last_result, list):
|
387
|
-
print("last_result is a list")
|
388
385
|
last_text = last_result
|
389
386
|
elif last_result and last_result[1] == engine_index:
|
390
387
|
last_text = last_result[0]
|
@@ -406,7 +403,6 @@ class TextFiltering:
|
|
406
403
|
break
|
407
404
|
else:
|
408
405
|
for block in new_blocks:
|
409
|
-
print(block)
|
410
406
|
if lang not in ["ja", "zh"] or self.classify(block)[0] == lang:
|
411
407
|
final_blocks.append(block)
|
412
408
|
|
@@ -752,7 +748,6 @@ class ScreenshotClass:
|
|
752
748
|
|
753
749
|
if rand_int == 1:
|
754
750
|
img.save(os.path.join(get_temporary_directory(), 'after_crop.png'), 'PNG')
|
755
|
-
print(f'OCR images saved to {get_temporary_directory()} if debugging is needed, this is 1/20 chance')
|
756
751
|
|
757
752
|
return img
|
758
753
|
|
@@ -897,7 +892,9 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
|
|
897
892
|
engine_color = config.get_general('engine_color')
|
898
893
|
|
899
894
|
start_time = time.time()
|
900
|
-
|
895
|
+
result = engine_instance(img_or_path)
|
896
|
+
res, text, crop_coords = (*result, None)[:3]
|
897
|
+
|
901
898
|
end_time = time.time()
|
902
899
|
|
903
900
|
if not res and ocr_2 == engine:
|
@@ -908,7 +905,8 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
|
|
908
905
|
if last_result:
|
909
906
|
last_result = []
|
910
907
|
break
|
911
|
-
|
908
|
+
result = engine_instance(img_or_path)
|
909
|
+
res, text, crop_coords = (*result, None)[:3]
|
912
910
|
|
913
911
|
orig_text = []
|
914
912
|
# print(filtering)
|
@@ -933,7 +931,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
|
|
933
931
|
elif write_to == 'clipboard':
|
934
932
|
pyperclipfix.copy(text)
|
935
933
|
elif write_to == "callback":
|
936
|
-
txt_callback(text, orig_text, ocr_start_time, img_or_path, bool(engine), filtering)
|
934
|
+
txt_callback(text, orig_text, ocr_start_time, img_or_path, bool(engine), filtering, crop_coords)
|
937
935
|
elif write_to:
|
938
936
|
with Path(write_to).open('a', encoding='utf-8') as f:
|
939
937
|
f.write(text + '\n')
|
@@ -27,7 +27,7 @@ def detect_voice_with_silero(input_audio):
|
|
27
27
|
|
28
28
|
|
29
29
|
# Example usage of Silero with trimming
|
30
|
-
def process_audio_with_silero(input_audio, output_audio):
|
30
|
+
def process_audio_with_silero(input_audio, output_audio, game_line):
|
31
31
|
voice_activity = detect_voice_with_silero(input_audio)
|
32
32
|
|
33
33
|
if not voice_activity:
|
@@ -35,7 +35,13 @@ def process_audio_with_silero(input_audio, output_audio):
|
|
35
35
|
|
36
36
|
# Trim based on the first and last speech detected
|
37
37
|
start_time = voice_activity[0]['start'] if voice_activity else 0
|
38
|
-
|
38
|
+
if (game_line.next and len(voice_activity) > 1
|
39
|
+
and voice_activity[-1]['end'] - get_config().audio.beginning_offset > len(input_audio) / 16000
|
40
|
+
and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
|
41
|
+
end_time = voice_activity[-2]['end']
|
42
|
+
logger.info("Using the second last timestamp for trimming")
|
43
|
+
else:
|
44
|
+
end_time = voice_activity[-1]['end'] if voice_activity else 0
|
39
45
|
|
40
46
|
# Trim the audio using FFmpeg
|
41
47
|
ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio)
|
@@ -123,7 +123,7 @@ def detect_voice_with_vosk(input_audio):
|
|
123
123
|
|
124
124
|
|
125
125
|
# Example usage of Vosk with trimming
|
126
|
-
def process_audio_with_vosk(input_audio, output_audio):
|
126
|
+
def process_audio_with_vosk(input_audio, output_audio, game_line):
|
127
127
|
voice_activity, total_duration = detect_voice_with_vosk(input_audio)
|
128
128
|
|
129
129
|
if not voice_activity:
|
@@ -132,7 +132,13 @@ def process_audio_with_vosk(input_audio, output_audio):
|
|
132
132
|
|
133
133
|
# Trim based on the first and last speech detected
|
134
134
|
start_time = voice_activity[0]['start'] if voice_activity else 0
|
135
|
-
|
135
|
+
if (game_line.next and len(voice_activity) > 1
|
136
|
+
and voice_activity[-1]['end'] - get_config().audio.beginning_offset > len(input_audio) / 16000
|
137
|
+
and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
|
138
|
+
end_time = voice_activity[-2]['end']
|
139
|
+
logger.info("Using the second last timestamp for trimming")
|
140
|
+
else:
|
141
|
+
end_time = voice_activity[-1]['end'] if voice_activity else 0
|
136
142
|
|
137
143
|
if get_config().vad.trim_beginning:
|
138
144
|
logger.info(f"VAD Trimmed Beginning of Audio to {start_time}")
|
@@ -70,7 +70,7 @@ def detect_voice_with_whisper(input_audio):
|
|
70
70
|
|
71
71
|
|
72
72
|
# Example usage of Whisper with trimming
|
73
|
-
def process_audio_with_whisper(input_audio, output_audio):
|
73
|
+
def process_audio_with_whisper(input_audio, output_audio, game_line):
|
74
74
|
voice_activity = detect_voice_with_whisper(input_audio)
|
75
75
|
|
76
76
|
if not voice_activity:
|
@@ -78,8 +78,14 @@ def process_audio_with_whisper(input_audio, output_audio):
|
|
78
78
|
return VADResult(False, 0, 0)
|
79
79
|
|
80
80
|
# Trim based on the first and last speech detected
|
81
|
-
start_time = voice_activity[0]['start']
|
82
|
-
|
81
|
+
start_time = voice_activity[0]['start'] if voice_activity else 0
|
82
|
+
if (game_line.next and len(voice_activity) > 1
|
83
|
+
and voice_activity[-1]['end'] - get_config().audio.beginning_offset > len(input_audio) / 16000
|
84
|
+
and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
|
85
|
+
end_time = voice_activity[-2]['end']
|
86
|
+
logger.info("Using the second last timestamp for trimming")
|
87
|
+
else:
|
88
|
+
end_time = voice_activity[-1]['end'] if voice_activity else 0
|
83
89
|
|
84
90
|
if get_config().vad.trim_beginning:
|
85
91
|
logger.info(f"VAD Trimmed Beginning of Audio to {start_time}")
|
@@ -126,7 +126,6 @@
|
|
126
126
|
<script>
|
127
127
|
let mainStyle = document.querySelector('head style');
|
128
128
|
let deleteHistoryButton = document.getElementById('delete-history');
|
129
|
-
console.log(mainStyle);
|
130
129
|
let displayedEventIds = new Set();
|
131
130
|
let isTabActive = true;
|
132
131
|
let isFetching = false; // Flag to track if a fetch is in progress
|
@@ -141,6 +140,8 @@
|
|
141
140
|
let hoveredCheckboxes = new Set();
|
142
141
|
let checkboxes = []; // Will hold all checkbox elements
|
143
142
|
let checkboxMap = {};
|
143
|
+
let textLines = []; // Will hold all textline elements
|
144
|
+
let textLineMap = {};
|
144
145
|
let checkboxes_being_updated = new Set();
|
145
146
|
|
146
147
|
// Shift click selection variable
|
@@ -170,7 +171,6 @@
|
|
170
171
|
}
|
171
172
|
}
|
172
173
|
if (!ev.history) {
|
173
|
-
console.log(checkboxMap[ev.id])
|
174
174
|
if (!checkboxes_being_updated.has(ev.id)) {
|
175
175
|
const checkbox = checkboxMap[ev.id];
|
176
176
|
if (checkbox) {
|
@@ -219,6 +219,7 @@
|
|
219
219
|
});
|
220
220
|
}
|
221
221
|
|
222
|
+
|
222
223
|
function addNewEvent(event) {
|
223
224
|
displayedEventIds.add(event.id);
|
224
225
|
const container = document.getElementById('session-events');
|
@@ -237,7 +238,7 @@
|
|
237
238
|
aria-label="Mark item"
|
238
239
|
data-event-id="${event.id}"
|
239
240
|
onchange="toggleCheckbox('${event.id}', this.checked)">
|
240
|
-
<p>${event.text}</p>
|
241
|
+
<p id="textline-${event.id}" contenteditable="false" ondblclick="this.contentEditable = this.contentEditable === 'true' ? 'false' : 'true'; if (this.contentEditable === 'true') this.focus();">${event.text}</p>
|
241
242
|
<div class="textline-buttons">
|
242
243
|
<button onclick="buttonClick('${event.id}', 'Screenshot')" title="Screenshot" style="background-color: #333; color: #fff; border: 1px solid #555; padding: 6px 10px; font-size: 10px; border-radius: 4px; cursor: pointer; transition: background-color 0.3s;">
|
243
244
|
📷
|
@@ -258,6 +259,11 @@
|
|
258
259
|
checkboxes.push(checkbox);
|
259
260
|
checkboxMap[event.id] = checkbox; // Store the checkbox in the map for easy access
|
260
261
|
|
262
|
+
let textline = shadowRoot.querySelector('#textline-' + event.id);
|
263
|
+
textLines.push(textline);
|
264
|
+
textLineMap[event.id] = textline; // Store the textline in the map for easy access
|
265
|
+
|
266
|
+
|
261
267
|
container.appendChild(div);
|
262
268
|
window.scrollTo({
|
263
269
|
top: document.documentElement.scrollHeight,
|
@@ -286,6 +292,12 @@
|
|
286
292
|
});
|
287
293
|
}
|
288
294
|
|
295
|
+
function textDoubleClicked(id) {
|
296
|
+
const textElement = textLineMap[id];
|
297
|
+
console.log(textElement);
|
298
|
+
textElement.contenteditable = textElement.contenteditable === "true" ? "false" : "true";
|
299
|
+
}
|
300
|
+
|
289
301
|
async function toggleCheckbox(id, checked) {
|
290
302
|
try {
|
291
303
|
checkboxes_being_updated.add(id);
|
@@ -1,11 +1,11 @@
|
|
1
1
|
GameSentenceMiner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
GameSentenceMiner/anki.py,sha256=
|
2
|
+
GameSentenceMiner/anki.py,sha256=bChJ1YU80muhvR8fjY9KAJEs0M0bpe-X_uMhjJBUC4k,14530
|
3
3
|
GameSentenceMiner/config_gui.py,sha256=J3R_oh4edAULY9_0UEuEnRhRczakyta_f5hnegpC1uQ,77373
|
4
4
|
GameSentenceMiner/configuration.py,sha256=vAKzDD8bUZehQK2XRKTnMDCLhjUVxoQLYrVkHuLPTEY,22493
|
5
5
|
GameSentenceMiner/electron_config.py,sha256=dGcPYCISPehXubYSzsDuI2Gl092MYK0u3bTnkL9Jh1Y,9787
|
6
|
-
GameSentenceMiner/ffmpeg.py,sha256=
|
6
|
+
GameSentenceMiner/ffmpeg.py,sha256=DX-2J1KZBKOPC8syR73YhDHwKIR4oNPrGuPl4l8is-4,18255
|
7
7
|
GameSentenceMiner/gametext.py,sha256=hcyZQ69B7xB5ZG85wLzM5au7ZPKxmeUXsmUD26oyk_0,5660
|
8
|
-
GameSentenceMiner/gsm.py,sha256
|
8
|
+
GameSentenceMiner/gsm.py,sha256=-76Gyrk2m9Hqobt6YByvV_sw1BI9xH_B9l0-aFAxBvc,27629
|
9
9
|
GameSentenceMiner/model.py,sha256=1lRyJFf_LND_4O16h8CWVqDfosLgr0ZS6ufBZ3qJHpY,5699
|
10
10
|
GameSentenceMiner/notification.py,sha256=pXKoLfmRQLH55IQ5G6uxdMuczqX7D6l3ubVEY1e6hXg,2859
|
11
11
|
GameSentenceMiner/obs.py,sha256=JwcVPnjO-Lm0H5007o3rF-gMf4ypgIm5m8ntthfbTk8,14789
|
@@ -27,19 +27,19 @@ GameSentenceMiner/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
27
27
|
GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=fEQ2o2NXksGRHpueO8c4TfAp75GEdAtAr1ngTFOsdpg,2257
|
28
28
|
GameSentenceMiner/ocr/ocrconfig.py,sha256=_tY8mjnzHMJrLS8E5pHqYXZjMuLoGKYgJwdhYgN-ny4,6466
|
29
29
|
GameSentenceMiner/ocr/owocr_area_selector.py,sha256=Q8ETMHL7BKMA1mbtjrntDLyqCQB0lZ5T4RCZsodjH7Y,47186
|
30
|
-
GameSentenceMiner/ocr/owocr_helper.py,sha256=
|
30
|
+
GameSentenceMiner/ocr/owocr_helper.py,sha256=ZUMz1moBdoFwYkUGBBzqW_V8MANL-zzQd8vsj5Br538,17661
|
31
31
|
GameSentenceMiner/owocr/owocr/__init__.py,sha256=opjBOyGGyEqZCE6YdZPnyt7nVfiwyELHsXA0jAsjm14,25
|
32
32
|
GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
|
33
33
|
GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
|
34
34
|
GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
|
35
|
-
GameSentenceMiner/owocr/owocr/ocr.py,sha256=
|
36
|
-
GameSentenceMiner/owocr/owocr/run.py,sha256=
|
35
|
+
GameSentenceMiner/owocr/owocr/ocr.py,sha256=rtKoIonyzqAMRPK92GvfYgGsU5M2yIWcWz9MQngNstc,41602
|
36
|
+
GameSentenceMiner/owocr/owocr/run.py,sha256=0UyjOKEP0MqSdCaagCUMGdqO-BMexPxCl7ZabGlic4E,54749
|
37
37
|
GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
|
38
38
|
GameSentenceMiner/vad/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
39
|
GameSentenceMiner/vad/result.py,sha256=C08HsYH4qVjTRh_dvrWrskmXHJ950w0GWxPjGx_BfGY,275
|
40
|
-
GameSentenceMiner/vad/silero_trim.py,sha256=
|
41
|
-
GameSentenceMiner/vad/vosk_helper.py,sha256=
|
42
|
-
GameSentenceMiner/vad/whisper_helper.py,sha256=
|
40
|
+
GameSentenceMiner/vad/silero_trim.py,sha256=InYsCy29WjK5EIB4e-KYb91rdHLCc5ZGbKtn9W5WmZI,2021
|
41
|
+
GameSentenceMiner/vad/vosk_helper.py,sha256=YYQct4J4EeVr6xHsIz9yMF-H1SeFXXqAKkbmH_13OE4,6457
|
42
|
+
GameSentenceMiner/vad/whisper_helper.py,sha256=8GXPBSHir2VGV5cowHDoA2KK1qKKiMJW230Esk7LykA,4005
|
43
43
|
GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
GameSentenceMiner/web/texthooking_page.py,sha256=t58PCfqwnJ197CtIUbGbE_cOyNkR81e4oi84VayTj5g,13497
|
45
45
|
GameSentenceMiner/web/static/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -53,10 +53,10 @@ GameSentenceMiner/web/static/web-app-manifest-192x192.png,sha256=EfSNnBmsSaLfESb
|
|
53
53
|
GameSentenceMiner/web/static/web-app-manifest-512x512.png,sha256=wyqgCWCrLEUxSRXmaA3iJEESd-vM-ZmlTtZFBY4V8Pk,230819
|
54
54
|
GameSentenceMiner/web/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
55
|
GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
|
56
|
-
GameSentenceMiner/web/templates/utility.html,sha256=
|
57
|
-
gamesentenceminer-2.8.
|
58
|
-
gamesentenceminer-2.8.
|
59
|
-
gamesentenceminer-2.8.
|
60
|
-
gamesentenceminer-2.8.
|
61
|
-
gamesentenceminer-2.8.
|
62
|
-
gamesentenceminer-2.8.
|
56
|
+
GameSentenceMiner/web/templates/utility.html,sha256=1vN3nK3IT-iNhMEj-k6JRz9uc6D87UUL9k7Bp2Espiw,16946
|
57
|
+
gamesentenceminer-2.8.51.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
58
|
+
gamesentenceminer-2.8.51.dist-info/METADATA,sha256=ohGZjJi67WnaD8AfD4uxgpOBkfIQhZWeLmMSvFDNYOM,7218
|
59
|
+
gamesentenceminer-2.8.51.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
60
|
+
gamesentenceminer-2.8.51.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
|
61
|
+
gamesentenceminer-2.8.51.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
|
62
|
+
gamesentenceminer-2.8.51.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|