GameSentenceMiner 2.10.10__py3-none-any.whl → 2.10.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,6 +80,7 @@ class ConfigApp:
80
80
  def __init__(self, root):
81
81
  self.window = root
82
82
  self.on_exit = None
83
+ self.window.tk.call('tk', 'scaling', 1.5) # Set DPI scaling factor
83
84
  # self.window = ttk.Window(themename='darkly')
84
85
  self.window.title('GameSentenceMiner Configuration')
85
86
  self.window.protocol("WM_DELETE_WINDOW", self.hide)
GameSentenceMiner/gsm.py CHANGED
@@ -452,6 +452,13 @@ def cleanup():
452
452
  if icon:
453
453
  icon.stop()
454
454
 
455
+ for video in gsm_state.videos_to_remove:
456
+ try:
457
+ if os.path.exists(video):
458
+ os.remove(video)
459
+ except Exception as e:
460
+ logger.error(f"Error removing temporary video file {video}: {e}")
461
+
455
462
  settings_window.window.destroy()
456
463
  time.sleep(5)
457
464
  logger.info("Cleanup complete.")
@@ -44,13 +44,13 @@ class WindowGeometry:
44
44
  class OCRConfig:
45
45
  scene: str
46
46
  rectangles: List[Rectangle]
47
- pre_scale_rectangles: List[Rectangle] = None
47
+ pre_scale_rectangles: Optional[List[Rectangle]] = None
48
48
  coordinate_system: str = None
49
49
  window_geometry: Optional[WindowGeometry] = None
50
50
  window: Optional[str] = None
51
51
  language: str = "ja"
52
52
 
53
- def __post_init__(self):
53
+ def scale_coords(self):
54
54
  self.pre_scale_rectangles = deepcopy(self.rectangles)
55
55
  if self.coordinate_system and self.coordinate_system == "percentage" and self.window:
56
56
  import pygetwindow as gw
@@ -20,6 +20,7 @@ except ImportError:
20
20
 
21
21
  try:
22
22
  import tkinter as tk
23
+ from tkinter import font as tkfont # NEW: Import for better font control
23
24
 
24
25
  selector_available = True
25
26
  except ImportError:
@@ -206,6 +207,66 @@ class ScreenSelector:
206
207
  self.drawn_rect_ids.append(new_rect_id)
207
208
  print("Redo: Restored rectangle.")
208
209
 
210
+ # --- NEW METHOD TO DISPLAY INSTRUCTIONS ---
211
+ def _create_instructions_widget(self, canvas):
212
+ """Creates a text box with usage instructions on the canvas."""
213
+ instructions_text = (
214
+ "How to Use:\n"
215
+ " • Left Click + Drag: Create a capture area (green).\n"
216
+ " • Shift + Left Click + Drag: Create an exclusion area (orange).\n"
217
+ " • Right-Click on a box: Delete it.\n\n"
218
+ "Hotkeys:\n"
219
+ " • Ctrl + S: Save and Quit\n"
220
+ " • Ctrl + Z / Ctrl + Y: Undo / Redo\n"
221
+ " • M: Toggle background visibility\n"
222
+ " • I: Toggle these instructions\n"
223
+ " • Esc: Quit without saving"
224
+ " "
225
+ )
226
+
227
+ # Use a common, readable font
228
+ instruction_font = tkfont.Font(family="Segoe UI", size=10, weight="normal")
229
+
230
+ # Create the text item first to get its size
231
+ text_id = canvas.create_text(
232
+ 20, 20, # Position with a small margin
233
+ text=instructions_text,
234
+ anchor=tk.NW,
235
+ fill='white',
236
+ font=instruction_font,
237
+ justify=tk.LEFT
238
+ )
239
+
240
+ # Get the bounding box of the text to draw a background
241
+ text_bbox = canvas.bbox(text_id)
242
+
243
+ # Create a background rectangle with padding
244
+ rect_id = canvas.create_rectangle(
245
+ text_bbox[0] - 10, # left
246
+ text_bbox[1] - 10, # top
247
+ text_bbox[2] + 10, # right
248
+ text_bbox[3] + 10, # bottom
249
+ fill='#2B2B2B', # Dark, semi-opaque background
250
+ outline='white',
251
+ width=1
252
+ )
253
+
254
+ # Lower the rectangle so it's behind the text
255
+ canvas.tag_lower(rect_id, text_id)
256
+
257
+ def toggle_instructions(self, event=None):
258
+ canvas = event.widget.winfo_toplevel().winfo_children()[0]
259
+ # Find all text and rectangle items (assuming only one of each for instructions)
260
+ text_items = [item for item in canvas.find_all() if canvas.type(item) == 'text']
261
+ rect_items = [item for item in canvas.find_all() if canvas.type(item) == 'rectangle']
262
+
263
+ if text_items and rect_items:
264
+ current_state = canvas.itemcget(text_items[0], 'state')
265
+ new_state = tk.NORMAL if current_state == tk.HIDDEN else tk.HIDDEN
266
+ for item in text_items + rect_items:
267
+ canvas.itemconfigure(item, state=new_state)
268
+ print("Toggled instructions visibility.")
269
+
209
270
  def start(self):
210
271
  self.root = tk.Tk()
211
272
  self.root.withdraw()
@@ -230,6 +291,10 @@ class ScreenSelector:
230
291
  canvas.pack(fill=tk.BOTH, expand=True)
231
292
  canvas.create_image(0, 0, image=self.photo_image, anchor=tk.NW)
232
293
 
294
+ # --- MODIFIED: CALL THE INSTRUCTION WIDGET CREATOR ---
295
+ self._create_instructions_widget(canvas)
296
+ # --- END MODIFICATION ---
297
+
233
298
  # Draw existing rectangles (which were converted to absolute pixels on load)
234
299
  for _, abs_coords, is_excluded in self.rectangles:
235
300
  x_abs, y_abs, w_abs, h_abs = abs_coords
@@ -275,17 +340,37 @@ class ScreenSelector:
275
340
  self.current_rect_id = self.start_x = self.start_y = None
276
341
 
277
342
  def on_right_click(event):
278
- items = canvas.find_closest(event.x, event.y)
279
- if items and items[0] in self.drawn_rect_ids:
280
- item_id = items[0]
281
- idx_to_del = self.drawn_rect_ids.index(item_id)
282
- del self.drawn_rect_ids[idx_to_del]
283
- del self.rectangles[idx_to_del]
284
- self.redo_stack.clear()
285
- canvas.delete(item_id)
343
+ # Iterate through our rectangles in reverse to find the topmost one.
344
+ for i in range(len(self.rectangles) - 1, -1, -1):
345
+ _monitor, abs_coords, _is_excluded = self.rectangles[i]
346
+ x_abs, y_abs, w_abs, h_abs = abs_coords
347
+ canvas_x1 = x_abs - self.bounding_box['left']
348
+ canvas_y1 = y_abs - self.bounding_box['top']
349
+ canvas_x2 = canvas_x1 + w_abs
350
+ canvas_y2 = canvas_y1 + h_abs
351
+
352
+ if canvas_x1 <= event.x <= canvas_x2 and canvas_y1 <= event.y <= canvas_y2:
353
+ # --- UNDO/REDO CHANGE ---
354
+ # We found the rectangle. Prepare the 'remove' action.
355
+ # We need to save the data AND its original index to restore it correctly.
356
+ rect_tuple_to_del = self.rectangles[i]
357
+ item_id_to_del = self.drawn_rect_ids[i]
358
+
359
+ self.redo_stack.append((*rect_tuple_to_del, i))
360
+
361
+ # Now, perform the deletion
362
+ del self.rectangles[i]
363
+ del self.drawn_rect_ids[i]
364
+ canvas.delete(item_id_to_del)
365
+ print("Deleted rectangle.")
366
+
367
+ break # Stop after deleting the topmost one
286
368
 
287
369
  def toggle_image_mode(e=None):
288
- self.image_mode = not self.image_mode; window.attributes("-alpha", 1.0 if self.image_mode else 0.25)
370
+ self.image_mode = not self.image_mode
371
+ # Only change alpha of the main window, not the text widget
372
+ window.attributes("-alpha", 1.0 if self.image_mode else 0.25)
373
+ print("Toggled background visibility.")
289
374
 
290
375
  def on_enter(e=None):
291
376
  canvas.focus_set()
@@ -296,13 +381,15 @@ class ScreenSelector:
296
381
  canvas.bind('<ButtonRelease-1>', on_release)
297
382
  canvas.bind('<Button-3>', on_right_click)
298
383
  canvas.bind('<Control-s>', self.save_rects)
299
- canvas.bind('<Control-z>', self.undo_last_rect)
300
384
  canvas.bind('<Control-y>', self.redo_last_rect)
385
+ canvas.bind('<Control-z>', self.undo_last_rect)
301
386
  canvas.bind("<Escape>", self.quit_app)
302
387
  canvas.bind("<m>", toggle_image_mode)
388
+ canvas.bind("<i>", self.toggle_instructions)
303
389
 
304
390
  canvas.focus_set()
305
- print("Starting UI. Press Esc to quit, Ctrl+S to save, M to toggle background.")
391
+ # The print message is now redundant but kept for console feedback
392
+ print("Starting UI. See on-screen instructions. Press Esc to quit, Ctrl+S to save.")
306
393
  self.root.mainloop()
307
394
 
308
395
  def quit_app(self, event=None):
@@ -350,10 +437,6 @@ if __name__ == "__main__":
350
437
  target_window_title = "Windowed Projector (Preview)" # Default
351
438
  if len(sys.argv) > 1:
352
439
  target_window_title = sys.argv[1]
353
- # else:
354
- # print("Usage: python your_script_name.py \"Target Window Title\"", file=sys.stderr)
355
- # print("Example: python selector.py \"Windowed Projector (Preview)\"", file=sys.stderr)
356
- # sys.exit(1)
357
440
 
358
441
  selection_result = get_screen_selection(target_window_title)
359
442
 
@@ -178,17 +178,28 @@ class WebsocketServerThread(threading.Thread):
178
178
  asyncio.run(main())
179
179
 
180
180
 
181
+ def compare_ocr_results(prev_text, new_text, threshold=90):
182
+ if not prev_text or not new_text:
183
+ return False
184
+ if isinstance(prev_text, list):
185
+ prev_text = ''.join([item for item in prev_text if item is not None]) if prev_text else ""
186
+ if isinstance(new_text, list):
187
+ new_text = ''.join([item for item in new_text if item is not None]) if new_text else ""
188
+ similarity = fuzz.ratio(prev_text, new_text)
189
+ return similarity >= threshold
190
+
181
191
  all_cords = None
182
192
  rectangles = None
183
- last_ocr2_result = ""
193
+ last_ocr2_result = []
184
194
 
185
195
  def do_second_ocr(ocr1_text, time, img, filtering, ignore_furigana_filter=False):
186
196
  global twopassocr, ocr2, last_ocr2_result
187
197
  try:
188
198
  orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
189
199
  engine=ocr2, furigana_filter_sensitivity=furigana_filter_sensitivity if not ignore_furigana_filter else 0)
190
- if fuzz.ratio(last_ocr2_result, orig_text) >= 90:
191
- logger.info("Seems like the same text from previous ocr2 result, not sending")
200
+
201
+ if compare_ocr_results(last_ocr2_result, orig_text):
202
+ logger.info("Detected similar text from previous OCR2 result, not sending")
192
203
  return
193
204
  save_result_image(img)
194
205
  last_ocr2_result = orig_text
@@ -241,12 +252,12 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
241
252
 
242
253
  line_start_time = time if time else datetime.now()
243
254
 
244
- if not twopassocr:
245
- if previous_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 90:
255
+ if manual or not twopassocr:
256
+ if compare_ocr_results(previous_orig_text, orig_text_string):
246
257
  logger.info("Seems like Text we already sent, not doing anything.")
247
258
  return
248
259
  save_result_image(img)
249
- asyncio.run(send_result(text, time))
260
+ asyncio.run(send_result(text, line_start_time))
250
261
  previous_orig_text = orig_text_string
251
262
  previous_text = None
252
263
  previous_img = None
@@ -260,13 +271,13 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
260
271
  if previous_text and text_stable_start_time:
261
272
  stable_time = text_stable_start_time
262
273
  previous_img_local = previous_img
263
- if previous_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 90:
274
+ if compare_ocr_results(previous_orig_text, orig_text_string):
264
275
  logger.info("Seems like Text we already sent, not doing anything.")
265
276
  previous_text = None
266
277
  return
267
278
  previous_orig_text = orig_text_string
268
279
  previous_ocr1_result = previous_text
269
- if crop_coords:
280
+ if crop_coords and optimize_second_scan:
270
281
  previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
271
282
  previous_img_local = previous_img_local.crop(crop_coords)
272
283
  second_ocr_queue.put((previous_text, stable_time, previous_img_local, filtering))
@@ -389,70 +400,88 @@ def set_force_stable_hotkey():
389
400
  print("Press Ctrl+Shift+F to toggle force stable mode.")
390
401
 
391
402
  if __name__ == "__main__":
392
- global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window
393
- import sys
394
-
395
- import argparse
396
-
397
- parser = argparse.ArgumentParser(description="OCR Configuration")
398
- parser.add_argument("--language", type=str, default="ja", help="Language for OCR (default: ja)")
399
- parser.add_argument("--ocr1", type=str, default="oneocr", help="Primary OCR engine (default: oneocr)")
400
- parser.add_argument("--ocr2", type=str, default="glens", help="Secondary OCR engine (default: glens)")
401
- parser.add_argument("--twopassocr", type=int, choices=[0, 1], default=1, help="Enable two-pass OCR (default: 1)")
402
- parser.add_argument("--manual", action="store_true", help="Use screenshot-only mode")
403
- parser.add_argument("--clipboard", action="store_true", help="Use clipboard for input")
404
- parser.add_argument("--clipboard-output", action="store_true", default=False, help="Use clipboard for output")
405
- parser.add_argument("--window", type=str, help="Specify the window name for OCR")
406
- parser.add_argument("--furigana_filter_sensitivity", type=float, default=0, help="Furigana Filter Sensitivity for OCR (default: 0)")
407
- parser.add_argument("--manual_ocr_hotkey", type=str, default=None, help="Hotkey for manual OCR (default: None)")
408
- parser.add_argument("--area_select_ocr_hotkey", type=str, default="ctrl+shift+o", help="Hotkey for area selection OCR (default: ctrl+shift+o)")
409
-
410
- args = parser.parse_args()
411
-
412
- language = args.language
413
- ocr1 = args.ocr1
414
- ocr2 = args.ocr2 if args.ocr2 else None
415
- twopassocr = bool(args.twopassocr)
416
- manual = args.manual
417
- ss_clipboard = args.clipboard
418
- window_name = args.window
419
- furigana_filter_sensitivity = args.furigana_filter_sensitivity
420
- ss_hotkey = args.area_select_ocr_hotkey.lower()
421
- manual_ocr_hotkey = args.manual_ocr_hotkey.lower().replace("ctrl", "<ctrl>").replace("shift", "<shift>").replace("alt", "<alt>") if args.manual_ocr_hotkey else None
422
- clipboard_output = args.clipboard_output
423
-
424
- logger.info(f"Received arguments: {vars(args)}")
425
- # set_force_stable_hotkey()
426
- ocr_config: OCRConfig = get_ocr_config(window=window_name)
427
- if ocr_config:
428
- if ocr_config.window:
429
- start_time = time.time()
430
- while time.time() - start_time < 30:
431
- window = get_window(ocr_config.window)
432
- if window or manual:
433
- break
434
- logger.info(f"Window: {ocr_config.window} Could not be found, retrying in 1 second...")
435
- time.sleep(1)
436
- else:
437
- logger.error(f"Window '{ocr_config.window}' not found within 30 seconds.")
438
- sys.exit(1)
439
- logger.info(f"Starting OCR with configuration: Window: {ocr_config.window}, Rectangles: {ocr_config.rectangles}, Engine 1: {ocr1}, Engine 2: {ocr2}, Two-pass OCR: {twopassocr}")
440
- set_dpi_awareness()
441
- if manual or ocr_config:
442
- rectangles = ocr_config.rectangles if ocr_config and ocr_config.rectangles else []
443
- oneocr_threads = []
444
- ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config,rectangles ), daemon=True)
445
- ocr_thread.start()
446
- if not manual:
447
- worker_thread = threading.Thread(target=process_task_queue, daemon=True)
448
- worker_thread.start()
449
- websocket_server_thread = WebsocketServerThread(read=True)
450
- websocket_server_thread.start()
451
- add_ss_hotkey(ss_hotkey)
452
- try:
453
- while not done:
454
- time.sleep(1)
455
- except KeyboardInterrupt as e:
456
- pass
457
- else:
458
- print("Failed to load OCR configuration. Please check the logs.")
403
+ try:
404
+ global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan
405
+ import sys
406
+
407
+ import argparse
408
+
409
+ parser = argparse.ArgumentParser(description="OCR Configuration")
410
+ parser.add_argument("--language", type=str, default="ja", help="Language for OCR (default: ja)")
411
+ parser.add_argument("--ocr1", type=str, default="oneocr", help="Primary OCR engine (default: oneocr)")
412
+ parser.add_argument("--ocr2", type=str, default="glens", help="Secondary OCR engine (default: glens)")
413
+ parser.add_argument("--twopassocr", type=int, choices=[0, 1], default=1,
414
+ help="Enable two-pass OCR (default: 1)")
415
+ parser.add_argument("--manual", action="store_true", help="Use screenshot-only mode")
416
+ parser.add_argument("--clipboard", action="store_true", help="Use clipboard for input")
417
+ parser.add_argument("--clipboard-output", action="store_true", default=False, help="Use clipboard for output")
418
+ parser.add_argument("--window", type=str, help="Specify the window name for OCR")
419
+ parser.add_argument("--furigana_filter_sensitivity", type=float, default=0,
420
+ help="Furigana Filter Sensitivity for OCR (default: 0)")
421
+ parser.add_argument("--manual_ocr_hotkey", type=str, default=None, help="Hotkey for manual OCR (default: None)")
422
+ parser.add_argument("--area_select_ocr_hotkey", type=str, default="ctrl+shift+o",
423
+ help="Hotkey for area selection OCR (default: ctrl+shift+o)")
424
+ parser.add_argument("--optimize_second_scan", action="store_true",
425
+ help="Optimize second scan by cropping based on first scan results")
426
+
427
+ args = parser.parse_args()
428
+
429
+ language = args.language
430
+ ocr1 = args.ocr1
431
+ ocr2 = args.ocr2 if args.ocr2 else None
432
+ twopassocr = bool(args.twopassocr)
433
+ manual = args.manual
434
+ ss_clipboard = args.clipboard
435
+ window_name = args.window
436
+ furigana_filter_sensitivity = args.furigana_filter_sensitivity
437
+ ss_hotkey = args.area_select_ocr_hotkey.lower()
438
+ manual_ocr_hotkey = args.manual_ocr_hotkey.lower().replace("ctrl", "<ctrl>").replace("shift",
439
+ "<shift>").replace(
440
+ "alt", "<alt>") if args.manual_ocr_hotkey else None
441
+ clipboard_output = args.clipboard_output
442
+ optimize_second_scan = args.optimize_second_scan
443
+
444
+ window = None
445
+ logger.info(f"Received arguments: {vars(args)}")
446
+ # set_force_stable_hotkey()
447
+ ocr_config: OCRConfig = get_ocr_config(window=window_name)
448
+ if ocr_config:
449
+ if ocr_config.window:
450
+ start_time = time.time()
451
+ while time.time() - start_time < 30:
452
+ window = get_window(ocr_config.window)
453
+ if window or manual:
454
+ if window:
455
+ ocr_config.scale_coords()
456
+ break
457
+ logger.info(f"Window: {ocr_config.window} Could not be found, retrying in 1 second...")
458
+ time.sleep(1)
459
+ else:
460
+ logger.error(f"Window '{ocr_config.window}' not found within 30 seconds.")
461
+ sys.exit(1)
462
+ logger.info(
463
+ f"Starting OCR with configuration: Window: {ocr_config.window}, Rectangles: {ocr_config.rectangles}, Engine 1: {ocr1}, Engine 2: {ocr2}, Two-pass OCR: {twopassocr}")
464
+ set_dpi_awareness()
465
+ if manual or ocr_config:
466
+ rectangles = ocr_config.rectangles if ocr_config and ocr_config.rectangles else []
467
+ oneocr_threads = []
468
+ ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config, rectangles), daemon=True)
469
+ ocr_thread.start()
470
+ if not manual:
471
+ worker_thread = threading.Thread(target=process_task_queue, daemon=True)
472
+ worker_thread.start()
473
+ websocket_server_thread = WebsocketServerThread(read=True)
474
+ websocket_server_thread.start()
475
+ add_ss_hotkey(ss_hotkey)
476
+ try:
477
+ while not done:
478
+ time.sleep(1)
479
+ except KeyboardInterrupt as e:
480
+ pass
481
+ else:
482
+ print("Failed to load OCR configuration. Please check the logs.")
483
+ except Exception as e:
484
+ logger.info(e, exc_info=True)
485
+ logger.debug(e, exc_info=True)
486
+ logger.info("Closing in 5 seconds...")
487
+ time.sleep(5)
@@ -1,6 +1,7 @@
1
1
  import re
2
2
  import os
3
3
  import io
4
+ import time
4
5
  from pathlib import Path
5
6
  import sys
6
7
  import platform
@@ -17,8 +18,6 @@ from google.generativeai import GenerationConfig
17
18
  from loguru import logger
18
19
  import requests
19
20
 
20
- from GameSentenceMiner.util.configuration import get_app_directory, get_temporary_directory
21
-
22
21
  try:
23
22
  from manga_ocr import MangaOcr as MOCR
24
23
  except ImportError:
@@ -1247,6 +1246,68 @@ class GroqOCR:
1247
1246
  def _preprocess(self, img):
1248
1247
  return base64.b64encode(pil_image_to_bytes(img, png_compression=1)).decode('utf-8')
1249
1248
 
1249
+ # class QWENOCR:
1250
+ # name = 'qwenvl'
1251
+ # readable_name = 'Qwen2-VL'
1252
+ # key = 'q'
1253
+ # available = False
1254
+ #
1255
+ # def __init__(self, config={}):
1256
+ # try:
1257
+ # import torch
1258
+ # from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
1259
+ # self.model = Qwen2VLForConditionalGeneration.from_pretrained(
1260
+ # "Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"
1261
+ # )
1262
+ # self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", use_fast=True)
1263
+ # self.device = "cuda" if torch.cuda.is_available() else "cpu"
1264
+ # print(self.device)
1265
+ # self.available = True
1266
+ # logger.info('Qwen2-VL ready')
1267
+ # except Exception as e:
1268
+ # logger.warning(f'Qwen2-VL not available: {e}')
1269
+ #
1270
+ # def __call__(self, img, furigana_filter_sensitivity=0):
1271
+ # if not self.available:
1272
+ # return (False, 'Qwen2-VL is not available.')
1273
+ # try:
1274
+ # img = input_to_pil_image(img)
1275
+ # conversation = [
1276
+ # {
1277
+ # "role": "user",
1278
+ # "content": [
1279
+ # {"type": "image"},
1280
+ # {"type": "text", "text": "Analyze the image. Extract text *only* from within dialogue boxes (speech bubbles or panels containing character dialogue). If Text appears to be vertical, read the text from top to bottom, right to left. From the extracted dialogue text, filter out any furigana. Ignore and do not include any text found outside of dialogue boxes, including character names, speaker labels, or sound effects. Return *only* the filtered dialogue text. If no text is found within dialogue boxes after applying filters, return nothing. Do not include any other output, formatting markers, or commentary."},
1281
+ # ],
1282
+ # }
1283
+ # ]
1284
+ # text_prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
1285
+ # inputs = self.processor(
1286
+ # text=[text_prompt], images=[img], padding=True, return_tensors="pt"
1287
+ # )
1288
+ # inputs = inputs.to(self.device)
1289
+ # output_ids = self.model.generate(**inputs, max_new_tokens=128)
1290
+ # generated_ids = [
1291
+ # output_ids[len(input_ids):]
1292
+ # for input_ids, output_ids in zip(inputs.input_ids, output_ids)
1293
+ # ]
1294
+ # output_text = self.processor.batch_decode(
1295
+ # generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
1296
+ # )
1297
+ # return (True, output_text[0] if output_text else "")
1298
+ # except Exception as e:
1299
+ # return (False, f'Qwen2-VL inference failed: {e}')
1300
+
1301
+
1302
+ # qwenocr = QWENOCR()
1303
+ #
1304
+ # for i in range(10):
1305
+ # start_time = time.time()
1306
+ # res, text = qwenocr(Image.open('test_furigana.png'), furigana_filter_sensitivity=0) # Example usage
1307
+ # end_time = time.time()
1308
+ #
1309
+ # print(f"Time taken: {end_time - start_time:.2f} seconds")
1310
+ # print(text)
1250
1311
  # class LocalOCR:
1251
1312
  # name = 'local_ocr'
1252
1313
  # readable_name = 'Local OCR'
@@ -172,7 +172,7 @@ class Screenshot:
172
172
  class Audio:
173
173
  enabled: bool = True
174
174
  extension: str = 'opus'
175
- beginning_offset: float = 0.0
175
+ beginning_offset: float = -0.5
176
176
  end_offset: float = 0.5
177
177
  pre_vad_end_offset: float = 0.0
178
178
  ffmpeg_reencode_options: str = '-c:a libopus -f opus -af \"afade=t=in:d=0.10\"' if is_windows() else ''
@@ -683,6 +683,7 @@ class GsmAppState:
683
683
  self.last_mined_line = None
684
684
  self.keep_running = True
685
685
  self.current_game = ''
686
+ self.videos_to_remove = set()
686
687
 
687
688
  @dataclass_json
688
689
  @dataclass
@@ -21,7 +21,7 @@ def handle_texthooker_button(video_path='', get_audio_from_video=None):
21
21
  if get_config().advanced.audio_player_path:
22
22
  play_audio_in_external(gsm_state.previous_audio)
23
23
  elif get_config().advanced.video_player_path:
24
- play_video_in_external(line, gsm_state.previous_audio)
24
+ play_video_in_external(line, video_path)
25
25
  else:
26
26
  import sounddevice as sd
27
27
  data, samplerate = gsm_state.previous_audio
@@ -35,9 +35,7 @@ def handle_texthooker_button(video_path='', get_audio_from_video=None):
35
35
  play_audio_in_external(audio)
36
36
  gsm_state.previous_audio = audio
37
37
  elif get_config().advanced.video_player_path:
38
- new_video_path = play_video_in_external(line, video_path)
39
- gsm_state.previous_audio = new_video_path
40
- gsm_state.previous_replay = new_video_path
38
+ play_video_in_external(line, video_path)
41
39
  else:
42
40
  import sounddevice as sd
43
41
  import soundfile as sf
@@ -75,8 +73,8 @@ def handle_texthooker_button(video_path='', get_audio_from_video=None):
75
73
  logger.debug(f"Error Playing Audio/Video: {e}", exc_info=True)
76
74
  return
77
75
  finally:
78
- if video_path and get_config().paths.remove_video and os.path.exists(video_path):
79
- os.remove(video_path)
76
+ gsm_state.previous_replay = video_path
77
+ gsm_state.videos_to_remove.add(video_path)
80
78
 
81
79
 
82
80
  def play_audio_in_external(filepath):
@@ -94,37 +92,28 @@ def play_audio_in_external(filepath):
94
92
 
95
93
 
96
94
  def play_video_in_external(line, filepath):
97
- def move_video_when_closed(p, fp):
98
- p.wait()
99
- os.remove(fp)
100
-
101
- shutil.move(filepath, get_temporary_directory())
102
- new_filepath = os.path.join(get_temporary_directory(), os.path.basename(filepath))
103
-
104
95
  command = [get_config().advanced.video_player_path]
105
96
 
106
- start, _, _, _ = get_video_timings(new_filepath, line)
97
+ start, _, _, _ = get_video_timings(filepath, line)
107
98
 
108
99
  if start:
109
100
  if "vlc" in get_config().advanced.video_player_path:
110
101
  command.extend(["--start-time", convert_to_vlc_seconds(start), '--one-instance'])
111
102
  else:
112
103
  command.extend(["--start", convert_to_vlc_seconds(start)])
113
- command.append(os.path.normpath(new_filepath))
104
+ command.append(os.path.normpath(filepath))
114
105
 
115
106
  logger.info(" ".join(command))
116
107
 
117
108
 
118
109
 
119
110
  try:
120
- proc = subprocess.Popen(command)
121
- print(f"Opened {filepath} in {get_config().advanced.video_player_path}.")
122
- threading.Thread(target=move_video_when_closed, args=(proc, filepath)).start()
111
+ subprocess.Popen(command)
112
+ logger.info(f"Opened {filepath} in {get_config().advanced.video_player_path}.")
123
113
  except FileNotFoundError:
124
- print("VLC not found. Make sure it's installed and in your PATH.")
114
+ logger.error("VLC not found. Make sure it's installed and in your PATH.")
125
115
  except Exception as e:
126
- print(f"An error occurred: {e}")
127
- return new_filepath
116
+ logger.error(f"An error occurred: {e}")
128
117
 
129
118
 
130
119
  def convert_to_vlc_seconds(time_str):
@@ -287,8 +287,8 @@ def get_screenshot():
287
287
  if event_id is None:
288
288
  return jsonify({'error': 'Missing id'}), 400
289
289
  gsm_state.line_for_screenshot = get_line_by_id(event_id)
290
- if gsm_state.previous_line_for_screenshot and gsm_state.line_for_screenshot.id == gsm_state.previous_line_for_screenshot.id:
291
- handle_texthooker_button()
290
+ if gsm_state.previous_line_for_screenshot and gsm_state.line_for_screenshot.id == gsm_state.previous_line_for_screenshot.id or gsm_state.previous_line_for_audio:
291
+ handle_texthooker_button(gsm_state.previous_replay)
292
292
  else:
293
293
  obs.save_replay_buffer()
294
294
  return jsonify({}), 200
@@ -301,8 +301,8 @@ def play_audio():
301
301
  if event_id is None:
302
302
  return jsonify({'error': 'Missing id'}), 400
303
303
  gsm_state.line_for_audio = get_line_by_id(event_id)
304
- if gsm_state.previous_line_for_audio and gsm_state.line_for_audio == gsm_state.previous_line_for_audio:
305
- handle_texthooker_button()
304
+ if gsm_state.previous_line_for_audio and gsm_state.line_for_audio == gsm_state.previous_line_for_audio or gsm_state.previous_line_for_screenshot:
305
+ handle_texthooker_button(gsm_state.previous_replay)
306
306
  else:
307
307
  obs.save_replay_buffer()
308
308
  return jsonify({}), 200
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.10.10
3
+ Version: 2.10.12
4
4
  Summary: A tool for mining sentences from games. Update: Full UI Re-design
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -1,8 +1,8 @@
1
1
  GameSentenceMiner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  GameSentenceMiner/anki.py,sha256=kWw3PV_Jj5-lHcttCB3lRXejHlaAbiJ2Ag_NAGX-RI8,16632
3
- GameSentenceMiner/config_gui.py,sha256=h-vDxpFCC347iK_mDJAjwKm7Qubeu-NWaxvd9SvzqzY,90942
3
+ GameSentenceMiner/config_gui.py,sha256=Xa_a-sdQzht3kzR-Z9gkLy4qnaPyP1bdVadYTHp5lUQ,91018
4
4
  GameSentenceMiner/gametext.py,sha256=6VkjmBeiuZfPk8T6PHFdIAElBH2Y_oLVYvmcafqN7RM,6747
5
- GameSentenceMiner/gsm.py,sha256=PSL_J723k23SIfgeNhoXgTqlG-V3MQTFJtLDcrZDFqs,24625
5
+ GameSentenceMiner/gsm.py,sha256=p4DVa_Jx1EOsgUxAAdC7st7VXLKWnP2BLDGT78ToO8w,24864
6
6
  GameSentenceMiner/obs.py,sha256=ZV9Vk39hrsJLT-AlIxa3qgncKxXaL3Myl33vVJEDEoA,14670
7
7
  GameSentenceMiner/vad.py,sha256=G0NkaWFJaIfKQAV7LOFxyKoih7pPNYHDuy4SzeFVCkI,16389
8
8
  GameSentenceMiner/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -16,21 +16,21 @@ GameSentenceMiner/assets/icon512.png,sha256=HxUj2GHjyQsk8NV433256UxU9phPhtjCY-YB
16
16
  GameSentenceMiner/assets/icon64.png,sha256=N8xgdZXvhqVQP9QUK3wX5iqxX9LxHljD7c-Bmgim6tM,9301
17
17
  GameSentenceMiner/assets/pickaxe.png,sha256=VfIGyXyIZdzEnVcc4PmG3wszPMO1W4KCT7Q_nFK6eSE,1403829
18
18
  GameSentenceMiner/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=0hZmNIvZmlAEcy_NaTukG_ALUORULUT7sQ8q5VlDJU4,4047
19
+ GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=jtTzAWtMAx8GuA1XIJ_BmyNn3aYaO3u_c5Q7m5D4gS8,4056
20
20
  GameSentenceMiner/ocr/ocrconfig.py,sha256=_tY8mjnzHMJrLS8E5pHqYXZjMuLoGKYgJwdhYgN-ny4,6466
21
- GameSentenceMiner/ocr/owocr_area_selector.py,sha256=GEqIIhRc3WCIAx3HunuYo6ayJsCnZWT-x9fwZMCy2e8,16183
22
- GameSentenceMiner/ocr/owocr_helper.py,sha256=YHhG3PuJsPWP4352TAu4dtdX7itRiOybngzZVT4B50c,20184
21
+ GameSentenceMiner/ocr/owocr_area_selector.py,sha256=boAqarX17jvFscu-7s6C9rqesjQ54s-kfuW0bjCru-M,19834
22
+ GameSentenceMiner/ocr/owocr_helper.py,sha256=VDcuBfyZ1B7TN6yImJVuNxqWY7pr95R2cRM9jgD5Rk8,21670
23
23
  GameSentenceMiner/ocr/ss_picker.py,sha256=0IhxUdaKruFpZyBL-8SpxWg7bPrlGpy3lhTcMMZ5rwo,5224
24
24
  GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9lKnRCj6oZgR0,49
25
25
  GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
26
26
  GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
27
27
  GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
28
- GameSentenceMiner/owocr/owocr/ocr.py,sha256=fWrbKomSrdkSdlEiMGTKb6-F7wCgfaZZNBUo2gCqmuA,52247
28
+ GameSentenceMiner/owocr/owocr/ocr.py,sha256=Mri_zB_COk7x9GmolyhYCINJ-lQlD45GuJ4m4M0IBFM,55328
29
29
  GameSentenceMiner/owocr/owocr/run.py,sha256=mZIGDm3fGYrYbSNuFOk7Sbslfgi36YN0YqfC1xYh_eY,55286
30
30
  GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
31
31
  GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  GameSentenceMiner/util/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
33
- GameSentenceMiner/util/configuration.py,sha256=iXgfrawPPpAmQdXv2zPR-LuZnXx1ORGAwwP55OmLOs8,28778
33
+ GameSentenceMiner/util/configuration.py,sha256=8PZk4IhtWFimfRy7biREcfG1NGkFNzKzFjlOjxNEFd0,28817
34
34
  GameSentenceMiner/util/electron_config.py,sha256=3VmIrcXhC-wIMMc4uqV85NrNenRl4ZUbnQfSjWEwuig,9852
35
35
  GameSentenceMiner/util/ffmpeg.py,sha256=t0tflxq170n8PZKkdw8fTZIUQfXD0p_qARa9JTdhBTc,21530
36
36
  GameSentenceMiner/util/gsm_utils.py,sha256=_279Fu9CU6FEh4cP6h40TWOt_BrqmPgytfumi8y53Ew,11491
@@ -47,8 +47,8 @@ GameSentenceMiner/util/downloader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
47
47
  GameSentenceMiner/util/downloader/download_tools.py,sha256=mvnOjDHFlV1AbjHaNI7mdnC5_CH5k3N4n1ezqzzbzGA,8139
48
48
  GameSentenceMiner/util/downloader/oneocr_dl.py,sha256=EJbKISaZ9p2x9P4x0rpMM5nAInTTc9b7arraGBcd-SA,10381
49
49
  GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- GameSentenceMiner/web/service.py,sha256=2o62dZfDSBxBH5zCjrcHCX5yAc3PmGeP2lr07n8-dgo,5779
51
- GameSentenceMiner/web/texthooking_page.py,sha256=rX2rBFIlVlKmVXB8dseuyWfMzcDcjNNQosncwUolMu8,16054
50
+ GameSentenceMiner/web/service.py,sha256=S7bYf2kSk08u-8R9Qpv7piM-pxfFjYZUvU825xupmuI,5279
51
+ GameSentenceMiner/web/texthooking_page.py,sha256=EmcIBEPGWNgI2LGL3kKUsm0rs2Vn0CPq9PVKKnuIt2g,16183
52
52
  GameSentenceMiner/web/static/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  GameSentenceMiner/web/static/apple-touch-icon.png,sha256=OcMI8af_68DA_tweOsQ5LytTyMwm7-hPW07IfrOVgEs,46132
54
54
  GameSentenceMiner/web/static/favicon-96x96.png,sha256=lOePzjiKl1JY2J1kT_PMdyEnrlJmi5GWbmXJunM12B4,16502
@@ -62,9 +62,9 @@ GameSentenceMiner/web/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
62
62
  GameSentenceMiner/web/templates/index.html,sha256=n0J-dV8eksj8JXUuaCTIh0fIxIjfgm2EvxGBdQ6gWoM,214113
63
63
  GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
64
64
  GameSentenceMiner/web/templates/utility.html,sha256=3flZinKNqUJ7pvrZk6xu__v67z44rXnaK7UTZ303R-8,16946
65
- gamesentenceminer-2.10.10.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
66
- gamesentenceminer-2.10.10.dist-info/METADATA,sha256=KJtMtM6AUz0qc8xsuSrNxd_I53gcYtAPNWO9VkDGSsY,7355
67
- gamesentenceminer-2.10.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- gamesentenceminer-2.10.10.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
69
- gamesentenceminer-2.10.10.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
70
- gamesentenceminer-2.10.10.dist-info/RECORD,,
65
+ gamesentenceminer-2.10.12.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
66
+ gamesentenceminer-2.10.12.dist-info/METADATA,sha256=YzGn0pkP-I00xGsRHt-5GK8x9pCKpoKR5lHBYL_z8Ho,7355
67
+ gamesentenceminer-2.10.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ gamesentenceminer-2.10.12.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
69
+ gamesentenceminer-2.10.12.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
70
+ gamesentenceminer-2.10.12.dist-info/RECORD,,