GameSentenceMiner 2.10.10__py3-none-any.whl → 2.10.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/config_gui.py +1 -0
- GameSentenceMiner/gsm.py +7 -0
- GameSentenceMiner/ocr/gsm_ocr_config.py +2 -2
- GameSentenceMiner/ocr/owocr_area_selector.py +98 -15
- GameSentenceMiner/ocr/owocr_helper.py +104 -75
- GameSentenceMiner/owocr/owocr/ocr.py +63 -2
- GameSentenceMiner/util/configuration.py +2 -1
- GameSentenceMiner/web/service.py +10 -21
- GameSentenceMiner/web/texthooking_page.py +4 -4
- {gamesentenceminer-2.10.10.dist-info → gamesentenceminer-2.10.12.dist-info}/METADATA +1 -1
- {gamesentenceminer-2.10.10.dist-info → gamesentenceminer-2.10.12.dist-info}/RECORD +15 -15
- {gamesentenceminer-2.10.10.dist-info → gamesentenceminer-2.10.12.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.10.10.dist-info → gamesentenceminer-2.10.12.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.10.10.dist-info → gamesentenceminer-2.10.12.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.10.10.dist-info → gamesentenceminer-2.10.12.dist-info}/top_level.txt +0 -0
GameSentenceMiner/config_gui.py
CHANGED
@@ -80,6 +80,7 @@ class ConfigApp:
|
|
80
80
|
def __init__(self, root):
|
81
81
|
self.window = root
|
82
82
|
self.on_exit = None
|
83
|
+
self.window.tk.call('tk', 'scaling', 1.5) # Set DPI scaling factor
|
83
84
|
# self.window = ttk.Window(themename='darkly')
|
84
85
|
self.window.title('GameSentenceMiner Configuration')
|
85
86
|
self.window.protocol("WM_DELETE_WINDOW", self.hide)
|
GameSentenceMiner/gsm.py
CHANGED
@@ -452,6 +452,13 @@ def cleanup():
|
|
452
452
|
if icon:
|
453
453
|
icon.stop()
|
454
454
|
|
455
|
+
for video in gsm_state.videos_to_remove:
|
456
|
+
try:
|
457
|
+
if os.path.exists(video):
|
458
|
+
os.remove(video)
|
459
|
+
except Exception as e:
|
460
|
+
logger.error(f"Error removing temporary video file {video}: {e}")
|
461
|
+
|
455
462
|
settings_window.window.destroy()
|
456
463
|
time.sleep(5)
|
457
464
|
logger.info("Cleanup complete.")
|
@@ -44,13 +44,13 @@ class WindowGeometry:
|
|
44
44
|
class OCRConfig:
|
45
45
|
scene: str
|
46
46
|
rectangles: List[Rectangle]
|
47
|
-
pre_scale_rectangles: List[Rectangle] = None
|
47
|
+
pre_scale_rectangles: Optional[List[Rectangle]] = None
|
48
48
|
coordinate_system: str = None
|
49
49
|
window_geometry: Optional[WindowGeometry] = None
|
50
50
|
window: Optional[str] = None
|
51
51
|
language: str = "ja"
|
52
52
|
|
53
|
-
def
|
53
|
+
def scale_coords(self):
|
54
54
|
self.pre_scale_rectangles = deepcopy(self.rectangles)
|
55
55
|
if self.coordinate_system and self.coordinate_system == "percentage" and self.window:
|
56
56
|
import pygetwindow as gw
|
@@ -20,6 +20,7 @@ except ImportError:
|
|
20
20
|
|
21
21
|
try:
|
22
22
|
import tkinter as tk
|
23
|
+
from tkinter import font as tkfont # NEW: Import for better font control
|
23
24
|
|
24
25
|
selector_available = True
|
25
26
|
except ImportError:
|
@@ -206,6 +207,66 @@ class ScreenSelector:
|
|
206
207
|
self.drawn_rect_ids.append(new_rect_id)
|
207
208
|
print("Redo: Restored rectangle.")
|
208
209
|
|
210
|
+
# --- NEW METHOD TO DISPLAY INSTRUCTIONS ---
|
211
|
+
def _create_instructions_widget(self, canvas):
|
212
|
+
"""Creates a text box with usage instructions on the canvas."""
|
213
|
+
instructions_text = (
|
214
|
+
"How to Use:\n"
|
215
|
+
" • Left Click + Drag: Create a capture area (green).\n"
|
216
|
+
" • Shift + Left Click + Drag: Create an exclusion area (orange).\n"
|
217
|
+
" • Right-Click on a box: Delete it.\n\n"
|
218
|
+
"Hotkeys:\n"
|
219
|
+
" • Ctrl + S: Save and Quit\n"
|
220
|
+
" • Ctrl + Z / Ctrl + Y: Undo / Redo\n"
|
221
|
+
" • M: Toggle background visibility\n"
|
222
|
+
" • I: Toggle these instructions\n"
|
223
|
+
" • Esc: Quit without saving"
|
224
|
+
" "
|
225
|
+
)
|
226
|
+
|
227
|
+
# Use a common, readable font
|
228
|
+
instruction_font = tkfont.Font(family="Segoe UI", size=10, weight="normal")
|
229
|
+
|
230
|
+
# Create the text item first to get its size
|
231
|
+
text_id = canvas.create_text(
|
232
|
+
20, 20, # Position with a small margin
|
233
|
+
text=instructions_text,
|
234
|
+
anchor=tk.NW,
|
235
|
+
fill='white',
|
236
|
+
font=instruction_font,
|
237
|
+
justify=tk.LEFT
|
238
|
+
)
|
239
|
+
|
240
|
+
# Get the bounding box of the text to draw a background
|
241
|
+
text_bbox = canvas.bbox(text_id)
|
242
|
+
|
243
|
+
# Create a background rectangle with padding
|
244
|
+
rect_id = canvas.create_rectangle(
|
245
|
+
text_bbox[0] - 10, # left
|
246
|
+
text_bbox[1] - 10, # top
|
247
|
+
text_bbox[2] + 10, # right
|
248
|
+
text_bbox[3] + 10, # bottom
|
249
|
+
fill='#2B2B2B', # Dark, semi-opaque background
|
250
|
+
outline='white',
|
251
|
+
width=1
|
252
|
+
)
|
253
|
+
|
254
|
+
# Lower the rectangle so it's behind the text
|
255
|
+
canvas.tag_lower(rect_id, text_id)
|
256
|
+
|
257
|
+
def toggle_instructions(self, event=None):
|
258
|
+
canvas = event.widget.winfo_toplevel().winfo_children()[0]
|
259
|
+
# Find all text and rectangle items (assuming only one of each for instructions)
|
260
|
+
text_items = [item for item in canvas.find_all() if canvas.type(item) == 'text']
|
261
|
+
rect_items = [item for item in canvas.find_all() if canvas.type(item) == 'rectangle']
|
262
|
+
|
263
|
+
if text_items and rect_items:
|
264
|
+
current_state = canvas.itemcget(text_items[0], 'state')
|
265
|
+
new_state = tk.NORMAL if current_state == tk.HIDDEN else tk.HIDDEN
|
266
|
+
for item in text_items + rect_items:
|
267
|
+
canvas.itemconfigure(item, state=new_state)
|
268
|
+
print("Toggled instructions visibility.")
|
269
|
+
|
209
270
|
def start(self):
|
210
271
|
self.root = tk.Tk()
|
211
272
|
self.root.withdraw()
|
@@ -230,6 +291,10 @@ class ScreenSelector:
|
|
230
291
|
canvas.pack(fill=tk.BOTH, expand=True)
|
231
292
|
canvas.create_image(0, 0, image=self.photo_image, anchor=tk.NW)
|
232
293
|
|
294
|
+
# --- MODIFIED: CALL THE INSTRUCTION WIDGET CREATOR ---
|
295
|
+
self._create_instructions_widget(canvas)
|
296
|
+
# --- END MODIFICATION ---
|
297
|
+
|
233
298
|
# Draw existing rectangles (which were converted to absolute pixels on load)
|
234
299
|
for _, abs_coords, is_excluded in self.rectangles:
|
235
300
|
x_abs, y_abs, w_abs, h_abs = abs_coords
|
@@ -275,17 +340,37 @@ class ScreenSelector:
|
|
275
340
|
self.current_rect_id = self.start_x = self.start_y = None
|
276
341
|
|
277
342
|
def on_right_click(event):
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
343
|
+
# Iterate through our rectangles in reverse to find the topmost one.
|
344
|
+
for i in range(len(self.rectangles) - 1, -1, -1):
|
345
|
+
_monitor, abs_coords, _is_excluded = self.rectangles[i]
|
346
|
+
x_abs, y_abs, w_abs, h_abs = abs_coords
|
347
|
+
canvas_x1 = x_abs - self.bounding_box['left']
|
348
|
+
canvas_y1 = y_abs - self.bounding_box['top']
|
349
|
+
canvas_x2 = canvas_x1 + w_abs
|
350
|
+
canvas_y2 = canvas_y1 + h_abs
|
351
|
+
|
352
|
+
if canvas_x1 <= event.x <= canvas_x2 and canvas_y1 <= event.y <= canvas_y2:
|
353
|
+
# --- UNDO/REDO CHANGE ---
|
354
|
+
# We found the rectangle. Prepare the 'remove' action.
|
355
|
+
# We need to save the data AND its original index to restore it correctly.
|
356
|
+
rect_tuple_to_del = self.rectangles[i]
|
357
|
+
item_id_to_del = self.drawn_rect_ids[i]
|
358
|
+
|
359
|
+
self.redo_stack.append((*rect_tuple_to_del, i))
|
360
|
+
|
361
|
+
# Now, perform the deletion
|
362
|
+
del self.rectangles[i]
|
363
|
+
del self.drawn_rect_ids[i]
|
364
|
+
canvas.delete(item_id_to_del)
|
365
|
+
print("Deleted rectangle.")
|
366
|
+
|
367
|
+
break # Stop after deleting the topmost one
|
286
368
|
|
287
369
|
def toggle_image_mode(e=None):
|
288
|
-
self.image_mode = not self.image_mode
|
370
|
+
self.image_mode = not self.image_mode
|
371
|
+
# Only change alpha of the main window, not the text widget
|
372
|
+
window.attributes("-alpha", 1.0 if self.image_mode else 0.25)
|
373
|
+
print("Toggled background visibility.")
|
289
374
|
|
290
375
|
def on_enter(e=None):
|
291
376
|
canvas.focus_set()
|
@@ -296,13 +381,15 @@ class ScreenSelector:
|
|
296
381
|
canvas.bind('<ButtonRelease-1>', on_release)
|
297
382
|
canvas.bind('<Button-3>', on_right_click)
|
298
383
|
canvas.bind('<Control-s>', self.save_rects)
|
299
|
-
canvas.bind('<Control-z>', self.undo_last_rect)
|
300
384
|
canvas.bind('<Control-y>', self.redo_last_rect)
|
385
|
+
canvas.bind('<Control-z>', self.undo_last_rect)
|
301
386
|
canvas.bind("<Escape>", self.quit_app)
|
302
387
|
canvas.bind("<m>", toggle_image_mode)
|
388
|
+
canvas.bind("<i>", self.toggle_instructions)
|
303
389
|
|
304
390
|
canvas.focus_set()
|
305
|
-
|
391
|
+
# The print message is now redundant but kept for console feedback
|
392
|
+
print("Starting UI. See on-screen instructions. Press Esc to quit, Ctrl+S to save.")
|
306
393
|
self.root.mainloop()
|
307
394
|
|
308
395
|
def quit_app(self, event=None):
|
@@ -350,10 +437,6 @@ if __name__ == "__main__":
|
|
350
437
|
target_window_title = "Windowed Projector (Preview)" # Default
|
351
438
|
if len(sys.argv) > 1:
|
352
439
|
target_window_title = sys.argv[1]
|
353
|
-
# else:
|
354
|
-
# print("Usage: python your_script_name.py \"Target Window Title\"", file=sys.stderr)
|
355
|
-
# print("Example: python selector.py \"Windowed Projector (Preview)\"", file=sys.stderr)
|
356
|
-
# sys.exit(1)
|
357
440
|
|
358
441
|
selection_result = get_screen_selection(target_window_title)
|
359
442
|
|
@@ -178,17 +178,28 @@ class WebsocketServerThread(threading.Thread):
|
|
178
178
|
asyncio.run(main())
|
179
179
|
|
180
180
|
|
181
|
+
def compare_ocr_results(prev_text, new_text, threshold=90):
|
182
|
+
if not prev_text or not new_text:
|
183
|
+
return False
|
184
|
+
if isinstance(prev_text, list):
|
185
|
+
prev_text = ''.join([item for item in prev_text if item is not None]) if prev_text else ""
|
186
|
+
if isinstance(new_text, list):
|
187
|
+
new_text = ''.join([item for item in new_text if item is not None]) if new_text else ""
|
188
|
+
similarity = fuzz.ratio(prev_text, new_text)
|
189
|
+
return similarity >= threshold
|
190
|
+
|
181
191
|
all_cords = None
|
182
192
|
rectangles = None
|
183
|
-
last_ocr2_result =
|
193
|
+
last_ocr2_result = []
|
184
194
|
|
185
195
|
def do_second_ocr(ocr1_text, time, img, filtering, ignore_furigana_filter=False):
|
186
196
|
global twopassocr, ocr2, last_ocr2_result
|
187
197
|
try:
|
188
198
|
orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
|
189
199
|
engine=ocr2, furigana_filter_sensitivity=furigana_filter_sensitivity if not ignore_furigana_filter else 0)
|
190
|
-
|
191
|
-
|
200
|
+
|
201
|
+
if compare_ocr_results(last_ocr2_result, orig_text):
|
202
|
+
logger.info("Detected similar text from previous OCR2 result, not sending")
|
192
203
|
return
|
193
204
|
save_result_image(img)
|
194
205
|
last_ocr2_result = orig_text
|
@@ -241,12 +252,12 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
|
|
241
252
|
|
242
253
|
line_start_time = time if time else datetime.now()
|
243
254
|
|
244
|
-
if not twopassocr:
|
245
|
-
if
|
255
|
+
if manual or not twopassocr:
|
256
|
+
if compare_ocr_results(previous_orig_text, orig_text_string):
|
246
257
|
logger.info("Seems like Text we already sent, not doing anything.")
|
247
258
|
return
|
248
259
|
save_result_image(img)
|
249
|
-
asyncio.run(send_result(text,
|
260
|
+
asyncio.run(send_result(text, line_start_time))
|
250
261
|
previous_orig_text = orig_text_string
|
251
262
|
previous_text = None
|
252
263
|
previous_img = None
|
@@ -260,13 +271,13 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
|
|
260
271
|
if previous_text and text_stable_start_time:
|
261
272
|
stable_time = text_stable_start_time
|
262
273
|
previous_img_local = previous_img
|
263
|
-
if
|
274
|
+
if compare_ocr_results(previous_orig_text, orig_text_string):
|
264
275
|
logger.info("Seems like Text we already sent, not doing anything.")
|
265
276
|
previous_text = None
|
266
277
|
return
|
267
278
|
previous_orig_text = orig_text_string
|
268
279
|
previous_ocr1_result = previous_text
|
269
|
-
if crop_coords:
|
280
|
+
if crop_coords and optimize_second_scan:
|
270
281
|
previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
|
271
282
|
previous_img_local = previous_img_local.crop(crop_coords)
|
272
283
|
second_ocr_queue.put((previous_text, stable_time, previous_img_local, filtering))
|
@@ -389,70 +400,88 @@ def set_force_stable_hotkey():
|
|
389
400
|
print("Press Ctrl+Shift+F to toggle force stable mode.")
|
390
401
|
|
391
402
|
if __name__ == "__main__":
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
403
|
+
try:
|
404
|
+
global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan
|
405
|
+
import sys
|
406
|
+
|
407
|
+
import argparse
|
408
|
+
|
409
|
+
parser = argparse.ArgumentParser(description="OCR Configuration")
|
410
|
+
parser.add_argument("--language", type=str, default="ja", help="Language for OCR (default: ja)")
|
411
|
+
parser.add_argument("--ocr1", type=str, default="oneocr", help="Primary OCR engine (default: oneocr)")
|
412
|
+
parser.add_argument("--ocr2", type=str, default="glens", help="Secondary OCR engine (default: glens)")
|
413
|
+
parser.add_argument("--twopassocr", type=int, choices=[0, 1], default=1,
|
414
|
+
help="Enable two-pass OCR (default: 1)")
|
415
|
+
parser.add_argument("--manual", action="store_true", help="Use screenshot-only mode")
|
416
|
+
parser.add_argument("--clipboard", action="store_true", help="Use clipboard for input")
|
417
|
+
parser.add_argument("--clipboard-output", action="store_true", default=False, help="Use clipboard for output")
|
418
|
+
parser.add_argument("--window", type=str, help="Specify the window name for OCR")
|
419
|
+
parser.add_argument("--furigana_filter_sensitivity", type=float, default=0,
|
420
|
+
help="Furigana Filter Sensitivity for OCR (default: 0)")
|
421
|
+
parser.add_argument("--manual_ocr_hotkey", type=str, default=None, help="Hotkey for manual OCR (default: None)")
|
422
|
+
parser.add_argument("--area_select_ocr_hotkey", type=str, default="ctrl+shift+o",
|
423
|
+
help="Hotkey for area selection OCR (default: ctrl+shift+o)")
|
424
|
+
parser.add_argument("--optimize_second_scan", action="store_true",
|
425
|
+
help="Optimize second scan by cropping based on first scan results")
|
426
|
+
|
427
|
+
args = parser.parse_args()
|
428
|
+
|
429
|
+
language = args.language
|
430
|
+
ocr1 = args.ocr1
|
431
|
+
ocr2 = args.ocr2 if args.ocr2 else None
|
432
|
+
twopassocr = bool(args.twopassocr)
|
433
|
+
manual = args.manual
|
434
|
+
ss_clipboard = args.clipboard
|
435
|
+
window_name = args.window
|
436
|
+
furigana_filter_sensitivity = args.furigana_filter_sensitivity
|
437
|
+
ss_hotkey = args.area_select_ocr_hotkey.lower()
|
438
|
+
manual_ocr_hotkey = args.manual_ocr_hotkey.lower().replace("ctrl", "<ctrl>").replace("shift",
|
439
|
+
"<shift>").replace(
|
440
|
+
"alt", "<alt>") if args.manual_ocr_hotkey else None
|
441
|
+
clipboard_output = args.clipboard_output
|
442
|
+
optimize_second_scan = args.optimize_second_scan
|
443
|
+
|
444
|
+
window = None
|
445
|
+
logger.info(f"Received arguments: {vars(args)}")
|
446
|
+
# set_force_stable_hotkey()
|
447
|
+
ocr_config: OCRConfig = get_ocr_config(window=window_name)
|
448
|
+
if ocr_config:
|
449
|
+
if ocr_config.window:
|
450
|
+
start_time = time.time()
|
451
|
+
while time.time() - start_time < 30:
|
452
|
+
window = get_window(ocr_config.window)
|
453
|
+
if window or manual:
|
454
|
+
if window:
|
455
|
+
ocr_config.scale_coords()
|
456
|
+
break
|
457
|
+
logger.info(f"Window: {ocr_config.window} Could not be found, retrying in 1 second...")
|
458
|
+
time.sleep(1)
|
459
|
+
else:
|
460
|
+
logger.error(f"Window '{ocr_config.window}' not found within 30 seconds.")
|
461
|
+
sys.exit(1)
|
462
|
+
logger.info(
|
463
|
+
f"Starting OCR with configuration: Window: {ocr_config.window}, Rectangles: {ocr_config.rectangles}, Engine 1: {ocr1}, Engine 2: {ocr2}, Two-pass OCR: {twopassocr}")
|
464
|
+
set_dpi_awareness()
|
465
|
+
if manual or ocr_config:
|
466
|
+
rectangles = ocr_config.rectangles if ocr_config and ocr_config.rectangles else []
|
467
|
+
oneocr_threads = []
|
468
|
+
ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config, rectangles), daemon=True)
|
469
|
+
ocr_thread.start()
|
470
|
+
if not manual:
|
471
|
+
worker_thread = threading.Thread(target=process_task_queue, daemon=True)
|
472
|
+
worker_thread.start()
|
473
|
+
websocket_server_thread = WebsocketServerThread(read=True)
|
474
|
+
websocket_server_thread.start()
|
475
|
+
add_ss_hotkey(ss_hotkey)
|
476
|
+
try:
|
477
|
+
while not done:
|
478
|
+
time.sleep(1)
|
479
|
+
except KeyboardInterrupt as e:
|
480
|
+
pass
|
481
|
+
else:
|
482
|
+
print("Failed to load OCR configuration. Please check the logs.")
|
483
|
+
except Exception as e:
|
484
|
+
logger.info(e, exc_info=True)
|
485
|
+
logger.debug(e, exc_info=True)
|
486
|
+
logger.info("Closing in 5 seconds...")
|
487
|
+
time.sleep(5)
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import re
|
2
2
|
import os
|
3
3
|
import io
|
4
|
+
import time
|
4
5
|
from pathlib import Path
|
5
6
|
import sys
|
6
7
|
import platform
|
@@ -17,8 +18,6 @@ from google.generativeai import GenerationConfig
|
|
17
18
|
from loguru import logger
|
18
19
|
import requests
|
19
20
|
|
20
|
-
from GameSentenceMiner.util.configuration import get_app_directory, get_temporary_directory
|
21
|
-
|
22
21
|
try:
|
23
22
|
from manga_ocr import MangaOcr as MOCR
|
24
23
|
except ImportError:
|
@@ -1247,6 +1246,68 @@ class GroqOCR:
|
|
1247
1246
|
def _preprocess(self, img):
|
1248
1247
|
return base64.b64encode(pil_image_to_bytes(img, png_compression=1)).decode('utf-8')
|
1249
1248
|
|
1249
|
+
# class QWENOCR:
|
1250
|
+
# name = 'qwenvl'
|
1251
|
+
# readable_name = 'Qwen2-VL'
|
1252
|
+
# key = 'q'
|
1253
|
+
# available = False
|
1254
|
+
#
|
1255
|
+
# def __init__(self, config={}):
|
1256
|
+
# try:
|
1257
|
+
# import torch
|
1258
|
+
# from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
|
1259
|
+
# self.model = Qwen2VLForConditionalGeneration.from_pretrained(
|
1260
|
+
# "Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"
|
1261
|
+
# )
|
1262
|
+
# self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", use_fast=True)
|
1263
|
+
# self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
1264
|
+
# print(self.device)
|
1265
|
+
# self.available = True
|
1266
|
+
# logger.info('Qwen2-VL ready')
|
1267
|
+
# except Exception as e:
|
1268
|
+
# logger.warning(f'Qwen2-VL not available: {e}')
|
1269
|
+
#
|
1270
|
+
# def __call__(self, img, furigana_filter_sensitivity=0):
|
1271
|
+
# if not self.available:
|
1272
|
+
# return (False, 'Qwen2-VL is not available.')
|
1273
|
+
# try:
|
1274
|
+
# img = input_to_pil_image(img)
|
1275
|
+
# conversation = [
|
1276
|
+
# {
|
1277
|
+
# "role": "user",
|
1278
|
+
# "content": [
|
1279
|
+
# {"type": "image"},
|
1280
|
+
# {"type": "text", "text": "Analyze the image. Extract text *only* from within dialogue boxes (speech bubbles or panels containing character dialogue). If Text appears to be vertical, read the text from top to bottom, right to left. From the extracted dialogue text, filter out any furigana. Ignore and do not include any text found outside of dialogue boxes, including character names, speaker labels, or sound effects. Return *only* the filtered dialogue text. If no text is found within dialogue boxes after applying filters, return nothing. Do not include any other output, formatting markers, or commentary."},
|
1281
|
+
# ],
|
1282
|
+
# }
|
1283
|
+
# ]
|
1284
|
+
# text_prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
|
1285
|
+
# inputs = self.processor(
|
1286
|
+
# text=[text_prompt], images=[img], padding=True, return_tensors="pt"
|
1287
|
+
# )
|
1288
|
+
# inputs = inputs.to(self.device)
|
1289
|
+
# output_ids = self.model.generate(**inputs, max_new_tokens=128)
|
1290
|
+
# generated_ids = [
|
1291
|
+
# output_ids[len(input_ids):]
|
1292
|
+
# for input_ids, output_ids in zip(inputs.input_ids, output_ids)
|
1293
|
+
# ]
|
1294
|
+
# output_text = self.processor.batch_decode(
|
1295
|
+
# generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
1296
|
+
# )
|
1297
|
+
# return (True, output_text[0] if output_text else "")
|
1298
|
+
# except Exception as e:
|
1299
|
+
# return (False, f'Qwen2-VL inference failed: {e}')
|
1300
|
+
|
1301
|
+
|
1302
|
+
# qwenocr = QWENOCR()
|
1303
|
+
#
|
1304
|
+
# for i in range(10):
|
1305
|
+
# start_time = time.time()
|
1306
|
+
# res, text = qwenocr(Image.open('test_furigana.png'), furigana_filter_sensitivity=0) # Example usage
|
1307
|
+
# end_time = time.time()
|
1308
|
+
#
|
1309
|
+
# print(f"Time taken: {end_time - start_time:.2f} seconds")
|
1310
|
+
# print(text)
|
1250
1311
|
# class LocalOCR:
|
1251
1312
|
# name = 'local_ocr'
|
1252
1313
|
# readable_name = 'Local OCR'
|
@@ -172,7 +172,7 @@ class Screenshot:
|
|
172
172
|
class Audio:
|
173
173
|
enabled: bool = True
|
174
174
|
extension: str = 'opus'
|
175
|
-
beginning_offset: float = 0.
|
175
|
+
beginning_offset: float = -0.5
|
176
176
|
end_offset: float = 0.5
|
177
177
|
pre_vad_end_offset: float = 0.0
|
178
178
|
ffmpeg_reencode_options: str = '-c:a libopus -f opus -af \"afade=t=in:d=0.10\"' if is_windows() else ''
|
@@ -683,6 +683,7 @@ class GsmAppState:
|
|
683
683
|
self.last_mined_line = None
|
684
684
|
self.keep_running = True
|
685
685
|
self.current_game = ''
|
686
|
+
self.videos_to_remove = set()
|
686
687
|
|
687
688
|
@dataclass_json
|
688
689
|
@dataclass
|
GameSentenceMiner/web/service.py
CHANGED
@@ -21,7 +21,7 @@ def handle_texthooker_button(video_path='', get_audio_from_video=None):
|
|
21
21
|
if get_config().advanced.audio_player_path:
|
22
22
|
play_audio_in_external(gsm_state.previous_audio)
|
23
23
|
elif get_config().advanced.video_player_path:
|
24
|
-
play_video_in_external(line,
|
24
|
+
play_video_in_external(line, video_path)
|
25
25
|
else:
|
26
26
|
import sounddevice as sd
|
27
27
|
data, samplerate = gsm_state.previous_audio
|
@@ -35,9 +35,7 @@ def handle_texthooker_button(video_path='', get_audio_from_video=None):
|
|
35
35
|
play_audio_in_external(audio)
|
36
36
|
gsm_state.previous_audio = audio
|
37
37
|
elif get_config().advanced.video_player_path:
|
38
|
-
|
39
|
-
gsm_state.previous_audio = new_video_path
|
40
|
-
gsm_state.previous_replay = new_video_path
|
38
|
+
play_video_in_external(line, video_path)
|
41
39
|
else:
|
42
40
|
import sounddevice as sd
|
43
41
|
import soundfile as sf
|
@@ -75,8 +73,8 @@ def handle_texthooker_button(video_path='', get_audio_from_video=None):
|
|
75
73
|
logger.debug(f"Error Playing Audio/Video: {e}", exc_info=True)
|
76
74
|
return
|
77
75
|
finally:
|
78
|
-
|
79
|
-
|
76
|
+
gsm_state.previous_replay = video_path
|
77
|
+
gsm_state.videos_to_remove.add(video_path)
|
80
78
|
|
81
79
|
|
82
80
|
def play_audio_in_external(filepath):
|
@@ -94,37 +92,28 @@ def play_audio_in_external(filepath):
|
|
94
92
|
|
95
93
|
|
96
94
|
def play_video_in_external(line, filepath):
|
97
|
-
def move_video_when_closed(p, fp):
|
98
|
-
p.wait()
|
99
|
-
os.remove(fp)
|
100
|
-
|
101
|
-
shutil.move(filepath, get_temporary_directory())
|
102
|
-
new_filepath = os.path.join(get_temporary_directory(), os.path.basename(filepath))
|
103
|
-
|
104
95
|
command = [get_config().advanced.video_player_path]
|
105
96
|
|
106
|
-
start, _, _, _ = get_video_timings(
|
97
|
+
start, _, _, _ = get_video_timings(filepath, line)
|
107
98
|
|
108
99
|
if start:
|
109
100
|
if "vlc" in get_config().advanced.video_player_path:
|
110
101
|
command.extend(["--start-time", convert_to_vlc_seconds(start), '--one-instance'])
|
111
102
|
else:
|
112
103
|
command.extend(["--start", convert_to_vlc_seconds(start)])
|
113
|
-
command.append(os.path.normpath(
|
104
|
+
command.append(os.path.normpath(filepath))
|
114
105
|
|
115
106
|
logger.info(" ".join(command))
|
116
107
|
|
117
108
|
|
118
109
|
|
119
110
|
try:
|
120
|
-
|
121
|
-
|
122
|
-
threading.Thread(target=move_video_when_closed, args=(proc, filepath)).start()
|
111
|
+
subprocess.Popen(command)
|
112
|
+
logger.info(f"Opened {filepath} in {get_config().advanced.video_player_path}.")
|
123
113
|
except FileNotFoundError:
|
124
|
-
|
114
|
+
logger.error("VLC not found. Make sure it's installed and in your PATH.")
|
125
115
|
except Exception as e:
|
126
|
-
|
127
|
-
return new_filepath
|
116
|
+
logger.error(f"An error occurred: {e}")
|
128
117
|
|
129
118
|
|
130
119
|
def convert_to_vlc_seconds(time_str):
|
@@ -287,8 +287,8 @@ def get_screenshot():
|
|
287
287
|
if event_id is None:
|
288
288
|
return jsonify({'error': 'Missing id'}), 400
|
289
289
|
gsm_state.line_for_screenshot = get_line_by_id(event_id)
|
290
|
-
if gsm_state.previous_line_for_screenshot and gsm_state.line_for_screenshot.id == gsm_state.previous_line_for_screenshot.id:
|
291
|
-
handle_texthooker_button()
|
290
|
+
if gsm_state.previous_line_for_screenshot and gsm_state.line_for_screenshot.id == gsm_state.previous_line_for_screenshot.id or gsm_state.previous_line_for_audio:
|
291
|
+
handle_texthooker_button(gsm_state.previous_replay)
|
292
292
|
else:
|
293
293
|
obs.save_replay_buffer()
|
294
294
|
return jsonify({}), 200
|
@@ -301,8 +301,8 @@ def play_audio():
|
|
301
301
|
if event_id is None:
|
302
302
|
return jsonify({'error': 'Missing id'}), 400
|
303
303
|
gsm_state.line_for_audio = get_line_by_id(event_id)
|
304
|
-
if gsm_state.previous_line_for_audio and gsm_state.line_for_audio == gsm_state.previous_line_for_audio:
|
305
|
-
handle_texthooker_button()
|
304
|
+
if gsm_state.previous_line_for_audio and gsm_state.line_for_audio == gsm_state.previous_line_for_audio or gsm_state.previous_line_for_screenshot:
|
305
|
+
handle_texthooker_button(gsm_state.previous_replay)
|
306
306
|
else:
|
307
307
|
obs.save_replay_buffer()
|
308
308
|
return jsonify({}), 200
|
@@ -1,8 +1,8 @@
|
|
1
1
|
GameSentenceMiner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
GameSentenceMiner/anki.py,sha256=kWw3PV_Jj5-lHcttCB3lRXejHlaAbiJ2Ag_NAGX-RI8,16632
|
3
|
-
GameSentenceMiner/config_gui.py,sha256=
|
3
|
+
GameSentenceMiner/config_gui.py,sha256=Xa_a-sdQzht3kzR-Z9gkLy4qnaPyP1bdVadYTHp5lUQ,91018
|
4
4
|
GameSentenceMiner/gametext.py,sha256=6VkjmBeiuZfPk8T6PHFdIAElBH2Y_oLVYvmcafqN7RM,6747
|
5
|
-
GameSentenceMiner/gsm.py,sha256=
|
5
|
+
GameSentenceMiner/gsm.py,sha256=p4DVa_Jx1EOsgUxAAdC7st7VXLKWnP2BLDGT78ToO8w,24864
|
6
6
|
GameSentenceMiner/obs.py,sha256=ZV9Vk39hrsJLT-AlIxa3qgncKxXaL3Myl33vVJEDEoA,14670
|
7
7
|
GameSentenceMiner/vad.py,sha256=G0NkaWFJaIfKQAV7LOFxyKoih7pPNYHDuy4SzeFVCkI,16389
|
8
8
|
GameSentenceMiner/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -16,21 +16,21 @@ GameSentenceMiner/assets/icon512.png,sha256=HxUj2GHjyQsk8NV433256UxU9phPhtjCY-YB
|
|
16
16
|
GameSentenceMiner/assets/icon64.png,sha256=N8xgdZXvhqVQP9QUK3wX5iqxX9LxHljD7c-Bmgim6tM,9301
|
17
17
|
GameSentenceMiner/assets/pickaxe.png,sha256=VfIGyXyIZdzEnVcc4PmG3wszPMO1W4KCT7Q_nFK6eSE,1403829
|
18
18
|
GameSentenceMiner/ocr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=
|
19
|
+
GameSentenceMiner/ocr/gsm_ocr_config.py,sha256=jtTzAWtMAx8GuA1XIJ_BmyNn3aYaO3u_c5Q7m5D4gS8,4056
|
20
20
|
GameSentenceMiner/ocr/ocrconfig.py,sha256=_tY8mjnzHMJrLS8E5pHqYXZjMuLoGKYgJwdhYgN-ny4,6466
|
21
|
-
GameSentenceMiner/ocr/owocr_area_selector.py,sha256=
|
22
|
-
GameSentenceMiner/ocr/owocr_helper.py,sha256=
|
21
|
+
GameSentenceMiner/ocr/owocr_area_selector.py,sha256=boAqarX17jvFscu-7s6C9rqesjQ54s-kfuW0bjCru-M,19834
|
22
|
+
GameSentenceMiner/ocr/owocr_helper.py,sha256=VDcuBfyZ1B7TN6yImJVuNxqWY7pr95R2cRM9jgD5Rk8,21670
|
23
23
|
GameSentenceMiner/ocr/ss_picker.py,sha256=0IhxUdaKruFpZyBL-8SpxWg7bPrlGpy3lhTcMMZ5rwo,5224
|
24
24
|
GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9lKnRCj6oZgR0,49
|
25
25
|
GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
|
26
26
|
GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
|
27
27
|
GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
|
28
|
-
GameSentenceMiner/owocr/owocr/ocr.py,sha256=
|
28
|
+
GameSentenceMiner/owocr/owocr/ocr.py,sha256=Mri_zB_COk7x9GmolyhYCINJ-lQlD45GuJ4m4M0IBFM,55328
|
29
29
|
GameSentenceMiner/owocr/owocr/run.py,sha256=mZIGDm3fGYrYbSNuFOk7Sbslfgi36YN0YqfC1xYh_eY,55286
|
30
30
|
GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
|
31
31
|
GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
32
|
GameSentenceMiner/util/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
|
33
|
-
GameSentenceMiner/util/configuration.py,sha256=
|
33
|
+
GameSentenceMiner/util/configuration.py,sha256=8PZk4IhtWFimfRy7biREcfG1NGkFNzKzFjlOjxNEFd0,28817
|
34
34
|
GameSentenceMiner/util/electron_config.py,sha256=3VmIrcXhC-wIMMc4uqV85NrNenRl4ZUbnQfSjWEwuig,9852
|
35
35
|
GameSentenceMiner/util/ffmpeg.py,sha256=t0tflxq170n8PZKkdw8fTZIUQfXD0p_qARa9JTdhBTc,21530
|
36
36
|
GameSentenceMiner/util/gsm_utils.py,sha256=_279Fu9CU6FEh4cP6h40TWOt_BrqmPgytfumi8y53Ew,11491
|
@@ -47,8 +47,8 @@ GameSentenceMiner/util/downloader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
47
47
|
GameSentenceMiner/util/downloader/download_tools.py,sha256=mvnOjDHFlV1AbjHaNI7mdnC5_CH5k3N4n1ezqzzbzGA,8139
|
48
48
|
GameSentenceMiner/util/downloader/oneocr_dl.py,sha256=EJbKISaZ9p2x9P4x0rpMM5nAInTTc9b7arraGBcd-SA,10381
|
49
49
|
GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
|
-
GameSentenceMiner/web/service.py,sha256=
|
51
|
-
GameSentenceMiner/web/texthooking_page.py,sha256=
|
50
|
+
GameSentenceMiner/web/service.py,sha256=S7bYf2kSk08u-8R9Qpv7piM-pxfFjYZUvU825xupmuI,5279
|
51
|
+
GameSentenceMiner/web/texthooking_page.py,sha256=EmcIBEPGWNgI2LGL3kKUsm0rs2Vn0CPq9PVKKnuIt2g,16183
|
52
52
|
GameSentenceMiner/web/static/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
53
|
GameSentenceMiner/web/static/apple-touch-icon.png,sha256=OcMI8af_68DA_tweOsQ5LytTyMwm7-hPW07IfrOVgEs,46132
|
54
54
|
GameSentenceMiner/web/static/favicon-96x96.png,sha256=lOePzjiKl1JY2J1kT_PMdyEnrlJmi5GWbmXJunM12B4,16502
|
@@ -62,9 +62,9 @@ GameSentenceMiner/web/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
62
62
|
GameSentenceMiner/web/templates/index.html,sha256=n0J-dV8eksj8JXUuaCTIh0fIxIjfgm2EvxGBdQ6gWoM,214113
|
63
63
|
GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
|
64
64
|
GameSentenceMiner/web/templates/utility.html,sha256=3flZinKNqUJ7pvrZk6xu__v67z44rXnaK7UTZ303R-8,16946
|
65
|
-
gamesentenceminer-2.10.
|
66
|
-
gamesentenceminer-2.10.
|
67
|
-
gamesentenceminer-2.10.
|
68
|
-
gamesentenceminer-2.10.
|
69
|
-
gamesentenceminer-2.10.
|
70
|
-
gamesentenceminer-2.10.
|
65
|
+
gamesentenceminer-2.10.12.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
66
|
+
gamesentenceminer-2.10.12.dist-info/METADATA,sha256=YzGn0pkP-I00xGsRHt-5GK8x9pCKpoKR5lHBYL_z8Ho,7355
|
67
|
+
gamesentenceminer-2.10.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
68
|
+
gamesentenceminer-2.10.12.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
|
69
|
+
gamesentenceminer-2.10.12.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
|
70
|
+
gamesentenceminer-2.10.12.dist-info/RECORD,,
|
File without changes
|
{gamesentenceminer-2.10.10.dist-info → gamesentenceminer-2.10.12.dist-info}/entry_points.txt
RENAMED
File without changes
|
{gamesentenceminer-2.10.10.dist-info → gamesentenceminer-2.10.12.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|