GameSentenceMiner 2.12.0.dev2__py3-none-any.whl → 2.12.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -114,7 +114,7 @@ class ConfigApp:
114
114
  self.window.protocol("WM_DELETE_WINDOW", self.hide)
115
115
  self.obs_scene_listbox_changed = False
116
116
 
117
- self.window.geometry("800x700")
117
+ # self.window.geometry("800x500")
118
118
  self.current_row = 0
119
119
 
120
120
  self.master_config: Config = configuration.load_config()
@@ -137,6 +137,7 @@ class ConfigApp:
137
137
  self.profiles_tab = None
138
138
  self.ai_tab = None
139
139
  self.advanced_tab = None
140
+ self.wip_tab = None
140
141
 
141
142
  self.create_tabs()
142
143
 
@@ -158,6 +159,8 @@ class ConfigApp:
158
159
  "Saves Settings and Syncs CHANGED SETTINGS to all profiles.", row=0,
159
160
  column=2)
160
161
 
162
+ self.window.update_idletasks()
163
+ self.window.geometry("")
161
164
  self.window.withdraw()
162
165
 
163
166
  def create_tabs(self):
@@ -172,6 +175,7 @@ class ConfigApp:
172
175
  self.create_profiles_tab()
173
176
  self.create_ai_tab()
174
177
  self.create_advanced_tab()
178
+ self.create_wip_tab()
175
179
 
176
180
  def add_reset_button(self, frame, category, row, column=0, recreate_tab=None):
177
181
  """
@@ -230,6 +234,7 @@ class ConfigApp:
230
234
  if self.window is not None:
231
235
  self.window.deiconify()
232
236
  self.window.lift()
237
+ self.window.update_idletasks()
233
238
  return
234
239
 
235
240
  def hide(self):
@@ -250,7 +255,8 @@ class ConfigApp:
250
255
  open_multimine_on_startup=self.open_multimine_on_startup.get(),
251
256
  texthook_replacement_regex=self.texthook_replacement_regex.get(),
252
257
  use_both_clipboard_and_websocket=self.use_both_clipboard_and_websocket.get(),
253
- texthooker_port=int(self.texthooker_port.get())
258
+ texthooker_port=int(self.texthooker_port.get()),
259
+ native_language=CommonLanguages.from_name(self.native_language.get()) if self.native_language.get() else CommonLanguages.ENGLISH.value,
254
260
  ),
255
261
  paths=Paths(
256
262
  folder_to_watch=self.folder_to_watch.get(),
@@ -362,6 +368,10 @@ class ConfigApp:
362
368
  use_canned_context_prompt=self.use_canned_context_prompt.get(),
363
369
  custom_prompt=self.custom_prompt.get("1.0", tk.END),
364
370
  dialogue_context_length=int(self.ai_dialogue_context_length.get()),
371
+ ),
372
+ wip=WIP(
373
+ overlay_websocket_port=int(self.overlay_websocket_port.get()),
374
+ overlay_websocket_send=self.overlay_websocket_send.get()
365
375
  )
366
376
  )
367
377
 
@@ -547,6 +557,15 @@ class ConfigApp:
547
557
  self.latest_version = ttk.Label(self.general_tab, text=get_latest_version(), bootstyle="secondary")
548
558
  self.latest_version.grid(row=self.current_row, column=1, sticky='W', pady=2)
549
559
  self.current_row += 1
560
+
561
+ # Native Language Selection
562
+ HoverInfoLabelWidget(self.general_tab, text="Native Language:",
563
+ tooltip="Select your native language. This is used for various features, but will not change the look of GSM.",
564
+ row=self.current_row, column=0)
565
+ self.native_language = ttk.Combobox(self.general_tab, values=CommonLanguages.get_all_names_pretty(), state="readonly")
566
+ self.native_language.set(CommonLanguages.from_code(self.settings.general.native_language).name.replace('_', ' ').title())
567
+ self.native_language.grid(row=self.current_row, column=1, sticky='EW', pady=2)
568
+ self.current_row += 1
550
569
 
551
570
  ttk.Label(self.general_tab, text="Indicates important/required settings.", foreground="dark orange",
552
571
  font=("Helvetica", 10, "bold")).grid(row=self.current_row, column=0, columnspan=2, sticky='W', pady=2)
@@ -1674,6 +1693,54 @@ class ConfigApp:
1674
1693
  #
1675
1694
  #
1676
1695
  # help_frame.grid_columnconfigure(0, weight=1)
1696
+
1697
+ @new_tab
1698
+ def create_wip_tab(self):
1699
+ if self.wip_tab is None:
1700
+ self.wip_tab = ttk.Frame(self.notebook, padding=15)
1701
+ self.notebook.add(self.wip_tab, text='WIP')
1702
+ else:
1703
+ for widget in self.wip_tab.winfo_children():
1704
+ widget.destroy()
1705
+
1706
+ wip_frame = self.wip_tab
1707
+
1708
+ ttk.Label(wip_frame, text="Warning: These features are experimental and may not work as expected.",
1709
+ foreground="red", font=("Helvetica", 10, "bold")).grid(row=self.current_row, column=0, columnspan=2,
1710
+ sticky='W', pady=5)
1711
+
1712
+ self.current_row += 1
1713
+
1714
+ ttk.Label(wip_frame, text="Overlay requires OwOCR dependencies to be installed, and requires an external app to be running.",
1715
+ foreground="red", font=("Helvetica", 10, "bold")).grid(row=self.current_row, column=0, columnspan=2,
1716
+ sticky='W', pady=5)
1717
+
1718
+ self.current_row += 1
1719
+
1720
+ HoverInfoLabelWidget(wip_frame, text="Overlay WebSocket Port:",
1721
+ tooltip="Port for the overlay WebSocket communication. Used for experimental overlay features.",
1722
+ row=self.current_row, column=0)
1723
+ self.overlay_websocket_port = ttk.Entry(wip_frame)
1724
+ self.overlay_websocket_port.insert(0, str(self.settings.wip.overlay_websocket_port))
1725
+ self.overlay_websocket_port.grid(row=self.current_row, column=1, sticky='EW', pady=2)
1726
+ self.current_row += 1
1727
+
1728
+ HoverInfoLabelWidget(wip_frame, text="Overlay WebSocket Send:",
1729
+ tooltip="Enable to send overlay data via WebSocket. Experimental feature.",
1730
+ row=self.current_row, column=0)
1731
+ self.overlay_websocket_send = tk.BooleanVar(value=self.settings.wip.overlay_websocket_send)
1732
+ ttk.Checkbutton(wip_frame, variable=self.overlay_websocket_send, bootstyle="round-toggle").grid(
1733
+ row=self.current_row, column=1, sticky='W', pady=2)
1734
+ self.current_row += 1
1735
+
1736
+ self.add_reset_button(wip_frame, "wip", self.current_row, 0, self.create_wip_tab)
1737
+
1738
+ for col in range(2):
1739
+ wip_frame.grid_columnconfigure(col, weight=0)
1740
+ for row in range(self.current_row):
1741
+ wip_frame.grid_rowconfigure(row, minsize=30)
1742
+
1743
+ return wip_frame
1677
1744
 
1678
1745
  def on_profile_change(self, event):
1679
1746
  self.save_settings(profile_change=True)
@@ -9,7 +9,7 @@ from websockets import InvalidStatus
9
9
  from GameSentenceMiner.util.gsm_utils import do_text_replacements, TEXT_REPLACEMENTS_FILE, run_new_thread
10
10
  from GameSentenceMiner.util.configuration import *
11
11
  from GameSentenceMiner.util.text_log import *
12
- from GameSentenceMiner.web.texthooking_page import add_event_to_texthooker
12
+ from GameSentenceMiner.web.texthooking_page import add_event_to_texthooker, send_word_coordinates_to_overlay
13
13
  from GameSentenceMiner.wip import get_overlay_coords
14
14
 
15
15
  current_line = ''
@@ -123,9 +123,13 @@ async def handle_new_text_event(current_clipboard, line_time=None):
123
123
  current_line_time = line_time if line_time else datetime.now()
124
124
  gsm_status.last_line_received = current_line_time.strftime("%Y-%m-%d %H:%M:%S")
125
125
  add_line(current_line_after_regex, line_time)
126
- boxes = await find_box_for_sentence(current_line)
127
126
  if len(get_text_log().values) > 0:
128
- await add_event_to_texthooker(get_text_log()[-1], boxes=boxes)
127
+ await add_event_to_texthooker(get_text_log()[-1])
128
+ if get_config().wip.overlay_websocket_port and get_config().wip.overlay_websocket_send:
129
+ boxes = await find_box_for_sentence(current_line_after_regex)
130
+ if boxes:
131
+ await send_word_coordinates_to_overlay(boxes)
132
+
129
133
 
130
134
  async def find_box_for_sentence(sentence):
131
135
  boxes = []
@@ -6,14 +6,13 @@ from pathlib import Path
6
6
  import sys
7
7
  import platform
8
8
  import logging
9
- from math import sqrt, floor
9
+ from math import sqrt
10
10
  import json
11
11
  import base64
12
12
  from urllib.parse import urlparse, parse_qs
13
13
 
14
14
  import jaconv
15
15
  import numpy as np
16
- import rapidfuzz.fuzz
17
16
  from PIL import Image
18
17
  from loguru import logger
19
18
  import requests
@@ -165,28 +164,6 @@ def limit_image_size(img, max_size):
165
164
  return False, ''
166
165
 
167
166
 
168
- def get_regex(lang):
169
- if lang == "ja":
170
- return re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
171
- elif lang == "zh":
172
- return re.compile(r'[\u4E00-\u9FFF]')
173
- elif lang == "ko":
174
- return re.compile(r'[\uAC00-\uD7AF]')
175
- elif lang == "ar":
176
- return re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
177
- elif lang == "ru":
178
- return re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
179
- elif lang == "el":
180
- return re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
181
- elif lang == "he":
182
- return re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
183
- elif lang == "th":
184
- return re.compile(r'[\u0E00-\u0E7F]')
185
- else:
186
- return re.compile(
187
- r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
188
-
189
-
190
167
  class MangaOcr:
191
168
  name = 'mangaocr'
192
169
  readable_name = 'Manga OCR'
@@ -266,20 +243,15 @@ class GoogleLens:
266
243
  available = False
267
244
 
268
245
  def __init__(self, lang='ja'):
269
- self.regex = get_regex(lang)
270
- self.initial_lang = lang
246
+ self.kana_kanji_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
271
247
  if 'betterproto' not in sys.modules:
272
248
  logger.warning('betterproto not available, Google Lens will not work!')
273
249
  else:
274
250
  self.available = True
275
251
  logger.info('Google Lens ready')
276
252
 
277
- def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False):
278
- lang = get_ocr_language()
253
+ def __call__(self, img, furigana_filter_sensitivity=0):
279
254
  img, is_path = input_to_pil_image(img)
280
- if lang != self.initial_lang:
281
- self.initial_lang = lang
282
- self.regex = get_regex(lang)
283
255
  if not img:
284
256
  return (False, 'Invalid image provided')
285
257
 
@@ -337,14 +309,12 @@ class GoogleLens:
337
309
  response_proto = LensOverlayServerResponse().FromString(res.content)
338
310
  response_dict = response_proto.to_dict(betterproto.Casing.SNAKE)
339
311
 
340
- if os.path.exists(r"C:\Users\Beangate\GSM\Electron App\test"):
341
- with open(os.path.join(r"C:\Users\Beangate\GSM\Electron App\test", 'glens_response.json'), 'w', encoding='utf-8') as f:
342
- json.dump(response_dict, f, indent=4, ensure_ascii=False)
312
+ # with open(os.path.join(r"C:\Users\Beangate\GSM\Electron App\test", 'glens_response.json'), 'w', encoding='utf-8') as f:
313
+ # json.dump(response_dict, f, indent=4, ensure_ascii=False)
343
314
  res = ''
344
315
  text = response_dict['objects_response']['text']
345
316
  skipped = []
346
317
  previous_line = None
347
- lines = []
348
318
  if 'text_layout' in text:
349
319
  for paragraph in text['text_layout']['paragraphs']:
350
320
  if previous_line:
@@ -360,38 +330,18 @@ class GoogleLens:
360
330
  if vertical_space > avg_height * 2:
361
331
  res += 'BLANK_LINE'
362
332
  for line in paragraph['lines']:
363
- # Build a list of word boxes for this line
364
- words_info = []
365
- for word in line['words']:
366
- word_info = {
367
- "word": word['plain_text'],
368
- "x1": int(word['geometry']['bounding_box']['center_x'] * img.width - (word['geometry']['bounding_box']['width'] * img.width) / 2),
369
- "y1": int(word['geometry']['bounding_box']['center_y'] * img.height - (word['geometry']['bounding_box']['height'] * img.height) / 2),
370
- "x2": int(word['geometry']['bounding_box']['center_x'] * img.width + (word['geometry']['bounding_box']['width'] * img.width) / 2),
371
- "y2": int(word['geometry']['bounding_box']['center_y'] * img.height + (word['geometry']['bounding_box']['height'] * img.height) / 2)
372
- }
373
- words_info.append(word_info)
374
-
375
- line_text = ''.join([w['word'] for w in words_info])
376
- line_box = {
377
- "sentence": line_text,
378
- "words": words_info
379
- }
380
-
381
- # Optionally apply furigana filter
382
333
  if furigana_filter_sensitivity:
383
- line_width = line['geometry']['bounding_box']['width'] * img.width
384
- line_height = line['geometry']['bounding_box']['height'] * img.height
385
- if furigana_filter_sensitivity < line_width and furigana_filter_sensitivity < line_height and self.regex.search(line_text):
386
- for w in words_info:
387
- res += w['word']
334
+ if furigana_filter_sensitivity < line['geometry']['bounding_box']['width'] * img.width and furigana_filter_sensitivity < line['geometry']['bounding_box']['height'] * img.height:
335
+ for word in line['words']:
336
+ res += word['plain_text'] + word['text_separator']
388
337
  else:
389
- skipped.extend([w['word'] for w in words_info])
338
+ skipped.append(word['plain_text'] for word in line['words'])
390
339
  continue
391
340
  else:
392
- for w in words_info:
393
- res += w['word']
394
- lines.append(line_box)
341
+ for word in line['words']:
342
+ res += word['plain_text'] + word['text_separator']
343
+ else:
344
+ continue
395
345
  previous_line = paragraph
396
346
  res += '\n'
397
347
  # logger.info(
@@ -434,11 +384,8 @@ class GoogleLens:
434
384
  # else:
435
385
  # continue
436
386
  # res += '\n'
437
-
438
- if return_coords:
439
- x = (True, res, lines)
440
- else:
441
- x = (True, res)
387
+
388
+ x = (True, res)
442
389
 
443
390
  # img.close()
444
391
  return x
@@ -865,7 +812,7 @@ class OneOCR:
865
812
 
866
813
  def __init__(self, config={}, lang='ja'):
867
814
  self.initial_lang = lang
868
- self.regex = get_regex(lang)
815
+ self.get_regex(lang)
869
816
  if sys.platform == 'win32':
870
817
  if int(platform.release()) < 10:
871
818
  logger.warning('OneOCR is not supported on Windows older than 10!')
@@ -887,11 +834,32 @@ class OneOCR:
887
834
  except:
888
835
  logger.warning('Error reading URL from config, OneOCR will not work!')
889
836
 
890
- def __call__(self, img, furigana_filter_sensitivity=0, sentence_to_check=None):
837
+ def get_regex(self, lang):
838
+ if lang == "ja":
839
+ self.regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
840
+ elif lang == "zh":
841
+ self.regex = re.compile(r'[\u4E00-\u9FFF]')
842
+ elif lang == "ko":
843
+ self.regex = re.compile(r'[\uAC00-\uD7AF]')
844
+ elif lang == "ar":
845
+ self.regex = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
846
+ elif lang == "ru":
847
+ self.regex = re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
848
+ elif lang == "el":
849
+ self.regex = re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
850
+ elif lang == "he":
851
+ self.regex = re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
852
+ elif lang == "th":
853
+ self.regex = re.compile(r'[\u0E00-\u0E7F]')
854
+ else:
855
+ self.regex = re.compile(
856
+ r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
857
+
858
+ def __call__(self, img, furigana_filter_sensitivity=0):
891
859
  lang = get_ocr_language()
892
860
  if lang != self.initial_lang:
893
861
  self.initial_lang = lang
894
- self.regex = get_regex(lang)
862
+ self.get_regex(lang)
895
863
  img, is_path = input_to_pil_image(img)
896
864
  if img.width < 51 or img.height < 51:
897
865
  new_width = max(img.width, 51)
@@ -911,6 +879,7 @@ class OneOCR:
911
879
  y_coords = [line['bounding_rect'][f'y{i}'] for line in filtered_lines for i in range(1, 5)]
912
880
  if x_coords and y_coords:
913
881
  crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
882
+
914
883
  # with open(os.path.join(get_temporary_directory(), 'oneocr_response.json'), 'w',
915
884
  # encoding='utf-8') as f:
916
885
  # json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
@@ -965,37 +934,8 @@ class OneOCR:
965
934
  # else:
966
935
  # continue
967
936
  # res += '\n'
968
- elif sentence_to_check:
969
- lines_to_build_area = []
970
- widths = []
971
- heights = []
972
- for line in ocr_resp['lines']:
973
- print(line['text'])
974
- if sentence_to_check in line['text'] or line['text'] in sentence_to_check or rapidfuzz.fuzz.partial_ratio(sentence_to_check, line['text']) > 50:
975
- lines_to_build_area.append(line)
976
- res += line['text']
977
- for word in line['words']:
978
- widths.append(word['bounding_rect']['x2'] - word['bounding_rect']['x1'])
979
- heights.append(word['bounding_rect']['y3'] - word['bounding_rect']['y1'])
980
-
981
- x_coords = [line['bounding_rect'][f'x{i}'] for line in lines_to_build_area for i in
982
- range(1, 5)]
983
- y_coords = [line['bounding_rect'][f'y{i}'] for line in lines_to_build_area for i in
984
- range(1, 5)]
985
- if widths:
986
- avg_width = sum(widths) / len(widths)
987
- if heights:
988
- avg_height = sum(heights) / len(heights)
989
- if x_coords and y_coords:
990
- crop_coords = (
991
- min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
992
937
  else:
993
- x_coords = [line['bounding_rect'][f'x{i}'] for line in ocr_resp['lines'] for i in range(1, 5)]
994
- y_coords = [line['bounding_rect'][f'y{i}'] for line in ocr_resp['lines'] for i in range(1, 5)]
995
- if x_coords and y_coords:
996
- crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
997
938
  res = ocr_resp['text']
998
-
999
939
  except RuntimeError as e:
1000
940
  return (False, e)
1001
941
  else:
@@ -1010,9 +950,10 @@ class OneOCR:
1010
950
  return (False, 'Unknown error!')
1011
951
 
1012
952
  res = res.json()['text']
953
+
1013
954
  x = (True, res, crop_coords)
1014
- if is_path:
1015
- img.close()
955
+
956
+ # img.close()
1016
957
  return x
1017
958
 
1018
959
  def _preprocess(self, img):
@@ -384,6 +384,7 @@ class TextFiltering:
384
384
  block_filtered = self.latin_extended_regex.findall(block)
385
385
  else:
386
386
  block_filtered = self.latin_extended_regex.findall(block)
387
+
387
388
  if block_filtered:
388
389
  orig_text_filtered.append(''.join(block_filtered))
389
390
  else:
@@ -547,39 +548,6 @@ class ScreenshotThread(threading.Thread):
547
548
  else:
548
549
  raise ValueError('Window capture is only currently supported on Windows and macOS')
549
550
 
550
- def __del__(self):
551
- if self.macos_window_tracker_instance:
552
- self.macos_window_tracker_instance.join()
553
- elif self.windows_window_tracker_instance:
554
- self.windows_window_tracker_instance.join()
555
-
556
- def setup_persistent_windows_window_tracker(self):
557
- global window_open
558
- window_open = False
559
- def setup_tracker():
560
- global window_open
561
- self.window_handle, window_title = self.get_windows_window_handle(self.screen_capture_window)
562
-
563
- if not self.window_handle:
564
- # print(f"Window '{screen_capture_window}' not found.")
565
- return
566
-
567
- set_dpi_awareness()
568
- window_open = True
569
- self.windows_window_tracker_instance = threading.Thread(target=self.windows_window_tracker)
570
- self.windows_window_tracker_instance.start()
571
- logger.opt(ansi=True).info(f'Selected window: {window_title}')
572
-
573
- while not terminated:
574
- if not window_open:
575
- try:
576
- setup_tracker()
577
- except ValueError as e:
578
- logger.error(f"Error setting up persistent windows window tracker: {e}")
579
- break
580
- time.sleep(5)
581
-
582
-
583
551
  def get_windows_window_handle(self, window_title):
584
552
  def callback(hwnd, window_title_part):
585
553
  window_title = win32gui.GetWindowText(hwnd)
@@ -602,7 +570,7 @@ class ScreenshotThread(threading.Thread):
602
570
 
603
571
  def windows_window_tracker(self):
604
572
  found = True
605
- while not terminated or window_open:
573
+ while not terminated:
606
574
  found = win32gui.IsWindow(self.window_handle)
607
575
  if not found:
608
576
  break
@@ -1118,11 +1086,10 @@ def signal_handler(sig, frame):
1118
1086
 
1119
1087
 
1120
1088
  def on_window_closed(alive):
1121
- global terminated, window_open
1089
+ global terminated
1122
1090
  if not (alive or terminated):
1123
1091
  logger.info('Window closed or error occurred, terminated!')
1124
- window_open = False
1125
- # terminated = True
1092
+ terminated = True
1126
1093
 
1127
1094
 
1128
1095
  def on_screenshot_combo():
@@ -15,6 +15,7 @@ from enum import Enum
15
15
  import toml
16
16
  from dataclasses_json import dataclass_json
17
17
 
18
+
18
19
  OFF = 'OFF'
19
20
  # VOSK = 'VOSK'
20
21
  SILERO = 'SILERO'
@@ -71,10 +72,232 @@ class Language(Enum):
71
72
  PORTUGUESE = "pt"
72
73
  HINDI = "hi"
73
74
  ARABIC = "ar"
75
+ TURKISH = "tr"
76
+ DUTCH = "nl"
77
+ SWEDISH = "sv"
78
+ FINNISH = "fi"
79
+ DANISH = "da"
80
+ NORWEGIAN = "no"
81
+
74
82
 
75
83
  AVAILABLE_LANGUAGES = [lang.value for lang in Language]
76
84
  AVAILABLE_LANGUAGES_DICT = {lang.value: lang for lang in Language}
77
85
 
86
+ class CommonLanguages(str, Enum):
87
+ """
88
+ An Enum of the world's most common languages, based on total speaker count.
89
+
90
+ The enum member is the common English name (e.g., ENGLISH) and its
91
+ value is the ISO 639-1 two-letter code (e.g., 'en').
92
+
93
+ Inheriting from `str` allows for direct comparison and use in functions
94
+ that expect a string, e.g., `CommonLanguages.FRENCH == 'fr'`.
95
+
96
+ This list is curated from Wikipedia's "List of languages by total number of speakers"
97
+ and contains over 200 entries to provide broad but practical coverage.
98
+ """
99
+ ENGLISH = 'en'
100
+ AFRIKAANS = 'af'
101
+ AKAN = 'ak'
102
+ ALBANIAN = 'sq'
103
+ ALGERIAN_SPOKEN_ARABIC = 'arq'
104
+ AMHARIC = 'am'
105
+ ARMENIAN = 'hy'
106
+ ASSAMESE = 'as'
107
+ BAMBARA = 'bm'
108
+ BASQUE = 'eu'
109
+ BELARUSIAN = 'be'
110
+ BENGALI = 'bn'
111
+ BHOJPURI = 'bho'
112
+ BOSNIAN = 'bs'
113
+ BODO = 'brx'
114
+ BULGARIAN = 'bg'
115
+ BURMESE = 'my'
116
+ CAPE_VERDEAN_CREOLE = 'kea'
117
+ CATALAN = 'ca'
118
+ CEBUANO = 'ceb'
119
+ CHHATTISGARHI = 'hns'
120
+ CHITTAGONIAN = 'ctg'
121
+ CROATIAN = 'hr'
122
+ CZECH = 'cs'
123
+ DANISH = 'da'
124
+ DECCAN = 'dcc'
125
+ DOGRI = 'doi'
126
+ DZONGKHA = 'dz'
127
+ DUTCH = 'nl'
128
+ EGYPTIAN_SPOKEN_ARABIC = 'arz'
129
+ ESTONIAN = 'et'
130
+ EWE = 'ee'
131
+ FAROESE = 'fo'
132
+ FIJIAN = 'fj'
133
+ FINNISH = 'fi'
134
+ FRENCH = 'fr'
135
+ GALICIAN = 'gl'
136
+ GAN_CHINESE = 'gan'
137
+ GEORGIAN = 'ka'
138
+ GERMAN = 'de'
139
+ GREEK = 'el'
140
+ GREENLANDIC = 'kl'
141
+ GUJARATI = 'gu'
142
+ HAITIAN_CREOLE = 'ht'
143
+ HAUSA = 'ha'
144
+ HAKKA_CHINESE = 'hak'
145
+ HARYANVI = 'bgc'
146
+ HEBREW = 'he'
147
+ HINDI = 'hi'
148
+ HUNGARIAN = 'hu'
149
+ ICELANDIC = 'is'
150
+ IGBO = 'ig'
151
+ INDONESIAN = 'id'
152
+ IRANIAN_PERSIAN = 'fa'
153
+ IRISH = 'ga'
154
+ ITALIAN = 'it'
155
+ JAVANESE = 'jv'
156
+ JAMAICAN_PATOIS = 'jam'
157
+ JAPANESE = 'ja'
158
+ KANNADA = 'kn'
159
+ KASHMIRI = 'ks'
160
+ KAZAKH = 'kk'
161
+ KHMER = 'km'
162
+ KONGO = 'kg'
163
+ KONKANI = 'kok'
164
+ KOREAN = 'ko'
165
+ KURDISH = 'kmr'
166
+ LAO = 'lo'
167
+ LATVIAN = 'lv'
168
+ LINGALA = 'ln'
169
+ LITHUANIAN = 'lt'
170
+ LUBA_KASAI = 'lua'
171
+ LUXEMBOURGISH = 'lb'
172
+ MACEDONIAN = 'mk'
173
+ MADURESE = 'mad'
174
+ MAGAHI = 'mag'
175
+ MAITHILI = 'mai'
176
+ MALAGASY = 'mg'
177
+ MALAYALAM = 'ml'
178
+ MALTESE = 'mt'
179
+ MANDARIN_CHINESE = 'zh'
180
+ MANIPURI = 'mni'
181
+ MARATHI = 'mr'
182
+ MAORI = 'mi'
183
+ MAURITIAN_CREOLE = 'mfe'
184
+ MIN_NAN_CHINESE = 'nan'
185
+ MINANGKABAU = 'min'
186
+ MONGOLIAN = 'mn'
187
+ MONTENEGRIN = 'cnr'
188
+ MOROCCAN_SPOKEN_ARABIC = 'ary'
189
+ NDEBELE = 'nr'
190
+ NEPALI = 'ne'
191
+ NIGERIAN_PIDGIN = 'pcm'
192
+ NORTHERN_KURDISH = 'kmr'
193
+ NORTHERN_PASHTO = 'pbu'
194
+ NORTHERN_UZBEK = 'uz'
195
+ NORWEGIAN = 'no'
196
+ ODIA = 'or'
197
+ PAPIAMENTO = 'pap'
198
+ POLISH = 'pl'
199
+ PORTUGUESE = 'pt'
200
+ ROMANIAN = 'ro'
201
+ RWANDA = 'rw'
202
+ RUSSIAN = 'ru'
203
+ SAMOAN = 'sm'
204
+ SANTALI = 'sat'
205
+ SARAIKI = 'skr'
206
+ SCOTTISH_GAELIC = 'gd'
207
+ SEYCHELLOIS_CREOLE = 'crs'
208
+ SERBIAN = 'sr'
209
+ SHONA = 'sn'
210
+ SINDHI = 'sd'
211
+ SINHALA = 'si'
212
+ SLOVAK = 'sk'
213
+ SLOVENIAN = 'sl'
214
+ SOMALI = 'so'
215
+ SOTHO = 'st'
216
+ SOUTH_AZERBAIJANI = 'azb'
217
+ SOUTHERN_PASHTO = 'ps'
218
+ SPANISH = 'es'
219
+ STANDARD_ARABIC = 'ar'
220
+ SUDANESE_SPOKEN_ARABIC = 'apd'
221
+ SUNDANESE = 'su'
222
+ SWAHILI = 'sw'
223
+ SWATI = 'ss'
224
+ SWEDISH = 'sv'
225
+ SYLHETI = 'syl'
226
+ TAGALOG = 'tl'
227
+ TAMIL = 'ta'
228
+ TELUGU = 'te'
229
+ THAI = 'th'
230
+ TIGRINYA = 'ti'
231
+ TIBETAN = 'bo'
232
+ TONGAN = 'to'
233
+ TSONGA = 'ts'
234
+ TSWANA = 'tn'
235
+ TWI = 'twi'
236
+ UKRAINIAN = 'uk'
237
+ URDU = 'ur'
238
+ UYGHUR = 'ug'
239
+ VENDA = 've'
240
+ VIETNAMESE = 'vi'
241
+ WELSH = 'cy'
242
+ WESTERN_PUNJABI = 'pnb'
243
+ WOLOF = 'wo'
244
+ WU_CHINESE = 'wuu'
245
+ XHOSA = 'xh'
246
+ YORUBA = 'yo'
247
+ YUE_CHINESE = 'yue'
248
+ ZULU = 'zu'
249
+
250
+
251
+ # Helper methods
252
+ @classmethod
253
+ def get_all_codes(cls) -> list[str]:
254
+ """Returns a list of all language codes (e.g., ['en', 'zh', 'hi'])."""
255
+ return [lang.value for lang in cls]
256
+
257
+ @classmethod
258
+ def get_all_names(cls) -> list[str]:
259
+ """Returns a list of all language names (e.g., ['ENGLISH', 'MANDARIN_CHINESE'])."""
260
+ return [lang.name for lang in cls]
261
+
262
+ @classmethod
263
+ def get_all_names_pretty(cls) -> list[str]:
264
+ """Returns a list of all language names formatted for display (e.g., ['English', 'Mandarin Chinese'])."""
265
+ return [lang.name.replace('_', ' ').title() for lang in cls]
266
+
267
+ @classmethod
268
+ def get_choices(cls) -> list[tuple[str, str]]:
269
+ """
270
+ Returns a list of (value, label) tuples for use in web framework
271
+ choice fields (e.g., Django, Flask).
272
+
273
+ Example: [('en', 'English'), ('zh', 'Mandarin Chinese')]
274
+ """
275
+ return [(lang.value, lang.name.replace('_', ' ').title()) for lang in cls]
276
+
277
+ # Method to lookup language by it's name
278
+ @classmethod
279
+ def from_name(cls, name: str) -> 'CommonLanguages':
280
+ """
281
+ Looks up a language by its name (e.g., 'ENGLISH') and returns the corresponding enum member.
282
+ Raises ValueError if not found.
283
+ """
284
+ try:
285
+ return cls[name]
286
+ except KeyError:
287
+ raise ValueError(f"Language '{name}' not found in CommonLanguages")
288
+
289
+ # Method to lookup language by its code
290
+ @classmethod
291
+ def from_code(cls, code: str) -> 'CommonLanguages':
292
+ """
293
+ Looks up a language by its code (e.g., 'en') and returns the corresponding enum member.
294
+ Raises ValueError if not found.
295
+ """
296
+ for lang in cls:
297
+ if lang.value == code:
298
+ return lang
299
+ raise ValueError(f"Language code '{code}' not found in CommonLanguages")
300
+
78
301
  @dataclass_json
79
302
  @dataclass
80
303
  class General:
@@ -86,6 +309,7 @@ class General:
86
309
  open_multimine_on_startup: bool = True
87
310
  texthook_replacement_regex: str = ""
88
311
  texthooker_port: int = 55000
312
+ native_language: str = CommonLanguages.ENGLISH.value
89
313
 
90
314
 
91
315
  @dataclass_json
@@ -283,6 +507,16 @@ class Ai:
283
507
  self.provider = AI_GEMINI
284
508
  if self.provider == 'groq':
285
509
  self.provider = AI_GROQ
510
+
511
+
512
+ # Experimental Features section, will change often
513
+ @dataclass_json
514
+ @dataclass
515
+ class WIP:
516
+ overlay_websocket_port: int = 55003
517
+ overlay_websocket_send: bool = False
518
+
519
+
286
520
 
287
521
  @dataclass_json
288
522
  @dataclass
@@ -300,6 +534,7 @@ class ProfileConfig:
300
534
  vad: VAD = field(default_factory=VAD)
301
535
  advanced: Advanced = field(default_factory=Advanced)
302
536
  ai: Ai = field(default_factory=Ai)
537
+ wip: WIP = field(default_factory=WIP)
303
538
 
304
539
 
305
540
  # This is just for legacy support
@@ -481,6 +716,7 @@ class Config:
481
716
  self.sync_shared_field(config, profile, "advanced")
482
717
  self.sync_shared_field(config, profile, "paths")
483
718
  self.sync_shared_field(config, profile, "obs")
719
+ self.sync_shared_field(config, profile, "wip")
484
720
  self.sync_shared_field(config.ai, profile.ai, "anki_field")
485
721
  self.sync_shared_field(config.ai, profile.ai, "provider")
486
722
  self.sync_shared_field(config.ai, profile.ai, "api_key")
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Optional, List
3
+ from enum import Enum
3
4
 
4
5
  from dataclasses_json import dataclass_json
5
6
 
@@ -259,7 +259,7 @@ def clear_history():
259
259
  return jsonify({'message': 'History cleared successfully'}), 200
260
260
 
261
261
 
262
- async def add_event_to_texthooker(line: GameLine, boxes=None):
262
+ async def add_event_to_texthooker(line: GameLine):
263
263
  new_event = event_manager.add_gameline(line)
264
264
  await websocket_server_thread.send_text({
265
265
  'event': 'text_received',
@@ -268,6 +268,9 @@ async def add_event_to_texthooker(line: GameLine, boxes=None):
268
268
  })
269
269
  if get_config().advanced.plaintext_websocket_port:
270
270
  await plaintext_websocket_server_thread.send_text(line.text)
271
+
272
+
273
+ async def send_word_coordinates_to_overlay(boxes):
271
274
  if boxes and len(boxes) > 0 and overlay_server_thread:
272
275
  await overlay_server_thread.send_text(boxes)
273
276
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.12.0.dev2
3
+ Version: 2.12.0.dev3
4
4
  Summary: A tool for mining sentences from games. Update: Overlay?
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -1,7 +1,7 @@
1
1
  GameSentenceMiner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  GameSentenceMiner/anki.py,sha256=3BVFXAM7tpJAxHMbsMpnMHUoDfyqHQ1JSYJThW18QWA,16846
3
- GameSentenceMiner/config_gui.py,sha256=QTK1yBDcfHaIUR_JyekkRQY9CVI_rh3Cae0bi7lviIo,99198
4
- GameSentenceMiner/gametext.py,sha256=boj55Sf5spEHDKHh3uv5x3rzn-I1EuN8XZYXcqSiBF8,7503
3
+ GameSentenceMiner/config_gui.py,sha256=Zl4Ad46DYkb4NsBkxk05lKBswGQur7AbO1-WNwnDyHs,102899
4
+ GameSentenceMiner/gametext.py,sha256=0QbG9HuNjJtDi0TljeiTTHV4lTMxluvbsxZpysS8WsA,7713
5
5
  GameSentenceMiner/gsm.py,sha256=qVHxnvly-yJ85v9RAxsGN2MqZxU-C1JA5wSRxVxMPMg,24950
6
6
  GameSentenceMiner/obs.py,sha256=-5j4k1_sYYR1Lnbn9C-_yN9prqgGLICgx5l3uguv4xk,15917
7
7
  GameSentenceMiner/vad.py,sha256=zo9JpuEOCXczPXM-dq8lbr-zM-MPpfJ8aajggR3mKk4,18710
@@ -25,16 +25,16 @@ GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9
25
25
  GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
26
26
  GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
27
27
  GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
28
- GameSentenceMiner/owocr/owocr/ocr.py,sha256=JB9dfln9FJIWd9WaFIuOykAuQ656OY6-UJj500r-VQk,63154
29
- GameSentenceMiner/owocr/owocr/run.py,sha256=nkDpXICJCTKgJTS4MYRnaz-GYqAS-GskcSg1ZkGIRuE,67285
28
+ GameSentenceMiner/owocr/owocr/ocr.py,sha256=z0w7kcPjXvFabMQTWaQyiBehxmjeIVaS2p53yvFyPbg,59707
29
+ GameSentenceMiner/owocr/owocr/run.py,sha256=p7DBHTbhey1DeW1SRqNQ5-y3H4Cq2zoMPCMED5C0Rws,65945
30
30
  GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
31
31
  GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  GameSentenceMiner/util/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
33
- GameSentenceMiner/util/configuration.py,sha256=4VxVO_rOyhaSaZ9peIuJ0a_M04BfTfHMuACTWoRCJ9I,28954
33
+ GameSentenceMiner/util/configuration.py,sha256=r5zWr9dQxzRp2QRZeOdIa7E-21gCzXMCd3fiPpmtrNQ,35045
34
34
  GameSentenceMiner/util/electron_config.py,sha256=8LZwl-T_uF5z_ig-IZcm9QI-VKaD7zaHX9u6MaLYuo4,8648
35
35
  GameSentenceMiner/util/ffmpeg.py,sha256=t0tflxq170n8PZKkdw8fTZIUQfXD0p_qARa9JTdhBTc,21530
36
36
  GameSentenceMiner/util/gsm_utils.py,sha256=iRyLVcodMptRhkCzLf3hyqc6_RCktXnwApi6mLju6oQ,11565
37
- GameSentenceMiner/util/model.py,sha256=AaOzgqSbaN7yks_rr1dQpLQR45FpBYdoLebMbrIYm34,6638
37
+ GameSentenceMiner/util/model.py,sha256=hmA_seopP2bK40v9T4ulua9TrAeWtbkdCv-sTBPBQDk,6660
38
38
  GameSentenceMiner/util/notification.py,sha256=0OnEYjn3DUEZ6c6OtPjdVZe-DG-QSoMAl9fetjjCvNU,3874
39
39
  GameSentenceMiner/util/package.py,sha256=u1ym5z869lw5EHvIviC9h9uH97bzUXSXXA8KIn8rUvk,1157
40
40
  GameSentenceMiner/util/ss_selector.py,sha256=cbjMxiKOCuOfbRvLR_PCRlykBrGtm1LXd6u5czPqkmc,4793
@@ -49,7 +49,7 @@ GameSentenceMiner/util/downloader/download_tools.py,sha256=zR-aEHiFVkyo-9oPoSx6n
49
49
  GameSentenceMiner/util/downloader/oneocr_dl.py,sha256=EJbKISaZ9p2x9P4x0rpMM5nAInTTc9b7arraGBcd-SA,10381
50
50
  GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  GameSentenceMiner/web/service.py,sha256=S7bYf2kSk08u-8R9Qpv7piM-pxfFjYZUvU825xupmuI,5279
52
- GameSentenceMiner/web/texthooking_page.py,sha256=su58fY2PoVzXgduNngU9oIYh71Xqf7KUPACfMmCkNPc,17128
52
+ GameSentenceMiner/web/texthooking_page.py,sha256=uuPxVsyFxGrnNh-aM4VpuYYRTdDqjGfQT-gUD-G5aDM,17185
53
53
  GameSentenceMiner/web/static/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
54
  GameSentenceMiner/web/static/apple-touch-icon.png,sha256=OcMI8af_68DA_tweOsQ5LytTyMwm7-hPW07IfrOVgEs,46132
55
55
  GameSentenceMiner/web/static/favicon-96x96.png,sha256=lOePzjiKl1JY2J1kT_PMdyEnrlJmi5GWbmXJunM12B4,16502
@@ -64,9 +64,9 @@ GameSentenceMiner/web/templates/index.html,sha256=Gv3CJvNnhAzIVV_QxhNq4OD-pXDt1v
64
64
  GameSentenceMiner/web/templates/text_replacements.html,sha256=tV5c8mCaWSt_vKuUpbdbLAzXZ3ATZeDvQ9PnnAfqY0M,8598
65
65
  GameSentenceMiner/web/templates/utility.html,sha256=3flZinKNqUJ7pvrZk6xu__v67z44rXnaK7UTZ303R-8,16946
66
66
  GameSentenceMiner/wip/get_overlay_coords.py,sha256=yE8LzXlR-Sw3rz3N-judgQa5z4egJptfJ97KEvdmEH4,3189
67
- gamesentenceminer-2.12.0.dev2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
68
- gamesentenceminer-2.12.0.dev2.dist-info/METADATA,sha256=j50ckHGGV5kIbDY3wtzR0UpQ95ql3TwWcR6MKMgwvb4,7004
69
- gamesentenceminer-2.12.0.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
- gamesentenceminer-2.12.0.dev2.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
71
- gamesentenceminer-2.12.0.dev2.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
72
- gamesentenceminer-2.12.0.dev2.dist-info/RECORD,,
67
+ gamesentenceminer-2.12.0.dev3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
68
+ gamesentenceminer-2.12.0.dev3.dist-info/METADATA,sha256=RsMrLlOr1lSzBxen38wGLgT4YxnIMel83EE1vJX7t7A,7004
69
+ gamesentenceminer-2.12.0.dev3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
+ gamesentenceminer-2.12.0.dev3.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
71
+ gamesentenceminer-2.12.0.dev3.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
72
+ gamesentenceminer-2.12.0.dev3.dist-info/RECORD,,