GameSentenceMiner 2.11.8__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,10 +33,10 @@ TRANSLATION_PROMPT = f"""
33
33
  **Professional Game Localization Task**
34
34
 
35
35
  **Task Directive:**
36
- Translate ONLY the single line of game dialogue specified below into natural-sounding, context-aware English. The translation must preserve the original tone and intent of the character.
36
+ Translate ONLY the single line of game dialogue specified below into natural-sounding, context-aware {get_config().general.get_native_language_name()}. The translation must preserve the original tone and intent of the character.
37
37
 
38
38
  **Output Requirements:**
39
- - Provide only the single, best English translation.
39
+ - Provide only the single, best {get_config().general.get_native_language_name()} translation.
40
40
  - Use expletives if they are natural for the context and enhance the translation's impact, but do not over-exaggerate.
41
41
  - Preserve or add HTML tags (e.g., `<i>`, `<b>`) if appropriate for emphasis.
42
42
  - Do not include notes, alternatives, explanations, or any other surrounding text. Absolutely nothing but the translated line.
@@ -47,7 +47,7 @@ Translate ONLY the single line of game dialogue specified below into natural-sou
47
47
  CONTEXT_PROMPT = textwrap.dedent(f"""
48
48
 
49
49
  **Task Directive:**
50
- Provide a very brief summary of the scene in English based on the provided Japanese dialogue and context. Focus on the characters' actions and the immediate situation being described.
50
+ Provide a very brief summary of the scene in {get_config().general.get_native_language_name()} based on the provided Japanese dialogue and context. Focus on the characters' actions and the immediate situation being described.
51
51
 
52
52
  Current Sentence:
53
53
  """)
GameSentenceMiner/anki.py CHANGED
@@ -19,6 +19,7 @@ from GameSentenceMiner.util.model import AnkiCard
19
19
  from GameSentenceMiner.util.text_log import get_all_lines, get_text_event, get_mined_line, lines_match
20
20
  from GameSentenceMiner.obs import get_current_game
21
21
  from GameSentenceMiner.web import texthooking_page
22
+ import re
22
23
 
23
24
  # Global variables to track state
24
25
  previous_note_ids = set()
@@ -161,6 +162,35 @@ def get_initial_card_info(last_note: AnkiCard, selected_lines):
161
162
  game_line = get_text_event(last_note)
162
163
  sentences = []
163
164
  sentences_text = ''
165
+
166
+ # TODO: REMOVE THIS, I DON'T THINK IT'S NEEDED
167
+ if get_config().wip.overlay_websocket_send:
168
+ sentence_in_anki = last_note.get_field(get_config().anki.sentence_field).replace("\n", "").replace("\r", "").strip()
169
+ if lines_match(game_line.text, remove_html_and_cloze_tags(sentence_in_anki)):
170
+ logger.info("Found matching line in Anki, Preserving HTML and fix spacing!")
171
+ if "<b>" in sentence_in_anki:
172
+ text_inside_bold = re.findall(r'<b>(.*?)</b>', sentence_in_anki)
173
+ logger.info(text_inside_bold)
174
+ if text_inside_bold:
175
+ text = text_inside_bold[0].replace(" ", "").replace('\n', '').strip()
176
+ note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_bold[0], f"<b>{text}</b>")
177
+ logger.info(f"Preserved bold Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
178
+ if "<i>" in sentence_in_anki:
179
+ text_inside_italic = re.findall(r'<i>(.*?)</i>', sentence_in_anki)
180
+ if text_inside_italic:
181
+ text = text_inside_italic[0].replace(" ", "").replace('\n', '').strip()
182
+ note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_italic[0], f"<i>{text}</i>")
183
+ logger.info(f"Preserved italic Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
184
+ if "<u>" in sentence_in_anki:
185
+ text_inside_underline = re.findall(r'<u>(.*?)</u>', sentence_in_anki)
186
+ if text_inside_underline:
187
+ text = text_inside_underline[0].replace(" ", "").replace('\n', '').strip()
188
+ note['fields'][get_config().anki.sentence_field] = game_line.text.replace(text_inside_underline[0], f"<u>{text}</u>")
189
+ logger.info(f"Preserved underline Tag for Sentence: {note['fields'][get_config().anki.sentence_field]}")
190
+
191
+ if get_config().anki.sentence_field not in note['fields']:
192
+ logger.info("No HTML tags found to preserve, just fixing spacing")
193
+ note['fields'][get_config().anki.sentence_field] = game_line.text
164
194
  if selected_lines:
165
195
  try:
166
196
  sentence_in_anki = last_note.get_field(get_config().anki.sentence_field)
@@ -113,8 +113,9 @@ class ConfigApp:
113
113
  self.window.title('GameSentenceMiner Configuration')
114
114
  self.window.protocol("WM_DELETE_WINDOW", self.hide)
115
115
  self.obs_scene_listbox_changed = False
116
+ self.test_func = None
116
117
 
117
- self.window.geometry("800x700")
118
+ # self.window.geometry("800x500")
118
119
  self.current_row = 0
119
120
 
120
121
  self.master_config: Config = configuration.load_config()
@@ -137,6 +138,7 @@ class ConfigApp:
137
138
  self.profiles_tab = None
138
139
  self.ai_tab = None
139
140
  self.advanced_tab = None
141
+ self.wip_tab = None
140
142
 
141
143
  self.create_tabs()
142
144
 
@@ -158,7 +160,12 @@ class ConfigApp:
158
160
  "Saves Settings and Syncs CHANGED SETTINGS to all profiles.", row=0,
159
161
  column=2)
160
162
 
163
+ self.window.update_idletasks()
164
+ self.window.geometry("")
161
165
  self.window.withdraw()
166
+
167
+ def set_test_func(self, func):
168
+ self.test_func = func
162
169
 
163
170
  def create_tabs(self):
164
171
  self.create_general_tab()
@@ -172,6 +179,7 @@ class ConfigApp:
172
179
  self.create_profiles_tab()
173
180
  self.create_ai_tab()
174
181
  self.create_advanced_tab()
182
+ self.create_wip_tab()
175
183
 
176
184
  def add_reset_button(self, frame, category, row, column=0, recreate_tab=None):
177
185
  """
@@ -230,6 +238,7 @@ class ConfigApp:
230
238
  if self.window is not None:
231
239
  self.window.deiconify()
232
240
  self.window.lift()
241
+ self.window.update_idletasks()
233
242
  return
234
243
 
235
244
  def hide(self):
@@ -250,7 +259,8 @@ class ConfigApp:
250
259
  open_multimine_on_startup=self.open_multimine_on_startup.get(),
251
260
  texthook_replacement_regex=self.texthook_replacement_regex.get(),
252
261
  use_both_clipboard_and_websocket=self.use_both_clipboard_and_websocket.get(),
253
- texthooker_port=int(self.texthooker_port.get())
262
+ texthooker_port=int(self.texthooker_port.get()),
263
+ native_language=CommonLanguages.from_name(self.native_language.get()) if self.native_language.get() else CommonLanguages.ENGLISH.value,
254
264
  ),
255
265
  paths=Paths(
256
266
  folder_to_watch=self.folder_to_watch.get(),
@@ -362,6 +372,10 @@ class ConfigApp:
362
372
  use_canned_context_prompt=self.use_canned_context_prompt.get(),
363
373
  custom_prompt=self.custom_prompt.get("1.0", tk.END),
364
374
  dialogue_context_length=int(self.ai_dialogue_context_length.get()),
375
+ ),
376
+ wip=WIP(
377
+ overlay_websocket_port=int(self.overlay_websocket_port.get()),
378
+ overlay_websocket_send=self.overlay_websocket_send.get()
365
379
  )
366
380
  )
367
381
 
@@ -547,6 +561,15 @@ class ConfigApp:
547
561
  self.latest_version = ttk.Label(self.general_tab, text=get_latest_version(), bootstyle="secondary")
548
562
  self.latest_version.grid(row=self.current_row, column=1, sticky='W', pady=2)
549
563
  self.current_row += 1
564
+
565
+ # Native Language Selection
566
+ HoverInfoLabelWidget(self.general_tab, text="Native Language:",
567
+ tooltip="Select your native language. This is used for various features, but will not change the look of GSM.",
568
+ row=self.current_row, column=0)
569
+ self.native_language = ttk.Combobox(self.general_tab, values=CommonLanguages.get_all_names_pretty(), state="readonly")
570
+ self.native_language.set(CommonLanguages.from_code(self.settings.general.native_language).name.replace('_', ' ').title())
571
+ self.native_language.grid(row=self.current_row, column=1, sticky='EW', pady=2)
572
+ self.current_row += 1
550
573
 
551
574
  ttk.Label(self.general_tab, text="Indicates important/required settings.", foreground="dark orange",
552
575
  font=("Helvetica", 10, "bold")).grid(row=self.current_row, column=0, columnspan=2, sticky='W', pady=2)
@@ -561,6 +584,12 @@ class ConfigApp:
561
584
  text="Every Label in settings has a tooltip with more information if you hover over them.",
562
585
  font=("Helvetica", 10, "bold")).grid(row=self.current_row, column=0, columnspan=2, sticky='W', pady=2)
563
586
  self.current_row += 1
587
+
588
+ if is_beangate:
589
+ ttk.Button(self.general_tab, text="Run Function", command=self.test_func, bootstyle="info").grid(
590
+ row=self.current_row, column=0, pady=5
591
+ )
592
+ self.current_row += 1
564
593
 
565
594
  # Add Reset to Default button
566
595
  self.add_reset_button(self.general_tab, "general", self.current_row, column=0, recreate_tab=self.create_general_tab)
@@ -1674,6 +1703,54 @@ class ConfigApp:
1674
1703
  #
1675
1704
  #
1676
1705
  # help_frame.grid_columnconfigure(0, weight=1)
1706
+
1707
+ @new_tab
1708
+ def create_wip_tab(self):
1709
+ if self.wip_tab is None:
1710
+ self.wip_tab = ttk.Frame(self.notebook, padding=15)
1711
+ self.notebook.add(self.wip_tab, text='WIP')
1712
+ else:
1713
+ for widget in self.wip_tab.winfo_children():
1714
+ widget.destroy()
1715
+
1716
+ wip_frame = self.wip_tab
1717
+
1718
+ ttk.Label(wip_frame, text="Warning: These features are experimental and may not work as expected.",
1719
+ foreground="red", font=("Helvetica", 10, "bold")).grid(row=self.current_row, column=0, columnspan=2,
1720
+ sticky='W', pady=5)
1721
+
1722
+ self.current_row += 1
1723
+
1724
+ ttk.Label(wip_frame, text="Overlay requires OwOCR dependencies to be installed, and requires an external app to be running.",
1725
+ foreground="red", font=("Helvetica", 10, "bold")).grid(row=self.current_row, column=0, columnspan=2,
1726
+ sticky='W', pady=5)
1727
+
1728
+ self.current_row += 1
1729
+
1730
+ HoverInfoLabelWidget(wip_frame, text="Overlay WebSocket Port:",
1731
+ tooltip="Port for the overlay WebSocket communication. Used for experimental overlay features.",
1732
+ row=self.current_row, column=0)
1733
+ self.overlay_websocket_port = ttk.Entry(wip_frame)
1734
+ self.overlay_websocket_port.insert(0, str(self.settings.wip.overlay_websocket_port))
1735
+ self.overlay_websocket_port.grid(row=self.current_row, column=1, sticky='EW', pady=2)
1736
+ self.current_row += 1
1737
+
1738
+ HoverInfoLabelWidget(wip_frame, text="Overlay WebSocket Send:",
1739
+ tooltip="Enable to send overlay data via WebSocket. Experimental feature.",
1740
+ row=self.current_row, column=0)
1741
+ self.overlay_websocket_send = tk.BooleanVar(value=self.settings.wip.overlay_websocket_send)
1742
+ ttk.Checkbutton(wip_frame, variable=self.overlay_websocket_send, bootstyle="round-toggle").grid(
1743
+ row=self.current_row, column=1, sticky='W', pady=2)
1744
+ self.current_row += 1
1745
+
1746
+ self.add_reset_button(wip_frame, "wip", self.current_row, 0, self.create_wip_tab)
1747
+
1748
+ for col in range(2):
1749
+ wip_frame.grid_columnconfigure(col, weight=0)
1750
+ for row in range(self.current_row):
1751
+ wip_frame.grid_rowconfigure(row, minsize=30)
1752
+
1753
+ return wip_frame
1677
1754
 
1678
1755
  def on_profile_change(self, event):
1679
1756
  self.save_settings(profile_change=True)
@@ -2,13 +2,15 @@ import asyncio
2
2
  import re
3
3
 
4
4
  import pyperclip
5
+ import requests
5
6
  import websockets
6
7
  from websockets import InvalidStatus
7
8
 
8
9
  from GameSentenceMiner.util.gsm_utils import do_text_replacements, TEXT_REPLACEMENTS_FILE, run_new_thread
9
10
  from GameSentenceMiner.util.configuration import *
10
11
  from GameSentenceMiner.util.text_log import *
11
- from GameSentenceMiner.web.texthooking_page import add_event_to_texthooker
12
+ from GameSentenceMiner.web.texthooking_page import add_event_to_texthooker, send_word_coordinates_to_overlay, overlay_server_thread
13
+ from GameSentenceMiner.wip import get_overlay_coords
12
14
 
13
15
  current_line = ''
14
16
  current_line_after_regex = ''
@@ -123,6 +125,23 @@ async def handle_new_text_event(current_clipboard, line_time=None):
123
125
  add_line(current_line_after_regex, line_time)
124
126
  if len(get_text_log().values) > 0:
125
127
  await add_event_to_texthooker(get_text_log()[-1])
128
+ if get_config().wip.overlay_websocket_port and get_config().wip.overlay_websocket_send and overlay_server_thread.has_clients():
129
+ boxes = await find_box_for_sentence(current_line_after_regex)
130
+ if boxes:
131
+ await send_word_coordinates_to_overlay(boxes)
132
+
133
+
134
+ async def find_box_for_sentence(sentence):
135
+ boxes = []
136
+ logger.info(f"Finding Box for Sentence: {sentence}")
137
+ boxes, font_size = await get_overlay_coords.find_box_for_sentence(sentence)
138
+ logger.info(f"Found Boxes: {boxes}, Font Size: {font_size}")
139
+ # if boxes:
140
+ # x1, y1, x2, y2 = box
141
+ # boxes.append({'sentence': sentence, 'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2, 'fontSize': font_size})
142
+ # x1, y1, x2, y2 = box
143
+ # requests.post("http://localhost:3000/open-overlay", json={"sentence": sentence, "x1": x1, "y1": y1, "x2": x2, "y2": y2, "fontSize": font_size})
144
+ return boxes
126
145
 
127
146
  def reset_line_hotkey_pressed():
128
147
  global current_line_time
GameSentenceMiner/gsm.py CHANGED
@@ -545,6 +545,8 @@ def async_loop():
545
545
  await check_obs_folder_is_correct()
546
546
  logger.info("Post-Initialization started.")
547
547
  vad_processor.init()
548
+ # if is_beangate:
549
+ # await run_test_code()
548
550
 
549
551
  asyncio.run(loop())
550
552
 
@@ -577,6 +579,13 @@ async def register_scene_switcher_callback():
577
579
  update_icon()
578
580
 
579
581
  await obs.register_scene_change_callback(scene_switcher_callback)
582
+
583
+ async def run_test_code():
584
+ if get_config().wip.overlay_websocket_port and get_config().wip.overlay_websocket_send:
585
+ boxes = await gametext.find_box_for_sentence("ちぇっ少しなの?")
586
+ if boxes:
587
+ await texthooking_page.send_word_coordinates_to_overlay(boxes)
588
+ await asyncio.sleep(2)
580
589
 
581
590
  async def async_main(reloading=False):
582
591
  global root, settings_window
GameSentenceMiner/obs.py CHANGED
@@ -3,6 +3,8 @@ import os.path
3
3
  import subprocess
4
4
  import threading
5
5
  import time
6
+ from pprint import pprint
7
+
6
8
  import psutil
7
9
 
8
10
  import obsws_python as obs
@@ -356,9 +358,9 @@ def get_screenshot(compression=-1):
356
358
  logger.error(f"Error getting screenshot: {e}")
357
359
  return None
358
360
 
359
- def get_screenshot_base64(compression=0, width=None, height=None):
361
+ def get_screenshot_base64(compression=75, width=None, height=None):
360
362
  try:
361
- # update_current_game()
363
+ update_current_game()
362
364
  current_game = get_current_game()
363
365
  if not current_game:
364
366
  logger.error("No active game scene found.")
@@ -368,7 +370,11 @@ def get_screenshot_base64(compression=0, width=None, height=None):
368
370
  if not current_source_name:
369
371
  logger.error("No active source found in the current scene.")
370
372
  return None
373
+ # version = client.send("GetVersion", raw=True)
374
+ # pprint(version)
375
+ # responseraw = client.send("GetSourceScreenshot", {"sourceName": current_source_name, "imageFormat": "png", "imageWidth": width, "imageHeight": height, "compressionQuality": compression}, raw=True)
371
376
  response = client.get_source_screenshot(name=current_source_name, img_format='png', quality=compression, width=width, height=height)
377
+ # print(responseraw)
372
378
  if response and response.image_data:
373
379
  return response.image_data.split(',', 1)[-1] # Remove data:image/png;base64, prefix if present
374
380
  else:
@@ -428,5 +434,7 @@ def main():
428
434
 
429
435
  if __name__ == '__main__':
430
436
  logging.basicConfig(level=logging.INFO)
431
- main()
437
+ # main()
438
+ connect_to_obs_sync()
439
+ print(get_screenshot_base64(compression=75, width=1280, height=720))
432
440
 
@@ -6,13 +6,14 @@ from pathlib import Path
6
6
  import sys
7
7
  import platform
8
8
  import logging
9
- from math import sqrt
9
+ from math import sqrt, floor
10
10
  import json
11
11
  import base64
12
12
  from urllib.parse import urlparse, parse_qs
13
13
 
14
14
  import jaconv
15
15
  import numpy as np
16
+ import rapidfuzz.fuzz
16
17
  from PIL import Image
17
18
  from loguru import logger
18
19
  import requests
@@ -164,6 +165,28 @@ def limit_image_size(img, max_size):
164
165
  return False, ''
165
166
 
166
167
 
168
+ def get_regex(lang):
169
+ if lang == "ja":
170
+ return re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
171
+ elif lang == "zh":
172
+ return re.compile(r'[\u4E00-\u9FFF]')
173
+ elif lang == "ko":
174
+ return re.compile(r'[\uAC00-\uD7AF]')
175
+ elif lang == "ar":
176
+ return re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
177
+ elif lang == "ru":
178
+ return re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
179
+ elif lang == "el":
180
+ return re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
181
+ elif lang == "he":
182
+ return re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
183
+ elif lang == "th":
184
+ return re.compile(r'[\u0E00-\u0E7F]')
185
+ else:
186
+ return re.compile(
187
+ r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
188
+
189
+
167
190
  class MangaOcr:
168
191
  name = 'mangaocr'
169
192
  readable_name = 'Manga OCR'
@@ -243,15 +266,20 @@ class GoogleLens:
243
266
  available = False
244
267
 
245
268
  def __init__(self, lang='ja'):
246
- self.kana_kanji_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
269
+ self.regex = get_regex(lang)
270
+ self.initial_lang = lang
247
271
  if 'betterproto' not in sys.modules:
248
272
  logger.warning('betterproto not available, Google Lens will not work!')
249
273
  else:
250
274
  self.available = True
251
275
  logger.info('Google Lens ready')
252
276
 
253
- def __call__(self, img, furigana_filter_sensitivity=0):
277
+ def __call__(self, img, furigana_filter_sensitivity=0, return_coords=False):
278
+ lang = get_ocr_language()
254
279
  img, is_path = input_to_pil_image(img)
280
+ if lang != self.initial_lang:
281
+ self.initial_lang = lang
282
+ self.regex = get_regex(lang)
255
283
  if not img:
256
284
  return (False, 'Invalid image provided')
257
285
 
@@ -309,12 +337,14 @@ class GoogleLens:
309
337
  response_proto = LensOverlayServerResponse().FromString(res.content)
310
338
  response_dict = response_proto.to_dict(betterproto.Casing.SNAKE)
311
339
 
312
- # with open(os.path.join(r"C:\Users\Beangate\GSM\Electron App\test", 'glens_response.json'), 'w', encoding='utf-8') as f:
313
- # json.dump(response_dict, f, indent=4, ensure_ascii=False)
340
+ if os.path.exists(r"C:\Users\Beangate\GSM\Electron App\test"):
341
+ with open(os.path.join(r"C:\Users\Beangate\GSM\Electron App\test", 'glens_response.json'), 'w', encoding='utf-8') as f:
342
+ json.dump(response_dict, f, indent=4, ensure_ascii=False)
314
343
  res = ''
315
344
  text = response_dict['objects_response']['text']
316
345
  skipped = []
317
346
  previous_line = None
347
+ lines = []
318
348
  if 'text_layout' in text:
319
349
  for paragraph in text['text_layout']['paragraphs']:
320
350
  if previous_line:
@@ -330,18 +360,38 @@ class GoogleLens:
330
360
  if vertical_space > avg_height * 2:
331
361
  res += 'BLANK_LINE'
332
362
  for line in paragraph['lines']:
363
+ # Build a list of word boxes for this line
364
+ words_info = []
365
+ for word in line['words']:
366
+ word_info = {
367
+ "word": word['plain_text'],
368
+ "x1": int(word['geometry']['bounding_box']['center_x'] * img.width - (word['geometry']['bounding_box']['width'] * img.width) / 2),
369
+ "y1": int(word['geometry']['bounding_box']['center_y'] * img.height - (word['geometry']['bounding_box']['height'] * img.height) / 2),
370
+ "x2": int(word['geometry']['bounding_box']['center_x'] * img.width + (word['geometry']['bounding_box']['width'] * img.width) / 2),
371
+ "y2": int(word['geometry']['bounding_box']['center_y'] * img.height + (word['geometry']['bounding_box']['height'] * img.height) / 2)
372
+ }
373
+ words_info.append(word_info)
374
+
375
+ line_text = ''.join([w['word'] for w in words_info])
376
+ line_box = {
377
+ "sentence": line_text,
378
+ "words": words_info
379
+ }
380
+
381
+ # Optionally apply furigana filter
333
382
  if furigana_filter_sensitivity:
334
- if furigana_filter_sensitivity < line['geometry']['bounding_box']['width'] * img.width and furigana_filter_sensitivity < line['geometry']['bounding_box']['height'] * img.height:
335
- for word in line['words']:
336
- res += word['plain_text'] + word['text_separator']
383
+ line_width = line['geometry']['bounding_box']['width'] * img.width
384
+ line_height = line['geometry']['bounding_box']['height'] * img.height
385
+ if furigana_filter_sensitivity < line_width and furigana_filter_sensitivity < line_height and self.regex.search(line_text):
386
+ for w in words_info:
387
+ res += w['word']
337
388
  else:
338
- skipped.append(word['plain_text'] for word in line['words'])
389
+ skipped.extend([w['word'] for w in words_info])
339
390
  continue
340
391
  else:
341
- for word in line['words']:
342
- res += word['plain_text'] + word['text_separator']
343
- else:
344
- continue
392
+ for w in words_info:
393
+ res += w['word']
394
+ lines.append(line_box)
345
395
  previous_line = paragraph
346
396
  res += '\n'
347
397
  # logger.info(
@@ -384,8 +434,11 @@ class GoogleLens:
384
434
  # else:
385
435
  # continue
386
436
  # res += '\n'
387
-
388
- x = (True, res)
437
+
438
+ if return_coords:
439
+ x = (True, res, lines)
440
+ else:
441
+ x = (True, res)
389
442
 
390
443
  # img.close()
391
444
  return x
@@ -812,7 +865,7 @@ class OneOCR:
812
865
 
813
866
  def __init__(self, config={}, lang='ja'):
814
867
  self.initial_lang = lang
815
- self.get_regex(lang)
868
+ self.regex = get_regex(lang)
816
869
  if sys.platform == 'win32':
817
870
  if int(platform.release()) < 10:
818
871
  logger.warning('OneOCR is not supported on Windows older than 10!')
@@ -834,32 +887,11 @@ class OneOCR:
834
887
  except:
835
888
  logger.warning('Error reading URL from config, OneOCR will not work!')
836
889
 
837
- def get_regex(self, lang):
838
- if lang == "ja":
839
- self.regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
840
- elif lang == "zh":
841
- self.regex = re.compile(r'[\u4E00-\u9FFF]')
842
- elif lang == "ko":
843
- self.regex = re.compile(r'[\uAC00-\uD7AF]')
844
- elif lang == "ar":
845
- self.regex = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
846
- elif lang == "ru":
847
- self.regex = re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
848
- elif lang == "el":
849
- self.regex = re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
850
- elif lang == "he":
851
- self.regex = re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
852
- elif lang == "th":
853
- self.regex = re.compile(r'[\u0E00-\u0E7F]')
854
- else:
855
- self.regex = re.compile(
856
- r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
857
-
858
- def __call__(self, img, furigana_filter_sensitivity=0):
890
+ def __call__(self, img, furigana_filter_sensitivity=0, sentence_to_check=None, return_coords=False):
859
891
  lang = get_ocr_language()
860
892
  if lang != self.initial_lang:
861
893
  self.initial_lang = lang
862
- self.get_regex(lang)
894
+ self.regex = get_regex(lang)
863
895
  img, is_path = input_to_pil_image(img)
864
896
  if img.width < 51 or img.height < 51:
865
897
  new_width = max(img.width, 51)
@@ -873,20 +905,18 @@ class OneOCR:
873
905
  if sys.platform == 'win32':
874
906
  try:
875
907
  ocr_resp = self.model.recognize_pil(img)
908
+ if os.path.exists(os.path.expanduser("~/GSM/temp")):
909
+ with open(os.path.join(os.path.expanduser("~/GSM/temp"), 'oneocr_response.json'), 'w',
910
+ encoding='utf-8') as f:
911
+ json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
876
912
  # print(json.dumps(ocr_resp))
877
913
  filtered_lines = [line for line in ocr_resp['lines'] if self.regex.search(line['text'])]
878
- x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
879
- y_coords = [line['bounding_rect'][f'y{i}'] for line in filtered_lines for i in range(1, 5)]
880
- if x_coords and y_coords:
881
- crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
882
-
883
- # with open(os.path.join(get_temporary_directory(), 'oneocr_response.json'), 'w',
884
- # encoding='utf-8') as f:
885
- # json.dump(ocr_resp, f, indent=4, ensure_ascii=False)
914
+ # logger.info(filtered_lines)
886
915
  res = ''
887
916
  skipped = []
917
+ boxes = []
888
918
  if furigana_filter_sensitivity > 0:
889
- for line in ocr_resp['lines']:
919
+ for line in filtered_lines:
890
920
  x1, x2, x3, x4 = line['bounding_rect']['x1'], line['bounding_rect']['x2'], \
891
921
  line['bounding_rect']['x3'], line['bounding_rect']['x4']
892
922
  y1, y2, y3, y4 = line['bounding_rect']['y1'], line['bounding_rect']['y2'], \
@@ -934,8 +964,46 @@ class OneOCR:
934
964
  # else:
935
965
  # continue
936
966
  # res += '\n'
967
+ elif sentence_to_check:
968
+ lines_to_build_area = []
969
+ widths = []
970
+ heights = []
971
+ for line in ocr_resp['lines']:
972
+ print(line['text'])
973
+ if sentence_to_check in line['text'] or line['text'] in sentence_to_check or rapidfuzz.fuzz.partial_ratio(sentence_to_check, line['text']) > 50:
974
+ lines_to_build_area.append(line)
975
+ res += line['text']
976
+ for word in line['words']:
977
+ widths.append(word['bounding_rect']['x2'] - word['bounding_rect']['x1'])
978
+ heights.append(word['bounding_rect']['y3'] - word['bounding_rect']['y1'])
979
+
980
+ x_coords = [line['bounding_rect'][f'x{i}'] for line in lines_to_build_area for i in
981
+ range(1, 5)]
982
+ y_coords = [line['bounding_rect'][f'y{i}'] for line in lines_to_build_area for i in
983
+ range(1, 5)]
984
+ if widths:
985
+ avg_width = sum(widths) / len(widths)
986
+ if heights:
987
+ avg_height = sum(heights) / len(heights)
988
+ if x_coords and y_coords:
989
+ crop_coords = (
990
+ min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
991
+ elif return_coords:
992
+ for line in filtered_lines:
993
+ for word in line['words']:
994
+ box = {
995
+ "text": word['text'],
996
+ "bounding_rect": word['bounding_rect']
997
+ }
998
+ boxes.append(box)
999
+ res = ocr_resp['text']
937
1000
  else:
1001
+ x_coords = [line['bounding_rect'][f'x{i}'] for line in filtered_lines for i in range(1, 5)]
1002
+ y_coords = [line['bounding_rect'][f'y{i}'] for line in filtered_lines for i in range(1, 5)]
1003
+ if x_coords and y_coords:
1004
+ crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
938
1005
  res = ocr_resp['text']
1006
+
939
1007
  except RuntimeError as e:
940
1008
  return (False, e)
941
1009
  else:
@@ -950,10 +1018,12 @@ class OneOCR:
950
1018
  return (False, 'Unknown error!')
951
1019
 
952
1020
  res = res.json()['text']
953
-
954
- x = (True, res, crop_coords)
955
-
956
- # img.close()
1021
+ if return_coords:
1022
+ x = (True, res, boxes)
1023
+ else:
1024
+ x = (True, res, crop_coords)
1025
+ if is_path:
1026
+ img.close()
957
1027
  return x
958
1028
 
959
1029
  def _preprocess(self, img):