PyPI - GameSentenceMiner - Versions diffs - 2.10.17__py3-none-any.whl → 2.11.0__py3-none-any.whl - Mend

GameSentenceMiner 2.10.17py3-none-any.whl → 2.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

GameSentenceMiner/ocr/owocr_helper.py CHANGED Viewed

@@ -195,10 +195,10 @@ all_cords = None
 rectangles = None
 last_ocr2_result = []
-def do_second_ocr(ocr1_text, time, img, filtering, ignore_furigana_filter=False):
+def do_second_ocr(ocr1_text, time, img, filtering, ignore_furigana_filter=False, ignore_previous_result=False):
     global twopassocr, ocr2, last_ocr2_result
     try:
-        orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
+        orig_text, text = run.process_and_write_results(img, None, last_ocr2_result if not ignore_previous_result else None, filtering, None,
                                                         engine=ocr2, furigana_filter_sensitivity=furigana_filter_sensitivity if not ignore_furigana_filter else 0)
         if compare_ocr_results(last_ocr2_result, orig_text):
@@ -344,7 +344,8 @@ def run_oneocr(ocr_config: OCRConfig, rectangles):
                 gsm_ocr_config=ocr_config,
                 screen_capture_areas=screen_areas,
                 furigana_filter_sensitivity=furigana_filter_sensitivity,
-                screen_capture_combo=manual_ocr_hotkey if manual_ocr_hotkey and manual else None)
+                screen_capture_combo=manual_ocr_hotkey if manual_ocr_hotkey and manual else None,
+                keep_line_breaks=keep_newline)
     except Exception as e:
         logger.exception(f"Error running OneOCR: {e}")
     done = True
@@ -359,14 +360,14 @@ def add_ss_hotkey(ss_hotkey="ctrl+shift+g"):
     def capture():
         print("Taking screenshot...")
         img = cropper.run()
-        do_second_ocr("", datetime.now(), img, filtering, ignore_furigana_filter=True)
+        do_second_ocr("", datetime.now(), img, filtering, ignore_furigana_filter=True, ignore_previous_result=True)
     def capture_main_monitor():
         print("Taking screenshot of main monitor...")
         with mss.mss() as sct:
             main_monitor = sct.monitors[1] if len(sct.monitors) > 1 else sct.monitors[0]
             img = sct.grab(main_monitor)
             img_bytes = mss.tools.to_png(img.rgb, img.size)
-            do_second_ocr("", datetime.now(), img_bytes, filtering, ignore_furigana_filter=True)
+            do_second_ocr("", datetime.now(), img_bytes, filtering, ignore_furigana_filter=True, ignore_previous_result=True)
     hotkey_reg = None
     try:
         hotkey_reg = keyboard.add_hotkey(ss_hotkey, capture)
@@ -404,7 +405,7 @@ def set_force_stable_hotkey():
 if __name__ == "__main__":
     try:
-        global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config
+        global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config, keep_newline
         import sys
         import argparse
@@ -428,6 +429,7 @@ if __name__ == "__main__":
                             help="Optimize second scan by cropping based on first scan results")
         parser.add_argument("--use_window_for_config", action="store_true",
                             help="Use the specified window for loading OCR configuration")
+        parser.add_argument("--keep_newline", action="store_true", help="Keep new lines in OCR output")
         args = parser.parse_args()
@@ -446,6 +448,7 @@ if __name__ == "__main__":
         clipboard_output = args.clipboard_output
         optimize_second_scan = args.optimize_second_scan
         use_window_for_config = args.use_window_for_config
+        keep_newline = args.keep_newline
         window = None
         logger.info(f"Received arguments: {vars(args)}")

GameSentenceMiner/owocr/owocr/ocr.py CHANGED Viewed

@@ -91,8 +91,11 @@ def empty_post_process(text):
     return text
-def post_process(text):
-    text = ' '.join([''.join(i.split()) for i in text.splitlines()])
+def post_process(text, keep_blank_lines=False):
+    if keep_blank_lines:
+        text = '\n'.join([''.join(i.split()) for i in text.splitlines()])
+    else:
+        text = ''.join([''.join(i.split()) for i in text.splitlines()])
     text = text.replace('…', '...')
     text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
     text = jaconv.h2z(text, ascii=True, digit=True)
@@ -304,22 +307,42 @@ class GoogleLens:
         response_proto = LensOverlayServerResponse().FromString(res.content)
         response_dict = response_proto.to_dict(betterproto.Casing.SNAKE)
-        # with open(os.path.join(get_temporary_directory(), 'glens_response.json'), 'w', encoding='utf-8') as f:
-        #     json.dump(response_dict, f, indent=4, ensure_ascii=False)
+        with open(os.path.join(r"C:\Users\Beangate\GSM\Electron App\test", 'glens_response.json'), 'w', encoding='utf-8') as f:
+            json.dump(response_dict, f, indent=4, ensure_ascii=False)
         res = ''
         text = response_dict['objects_response']['text']
         skipped = []
-        if furigana_filter_sensitivity > 0:
-            if 'text_layout' in text:
-                for paragraph in text['text_layout']['paragraphs']:
-                    for line in paragraph['lines']:
+        previous_line = None
+        if 'text_layout' in text:
+            for paragraph in text['text_layout']['paragraphs']:
+                if previous_line:
+                    prev_bbox = previous_line['geometry']['bounding_box']
+                    curr_bbox = paragraph['geometry']['bounding_box']
+                    vertical_space = abs(curr_bbox['center_y'] - prev_bbox['center_y']) * img.height
+                    prev_height = prev_bbox['height'] * img.height
+                    current_height = curr_bbox['height'] * img.height
+                    avg_height = (prev_height + current_height) / 2
+                    # If vertical space is close to previous line's height, add a blank line
+                    # logger.info(f"Vertical space: {vertical_space}, Average height: {avg_height}")
+                    # logger.info(avg_height * 2)
+                    if vertical_space > avg_height * 2:
+                        logger.info('Adding blank line')
+                        res += 'BLANK_LINE'
+                for line in paragraph['lines']:
+                    if furigana_filter_sensitivity:
                         if furigana_filter_sensitivity < line['geometry']['bounding_box']['width'] * img.width and furigana_filter_sensitivity < line['geometry']['bounding_box']['height'] * img.height:
                             for word in line['words']:
                                 res += word['plain_text'] + word['text_separator']
                         else:
                             skipped.append(word['plain_text'] for word in line['words'])
                             continue
-                        res += '\n'
+                    else:
+                        for word in line['words']:
+                                res += word['plain_text'] + word['text_separator']
+                        else:
+                            continue
+                previous_line = paragraph
+                res += '\n'
             # logger.info(
             #     f"Skipped {len(skipped)} chars due to furigana filter sensitivity: {furigana_filter_sensitivity}")
             # widths = []
@@ -350,16 +373,16 @@ class GoogleLens:
             #             else:
             #                 continue
             #         res += '\n'
-        else:
-            if 'text_layout' in text:
-                paragraphs = text['text_layout']['paragraphs']
-                for paragraph in paragraphs:
-                    for line in paragraph['lines']:
-                        for word in line['words']:
-                                res += word['plain_text'] + word['text_separator']
-                        else:
-                            continue
-                    res += '\n'
+        # else:
+        #     if 'text_layout' in text:
+        #         paragraphs = text['text_layout']['paragraphs']
+        #         for paragraph in paragraphs:
+        #             for line in paragraph['lines']:
+        #                 for word in line['words']:
+        #                         res += word['plain_text'] + word['text_separator']
+        #                 else:
+        #                     continue
+        #             res += '\n'
         x = (True, res)

GameSentenceMiner/owocr/owocr/run.py CHANGED Viewed

@@ -353,7 +353,9 @@ class TextFiltering:
         orig_text_filtered = []
         for block in orig_text:
-            if lang == "ja":
+            if "BLANK_LINE" in block:
+                block_filtered = ["\n"]
+            elif lang == "ja":
                 block_filtered = self.kana_kanji_regex.findall(block)
             elif lang == "zh":
                 block_filtered = self.chinese_common_regex.findall(block)
@@ -394,7 +396,8 @@ class TextFiltering:
         new_blocks = []
         for idx, block in enumerate(orig_text):
             if orig_text_filtered[idx] and (orig_text_filtered[idx] not in last_text):
-                new_blocks.append(block)
+                new_blocks.append(str(block).strip().replace("BLANK_LINE", "\n"))
         final_blocks = []
         if self.accurate_filtering:
@@ -407,9 +410,10 @@ class TextFiltering:
         else:
             for block in new_blocks:
                 # This only filters out NON JA/ZH from text when lang is JA/ZH
-                if lang not in ["ja", "zh"] or self.classify(block)[0] in ['ja', 'zh']:
+                if lang not in ["ja", "zh"] or self.classify(block)[0] in ['ja', 'zh'] or block == "\n":
                     final_blocks.append(block)
         text = '\n'.join(final_blocks)
         return text, orig_text_filtered
@@ -937,7 +941,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
         if filtering:
             text, orig_text = filtering(text, last_result)
         if lang == "ja" or lang == "zh":
-            text = post_process(text)
+            text = post_process(text, keep_blank_lines=keep_new_lines)
         logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
         if notify and config.get_general('notifications'):
             notifier.send(title='owocr', message='Text recognized: ' + text)
@@ -999,6 +1003,7 @@ def run(read_from=None,
         ocr2=None,
         gsm_ocr_config=None,
         furigana_filter_sensitivity=None,
+        keep_line_breaks=False,
         ):
     """
     Japanese OCR client
@@ -1075,11 +1080,13 @@ def run(read_from=None,
     global engine_instances
     global engine_keys
     global lang
+    global keep_new_lines
     lang = language
     engine_instances = []
     config_engines = []
     engine_keys = []
     default_engine = ''
+    keep_new_lines = keep_line_breaks
     if len(config.get_general('engines')) > 0:
         for config_engine in config.get_general('engines').split(','):

GameSentenceMiner/util/configuration.py CHANGED Viewed

@@ -16,13 +16,13 @@ import toml
 from dataclasses_json import dataclass_json
 OFF = 'OFF'
-VOSK = 'VOSK'
+# VOSK = 'VOSK'
 SILERO = 'SILERO'
 WHISPER = 'WHISPER'
-GROQ = 'GROQ'
+# GROQ = 'GROQ'
-VOSK_BASE = 'BASE'
-VOSK_SMALL = 'SMALL'
+# VOSK_BASE = 'BASE'
+# VOSK_SMALL = 'SMALL'
 WHISPER_TINY = 'tiny'
 WHISPER_BASE = 'base'
@@ -33,6 +33,7 @@ WHISPER_TURBO = 'turbo'
 AI_GEMINI = 'Gemini'
 AI_GROQ = 'Groq'
+AI_LOCAL = 'Local'
 INFO = 'INFO'
 DEBUG = 'DEBUG'
@@ -219,7 +220,7 @@ class VAD:
     whisper_model: str = WHISPER_BASE
     do_vad_postprocessing: bool = True
     language: str = 'ja'
-    vosk_url: str = VOSK_BASE
+    # vosk_url: str = VOSK_BASE
     selected_vad_model: str = WHISPER
     backup_vad_model: str = SILERO
     trim_beginning: bool = False
@@ -234,11 +235,11 @@ class VAD:
     def is_whisper(self):
         return self.selected_vad_model == WHISPER or self.backup_vad_model == WHISPER
-    def is_vosk(self):
-        return self.selected_vad_model == VOSK or self.backup_vad_model == VOSK
+    # def is_vosk(self):
+    #     return self.selected_vad_model == VOSK or self.backup_vad_model == VOSK
-    def is_groq(self):
-        return self.selected_vad_model == GROQ or self.backup_vad_model == GROQ
+    # def is_groq(self):
+    #     return self.selected_vad_model == GROQ or self.backup_vad_model == GROQ
 @dataclass_json
@@ -266,6 +267,7 @@ class Ai:
     anki_field: str = ''
     provider: str = AI_GEMINI
     gemini_model: str = 'gemini-2.5-flash'
+    local_model: str = OFF
     groq_model: str = 'meta-llama/llama-4-scout-17b-16e-instruct'
     api_key: str = '' # Deprecated
     gemini_api_key: str = ''

GameSentenceMiner/util/text_log.py CHANGED Viewed

@@ -20,6 +20,7 @@ class GameLine:
     next: 'GameLine | None'
     index: int = 0
     scene: str = ""
+    TL: str = ""
     def get_previous_time(self):
         if self.prev:
@@ -31,6 +32,9 @@ class GameLine:
             return self.next.time
         return 0
+    def set_TL(self, tl: str):
+        self.TL = tl
     def __str__(self):
         return str({"text": self.text, "time": self.time})

GameSentenceMiner 2.10.17__py3-none-any.whl → 2.11.0__py3-none-any.whl

GameSentenceMiner 2.10.17py3-none-any.whl → 2.11.0py3-none-any.whl