PyPI - GameSentenceMiner - Versions diffs - 2.8.25__py3-none-any.whl → 2.8.27__py3-none-any.whl - Mend

GameSentenceMiner 2.8.25py3-none-any.whl → 2.8.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

GameSentenceMiner/anki.py CHANGED Viewed

@@ -8,7 +8,7 @@ from datetime import datetime, timedelta
 from requests import post
 from GameSentenceMiner import obs, util, notification, ffmpeg
-from GameSentenceMiner.ai.ai_prompting import GeminiAI, get_ai_prompt_result
+from GameSentenceMiner.ai.ai_prompting import get_ai_prompt_result
 from GameSentenceMiner.configuration import *
 from GameSentenceMiner.configuration import get_config
 from GameSentenceMiner.model import AnkiCard
@@ -28,7 +28,7 @@ card_queue = []
 def update_anki_card(last_note: AnkiCard, note=None, audio_path='', video_path='', tango='', reuse_audio=False,
-                     should_update_audio=True, ss_time=0, game_line=None, selected_lines=None):
+                     should_update_audio=True, ss_time=0, game_line=None, selected_lines=None, prev_ss_timing=0):
     global audio_in_anki, screenshot_in_anki, prev_screenshot_in_anki
     update_audio = should_update_audio and (get_config().anki.sentence_audio_field and not
     last_note.get_field(get_config().anki.sentence_audio_field) or get_config().anki.overwrite_audio)
@@ -45,7 +45,11 @@ def update_anki_card(last_note: AnkiCard, note=None, audio_path='', video_path='
             if get_config().paths.remove_screenshot:
                 os.remove(screenshot)
         if get_config().anki.previous_image_field:
-            prev_screenshot = ffmpeg.get_screenshot(video_path, ffmpeg.get_screenshot_time(video_path, selected_lines[0].prev if selected_lines else game_line.prev))
+            try:
+                prev_screenshot = ffmpeg.get_screenshot(video_path, prev_ss_timing)
+            except Exception as e:
+                logger.error(f"Error getting previous screenshot based on VAD, Falling back to previous logic: {e}")
+                prev_screenshot = ffmpeg.get_screenshot(video_path, ffmpeg.get_screenshot_time(video_path, selected_lines[0].prev if selected_lines else game_line.prev))
             prev_screenshot_in_anki = store_media_file(prev_screenshot)
             if get_config().paths.remove_screenshot:
                 os.remove(prev_screenshot)

GameSentenceMiner/ffmpeg.py CHANGED Viewed

@@ -50,7 +50,7 @@ def get_screenshot_for_line(video_file, game_line):
     return get_screenshot(video_file, get_screenshot_time(video_file, game_line))
-def get_screenshot_time(video_path, game_line, default_beginning=False, vad_beginning=None, vad_end=None, doing_multi_line=False):
+def get_screenshot_time(video_path, game_line, default_beginning=False, vad_result=None, doing_multi_line=False):
     if game_line:
         line_time = game_line.time
     else:
@@ -68,19 +68,19 @@ def get_screenshot_time(video_path, game_line, default_beginning=False, vad_begi
     screenshot_offset = get_config().screenshot.seconds_after_line
     # Calculate screenshot time from the beginning by adding the offset
-    if vad_beginning and vad_end and not doing_multi_line:
+    if vad_result and vad_result.success and not doing_multi_line:
         logger.debug("Using VAD to determine screenshot time")
-        screenshot_time_from_beginning = line_timestamp_in_video + vad_end - screenshot_offset
+        screenshot_time_from_beginning = line_timestamp_in_video + vad_result.end - 0.1
     elif get_config().screenshot.screenshot_timing_setting == "beginning":
         logger.debug("Using beginning of line for screenshot")
         screenshot_time_from_beginning = line_timestamp_in_video + screenshot_offset
     elif get_config().screenshot.screenshot_timing_setting == "middle":
         if game_line.next:
             logger.debug("Finding time between lines for screenshot")
-            screenshot_time_from_beginning = line_timestamp_in_video + ((game_line.next.time - game_line.time).total_seconds() / 2)
+            screenshot_time_from_beginning = line_timestamp_in_video + ((game_line.next.time - game_line.time).total_seconds() / 2) + screenshot_offset
         else:
             logger.debug("Using end of line for screenshot")
-            screenshot_time_from_beginning = file_length - screenshot_offset
+            screenshot_time_from_beginning = file_length - abs(screenshot_offset)
     elif get_config().screenshot.screenshot_timing_setting == "end":
         logger.debug("Using end of line for screenshot")
         if game_line.next:

GameSentenceMiner/gsm.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import asyncio
+from GameSentenceMiner.vad.result import VADResult
 try:
     import os.path
     import signal
@@ -134,32 +136,33 @@ class VideoToAudioHandler(FileSystemEventHandler):
                 if get_config().anki.sentence_audio_field and get_config().audio.enabled:
                     logger.debug("Attempting to get audio from video")
-                    final_audio_output, should_update_audio, vad_trimmed_audio, vad_beginning, vad_end = VideoToAudioHandler.get_audio(
+                    final_audio_output, vad_result, vad_trimmed_audio = VideoToAudioHandler.get_audio(
                         start_line,
                         line_cutoff,
                         video_path,
                         anki_card_creation_time)
                 else:
                     final_audio_output = ""
-                    should_update_audio = False
+                    vad_result = VADResult(False, 0, 0)
                     vad_trimmed_audio = ""
-                    vad_beginning = 0
-                    vad_end = 0
                     if not get_config().audio.enabled:
                         logger.info("Audio is disabled in config, skipping audio processing!")
                     elif not get_config().anki.sentence_audio_field:
                         logger.info("No SentenceAudio Field in config, skipping audio processing!")
-                ss_timing = ffmpeg.get_screenshot_time(video_path, mined_line, vad_beginning=vad_beginning, vad_end=vad_end, doing_multi_line=bool(selected_lines))
+                ss_timing = ffmpeg.get_screenshot_time(video_path, mined_line, vad_result=vad_result, doing_multi_line=bool(selected_lines))
+                if get_config().anki.previous_image_field:
+                    prev_ss_timing = ffmpeg.get_screenshot_time(video_path, mined_line.prev, vad_result=VideoToAudioHandler.get_audio(mined_line.prev, mined_line.time, video_path, anki_card_creation_time=anki_card_creation_time, timing_only=True) ,doing_multi_line=bool(selected_lines))
                 if get_config().anki.update_anki and last_note:
                     anki.update_anki_card(last_note, note, audio_path=final_audio_output, video_path=video_path,
                                           tango=tango,
-                                          should_update_audio=should_update_audio,
+                                          should_update_audio=vad_result.success,
                                           ss_time=ss_timing,
                                           game_line=start_line,
-                                          selected_lines=selected_lines)
-                elif get_config().features.notify_on_update and should_update_audio:
+                                          selected_lines=selected_lines,
+                                          prev_ss_timing=prev_ss_timing)
+                elif get_config().features.notify_on_update and vad_result.success:
                     notification.send_audio_generated_notification(vad_trimmed_audio)
         except Exception as e:
             logger.error(f"Failed Processing and/or adding to Anki: Reason {e}")
@@ -173,7 +176,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
     @staticmethod
-    def get_audio(game_line, next_line_time, video_path, anki_card_creation_time=None, temporary=False):
+    def get_audio(game_line, next_line_time, video_path, anki_card_creation_time=None, temporary=False, timing_only=False):
         trimmed_audio = get_audio_and_trim(video_path, game_line, next_line_time, anki_card_creation_time)
         if temporary:
             return trimmed_audio
@@ -181,23 +184,23 @@ class VideoToAudioHandler(FileSystemEventHandler):
             f"{os.path.abspath(configuration.get_temporary_directory())}/{obs.get_current_game(sanitize=True)}.{get_config().audio.extension}")
         final_audio_output = make_unique_file_name(os.path.join(get_config().paths.audio_destination,
                                                                 f"{obs.get_current_game(sanitize=True)}.{get_config().audio.extension}"))
-        should_update_audio = True
-        vad_beginning, vad_end = 0, 0
+        result = VADResult(False, 0, 0)
         if get_config().vad.do_vad_postprocessing:
-            should_update_audio, vad_beginning, vad_end = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio, vad_trimmed_audio)
-            if not should_update_audio:
-                should_update_audio, vad_beginning, vad_end = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio,
+            result = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio, vad_trimmed_audio)
+            if not result.success:
+                result = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio,
                                                         vad_trimmed_audio)
-            if not should_update_audio and get_config().vad.add_audio_on_no_results:
+            if not result.success and get_config().vad.add_audio_on_no_results:
                 logger.info("No voice activity detected, using full audio.")
                 vad_trimmed_audio = trimmed_audio
-                should_update_audio = True
+        if timing_only:
+            return result
         if get_config().audio.ffmpeg_reencode_options and os.path.exists(vad_trimmed_audio):
             ffmpeg.reencode_file_with_user_config(vad_trimmed_audio, final_audio_output,
                                                   get_config().audio.ffmpeg_reencode_options)
         elif os.path.exists(vad_trimmed_audio):
             shutil.move(vad_trimmed_audio, final_audio_output)
-        return final_audio_output, should_update_audio, vad_trimmed_audio, vad_beginning, vad_end
+        return final_audio_output, result, vad_trimmed_audio
 def do_vad_processing(model, trimmed_audio, vad_trimmed_audio, second_pass=False):

GameSentenceMiner/ocr/owocr_helper.py CHANGED Viewed

@@ -207,7 +207,6 @@ rectangles = None
 def do_second_ocr(ocr1_text, rectangle_index, time, img):
     global twopassocr, ocr2, last_ocr1_results, last_ocr2_results
-    last_result = ([], -1)
     try:
         orig_text, text = run.process_and_write_results(img, None, None, None, None,
                                                         engine=ocr2)
@@ -218,6 +217,7 @@ def do_second_ocr(ocr1_text, rectangle_index, time, img):
         img.save(os.path.join(get_temporary_directory(), "last_successful_ocr.png"))
         last_ocr2_results[rectangle_index] = text
         send_result(text, time)
+        img.close()
     except json.JSONDecodeError:
         print("Invalid JSON received.")
     except Exception as e:

GameSentenceMiner/owocr/owocr/config.py CHANGED Viewed

@@ -4,16 +4,26 @@ import argparse
 import textwrap
 import urllib.request
+def str2bool(value):
+    if value.lower() == 'true':
+        return True
+    elif value.lower() == 'false':
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
 parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
     Runs OCR in the background.
     It can read images copied to the system clipboard or placed in a directory, images sent via a websocket or a Unix domain socket, or directly capture a screen (or a portion of it) or a window.
-    Recognized texts can be either saved to system clipboard, appended to a text file or sent via a websocket.
+    Recognized text can be either saved to system clipboard, appended to a text file or sent via a websocket.
 '''))
 parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
-                    help='Specifies where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
+                    help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
+parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.SUPPRESS,
+                    help="Optional secondary source to read input images from. Same options as read_from, but they can't both be directory paths.")
 parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
-                    help='Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
+                    help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
 parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
                     help='OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
 parser.add_argument('-p', '--pause_at_startup', action='store_true', default=argparse.SUPPRESS,
@@ -23,21 +33,21 @@ parser.add_argument('-i', '--ignore_flag', action='store_true', default=argparse
 parser.add_argument('-d', '--delete_images', action='store_true', default=argparse.SUPPRESS,
                     help='Delete image files after processing when reading from a directory.')
 parser.add_argument('-n', '--notifications', action='store_true', default=argparse.SUPPRESS,
-                    help='Show an operating system notification with the detected text.')
+                    help='Show an operating system notification with the detected text. Will be ignored when reading with screen capture, unless screen_capture_combo is set.')
 parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS,
-                    help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture. 0 to disable.')
+                    help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. Will be ignored when reading with screen capture, unless screen_capture_combo is set. 0 to disable.')
 parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS,
-                    help='Specifies a combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
+                    help='Combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS,
-                    help='Specifies a combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
+                    help='Combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". To be used with combo_pause. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
-                    help='Specifies area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
+                    help='Area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
 parser.add_argument('-sd', '--screen_capture_delay_secs', type=float, default=argparse.SUPPRESS,
-                    help='Specifies the delay (in seconds) between screenshots when reading with screen capture.')
-parser.add_argument('-sw', '--screen_capture_only_active_windows', action='store_true', default=argparse.SUPPRESS,
-                    help="When reading with screen capture and screen_capture_area is a window name, specifies whether to only target the window while it's active.")
+                    help='Delay (in seconds) between screenshots when reading with screen capture.')
+parser.add_argument('-sw', '--screen_capture_only_active_windows', type=str2bool, default=argparse.SUPPRESS,
+                    help="When reading with screen capture and screen_capture_area is a window name, only target the window while it's active.")
 parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
-                    help='When reading with screen capture, specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
+                    help='When reading with screen capture, combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
 class Config:
     has_config = False
@@ -47,6 +57,7 @@ class Config:
     __engine_config = {}
     __default_config = {
         'read_from': 'clipboard',
+        'read_from_secondary': '',
         'write_to': 'clipboard',
         'engine': '',
         'pause_at_startup': False,
@@ -64,7 +75,8 @@ class Config:
         'screen_capture_area': '',
         'screen_capture_delay_secs': 3,
         'screen_capture_only_active_windows': True,
-        'screen_capture_combo': ''
+        'screen_capture_combo': '',
+        'screen_capture_old_macos_api': False
     }
     def __parse(self, value):

GameSentenceMiner/owocr/owocr/ocr.py CHANGED Viewed

@@ -96,6 +96,22 @@ def post_process(text):
     return text
+def input_to_pil_image(img):
+    if isinstance(img, Image.Image):
+        pil_image = img
+    elif isinstance(img, (bytes, bytearray)):
+        pil_image = Image.open(io.BytesIO(img))
+    elif isinstance(img, Path):
+        try:
+            pil_image = Image.open(img)
+            pil_image.load()
+        except (UnidentifiedImageError, OSError) as e:
+            return None
+    else:
+        raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
+    return pil_image
 def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
     if img_format == 'png' and optimized_png_encode and not optimize:
         raw_data = img.convert('RGBA').tobytes()
@@ -157,15 +173,14 @@ class MangaOcr:
             self.available = True
             logger.info('Manga OCR ready')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         x = (True, self.model(img))
+        # img.close()
         return x
 class GoogleVision:
@@ -188,13 +203,10 @@ class GoogleVision:
             except:
                 logger.warning('Error parsing Google credentials, Google Vision will not work!')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         image_bytes = self._preprocess(img)
         image = vision.Image(content=image_bytes)
@@ -207,6 +219,8 @@ class GoogleVision:
         texts = response.text_annotations
         res = texts[0].description if len(texts) > 0 else ''
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -225,13 +239,10 @@ class GoogleLens:
             self.available = True
             logger.info('Google Lens ready')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         request = LensOverlayServerRequest()
@@ -298,6 +309,8 @@ class GoogleLens:
                 res += '\n'
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -305,7 +318,9 @@ class GoogleLens:
             aspect_ratio = img.width / img.height
             new_w = int(sqrt(3000000 * aspect_ratio))
             new_h = int(new_w / aspect_ratio)
-            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            # img.close()
+            img = img_resized
         return (pil_image_to_bytes(img), img.width, img.height)
@@ -323,13 +338,10 @@ class GoogleLensWeb:
             self.available = True
             logger.info('Google Lens (web) ready')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         url = 'https://lens.google.com/v3/upload'
         files = {'encoded_image': ('image.png', self._preprocess(img), 'image/png')}
@@ -393,6 +405,8 @@ class GoogleLensWeb:
             res += '\n'
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -400,7 +414,9 @@ class GoogleLensWeb:
             aspect_ratio = img.width / img.height
             new_w = int(sqrt(3000000 * aspect_ratio))
             new_h = int(new_w / aspect_ratio)
-            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            # img.close()
+            img = img_resized
         return pil_image_to_bytes(img)
@@ -415,13 +431,10 @@ class Bing:
         self.available = True
         logger.info('Bing ready')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         img_bytes = self._preprocess(img)
         if not img_bytes:
@@ -515,6 +528,8 @@ class Bing:
                         res += line['text'] + '\n'
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -526,7 +541,9 @@ class Bing:
             resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
             new_w = int(img.width * resize_factor)
             new_h = int(img.height * resize_factor)
-            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            # img.close()
+            img = img_resized
         img_bytes, _ = limit_image_size(img, max_byte_size)
@@ -550,13 +567,10 @@ class AppleVision:
             self.available = True
             logger.info('Apple Vision ready')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         with objc.autorelease_pool():
             req = Vision.VNRecognizeTextRequest.alloc().init()
@@ -579,6 +593,7 @@ class AppleVision:
             else:
                 x = (False, 'Unknown error!')
+            # img.close()
             return x
     def _preprocess(self, img):
@@ -631,13 +646,10 @@ class AppleLiveText:
             self.available = True
             logger.info('Apple Live Text ready')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         with objc.autorelease_pool():
             analyzer = self.VKCImageAnalyzer.alloc().init()
@@ -691,13 +703,10 @@ class WinRTOCR:
             except:
                 logger.warning('Error reading URL from config, WinRT OCR will not work!')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         if sys.platform == 'win32':
             res = winocr.recognize_pil_sync(img, lang='ja')['text']
@@ -716,6 +725,8 @@ class WinRTOCR:
             res = res.json()['text']
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -749,13 +760,10 @@ class OneOCR:
             except:
                 logger.warning('Error reading URL from config, OneOCR will not work!')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         if sys.platform == 'win32':
             try:
@@ -779,6 +787,8 @@ class OneOCR:
             res = res.json()['text']
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -802,13 +812,10 @@ class AzureImageAnalysis:
             except:
                 logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         try:
             read_result = self.client.analyze(image_data=self._preprocess(img), visual_features=[VisualFeatures.READ])
@@ -826,6 +833,8 @@ class AzureImageAnalysis:
             return (False, 'Unknown error!')
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -833,7 +842,9 @@ class AzureImageAnalysis:
             resize_factor = max(50 / img.width, 50 / img.height)
             new_w = int(img.width * resize_factor)
             new_h = int(img.height * resize_factor)
-            img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+            # img.close()
+            img = img_resized
         return pil_image_to_bytes(img)
@@ -853,13 +864,10 @@ class EasyOCR:
             self.available = True
             logger.info('EasyOCR ready')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         res = ''
         read_result = self.model.readtext(self._preprocess(img), detail=0)
@@ -867,6 +875,8 @@ class EasyOCR:
             res += text + '\n'
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -900,13 +910,10 @@ class RapidOCR:
             self.available = True
             logger.info('RapidOCR ready')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         res = ''
         read_results, elapsed = self.model(self._preprocess(img))
@@ -915,6 +922,8 @@ class RapidOCR:
                 res += read_result[1] + '\n'
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):
@@ -935,13 +944,10 @@ class OCRSpace:
         except:
             logger.warning('Error reading API key from config, OCRSpace will not work!')
-    def __call__(self, img_or_path):
-        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
-            img = Image.open(img_or_path)
-        elif isinstance(img_or_path, Image.Image):
-            img = img_or_path
-        else:
-            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+    def __call__(self, img):
+        img = input_to_pil_image(img)
+        if not img:
+            return (False, 'Invalid image provided')
         img_bytes, img_extension = self._preprocess(img)
         if not img_bytes:
@@ -972,6 +978,8 @@ class OCRSpace:
         res = res['ParsedResults'][0]['ParsedText']
         x = (True, res)
+        # img.close()
         return x
     def _preprocess(self, img):

GameSentenceMiner 2.8.25__py3-none-any.whl → 2.8.27__py3-none-any.whl

GameSentenceMiner 2.8.25py3-none-any.whl → 2.8.27py3-none-any.whl