PyPI - GameSentenceMiner - Versions diffs - 2.18.6__py3-none-any.whl → 2.18.8__py3-none-any.whl - Mend

GameSentenceMiner 2.18.6py3-none-any.whl → 2.18.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

GameSentenceMiner/util/get_overlay_coords.py CHANGED Viewed

@@ -13,7 +13,7 @@ from rapidfuzz import fuzz
 # Local application imports
 from GameSentenceMiner.ocr.gsm_ocr_config import set_dpi_awareness
-from GameSentenceMiner.util.configuration import OverlayEngine, get_config, get_temporary_directory, is_windows, is_beangate, logger
+from GameSentenceMiner.util.configuration import OverlayEngine, get_config, get_overlay_config, get_temporary_directory, is_windows, is_beangate, logger
 from GameSentenceMiner.util.electron_config import get_ocr_language
 from GameSentenceMiner.obs import get_screenshot_PIL
 from GameSentenceMiner.web.texthooking_page import send_word_coordinates_to_overlay
@@ -135,6 +135,7 @@ class OverlayProcessor:
         self.ready = False
         self.last_oneocr_result = None
         self.last_lens_result = None
+        self.current_task = None  # Track current running task
         try:
             if self.config.overlay.websocket_port and all([GoogleLens, get_regex]):
@@ -163,8 +164,22 @@ class OverlayProcessor:
     async def find_box_and_send_to_overlay(self, sentence_to_check: str = None):
         """
         Sends the detected text boxes to the overlay via WebSocket.
+        Cancels any running OCR task before starting a new one.
         """
-        await self.find_box_for_sentence(sentence_to_check)
+        # Cancel any existing task
+        if self.current_task and not self.current_task.done():
+            self.current_task.cancel()
+            try:
+                await self.current_task
+            except asyncio.CancelledError:
+                logger.info("Previous OCR task was cancelled")
+        # Start new task
+        self.current_task = asyncio.create_task(self.find_box_for_sentence(sentence_to_check))
+        try:
+            await self.current_task
+        except asyncio.CancelledError:
+            logger.info("OCR task was cancelled")
         # logger.info(f"Sending {len(boxes)} boxes to overlay.")
         # await send_word_coordinates_to_overlay(boxes)
@@ -191,38 +206,46 @@ class OverlayProcessor:
         # set_dpi_awareness()
         with mss.mss() as sct:
             monitors = sct.monitors[1:]
-            # return monitors[monitor_index] if 0 <= monitor_index < len(monitors) else monitors[0]
-            if is_windows() and monitor_index == 0:
-                from ctypes import wintypes
-                import ctypes
-                # Get work area for primary monitor (ignores taskbar)
-                SPI_GETWORKAREA = 0x0030
-                rect = wintypes.RECT()
-                res = ctypes.windll.user32.SystemParametersInfoW(
-                    SPI_GETWORKAREA, 0, ctypes.byref(rect), 0
-                )
-                if not res:
-                    raise ctypes.WinError()
+            monitor = monitors[monitor_index] if 0 <= monitor_index < len(monitors) else monitors[0]
+            # Return monitor but the Y is 1 less to avoid taskbar on Windows
+            return {
+                "left": monitor["left"],
+                "top": monitor["top"],
+                "width": monitor["width"],
+                "height": monitor["height"] - 1
+            }
+            # # return monitors[monitor_index] if 0 <= monitor_index < len(monitors) else monitors[0]
+            # if is_windows() and monitor_index == 0:
+            #     from ctypes import wintypes
+            #     import ctypes
+            #     # Get work area for primary monitor (ignores taskbar)
+            #     SPI_GETWORKAREA = 0x0030
+            #     rect = wintypes.RECT()
+            #     res = ctypes.windll.user32.SystemParametersInfoW(
+            #         SPI_GETWORKAREA, 0, ctypes.byref(rect), 0
+            #     )
+            #     if not res:
+            #         raise ctypes.WinError()
-                return {
-                    "left": rect.left,
-                    "top": rect.top,
-                    "width": rect.right - rect.left,
-                    "height": rect.bottom - rect.top,
-                }
-            elif is_windows() and monitor_index > 0:
-                # Secondary monitors: just return with a guess of how tall the taskbar is
-                taskbar_height_guess = 48  # A common taskbar height, may vary
-                mon = monitors[monitor_index]
-                return {
-                    "left": mon["left"],
-                    "top": mon["top"],
-                    "width": mon["width"],
-                    "height": mon["height"] - taskbar_height_guess
-                }
-            else:
-                # For non-Windows systems or unspecified monitors, return the monitor area as-is
-                return monitors[monitor_index] if 0 <= monitor_index < len(monitors) else monitors[0]
+            #     return {
+            #         "left": rect.left,
+            #         "top": rect.top,
+            #         "width": rect.right - rect.left,
+            #         "height": rect.bottom - rect.top,
+            #     }
+            # elif is_windows() and monitor_index > 0:
+            #     # Secondary monitors: just return with a guess of how tall the taskbar is
+            #     taskbar_height_guess = 48  # A common taskbar height, may vary
+            #     mon = monitors[monitor_index]
+            #     return {
+            #         "left": mon["left"],
+            #         "top": mon["top"],
+            #         "width": mon["width"],
+            #         "height": mon["height"] - taskbar_height_guess
+            #     }
+            # else:
+            #     # For non-Windows systems or unspecified monitors, return the monitor area as-is
+            #     return monitors[monitor_index] if 0 <= monitor_index < len(monitors) else monitors[0]
     def _get_full_screenshot(self) -> Tuple[Image.Image | None, int, int]:
@@ -230,7 +253,8 @@ class OverlayProcessor:
         if not mss:
             raise RuntimeError("MSS screenshot library is not installed.")
         with mss.mss() as sct:
-            monitor = self.get_monitor_workarea(0)  # Get primary monitor work area
+            logger.info(get_overlay_config())
+            monitor = self.get_monitor_workarea(get_overlay_config().monitor_to_capture)  # Get primary monitor work area
             sct_img = sct.grab(monitor)
             img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
@@ -281,19 +305,32 @@ class OverlayProcessor:
         return composite_img
     async def _do_work(self, sentence_to_check: str = None) -> Tuple[List[Dict[str, Any]], int]:
-        """The main OCR workflow."""
+        """The main OCR workflow with cancellation support."""
         if not self.lens:
             logger.error("OCR engines are not initialized. Cannot perform OCR for Overlay.")
             return []
         if get_config().overlay.scan_delay > 0:
-            await asyncio.sleep(get_config().overlay.scan_delay)
+            try:
+                await asyncio.sleep(get_config().overlay.scan_delay)
+            except asyncio.CancelledError:
+                logger.info("OCR task cancelled during scan delay")
+                raise
+        # Check for cancellation before taking screenshot
+        if asyncio.current_task().cancelled():
+            raise asyncio.CancelledError()
         # 1. Get screenshot
         full_screenshot, monitor_width, monitor_height = self._get_full_screenshot()
         if not full_screenshot:
             logger.warning("Failed to get a screenshot.")
             return []
+        # Check for cancellation after screenshot
+        if asyncio.current_task().cancelled():
+            raise asyncio.CancelledError()
         if self.oneocr:
             # 2. Use OneOCR to find general text areas (fast)
             res, text, oneocr_results, crop_coords_list = self.oneocr(
@@ -304,6 +341,10 @@ class OverlayProcessor:
                 furigana_filter_sensitivity=None, # Disable furigana filtering
             )
+            # Check for cancellation after OneOCR
+            if asyncio.current_task().cancelled():
+                raise asyncio.CancelledError()
             text_str = "".join([text for text in text if self.regex.match(text)])
             # RapidFuzz fuzzy match 90% to not send the same results repeatedly
@@ -325,6 +366,10 @@ class OverlayProcessor:
                 logger.info("Sent %d text boxes to overlay.", len(oneocr_results))
                 return
+            # Check for cancellation before creating composite image
+            if asyncio.current_task().cancelled():
+                raise asyncio.CancelledError()
             # 3. Create a composite image with only the detected text regions
             composite_image = self._create_composite_image(
                 full_screenshot,
@@ -335,6 +380,10 @@ class OverlayProcessor:
         else:
             composite_image = full_screenshot
+        # Check for cancellation before Google Lens processing
+        if asyncio.current_task().cancelled():
+            raise asyncio.CancelledError()
         # 4. Use Google Lens on the cleaner composite image for higher accuracy
         res = self.lens(
             composite_image,
@@ -342,6 +391,10 @@ class OverlayProcessor:
             furigana_filter_sensitivity=None # Disable furigana filtering
         )
+        # Check for cancellation after Google Lens
+        if asyncio.current_task().cancelled():
+            raise asyncio.CancelledError()
         if len(res) != 3:
             return
@@ -360,6 +413,10 @@ class OverlayProcessor:
         if not success or not coords:
             return
+        # Check for cancellation before final processing
+        if asyncio.current_task().cancelled():
+            raise asyncio.CancelledError()
         # 5. Process the high-accuracy results into the desired format
         extracted_data = self._extract_text_with_pixel_boxes(
             api_response=coords,

GameSentenceMiner/vad.py CHANGED Viewed

@@ -103,6 +103,10 @@ class VADProcessor(ABC):
     def process_audio(self, input_audio, output_audio, game_line, text_mined):
         voice_activity = self._detect_voice_activity(input_audio, text_mined)
+        text_similarity = 0
+        if voice_activity and isinstance(voice_activity, tuple):
+            voice_activity, text_similarity = voice_activity
         if not voice_activity:
             logger.info("No voice activity detected in the audio.")
@@ -117,16 +121,17 @@ class VADProcessor(ABC):
             if 0 > audio_length - voice_activity[-1]['start'] + get_config().audio.beginning_offset:
                 end_time = voice_activity[-2]['end']
-        # if detected text is much shorter than game_line.text, if no text, guess based on length
-        if 'text' in voice_activity[0]:
-            dectected_text = ''.join([item['text'] for item in voice_activity])
-            if game_line and game_line.text and len(dectected_text) < len(game_line.text) / 2:
-                logger.info(f"Detected text '{dectected_text}' is much shorter than expected '{game_line.text}', skipping.")
-                return VADResult(False, 0, 0, self.vad_system_name)
-        else:
-            if game_line and game_line.text and (end_time - start_time) < max(0.5, len(game_line.text) * 0.05):
-                logger.info(f"Detected audio length {end_time - start_time} is much shorter than expected for text '{game_line.text}', skipping.")
-                return VADResult(False, 0, 0, self.vad_system_name)
+        # if detected text is much shorter than game_line.text, if no text, guess based on length, only check if text_similarity is low
+        if text_similarity < 50:
+            if 'text' in voice_activity[0]:
+                detected_text = ''.join([item['text'] for item in voice_activity])
+                if game_line and game_line.text and len(detected_text) < len(game_line.text) / 4:
+                    logger.info(f"Detected text '{detected_text}' is much shorter than expected '{game_line.text}', skipping.")
+                    return VADResult(False, 0, 0, self.vad_system_name)
+            else:
+                if game_line and game_line.text and (end_time - start_time) < max(0.5, len(game_line.text) * 0.05):
+                    logger.info(f"Detected audio length {end_time - start_time} is much shorter than expected for text '{game_line.text}', skipping.")
+                    return VADResult(False, 0, 0, self.vad_system_name)
         if get_config().vad.cut_and_splice_segments:
             self.extract_audio_and_combine_segments(input_audio, voice_activity, output_audio, padding=get_config().vad.splice_padding)
@@ -185,13 +190,14 @@ class WhisperVADProcessor(VADProcessor):
         logger.debug(json.dumps(result.to_dict()))
         text = result.text.strip()
+        text_similarity = 0
         # If both mined text and Whisper transcription are available, compare their similarity
         if text_mined and text:
             from rapidfuzz import fuzz
             similarity = fuzz.partial_ratio(text_mined, text)
             logger.info(f"Whisper transcription: '{text}' | Mined text: '{text_mined}' | Partial similarity: {similarity:.1f}")
-            # If similarity is very low, treat as no voice activity detected
+            text_similarity = similarity
             if similarity < 20:
                 logger.info(f"Partial similarity {similarity:.1f} is below threshold, skipping voice activity.")
                 return []
@@ -247,7 +253,7 @@ class WhisperVADProcessor(VADProcessor):
             previous_segment = segment
         # Return the detected voice activity and the total duration
-        return voice_activity
+        return voice_activity, text_similarity
 # Add a new class for Vosk-based VAD
 # class VoskVADProcessor(VADProcessor):

GameSentenceMiner/web/service.py CHANGED Viewed

@@ -9,6 +9,7 @@ from GameSentenceMiner.util import ffmpeg, notification
 from GameSentenceMiner.util.configuration import gsm_state, logger, get_config, get_temporary_directory
 from GameSentenceMiner.util.ffmpeg import get_video_timings
 from GameSentenceMiner.util.text_log import GameLine
+from GameSentenceMiner.util.audio_player import AudioPlayer
 def set_get_audio_from_video_callback(func):
@@ -16,41 +17,91 @@ def set_get_audio_from_video_callback(func):
     get_audio_from_video = func
+# Global audio player instance
+_audio_player = None
+def get_audio_player():
+    """Get or create the global audio player instance."""
+    global _audio_player
+    if _audio_player is None:
+        _audio_player = AudioPlayer(finished_callback=_on_audio_finished)
+    return _audio_player
+def _on_audio_finished():
+    """Callback when audio playback finishes."""
+    # Clear the current audio stream reference from gsm_state
+    gsm_state.current_audio_stream = None
+def stop_current_audio():
+    """Stop the currently playing audio."""
+    player = get_audio_player()
+    player.stop_audio()
+    gsm_state.current_audio_stream = None
+def play_audio_data_safe(data, samplerate):
+    """
+    Play audio data using the safe audio player.
+    Args:
+        data: Audio data as numpy array
+        samplerate: Sample rate of the audio
+    Returns:
+        True if playback started successfully, False otherwise
+    """
+    player = get_audio_player()
+    success = player.play_audio_data(data, samplerate)
+    if success:
+        # Store reference in gsm_state for compatibility
+        gsm_state.current_audio_stream = player.current_audio_stream
+    return success
 def handle_texthooker_button(video_path=''):
     try:
         if gsm_state.line_for_audio:
+            # Check if audio is currently playing and stop it
+            if gsm_state.current_audio_stream:
+                stop_current_audio()
+                gsm_state.line_for_audio = None
+                return
             line: GameLine = gsm_state.line_for_audio
             gsm_state.line_for_audio = None
             if line == gsm_state.previous_line_for_audio:
                 logger.info("Line is the same as the last one, skipping processing.")
-                if get_config().advanced.audio_player_path:
-                    play_audio_in_external(gsm_state.previous_audio)
-                elif get_config().advanced.video_player_path:
+                if get_config().advanced.video_player_path:
                     play_video_in_external(line, video_path)
                 else:
-                    import sounddevice as sd
-                    data, samplerate = gsm_state.previous_audio
-                    sd.play(data, samplerate)
-                    sd.wait()
+                    # Use cached audio data with safe playback
+                    if gsm_state.previous_audio:
+                        data, samplerate = gsm_state.previous_audio
+                        play_audio_data_safe(data, samplerate)
                 return
             gsm_state.previous_line_for_audio = line
-            if get_config().advanced.audio_player_path:
-                audio = get_audio_from_video(line, line.next.time if line.next else None, video_path,
-                                             temporary=True)
-                play_audio_in_external(audio)
-                gsm_state.previous_audio = audio
-            elif get_config().advanced.video_player_path:
+            if get_config().advanced.video_player_path:
                 play_video_in_external(line, video_path)
             else:
-                import sounddevice as sd
+                # Extract audio and play with safe method
                 import soundfile as sf
                 audio = get_audio_from_video(line, line.next.time if line.next else None, video_path,
                                              temporary=True)
                 data, samplerate = sf.read(audio)
-                sd.play(data, samplerate)
-                sd.wait()
-                gsm_state.previous_audio = (data, samplerate)
+                data = data.astype('float32')
+                # Use safe audio playback
+                success = play_audio_data_safe(data, samplerate)
+                if success:
+                    gsm_state.previous_audio = (data, samplerate)
             return
         if gsm_state.line_for_screenshot:
             line: GameLine = gsm_state.line_for_screenshot
             gsm_state.line_for_screenshot = None

GameSentenceMiner 2.18.6__py3-none-any.whl → 2.18.8__py3-none-any.whl

GameSentenceMiner 2.18.6py3-none-any.whl → 2.18.8py3-none-any.whl