PyPI - GameSentenceMiner - Versions diffs - 2.17.7__py3-none-any.whl → 2.18.1__py3-none-any.whl - Mend

GameSentenceMiner 2.17.7py3-none-any.whl → 2.18.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of GameSentenceMiner might be problematic. Click here for more details.

Files changed (76) hide show

GameSentenceMiner/util/ffmpeg.py CHANGED Viewed

@@ -5,23 +5,20 @@ import sys
 import tempfile
 import time
 from pathlib import Path
+import subprocess
+from pathlib import Path
+import shutil
 from GameSentenceMiner import obs
-from GameSentenceMiner.util.configuration import get_app_directory, is_windows, logger, get_config, \
+from GameSentenceMiner.ui.config_gui import ConfigApp
+from GameSentenceMiner.util.configuration import ffmpeg_base_command_list, get_ffprobe_path, logger, get_config, \
     get_temporary_directory, gsm_state, is_linux
 from GameSentenceMiner.util.gsm_utils import make_unique_file_name, get_file_modification_time
 from GameSentenceMiner.util import configuration
 from GameSentenceMiner.util.text_log import initial_time
-def get_ffmpeg_path():
-    return os.path.join(get_app_directory(), "ffmpeg", "ffmpeg.exe") if is_windows() else "ffmpeg"
-def get_ffprobe_path():
-    return os.path.join(get_app_directory(), "ffmpeg", "ffprobe.exe") if is_windows() else "ffprobe"
-ffmpeg_base_command_list = [get_ffmpeg_path(), "-hide_banner", "-loglevel", "error", '-nostdin']
 supported_formats = {
     'opus': 'libopus',
     'mp3': 'libmp3lame',
@@ -30,11 +27,6 @@ supported_formats = {
     'm4a': 'aac',
 }
-import subprocess
-from pathlib import Path
-import shutil
 def video_to_anim(
     input_path: str | Path,
     output_path: str | Path = None,
@@ -184,22 +176,24 @@ def call_frame_extractor(video_path, timestamp):
         str: The path of the selected image, or None on error.
     """
     try:
-        logger.info(' '.join([sys.executable, "-m", "GameSentenceMiner.tools.ss_selector", video_path, str(timestamp)]))
-        # Run the script using subprocess.run()
-        result = subprocess.run(
-            [sys.executable, "-m", "GameSentenceMiner.tools.ss_selector", video_path, str(timestamp), get_config().screenshot.screenshot_timing_setting],  # Use sys.executable
-            capture_output=True,
-            text=True,  # Get output as text
-            check=False  # Raise an exception for non-zero exit codes
-        )
-        if result.returncode != 0:
-            logger.error(f"Script failed with return code: {result.returncode}")
-            return None
-        logger.info(result)
-        # Print the standard output
-        logger.info(f"Frame extractor script output: {result.stdout.strip()}")
-        return result.stdout.strip() # Return the output
+        config_app: ConfigApp = gsm_state.config_app
+        return config_app.show_screenshot_selector(video_path, timestamp, get_config().screenshot.screenshot_timing_setting)
+        # logger.info(' '.join([sys.executable, "-m", "GameSentenceMiner.tools.ss_selector", video_path, str(timestamp)]))
+        # # Run the script using subprocess.run()
+        # result = subprocess.run(
+        #     [sys.executable, "-m", "GameSentenceMiner.tools.ss_selector", video_path, str(timestamp), get_config().screenshot.screenshot_timing_setting],  # Use sys.executable
+        #     capture_output=True,
+        #     text=True,  # Get output as text
+        #     check=False  # Raise an exception for non-zero exit codes
+        # )
+        # if result.returncode != 0:
+        #     logger.error(f"Script failed with return code: {result.returncode}")
+        #     return None
+        # logger.info(result)
+        # # Print the standard output
+        # logger.info(f"Frame extractor script output: {result.stdout.strip()}")
+        # return result.stdout.strip() # Return the output
     except subprocess.CalledProcessError as e:
         logger.error(f"Error calling script: {e}")

GameSentenceMiner/util/get_overlay_coords.py CHANGED Viewed

@@ -190,37 +190,38 @@ class OverlayProcessor:
         """
         with mss.mss() as sct:
             monitors = sct.monitors[1:]
-            if is_windows() and monitor_index == 0:
-                from ctypes import wintypes
-                import ctypes
-                # Get work area for primary monitor (ignores taskbar)
-                SPI_GETWORKAREA = 0x0030
-                rect = wintypes.RECT()
-                res = ctypes.windll.user32.SystemParametersInfoW(
-                    SPI_GETWORKAREA, 0, ctypes.byref(rect), 0
-                )
-                if not res:
-                    raise ctypes.WinError()
+            return monitors[monitor_index] if 0 <= monitor_index < len(monitors) else monitors[0]
+            # if is_windows() and monitor_index == 0:
+            #     from ctypes import wintypes
+            #     import ctypes
+            #     # Get work area for primary monitor (ignores taskbar)
+            #     SPI_GETWORKAREA = 0x0030
+            #     rect = wintypes.RECT()
+            #     res = ctypes.windll.user32.SystemParametersInfoW(
+            #         SPI_GETWORKAREA, 0, ctypes.byref(rect), 0
+            #     )
+            #     if not res:
+            #         raise ctypes.WinError()
-                return {
-                    "left": rect.left,
-                    "top": rect.top,
-                    "width": rect.right - rect.left,
-                    "height": rect.bottom - rect.top,
-                }
-            elif is_windows() and monitor_index > 0:
-                # Secondary monitors: just return with a guess of how tall the taskbar is
-                taskbar_height_guess = 48  # A common taskbar height, may vary
-                mon = monitors[monitor_index]
-                return {
-                    "left": mon["left"],
-                    "top": mon["top"],
-                    "width": mon["width"],
-                    "height": mon["height"] - taskbar_height_guess
-                }
-            else:
-                # For non-Windows systems or unspecified monitors, return the monitor area as-is
-                return monitors[monitor_index] if 0 <= monitor_index < len(monitors) else monitors[0]
+            #     return {
+            #         "left": rect.left,
+            #         "top": rect.top,
+            #         "width": rect.right - rect.left,
+            #         "height": rect.bottom - rect.top,
+            #     }
+            # elif is_windows() and monitor_index > 0:
+            #     # Secondary monitors: just return with a guess of how tall the taskbar is
+            #     taskbar_height_guess = 48  # A common taskbar height, may vary
+            #     mon = monitors[monitor_index]
+            #     return {
+            #         "left": mon["left"],
+            #         "top": mon["top"],
+            #         "width": mon["width"],
+            #         "height": mon["height"] - taskbar_height_guess
+            #     }
+            # else:
+            #     # For non-Windows systems or unspecified monitors, return the monitor area as-is
+            #     return monitors[monitor_index] if 0 <= monitor_index < len(monitors) else monitors[0]
     def _get_full_screenshot(self) -> Tuple[Image.Image | None, int, int]:
@@ -309,11 +310,9 @@ class OverlayProcessor:
                 score = fuzz.ratio(text_str, self.last_oneocr_result)
                 if score >= 80:
-                    logger.info("OneOCR results are similar to the last results (score: %d). Skipping overlay update.", score)
                     return
             self.last_oneocr_result = text_str
-            logger.info("Sending OneOCR results to overlay.")
             await send_word_coordinates_to_overlay(self._convert_oneocr_results_to_percentages(oneocr_results, monitor_width, monitor_height))
             # If User Home is beangate
@@ -322,7 +321,7 @@ class OverlayProcessor:
                     f.write(json.dumps(oneocr_results, ensure_ascii=False, indent=2))
             if get_config().overlay.engine == OverlayEngine.ONEOCR.value and self.oneocr:
-                logger.info("Using OneOCR results for overlay as configured.")
+                logger.info("Sent %d text boxes to overlay.", len(oneocr_results))
                 return
             # 3. Create a composite image with only the detected text regions
@@ -371,8 +370,9 @@ class OverlayProcessor:
             crop_height=composite_image.height,
             use_percentages=True
         )
-        logger.info("Sending Google Lens results to overlay.")
         await send_word_coordinates_to_overlay(extracted_data)
+        logger.info("Sent %d text boxes to overlay.", len(extracted_data))
     def _extract_text_with_pixel_boxes(
         self,

GameSentenceMiner/util/gsm_utils.py CHANGED Viewed

@@ -13,7 +13,7 @@ from pathlib import Path
 import requests
 from rapidfuzz import process
-from GameSentenceMiner.util.configuration import logger, get_config, get_app_directory
+from GameSentenceMiner.util.configuration import gsm_state, logger, get_config, get_app_directory, get_temporary_directory
 SCRIPTS_DIR = r"E:\Japanese Stuff\agent-v0.1.4-win32-x64\data\scripts"
@@ -22,6 +22,13 @@ def run_new_thread(func):
     thread.start()
     return thread
+def make_unique_temp_file(path):
+    path = Path(path)
+    current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
+    temp_dir = get_temporary_directory()
+    os.makedirs(temp_dir, exist_ok=True)
+    return str(Path(temp_dir) / f"{path.stem}_{current_time}{path.suffix}")
 def make_unique_file_name(path):
     path = Path(path)
     current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
@@ -258,6 +265,29 @@ TEXT_REPLACEMENTS_FILE = os.path.join(get_app_directory(), 'config', 'text_repla
 OCR_REPLACEMENTS_FILE = os.path.join(get_app_directory(), 'config', 'ocr_replacements.json')
 os.makedirs(os.path.dirname(TEXT_REPLACEMENTS_FILE), exist_ok=True)
+def add_srt_line(line_time, new_line):
+    global srt_index
+    if get_config().features.generate_longplay and gsm_state.recording_started_time and new_line.prev:
+        logger.info(f"Adding SRT line {new_line.prev.text}... for longplay")
+        with open(gsm_state.current_srt, 'a', encoding='utf-8') as srt_file:
+            # Calculate start and end times for the previous line
+            prev_start_time = new_line.prev.time - gsm_state.recording_started_time
+            prev_end_time = (line_time if line_time else datetime.now()) - gsm_state.recording_started_time
+            # Format times as SRT timestamps (HH:MM:SS,mmm)
+            def format_srt_time(td, offset=0):
+                total_seconds = int(td.total_seconds()) + offset
+                hours = total_seconds // 3600
+                minutes = (total_seconds % 3600) // 60
+                seconds = total_seconds % 60
+                milliseconds = int(td.microseconds / 1000)
+                return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
+            srt_file.write(f"{gsm_state.srt_index}\n")
+            srt_file.write(f"{format_srt_time(prev_start_time)} --> {format_srt_time(prev_end_time, offset=-1)}\n")
+            srt_file.write(f"{new_line.prev.text}\n\n")
+            gsm_state.srt_index += 1
 # if not os.path.exists(OCR_REPLACEMENTS_FILE):
 #     url = "https://raw.githubusercontent.com/bpwhelan/GameSentenceMiner/refs/heads/main/electron-src/assets/ocr_replacements.json"
 #     try:

GameSentenceMiner/util/text_log.py CHANGED Viewed

@@ -89,11 +89,11 @@ class GameText:
             scene=gsm_state.current_game or ""
         )
         self.values_dict[line_id] = new_line
-        logger.debug(f"Adding line: {new_line}")
         self.game_line_index += 1
         if self.values:
             self.values[-1].next = new_line
         self.values.append(new_line)
+        return new_line
         # self.remove_old_events(datetime.now() - timedelta(minutes=10))
     def has_line(self, line_text) -> bool:
@@ -119,16 +119,17 @@ def strip_whitespace_and_punctuation(text: str) -> str:
     return re.sub(r'[\s　、。「」【】《》., ]', '', text).strip()
+# TODO See if partial_ratio is better than ratio
 def lines_match(texthooker_sentence, anki_sentence, similarity_threshold=80) -> bool:
     # Replace newlines, spaces, other whitespace characters, AND japanese punctuation
     texthooker_sentence = strip_whitespace_and_punctuation(texthooker_sentence)
     anki_sentence = strip_whitespace_and_punctuation(anki_sentence)
     similarity = rapidfuzz.fuzz.ratio(texthooker_sentence, anki_sentence)
-    logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
-    if texthooker_sentence in anki_sentence:
-        logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
-    elif anki_sentence in texthooker_sentence:
-        logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
+    # logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
+    # if texthooker_sentence in anki_sentence:
+    #     logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
+    # elif anki_sentence in texthooker_sentence:
+    #     logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
     return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence) or (similarity >= similarity_threshold)
@@ -145,7 +146,8 @@ def get_text_event(last_note) -> GameLine:
     if not sentence:
         return lines[-1]
-    for line in reversed(lines):
+    # Check the last 50 lines for a match
+    for line in reversed(lines[-50:]):
         if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
             return line
@@ -181,7 +183,7 @@ def get_mined_line(last_note: AnkiCard, lines=None):
         raise Exception("No voicelines in GSM. GSM can only do work on text that has been sent to it since it started. If you are not getting any text into GSM, please check your setup/config.")
     sentence = last_note.get_field(get_config().anki.sentence_field)
-    for line in reversed(lines):
+    for line in reversed(lines[-50:]):
         if lines_match(line.get_stripped_text(), remove_html_and_cloze_tags(sentence)):
             return line
     return lines[-1]
@@ -199,7 +201,7 @@ def get_text_log() -> GameText:
     return game_log
 def add_line(current_line_after_regex, line_time):
-    game_log.add_line(current_line_after_regex, line_time)
+    return game_log.add_line(current_line_after_regex, line_time)
 def get_line_by_id(line_id: str) -> Optional[GameLine]:
     """

GameSentenceMiner/vad.py CHANGED Viewed

@@ -5,6 +5,7 @@ import shutil
 import tempfile
 import time
 import warnings
+import re
 from abc import abstractmethod, ABC
 from GameSentenceMiner.util import configuration, ffmpeg
@@ -35,26 +36,26 @@ class VADSystem:
         #     if not self.groq:
         #         self.groq = GroqVADProcessor()
-    def trim_audio_with_vad(self, input_audio, output_audio, game_line):
+    def trim_audio_with_vad(self, input_audio, output_audio, game_line, full_text):
         if get_config().vad.do_vad_postprocessing:
-            result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line)
+            result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line, full_text)
             if not result.success and get_config().vad.backup_vad_model != configuration.OFF:
                 logger.info("No voice activity detected, using backup VAD model.")
-                result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line)
+                result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line, full_text)
             return result
-    def _do_vad_processing(self, model, input_audio, output_audio, game_line):
+    def _do_vad_processing(self, model, input_audio, output_audio, game_line, text_mined):
         match model:
             case configuration.OFF:
                 return VADResult(False, 0, 0, "OFF")
             case configuration.SILERO:
                 if not self.silero:
                     self.silero = SileroVADProcessor()
-                return self.silero.process_audio(input_audio, output_audio, game_line)
+                return self.silero.process_audio(input_audio, output_audio, game_line, text_mined)
             case configuration.WHISPER:
                 if not self.whisper:
                     self.whisper = WhisperVADProcessor()
-                return self.whisper.process_audio(input_audio, output_audio, game_line)
+                return self.whisper.process_audio(input_audio, output_audio, game_line, text_mined)
 # Base class for VAD systems
 class VADProcessor(ABC):
@@ -63,7 +64,7 @@ class VADProcessor(ABC):
         self.vad_system_name = None
     @abstractmethod
-    def _detect_voice_activity(self, input_audio):
+    def _detect_voice_activity(self, input_audio, text_mined):
         pass
     @staticmethod
@@ -100,8 +101,8 @@ class VADProcessor(ABC):
             shutil.move(files[0], output_audio)
-    def process_audio(self, input_audio, output_audio, game_line):
-        voice_activity = self._detect_voice_activity(input_audio)
+    def process_audio(self, input_audio, output_audio, game_line, text_mined):
+        voice_activity = self._detect_voice_activity(input_audio, text_mined)
         if not voice_activity:
             logger.info("No voice activity detected in the audio.")
@@ -140,7 +141,7 @@ class SileroVADProcessor(VADProcessor):
         self.vad_model = load_silero_vad()
         self.vad_system_name = SILERO
-    def _detect_voice_activity(self, input_audio):
+    def _detect_voice_activity(self, input_audio, text_mined):
         from silero_vad import read_audio, get_speech_timestamps
         temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
         ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
@@ -166,7 +167,7 @@ class WhisperVADProcessor(VADProcessor):
             logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
         return self.vad_model
-    def _detect_voice_activity(self, input_audio):
+    def _detect_voice_activity(self, input_audio, text_mined):
         from stable_whisper import WhisperResult
         # Convert the audio to 16kHz mono WAV, evidence https://discord.com/channels/1286409772383342664/1286518821913362445/1407017127529152533
         temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
@@ -178,10 +179,22 @@ class WhisperVADProcessor(VADProcessor):
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             result: WhisperResult = self.vad_model.transcribe(temp_wav, vad=True, language=get_config().vad.language, vad_filter=get_config().vad.use_vad_filter_for_whisper,
-                                                             temperature=0.0)
+                                                             temperature=0.0, chunk_length=60)
         voice_activity = []
         logger.debug(json.dumps(result.to_dict()))
+        text = result.text.strip()
+        # If both mined text and Whisper transcription are available, compare their similarity
+        if text_mined and text:
+            from rapidfuzz import fuzz
+            similarity = fuzz.partial_ratio(text_mined, text)
+            logger.info(f"Whisper transcription: '{text}' | Mined text: '{text_mined}' | Partial similarity: {similarity:.1f}")
+            # If similarity is very low, treat as no voice activity detected
+            if similarity < 20:
+                logger.info(f"Partial similarity {similarity:.1f} is below threshold, skipping voice activity.")
+                return []
         # Process the segments to extract tokens, timestamps, and confidence
         previous_segment = None
@@ -193,6 +206,12 @@ class WhisperVADProcessor(VADProcessor):
                 else:
                     logger.info(
                         "Unknown single character segment, not skipping, but logging, please report if this is a mistake: " + segment.text)
+            # Skip segments with excessive repeating sequences of at least 3 characters
+            match = re.search(r'(.{3,})\1{4,}', segment.text)
+            if match:
+                logger.debug(f"Skipping segment with excessive repeating sequence (>=5): '{segment.text}' at {segment.start}-{segment.end}. Likely Hallucination.")
+                continue
             if segment.no_speech_prob and segment.no_speech_prob > 0.9:
                 logger.debug(f"Skipping segment with high no_speech_prob: {segment.no_speech_prob} for segment {segment.text} at {segment.start}-{segment.end}")

GameSentenceMiner 2.17.7__py3-none-any.whl → 2.18.1__py3-none-any.whl

Potentially problematic release.

GameSentenceMiner 2.17.7py3-none-any.whl → 2.18.1py3-none-any.whl