PyPI - GameSentenceMiner - Versions diffs - 2.17.6__py3-none-any.whl → 2.18.0__py3-none-any.whl - Mend

GameSentenceMiner 2.17.6py3-none-any.whl → 2.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

GameSentenceMiner/ai/ai_prompting.py +51 -51
GameSentenceMiner/anki.py +236 -152
GameSentenceMiner/gametext.py +7 -4
GameSentenceMiner/gsm.py +49 -10
GameSentenceMiner/locales/en_us.json +7 -3
GameSentenceMiner/locales/ja_jp.json +8 -4
GameSentenceMiner/locales/zh_cn.json +8 -4
GameSentenceMiner/obs.py +238 -59
GameSentenceMiner/ocr/owocr_helper.py +1 -1
GameSentenceMiner/tools/ss_selector.py +7 -8
GameSentenceMiner/ui/__init__.py +0 -0
GameSentenceMiner/ui/anki_confirmation.py +187 -0
GameSentenceMiner/{config_gui.py → ui/config_gui.py} +102 -37
GameSentenceMiner/ui/screenshot_selector.py +215 -0
GameSentenceMiner/util/configuration.py +124 -22
GameSentenceMiner/util/db.py +22 -13
GameSentenceMiner/util/downloader/download_tools.py +2 -2
GameSentenceMiner/util/ffmpeg.py +24 -30
GameSentenceMiner/util/get_overlay_coords.py +34 -34
GameSentenceMiner/util/gsm_utils.py +31 -1
GameSentenceMiner/util/text_log.py +11 -9
GameSentenceMiner/vad.py +31 -12
GameSentenceMiner/web/database_api.py +742 -123
GameSentenceMiner/web/static/css/dashboard-shared.css +241 -0
GameSentenceMiner/web/static/css/kanji-grid.css +94 -2
GameSentenceMiner/web/static/css/overview.css +850 -0
GameSentenceMiner/web/static/css/popups-shared.css +126 -0
GameSentenceMiner/web/static/css/shared.css +97 -0
GameSentenceMiner/web/static/css/stats.css +192 -597
GameSentenceMiner/web/static/js/anki_stats.js +6 -4
GameSentenceMiner/web/static/js/database.js +209 -5
GameSentenceMiner/web/static/js/goals.js +610 -0
GameSentenceMiner/web/static/js/kanji-grid.js +267 -4
GameSentenceMiner/web/static/js/overview.js +1176 -0
GameSentenceMiner/web/static/js/shared.js +25 -0
GameSentenceMiner/web/static/js/stats.js +154 -1459
GameSentenceMiner/web/stats.py +2 -2
GameSentenceMiner/web/templates/anki_stats.html +5 -0
GameSentenceMiner/web/templates/components/navigation.html +3 -1
GameSentenceMiner/web/templates/database.html +73 -1
GameSentenceMiner/web/templates/goals.html +376 -0
GameSentenceMiner/web/templates/index.html +13 -11
GameSentenceMiner/web/templates/overview.html +416 -0
GameSentenceMiner/web/templates/stats.html +46 -251
GameSentenceMiner/web/texthooking_page.py +18 -0
{gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/METADATA +5 -1
{gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/RECORD +51 -41
{gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/WHEEL +0 -0
{gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/entry_points.txt +0 -0
{gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/licenses/LICENSE +0 -0
{gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/top_level.txt +0 -0

GameSentenceMiner/util/text_log.py CHANGED Viewed

@@ -89,11 +89,11 @@ class GameText:
             scene=gsm_state.current_game or ""
         )
         self.values_dict[line_id] = new_line
-        logger.debug(f"Adding line: {new_line}")
         self.game_line_index += 1
         if self.values:
             self.values[-1].next = new_line
         self.values.append(new_line)
+        return new_line
         # self.remove_old_events(datetime.now() - timedelta(minutes=10))
     def has_line(self, line_text) -> bool:
@@ -119,16 +119,17 @@ def strip_whitespace_and_punctuation(text: str) -> str:
     return re.sub(r'[\s　、。「」【】《》., ]', '', text).strip()
+# TODO See if partial_ratio is better than ratio
 def lines_match(texthooker_sentence, anki_sentence, similarity_threshold=80) -> bool:
     # Replace newlines, spaces, other whitespace characters, AND japanese punctuation
     texthooker_sentence = strip_whitespace_and_punctuation(texthooker_sentence)
     anki_sentence = strip_whitespace_and_punctuation(anki_sentence)
     similarity = rapidfuzz.fuzz.ratio(texthooker_sentence, anki_sentence)
-    logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
-    if texthooker_sentence in anki_sentence:
-        logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
-    elif anki_sentence in texthooker_sentence:
-        logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
+    # logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
+    # if texthooker_sentence in anki_sentence:
+    #     logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
+    # elif anki_sentence in texthooker_sentence:
+    #     logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
     return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence) or (similarity >= similarity_threshold)
@@ -145,7 +146,8 @@ def get_text_event(last_note) -> GameLine:
     if not sentence:
         return lines[-1]
-    for line in reversed(lines):
+    # Check the last 50 lines for a match
+    for line in reversed(lines[-50:]):
         if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
             return line
@@ -181,7 +183,7 @@ def get_mined_line(last_note: AnkiCard, lines=None):
         raise Exception("No voicelines in GSM. GSM can only do work on text that has been sent to it since it started. If you are not getting any text into GSM, please check your setup/config.")
     sentence = last_note.get_field(get_config().anki.sentence_field)
-    for line in reversed(lines):
+    for line in reversed(lines[-50:]):
         if lines_match(line.get_stripped_text(), remove_html_and_cloze_tags(sentence)):
             return line
     return lines[-1]
@@ -199,7 +201,7 @@ def get_text_log() -> GameText:
     return game_log
 def add_line(current_line_after_regex, line_time):
-    game_log.add_line(current_line_after_regex, line_time)
+    return game_log.add_line(current_line_after_regex, line_time)
 def get_line_by_id(line_id: str) -> Optional[GameLine]:
     """

GameSentenceMiner/vad.py CHANGED Viewed

@@ -5,6 +5,7 @@ import shutil
 import tempfile
 import time
 import warnings
+import re
 from abc import abstractmethod, ABC
 from GameSentenceMiner.util import configuration, ffmpeg
@@ -35,26 +36,26 @@ class VADSystem:
         #     if not self.groq:
         #         self.groq = GroqVADProcessor()
-    def trim_audio_with_vad(self, input_audio, output_audio, game_line):
+    def trim_audio_with_vad(self, input_audio, output_audio, game_line, full_text):
         if get_config().vad.do_vad_postprocessing:
-            result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line)
+            result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line, full_text)
             if not result.success and get_config().vad.backup_vad_model != configuration.OFF:
                 logger.info("No voice activity detected, using backup VAD model.")
-                result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line)
+                result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line, full_text)
             return result
-    def _do_vad_processing(self, model, input_audio, output_audio, game_line):
+    def _do_vad_processing(self, model, input_audio, output_audio, game_line, text_mined):
         match model:
             case configuration.OFF:
                 return VADResult(False, 0, 0, "OFF")
             case configuration.SILERO:
                 if not self.silero:
                     self.silero = SileroVADProcessor()
-                return self.silero.process_audio(input_audio, output_audio, game_line)
+                return self.silero.process_audio(input_audio, output_audio, game_line, text_mined)
             case configuration.WHISPER:
                 if not self.whisper:
                     self.whisper = WhisperVADProcessor()
-                return self.whisper.process_audio(input_audio, output_audio, game_line)
+                return self.whisper.process_audio(input_audio, output_audio, game_line, text_mined)
 # Base class for VAD systems
 class VADProcessor(ABC):
@@ -63,7 +64,7 @@ class VADProcessor(ABC):
         self.vad_system_name = None
     @abstractmethod
-    def _detect_voice_activity(self, input_audio):
+    def _detect_voice_activity(self, input_audio, text_mined):
         pass
     @staticmethod
@@ -100,8 +101,8 @@ class VADProcessor(ABC):
             shutil.move(files[0], output_audio)
-    def process_audio(self, input_audio, output_audio, game_line):
-        voice_activity = self._detect_voice_activity(input_audio)
+    def process_audio(self, input_audio, output_audio, game_line, text_mined):
+        voice_activity = self._detect_voice_activity(input_audio, text_mined)
         if not voice_activity:
             logger.info("No voice activity detected in the audio.")
@@ -140,7 +141,7 @@ class SileroVADProcessor(VADProcessor):
         self.vad_model = load_silero_vad()
         self.vad_system_name = SILERO
-    def _detect_voice_activity(self, input_audio):
+    def _detect_voice_activity(self, input_audio, text_mined):
         from silero_vad import read_audio, get_speech_timestamps
         temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
         ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
@@ -166,7 +167,7 @@ class WhisperVADProcessor(VADProcessor):
             logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
         return self.vad_model
-    def _detect_voice_activity(self, input_audio):
+    def _detect_voice_activity(self, input_audio, text_mined):
         from stable_whisper import WhisperResult
         # Convert the audio to 16kHz mono WAV, evidence https://discord.com/channels/1286409772383342664/1286518821913362445/1407017127529152533
         temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
@@ -178,10 +179,22 @@ class WhisperVADProcessor(VADProcessor):
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             result: WhisperResult = self.vad_model.transcribe(temp_wav, vad=True, language=get_config().vad.language, vad_filter=get_config().vad.use_vad_filter_for_whisper,
-                                                             temperature=0.0)
+                                                             temperature=0.0, chunk_length=60)
         voice_activity = []
         logger.debug(json.dumps(result.to_dict()))
+        text = result.text.strip()
+        # If both mined text and Whisper transcription are available, compare their similarity
+        if text_mined and text:
+            from rapidfuzz import fuzz
+            similarity = fuzz.partial_ratio(text_mined, text)
+            logger.info(f"Whisper transcription: '{text}' | Mined text: '{text_mined}' | Partial similarity: {similarity:.1f}")
+            # If similarity is very low, treat as no voice activity detected
+            if similarity < 20:
+                logger.info(f"Partial similarity {similarity:.1f} is below threshold, skipping voice activity.")
+                return []
         # Process the segments to extract tokens, timestamps, and confidence
         previous_segment = None
@@ -193,6 +206,12 @@ class WhisperVADProcessor(VADProcessor):
                 else:
                     logger.info(
                         "Unknown single character segment, not skipping, but logging, please report if this is a mistake: " + segment.text)
+            # Skip segments with excessive repeating sequences of at least 3 characters
+            match = re.search(r'(.{3,})\1{4,}', segment.text)
+            if match:
+                logger.debug(f"Skipping segment with excessive repeating sequence (>=5): '{segment.text}' at {segment.start}-{segment.end}. Likely Hallucination.")
+                continue
             if segment.no_speech_prob and segment.no_speech_prob > 0.9:
                 logger.debug(f"Skipping segment with high no_speech_prob: {segment.no_speech_prob} for segment {segment.text} at {segment.start}-{segment.end}")

GameSentenceMiner 2.17.6__py3-none-any.whl → 2.18.0__py3-none-any.whl

GameSentenceMiner 2.17.6py3-none-any.whl → 2.18.0py3-none-any.whl