GameSentenceMiner 2.17.6__py3-none-any.whl → 2.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. GameSentenceMiner/ai/ai_prompting.py +51 -51
  2. GameSentenceMiner/anki.py +236 -152
  3. GameSentenceMiner/gametext.py +7 -4
  4. GameSentenceMiner/gsm.py +49 -10
  5. GameSentenceMiner/locales/en_us.json +7 -3
  6. GameSentenceMiner/locales/ja_jp.json +8 -4
  7. GameSentenceMiner/locales/zh_cn.json +8 -4
  8. GameSentenceMiner/obs.py +238 -59
  9. GameSentenceMiner/ocr/owocr_helper.py +1 -1
  10. GameSentenceMiner/tools/ss_selector.py +7 -8
  11. GameSentenceMiner/ui/__init__.py +0 -0
  12. GameSentenceMiner/ui/anki_confirmation.py +187 -0
  13. GameSentenceMiner/{config_gui.py → ui/config_gui.py} +102 -37
  14. GameSentenceMiner/ui/screenshot_selector.py +215 -0
  15. GameSentenceMiner/util/configuration.py +124 -22
  16. GameSentenceMiner/util/db.py +22 -13
  17. GameSentenceMiner/util/downloader/download_tools.py +2 -2
  18. GameSentenceMiner/util/ffmpeg.py +24 -30
  19. GameSentenceMiner/util/get_overlay_coords.py +34 -34
  20. GameSentenceMiner/util/gsm_utils.py +31 -1
  21. GameSentenceMiner/util/text_log.py +11 -9
  22. GameSentenceMiner/vad.py +31 -12
  23. GameSentenceMiner/web/database_api.py +742 -123
  24. GameSentenceMiner/web/static/css/dashboard-shared.css +241 -0
  25. GameSentenceMiner/web/static/css/kanji-grid.css +94 -2
  26. GameSentenceMiner/web/static/css/overview.css +850 -0
  27. GameSentenceMiner/web/static/css/popups-shared.css +126 -0
  28. GameSentenceMiner/web/static/css/shared.css +97 -0
  29. GameSentenceMiner/web/static/css/stats.css +192 -597
  30. GameSentenceMiner/web/static/js/anki_stats.js +6 -4
  31. GameSentenceMiner/web/static/js/database.js +209 -5
  32. GameSentenceMiner/web/static/js/goals.js +610 -0
  33. GameSentenceMiner/web/static/js/kanji-grid.js +267 -4
  34. GameSentenceMiner/web/static/js/overview.js +1176 -0
  35. GameSentenceMiner/web/static/js/shared.js +25 -0
  36. GameSentenceMiner/web/static/js/stats.js +154 -1459
  37. GameSentenceMiner/web/stats.py +2 -2
  38. GameSentenceMiner/web/templates/anki_stats.html +5 -0
  39. GameSentenceMiner/web/templates/components/navigation.html +3 -1
  40. GameSentenceMiner/web/templates/database.html +73 -1
  41. GameSentenceMiner/web/templates/goals.html +376 -0
  42. GameSentenceMiner/web/templates/index.html +13 -11
  43. GameSentenceMiner/web/templates/overview.html +416 -0
  44. GameSentenceMiner/web/templates/stats.html +46 -251
  45. GameSentenceMiner/web/texthooking_page.py +18 -0
  46. {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/METADATA +5 -1
  47. {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/RECORD +51 -41
  48. {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/WHEEL +0 -0
  49. {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/entry_points.txt +0 -0
  50. {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/licenses/LICENSE +0 -0
  51. {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/top_level.txt +0 -0
@@ -89,11 +89,11 @@ class GameText:
89
89
  scene=gsm_state.current_game or ""
90
90
  )
91
91
  self.values_dict[line_id] = new_line
92
- logger.debug(f"Adding line: {new_line}")
93
92
  self.game_line_index += 1
94
93
  if self.values:
95
94
  self.values[-1].next = new_line
96
95
  self.values.append(new_line)
96
+ return new_line
97
97
  # self.remove_old_events(datetime.now() - timedelta(minutes=10))
98
98
 
99
99
  def has_line(self, line_text) -> bool:
@@ -119,16 +119,17 @@ def strip_whitespace_and_punctuation(text: str) -> str:
119
119
  return re.sub(r'[\s 、。「」【】《》., ]', '', text).strip()
120
120
 
121
121
 
122
+ # TODO See if partial_ratio is better than ratio
122
123
  def lines_match(texthooker_sentence, anki_sentence, similarity_threshold=80) -> bool:
123
124
  # Replace newlines, spaces, other whitespace characters, AND japanese punctuation
124
125
  texthooker_sentence = strip_whitespace_and_punctuation(texthooker_sentence)
125
126
  anki_sentence = strip_whitespace_and_punctuation(anki_sentence)
126
127
  similarity = rapidfuzz.fuzz.ratio(texthooker_sentence, anki_sentence)
127
- logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
128
- if texthooker_sentence in anki_sentence:
129
- logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
130
- elif anki_sentence in texthooker_sentence:
131
- logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
128
+ # logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
129
+ # if texthooker_sentence in anki_sentence:
130
+ # logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
131
+ # elif anki_sentence in texthooker_sentence:
132
+ # logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
132
133
  return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence) or (similarity >= similarity_threshold)
133
134
 
134
135
 
@@ -145,7 +146,8 @@ def get_text_event(last_note) -> GameLine:
145
146
  if not sentence:
146
147
  return lines[-1]
147
148
 
148
- for line in reversed(lines):
149
+ # Check the last 50 lines for a match
150
+ for line in reversed(lines[-50:]):
149
151
  if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
150
152
  return line
151
153
 
@@ -181,7 +183,7 @@ def get_mined_line(last_note: AnkiCard, lines=None):
181
183
  raise Exception("No voicelines in GSM. GSM can only do work on text that has been sent to it since it started. If you are not getting any text into GSM, please check your setup/config.")
182
184
 
183
185
  sentence = last_note.get_field(get_config().anki.sentence_field)
184
- for line in reversed(lines):
186
+ for line in reversed(lines[-50:]):
185
187
  if lines_match(line.get_stripped_text(), remove_html_and_cloze_tags(sentence)):
186
188
  return line
187
189
  return lines[-1]
@@ -199,7 +201,7 @@ def get_text_log() -> GameText:
199
201
  return game_log
200
202
 
201
203
  def add_line(current_line_after_regex, line_time):
202
- game_log.add_line(current_line_after_regex, line_time)
204
+ return game_log.add_line(current_line_after_regex, line_time)
203
205
 
204
206
  def get_line_by_id(line_id: str) -> Optional[GameLine]:
205
207
  """
GameSentenceMiner/vad.py CHANGED
@@ -5,6 +5,7 @@ import shutil
5
5
  import tempfile
6
6
  import time
7
7
  import warnings
8
+ import re
8
9
  from abc import abstractmethod, ABC
9
10
 
10
11
  from GameSentenceMiner.util import configuration, ffmpeg
@@ -35,26 +36,26 @@ class VADSystem:
35
36
  # if not self.groq:
36
37
  # self.groq = GroqVADProcessor()
37
38
 
38
- def trim_audio_with_vad(self, input_audio, output_audio, game_line):
39
+ def trim_audio_with_vad(self, input_audio, output_audio, game_line, full_text):
39
40
  if get_config().vad.do_vad_postprocessing:
40
- result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line)
41
+ result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line, full_text)
41
42
  if not result.success and get_config().vad.backup_vad_model != configuration.OFF:
42
43
  logger.info("No voice activity detected, using backup VAD model.")
43
- result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line)
44
+ result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line, full_text)
44
45
  return result
45
46
 
46
- def _do_vad_processing(self, model, input_audio, output_audio, game_line):
47
+ def _do_vad_processing(self, model, input_audio, output_audio, game_line, text_mined):
47
48
  match model:
48
49
  case configuration.OFF:
49
50
  return VADResult(False, 0, 0, "OFF")
50
51
  case configuration.SILERO:
51
52
  if not self.silero:
52
53
  self.silero = SileroVADProcessor()
53
- return self.silero.process_audio(input_audio, output_audio, game_line)
54
+ return self.silero.process_audio(input_audio, output_audio, game_line, text_mined)
54
55
  case configuration.WHISPER:
55
56
  if not self.whisper:
56
57
  self.whisper = WhisperVADProcessor()
57
- return self.whisper.process_audio(input_audio, output_audio, game_line)
58
+ return self.whisper.process_audio(input_audio, output_audio, game_line, text_mined)
58
59
 
59
60
  # Base class for VAD systems
60
61
  class VADProcessor(ABC):
@@ -63,7 +64,7 @@ class VADProcessor(ABC):
63
64
  self.vad_system_name = None
64
65
 
65
66
  @abstractmethod
66
- def _detect_voice_activity(self, input_audio):
67
+ def _detect_voice_activity(self, input_audio, text_mined):
67
68
  pass
68
69
 
69
70
  @staticmethod
@@ -100,8 +101,8 @@ class VADProcessor(ABC):
100
101
  shutil.move(files[0], output_audio)
101
102
 
102
103
 
103
- def process_audio(self, input_audio, output_audio, game_line):
104
- voice_activity = self._detect_voice_activity(input_audio)
104
+ def process_audio(self, input_audio, output_audio, game_line, text_mined):
105
+ voice_activity = self._detect_voice_activity(input_audio, text_mined)
105
106
 
106
107
  if not voice_activity:
107
108
  logger.info("No voice activity detected in the audio.")
@@ -140,7 +141,7 @@ class SileroVADProcessor(VADProcessor):
140
141
  self.vad_model = load_silero_vad()
141
142
  self.vad_system_name = SILERO
142
143
 
143
- def _detect_voice_activity(self, input_audio):
144
+ def _detect_voice_activity(self, input_audio, text_mined):
144
145
  from silero_vad import read_audio, get_speech_timestamps
145
146
  temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
146
147
  ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
@@ -166,7 +167,7 @@ class WhisperVADProcessor(VADProcessor):
166
167
  logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
167
168
  return self.vad_model
168
169
 
169
- def _detect_voice_activity(self, input_audio):
170
+ def _detect_voice_activity(self, input_audio, text_mined):
170
171
  from stable_whisper import WhisperResult
171
172
  # Convert the audio to 16kHz mono WAV, evidence https://discord.com/channels/1286409772383342664/1286518821913362445/1407017127529152533
172
173
  temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
@@ -178,10 +179,22 @@ class WhisperVADProcessor(VADProcessor):
178
179
  with warnings.catch_warnings():
179
180
  warnings.simplefilter("ignore")
180
181
  result: WhisperResult = self.vad_model.transcribe(temp_wav, vad=True, language=get_config().vad.language, vad_filter=get_config().vad.use_vad_filter_for_whisper,
181
- temperature=0.0)
182
+ temperature=0.0, chunk_length=60)
182
183
  voice_activity = []
183
184
 
184
185
  logger.debug(json.dumps(result.to_dict()))
186
+
187
+ text = result.text.strip()
188
+
189
+ # If both mined text and Whisper transcription are available, compare their similarity
190
+ if text_mined and text:
191
+ from rapidfuzz import fuzz
192
+ similarity = fuzz.partial_ratio(text_mined, text)
193
+ logger.info(f"Whisper transcription: '{text}' | Mined text: '{text_mined}' | Partial similarity: {similarity:.1f}")
194
+ # If similarity is very low, treat as no voice activity detected
195
+ if similarity < 20:
196
+ logger.info(f"Partial similarity {similarity:.1f} is below threshold, skipping voice activity.")
197
+ return []
185
198
 
186
199
  # Process the segments to extract tokens, timestamps, and confidence
187
200
  previous_segment = None
@@ -193,6 +206,12 @@ class WhisperVADProcessor(VADProcessor):
193
206
  else:
194
207
  logger.info(
195
208
  "Unknown single character segment, not skipping, but logging, please report if this is a mistake: " + segment.text)
209
+
210
+ # Skip segments with excessive repeating sequences of at least 3 characters
211
+ match = re.search(r'(.{3,})\1{4,}', segment.text)
212
+ if match:
213
+ logger.debug(f"Skipping segment with excessive repeating sequence (>=5): '{segment.text}' at {segment.start}-{segment.end}. Likely Hallucination.")
214
+ continue
196
215
 
197
216
  if segment.no_speech_prob and segment.no_speech_prob > 0.9:
198
217
  logger.debug(f"Skipping segment with high no_speech_prob: {segment.no_speech_prob} for segment {segment.text} at {segment.start}-{segment.end}")