GameSentenceMiner 2.17.6__py3-none-any.whl → 2.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/ai/ai_prompting.py +51 -51
- GameSentenceMiner/anki.py +236 -152
- GameSentenceMiner/gametext.py +7 -4
- GameSentenceMiner/gsm.py +49 -10
- GameSentenceMiner/locales/en_us.json +7 -3
- GameSentenceMiner/locales/ja_jp.json +8 -4
- GameSentenceMiner/locales/zh_cn.json +8 -4
- GameSentenceMiner/obs.py +238 -59
- GameSentenceMiner/ocr/owocr_helper.py +1 -1
- GameSentenceMiner/tools/ss_selector.py +7 -8
- GameSentenceMiner/ui/__init__.py +0 -0
- GameSentenceMiner/ui/anki_confirmation.py +187 -0
- GameSentenceMiner/{config_gui.py → ui/config_gui.py} +102 -37
- GameSentenceMiner/ui/screenshot_selector.py +215 -0
- GameSentenceMiner/util/configuration.py +124 -22
- GameSentenceMiner/util/db.py +22 -13
- GameSentenceMiner/util/downloader/download_tools.py +2 -2
- GameSentenceMiner/util/ffmpeg.py +24 -30
- GameSentenceMiner/util/get_overlay_coords.py +34 -34
- GameSentenceMiner/util/gsm_utils.py +31 -1
- GameSentenceMiner/util/text_log.py +11 -9
- GameSentenceMiner/vad.py +31 -12
- GameSentenceMiner/web/database_api.py +742 -123
- GameSentenceMiner/web/static/css/dashboard-shared.css +241 -0
- GameSentenceMiner/web/static/css/kanji-grid.css +94 -2
- GameSentenceMiner/web/static/css/overview.css +850 -0
- GameSentenceMiner/web/static/css/popups-shared.css +126 -0
- GameSentenceMiner/web/static/css/shared.css +97 -0
- GameSentenceMiner/web/static/css/stats.css +192 -597
- GameSentenceMiner/web/static/js/anki_stats.js +6 -4
- GameSentenceMiner/web/static/js/database.js +209 -5
- GameSentenceMiner/web/static/js/goals.js +610 -0
- GameSentenceMiner/web/static/js/kanji-grid.js +267 -4
- GameSentenceMiner/web/static/js/overview.js +1176 -0
- GameSentenceMiner/web/static/js/shared.js +25 -0
- GameSentenceMiner/web/static/js/stats.js +154 -1459
- GameSentenceMiner/web/stats.py +2 -2
- GameSentenceMiner/web/templates/anki_stats.html +5 -0
- GameSentenceMiner/web/templates/components/navigation.html +3 -1
- GameSentenceMiner/web/templates/database.html +73 -1
- GameSentenceMiner/web/templates/goals.html +376 -0
- GameSentenceMiner/web/templates/index.html +13 -11
- GameSentenceMiner/web/templates/overview.html +416 -0
- GameSentenceMiner/web/templates/stats.html +46 -251
- GameSentenceMiner/web/texthooking_page.py +18 -0
- {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/METADATA +5 -1
- {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/RECORD +51 -41
- {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.17.6.dist-info → gamesentenceminer-2.18.0.dist-info}/top_level.txt +0 -0
|
@@ -89,11 +89,11 @@ class GameText:
|
|
|
89
89
|
scene=gsm_state.current_game or ""
|
|
90
90
|
)
|
|
91
91
|
self.values_dict[line_id] = new_line
|
|
92
|
-
logger.debug(f"Adding line: {new_line}")
|
|
93
92
|
self.game_line_index += 1
|
|
94
93
|
if self.values:
|
|
95
94
|
self.values[-1].next = new_line
|
|
96
95
|
self.values.append(new_line)
|
|
96
|
+
return new_line
|
|
97
97
|
# self.remove_old_events(datetime.now() - timedelta(minutes=10))
|
|
98
98
|
|
|
99
99
|
def has_line(self, line_text) -> bool:
|
|
@@ -119,16 +119,17 @@ def strip_whitespace_and_punctuation(text: str) -> str:
|
|
|
119
119
|
return re.sub(r'[\s 、。「」【】《》., ]', '', text).strip()
|
|
120
120
|
|
|
121
121
|
|
|
122
|
+
# TODO See if partial_ratio is better than ratio
|
|
122
123
|
def lines_match(texthooker_sentence, anki_sentence, similarity_threshold=80) -> bool:
|
|
123
124
|
# Replace newlines, spaces, other whitespace characters, AND japanese punctuation
|
|
124
125
|
texthooker_sentence = strip_whitespace_and_punctuation(texthooker_sentence)
|
|
125
126
|
anki_sentence = strip_whitespace_and_punctuation(anki_sentence)
|
|
126
127
|
similarity = rapidfuzz.fuzz.ratio(texthooker_sentence, anki_sentence)
|
|
127
|
-
logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
|
|
128
|
-
if texthooker_sentence in anki_sentence:
|
|
129
|
-
|
|
130
|
-
elif anki_sentence in texthooker_sentence:
|
|
131
|
-
|
|
128
|
+
# logger.debug(f"Comparing sentences: '{texthooker_sentence}' and '{anki_sentence}' - Similarity: {similarity}")
|
|
129
|
+
# if texthooker_sentence in anki_sentence:
|
|
130
|
+
# logger.debug(f"One contains the other: {texthooker_sentence} in {anki_sentence} - Similarity: {similarity}")
|
|
131
|
+
# elif anki_sentence in texthooker_sentence:
|
|
132
|
+
# logger.debug(f"One contains the other: {anki_sentence} in {texthooker_sentence} - Similarity: {similarity}")
|
|
132
133
|
return (anki_sentence in texthooker_sentence) or (texthooker_sentence in anki_sentence) or (similarity >= similarity_threshold)
|
|
133
134
|
|
|
134
135
|
|
|
@@ -145,7 +146,8 @@ def get_text_event(last_note) -> GameLine:
|
|
|
145
146
|
if not sentence:
|
|
146
147
|
return lines[-1]
|
|
147
148
|
|
|
148
|
-
|
|
149
|
+
# Check the last 50 lines for a match
|
|
150
|
+
for line in reversed(lines[-50:]):
|
|
149
151
|
if lines_match(line.text, remove_html_and_cloze_tags(sentence)):
|
|
150
152
|
return line
|
|
151
153
|
|
|
@@ -181,7 +183,7 @@ def get_mined_line(last_note: AnkiCard, lines=None):
|
|
|
181
183
|
raise Exception("No voicelines in GSM. GSM can only do work on text that has been sent to it since it started. If you are not getting any text into GSM, please check your setup/config.")
|
|
182
184
|
|
|
183
185
|
sentence = last_note.get_field(get_config().anki.sentence_field)
|
|
184
|
-
for line in reversed(lines):
|
|
186
|
+
for line in reversed(lines[-50:]):
|
|
185
187
|
if lines_match(line.get_stripped_text(), remove_html_and_cloze_tags(sentence)):
|
|
186
188
|
return line
|
|
187
189
|
return lines[-1]
|
|
@@ -199,7 +201,7 @@ def get_text_log() -> GameText:
|
|
|
199
201
|
return game_log
|
|
200
202
|
|
|
201
203
|
def add_line(current_line_after_regex, line_time):
|
|
202
|
-
game_log.add_line(current_line_after_regex, line_time)
|
|
204
|
+
return game_log.add_line(current_line_after_regex, line_time)
|
|
203
205
|
|
|
204
206
|
def get_line_by_id(line_id: str) -> Optional[GameLine]:
|
|
205
207
|
"""
|
GameSentenceMiner/vad.py
CHANGED
|
@@ -5,6 +5,7 @@ import shutil
|
|
|
5
5
|
import tempfile
|
|
6
6
|
import time
|
|
7
7
|
import warnings
|
|
8
|
+
import re
|
|
8
9
|
from abc import abstractmethod, ABC
|
|
9
10
|
|
|
10
11
|
from GameSentenceMiner.util import configuration, ffmpeg
|
|
@@ -35,26 +36,26 @@ class VADSystem:
|
|
|
35
36
|
# if not self.groq:
|
|
36
37
|
# self.groq = GroqVADProcessor()
|
|
37
38
|
|
|
38
|
-
def trim_audio_with_vad(self, input_audio, output_audio, game_line):
|
|
39
|
+
def trim_audio_with_vad(self, input_audio, output_audio, game_line, full_text):
|
|
39
40
|
if get_config().vad.do_vad_postprocessing:
|
|
40
|
-
result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line)
|
|
41
|
+
result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line, full_text)
|
|
41
42
|
if not result.success and get_config().vad.backup_vad_model != configuration.OFF:
|
|
42
43
|
logger.info("No voice activity detected, using backup VAD model.")
|
|
43
|
-
result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line)
|
|
44
|
+
result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line, full_text)
|
|
44
45
|
return result
|
|
45
46
|
|
|
46
|
-
def _do_vad_processing(self, model, input_audio, output_audio, game_line):
|
|
47
|
+
def _do_vad_processing(self, model, input_audio, output_audio, game_line, text_mined):
|
|
47
48
|
match model:
|
|
48
49
|
case configuration.OFF:
|
|
49
50
|
return VADResult(False, 0, 0, "OFF")
|
|
50
51
|
case configuration.SILERO:
|
|
51
52
|
if not self.silero:
|
|
52
53
|
self.silero = SileroVADProcessor()
|
|
53
|
-
return self.silero.process_audio(input_audio, output_audio, game_line)
|
|
54
|
+
return self.silero.process_audio(input_audio, output_audio, game_line, text_mined)
|
|
54
55
|
case configuration.WHISPER:
|
|
55
56
|
if not self.whisper:
|
|
56
57
|
self.whisper = WhisperVADProcessor()
|
|
57
|
-
return self.whisper.process_audio(input_audio, output_audio, game_line)
|
|
58
|
+
return self.whisper.process_audio(input_audio, output_audio, game_line, text_mined)
|
|
58
59
|
|
|
59
60
|
# Base class for VAD systems
|
|
60
61
|
class VADProcessor(ABC):
|
|
@@ -63,7 +64,7 @@ class VADProcessor(ABC):
|
|
|
63
64
|
self.vad_system_name = None
|
|
64
65
|
|
|
65
66
|
@abstractmethod
|
|
66
|
-
def _detect_voice_activity(self, input_audio):
|
|
67
|
+
def _detect_voice_activity(self, input_audio, text_mined):
|
|
67
68
|
pass
|
|
68
69
|
|
|
69
70
|
@staticmethod
|
|
@@ -100,8 +101,8 @@ class VADProcessor(ABC):
|
|
|
100
101
|
shutil.move(files[0], output_audio)
|
|
101
102
|
|
|
102
103
|
|
|
103
|
-
def process_audio(self, input_audio, output_audio, game_line):
|
|
104
|
-
voice_activity = self._detect_voice_activity(input_audio)
|
|
104
|
+
def process_audio(self, input_audio, output_audio, game_line, text_mined):
|
|
105
|
+
voice_activity = self._detect_voice_activity(input_audio, text_mined)
|
|
105
106
|
|
|
106
107
|
if not voice_activity:
|
|
107
108
|
logger.info("No voice activity detected in the audio.")
|
|
@@ -140,7 +141,7 @@ class SileroVADProcessor(VADProcessor):
|
|
|
140
141
|
self.vad_model = load_silero_vad()
|
|
141
142
|
self.vad_system_name = SILERO
|
|
142
143
|
|
|
143
|
-
def _detect_voice_activity(self, input_audio):
|
|
144
|
+
def _detect_voice_activity(self, input_audio, text_mined):
|
|
144
145
|
from silero_vad import read_audio, get_speech_timestamps
|
|
145
146
|
temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
|
|
146
147
|
ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
|
|
@@ -166,7 +167,7 @@ class WhisperVADProcessor(VADProcessor):
|
|
|
166
167
|
logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
|
|
167
168
|
return self.vad_model
|
|
168
169
|
|
|
169
|
-
def _detect_voice_activity(self, input_audio):
|
|
170
|
+
def _detect_voice_activity(self, input_audio, text_mined):
|
|
170
171
|
from stable_whisper import WhisperResult
|
|
171
172
|
# Convert the audio to 16kHz mono WAV, evidence https://discord.com/channels/1286409772383342664/1286518821913362445/1407017127529152533
|
|
172
173
|
temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
|
|
@@ -178,10 +179,22 @@ class WhisperVADProcessor(VADProcessor):
|
|
|
178
179
|
with warnings.catch_warnings():
|
|
179
180
|
warnings.simplefilter("ignore")
|
|
180
181
|
result: WhisperResult = self.vad_model.transcribe(temp_wav, vad=True, language=get_config().vad.language, vad_filter=get_config().vad.use_vad_filter_for_whisper,
|
|
181
|
-
temperature=0.0)
|
|
182
|
+
temperature=0.0, chunk_length=60)
|
|
182
183
|
voice_activity = []
|
|
183
184
|
|
|
184
185
|
logger.debug(json.dumps(result.to_dict()))
|
|
186
|
+
|
|
187
|
+
text = result.text.strip()
|
|
188
|
+
|
|
189
|
+
# If both mined text and Whisper transcription are available, compare their similarity
|
|
190
|
+
if text_mined and text:
|
|
191
|
+
from rapidfuzz import fuzz
|
|
192
|
+
similarity = fuzz.partial_ratio(text_mined, text)
|
|
193
|
+
logger.info(f"Whisper transcription: '{text}' | Mined text: '{text_mined}' | Partial similarity: {similarity:.1f}")
|
|
194
|
+
# If similarity is very low, treat as no voice activity detected
|
|
195
|
+
if similarity < 20:
|
|
196
|
+
logger.info(f"Partial similarity {similarity:.1f} is below threshold, skipping voice activity.")
|
|
197
|
+
return []
|
|
185
198
|
|
|
186
199
|
# Process the segments to extract tokens, timestamps, and confidence
|
|
187
200
|
previous_segment = None
|
|
@@ -193,6 +206,12 @@ class WhisperVADProcessor(VADProcessor):
|
|
|
193
206
|
else:
|
|
194
207
|
logger.info(
|
|
195
208
|
"Unknown single character segment, not skipping, but logging, please report if this is a mistake: " + segment.text)
|
|
209
|
+
|
|
210
|
+
# Skip segments with excessive repeating sequences of at least 3 characters
|
|
211
|
+
match = re.search(r'(.{3,})\1{4,}', segment.text)
|
|
212
|
+
if match:
|
|
213
|
+
logger.debug(f"Skipping segment with excessive repeating sequence (>=5): '{segment.text}' at {segment.start}-{segment.end}. Likely Hallucination.")
|
|
214
|
+
continue
|
|
196
215
|
|
|
197
216
|
if segment.no_speech_prob and segment.no_speech_prob > 0.9:
|
|
198
217
|
logger.debug(f"Skipping segment with high no_speech_prob: {segment.no_speech_prob} for segment {segment.text} at {segment.start}-{segment.end}")
|