GameSentenceMiner 2.9.4__py3-none-any.whl → 2.9.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. GameSentenceMiner/ai/ai_prompting.py +3 -3
  2. GameSentenceMiner/anki.py +16 -14
  3. GameSentenceMiner/config_gui.py +22 -7
  4. GameSentenceMiner/gametext.py +5 -5
  5. GameSentenceMiner/gsm.py +25 -67
  6. GameSentenceMiner/obs.py +7 -8
  7. GameSentenceMiner/ocr/owocr_area_selector.py +1 -1
  8. GameSentenceMiner/ocr/owocr_helper.py +30 -13
  9. GameSentenceMiner/owocr/owocr/ocr.py +0 -2
  10. GameSentenceMiner/owocr/owocr/run.py +1 -1
  11. GameSentenceMiner/{communication → util/communication}/__init__.py +1 -1
  12. GameSentenceMiner/{communication → util/communication}/send.py +1 -1
  13. GameSentenceMiner/{communication → util/communication}/websocket.py +2 -2
  14. GameSentenceMiner/{configuration.py → util/configuration.py} +6 -0
  15. GameSentenceMiner/{downloader → util/downloader}/download_tools.py +3 -3
  16. GameSentenceMiner/{electron_config.py → util/electron_config.py} +1 -1
  17. GameSentenceMiner/{ffmpeg.py → util/ffmpeg.py} +18 -10
  18. GameSentenceMiner/{util.py → util/gsm_utils.py} +4 -31
  19. GameSentenceMiner/{model.py → util/model.py} +1 -1
  20. GameSentenceMiner/{notification.py → util/notification.py} +3 -5
  21. GameSentenceMiner/{package.py → util/package.py} +1 -2
  22. GameSentenceMiner/{ss_selector.py → util/ss_selector.py} +5 -4
  23. GameSentenceMiner/{text_log.py → util/text_log.py} +3 -3
  24. GameSentenceMiner/vad.py +344 -0
  25. GameSentenceMiner/web/texthooking_page.py +15 -10
  26. {gamesentenceminer-2.9.4.dist-info → gamesentenceminer-2.9.6.dist-info}/METADATA +2 -3
  27. gamesentenceminer-2.9.6.dist-info/RECORD +67 -0
  28. GameSentenceMiner/vad/groq_trim.py +0 -82
  29. GameSentenceMiner/vad/result.py +0 -21
  30. GameSentenceMiner/vad/silero_trim.py +0 -52
  31. GameSentenceMiner/vad/vad_utils.py +0 -13
  32. GameSentenceMiner/vad/vosk_helper.py +0 -158
  33. GameSentenceMiner/vad/whisper_helper.py +0 -105
  34. gamesentenceminer-2.9.4.dist-info/RECORD +0 -72
  35. /GameSentenceMiner/{downloader → util}/__init__.py +0 -0
  36. /GameSentenceMiner/{downloader → util/downloader}/Untitled_json.py +0 -0
  37. /GameSentenceMiner/{vad → util/downloader}/__init__.py +0 -0
  38. /GameSentenceMiner/{downloader → util/downloader}/oneocr_dl.py +0 -0
  39. {gamesentenceminer-2.9.4.dist-info → gamesentenceminer-2.9.6.dist-info}/WHEEL +0 -0
  40. {gamesentenceminer-2.9.4.dist-info → gamesentenceminer-2.9.6.dist-info}/entry_points.txt +0 -0
  41. {gamesentenceminer-2.9.4.dist-info → gamesentenceminer-2.9.6.dist-info}/licenses/LICENSE +0 -0
  42. {gamesentenceminer-2.9.4.dist-info → gamesentenceminer-2.9.6.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,13 @@
1
- import shutil
1
+ import subprocess
2
2
  import tempfile
3
+ import time
4
+ from pathlib import Path
3
5
 
4
- import GameSentenceMiner.configuration
5
- from GameSentenceMiner import obs, util, configuration
6
- from GameSentenceMiner.configuration import *
7
- from GameSentenceMiner.text_log import initial_time
8
- from GameSentenceMiner.util import *
6
+ from GameSentenceMiner import obs
7
+ from GameSentenceMiner.util.gsm_utils import make_unique_file_name, get_file_modification_time
8
+ from GameSentenceMiner.util import configuration
9
+ from GameSentenceMiner.util.configuration import *
10
+ from GameSentenceMiner.util.text_log import initial_time
9
11
 
10
12
 
11
13
  def get_ffmpeg_path():
@@ -34,11 +36,11 @@ def call_frame_extractor(video_path, timestamp):
34
36
  # Construct the path to the frame extractor script
35
37
  script_path = os.path.join(current_dir, "ss_selector.py") # Replace with the actual script name if different
36
38
 
37
- logger.info(' '.join([sys.executable, "-m", "GameSentenceMiner.ss_selector", video_path, str(timestamp)]))
39
+ logger.info(' '.join([sys.executable, "-m", "GameSentenceMiner.util.ss_selector", video_path, str(timestamp)]))
38
40
 
39
41
  # Run the script using subprocess.run()
40
42
  result = subprocess.run(
41
- [sys.executable, "-m", "GameSentenceMiner.ss_selector", video_path, str(timestamp), get_config().screenshot.screenshot_timing_setting], # Use sys.executable
43
+ [sys.executable, "-m", "GameSentenceMiner.util.ss_selector", video_path, str(timestamp), get_config().screenshot.screenshot_timing_setting], # Use sys.executable
42
44
  capture_output=True,
43
45
  text=True, # Get output as text
44
46
  check=False # Raise an exception for non-zero exit codes
@@ -297,6 +299,7 @@ def trim_audio_based_on_last_line(untrimmed_audio, video_path, game_line, next_l
297
299
  trimmed_audio = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(),
298
300
  suffix=f".{get_config().audio.extension}").name
299
301
  start_trim_time, total_seconds, total_seconds_after_offset = get_video_timings(video_path, game_line, anki_card_creation_time)
302
+ end_trim_time = ""
300
303
 
301
304
  ffmpeg_command = ffmpeg_base_command_list + [
302
305
  "-i", untrimmed_audio,
@@ -320,6 +323,11 @@ def trim_audio_based_on_last_line(untrimmed_audio, video_path, game_line, next_l
320
323
 
321
324
  logger.debug(f"{total_seconds_after_offset} trimmed off of beginning")
322
325
 
326
+ if end_trim_time:
327
+ logger.info(f"Audio Extracted and trimmed to {start_trim_time} seconds with end time {end_trim_time}")
328
+ else:
329
+ logger.info(f"Audio Extracted and trimmed to {start_trim_time} seconds")
330
+
323
331
  logger.debug(f"Audio trimmed and saved to {trimmed_audio}")
324
332
  return trimmed_audio
325
333
 
@@ -365,8 +373,8 @@ def reencode_file_with_user_config(input_file, final_output_audio, user_ffmpeg_o
365
373
 
366
374
 
367
375
  def create_temp_file_with_same_name(input_file: str):
368
- split = input_file.split(".")
369
- return f"{split[0]}_temp.{split[1]}"
376
+ path = Path(input_file)
377
+ return str(path.with_name(f"{path.stem}_temp{path.suffix}"))
370
378
 
371
379
 
372
380
  def replace_file_with_retry(temp_file, input_file, retries=5, delay=1):
@@ -7,25 +7,14 @@ import subprocess
7
7
  import threading
8
8
  import time
9
9
  from datetime import datetime
10
+ from pathlib import Path
10
11
 
11
12
  from rapidfuzz import process
12
13
 
13
- from GameSentenceMiner.configuration import logger, get_config, get_app_directory
14
+ from GameSentenceMiner.util.configuration import logger, get_config, get_app_directory
14
15
 
15
16
  SCRIPTS_DIR = r"E:\Japanese Stuff\agent-v0.1.4-win32-x64\data\scripts"
16
17
 
17
- # Global variables to control script execution
18
- keep_running = True
19
- lock = threading.Lock()
20
- last_mined_line = None
21
-
22
- def get_last_mined_line():
23
- return last_mined_line
24
-
25
- def set_last_mined_line(line):
26
- global last_mined_line
27
- last_mined_line = line
28
-
29
18
  def run_new_thread(func):
30
19
  thread = threading.Thread(target=func, daemon=True)
31
20
  thread.start()
@@ -33,13 +22,9 @@ def run_new_thread(func):
33
22
 
34
23
 
35
24
  def make_unique_file_name(path):
36
- split = path.rsplit('.', 1)
37
- filename = split[0]
38
- extension = split[1]
39
-
25
+ path = Path(path)
40
26
  current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
41
-
42
- return f"{filename}_{current_time}.{extension}"
27
+ return f"{path.stem}_{current_time}{path.suffix}"
43
28
 
44
29
  def sanitize_filename(filename):
45
30
  return re.sub(r'[ <>:"/\\|?*\x00-\x1F]', '', filename)
@@ -194,18 +179,6 @@ def wait_for_stable_file(file_path, timeout=10, check_interval=0.1):
194
179
  logger.warning("File size did not stabilize within the timeout period. Continuing...")
195
180
  return False
196
181
 
197
-
198
- def import_vad_models():
199
- silero_trim, whisper_helper, vosk_helper = None, None, None
200
- if get_config().vad.is_silero():
201
- from GameSentenceMiner.vad import silero_trim
202
- if get_config().vad.is_whisper():
203
- from GameSentenceMiner.vad import whisper_helper
204
- if get_config().vad.is_vosk():
205
- from GameSentenceMiner.vad import vosk_helper
206
- return silero_trim, whisper_helper, vosk_helper
207
-
208
-
209
182
  def isascii(s: str):
210
183
  try:
211
184
  return s.isascii()
@@ -3,7 +3,7 @@ from typing import Optional, List
3
3
 
4
4
  from dataclasses_json import dataclass_json
5
5
 
6
- from GameSentenceMiner.configuration import get_config, logger, save_current_config
6
+ from GameSentenceMiner.util.configuration import get_config, logger, save_current_config
7
7
 
8
8
 
9
9
  # OBS
@@ -1,8 +1,6 @@
1
- import platform
2
-
3
1
  import requests
4
2
  from plyer import notification
5
- from GameSentenceMiner.configuration import logger, is_windows
3
+ from GameSentenceMiner.util.configuration import logger, is_windows
6
4
 
7
5
  if is_windows():
8
6
  from win10toast import ToastNotifier
@@ -21,7 +19,7 @@ if is_windows():
21
19
  else:
22
20
  notifier = notification
23
21
 
24
- def open_browser_window(note_id):
22
+ def open_browser_window(note_id, query=None):
25
23
  url = "http://localhost:8765"
26
24
  headers = {'Content-Type': 'application/json'}
27
25
 
@@ -29,7 +27,7 @@ def open_browser_window(note_id):
29
27
  "action": "guiBrowse",
30
28
  "version": 6,
31
29
  "params": {
32
- "query": f"nid:{note_id}"
30
+ "query": f"nid:{note_id}" if not query else query,
33
31
  }
34
32
  }
35
33
 
@@ -1,9 +1,8 @@
1
- import os
2
1
  from importlib import metadata
3
2
 
4
3
  import requests
5
4
 
6
- from GameSentenceMiner.configuration import logger, get_app_directory
5
+ from GameSentenceMiner.util.configuration import logger
7
6
 
8
7
  PACKAGE_NAME = "GameSentenceMiner"
9
8
 
@@ -4,10 +4,11 @@ import subprocess
4
4
  import os
5
5
  import sys
6
6
 
7
- from GameSentenceMiner import ffmpeg
8
- from GameSentenceMiner.configuration import get_temporary_directory, logger
9
- from GameSentenceMiner.ffmpeg import ffmpeg_base_command_list
10
- from GameSentenceMiner.util import sanitize_filename
7
+ from GameSentenceMiner.util.gsm_utils import sanitize_filename
8
+ from GameSentenceMiner.util.configuration import get_temporary_directory, logger
9
+ from GameSentenceMiner.util.ffmpeg import ffmpeg_base_command_list
10
+ from GameSentenceMiner.util import ffmpeg
11
+
11
12
 
12
13
  def extract_frames(video_path, timestamp, temp_dir, mode):
13
14
  frame_paths = []
@@ -4,9 +4,9 @@ from datetime import datetime
4
4
  from difflib import SequenceMatcher
5
5
  from typing import Optional
6
6
 
7
- from GameSentenceMiner.configuration import logger, get_config
8
- from GameSentenceMiner.model import AnkiCard
9
- from GameSentenceMiner.util import remove_html_and_cloze_tags
7
+ from GameSentenceMiner.util.gsm_utils import remove_html_and_cloze_tags
8
+ from GameSentenceMiner.util.configuration import logger, get_config
9
+ from GameSentenceMiner.util.model import AnkiCard
10
10
 
11
11
  initial_time = datetime.now()
12
12
 
@@ -0,0 +1,344 @@
1
+ import subprocess
2
+ import tempfile
3
+ import warnings
4
+ from abc import abstractmethod, ABC
5
+
6
+ from GameSentenceMiner.util import configuration, ffmpeg
7
+ from GameSentenceMiner.util.configuration import *
8
+ from GameSentenceMiner.util.ffmpeg import get_ffprobe_path
9
+
10
+
11
+ def get_audio_length(path):
12
+ result = subprocess.run(
13
+ [get_ffprobe_path(), "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", path],
14
+ stdout=subprocess.PIPE,
15
+ stderr=subprocess.PIPE,
16
+ text=True
17
+ )
18
+ return float(result.stdout.strip())
19
+
20
+ class VADResult:
21
+ def __init__(self, success: bool, start: float, end: float, model: str, output_audio: str = None):
22
+ self.success = success
23
+ self.start = start
24
+ self.end = end
25
+ self.model = model
26
+ self.output_audio = None
27
+
28
+ def __repr__(self):
29
+ return f"VADResult(success={self.success}, start={self.start}, end={self.end}, model={self.model}, output_audio={self.output_audio})"
30
+
31
+ def trim_successful_string(self):
32
+ if self.success:
33
+ if get_config().vad.trim_beginning:
34
+ return f"Trimmed audio from {self.start:.2f} to {self.end:.2f} seconds using {self.model}."
35
+ else:
36
+ return f"Trimmed end of audio to {self.end:.2f} seconds using {self.model}."
37
+ else:
38
+ return f"Failed to trim audio using {self.model}."
39
+
40
+ class VADSystem:
41
+ def __init__(self):
42
+ self.silero = None
43
+ self.whisper = None
44
+ self.vosk = None
45
+ self.groq = None
46
+
47
+ def init(self):
48
+ if get_config().vad.is_whisper():
49
+ if not self.whisper:
50
+ self.whisper = WhisperVADProcessor()
51
+ if get_config().vad.is_silero():
52
+ if not self.silero:
53
+ self.silero = SileroVADProcessor()
54
+ if get_config().vad.is_vosk():
55
+ if not self.vosk:
56
+ self.vosk = VoskVADProcessor()
57
+ if get_config().vad.is_groq():
58
+ if not self.groq:
59
+ self.groq = GroqVADProcessor()
60
+
61
+ def trim_audio_with_vad(self, input_audio, output_audio, game_line):
62
+ if get_config().vad.do_vad_postprocessing:
63
+ result = self._do_vad_processing(get_config().vad.selected_vad_model, input_audio, output_audio, game_line)
64
+ if not result.success and get_config().vad.backup_vad_model != configuration.OFF:
65
+ logger.info("No voice activity detected, using backup VAD model.")
66
+ result = self._do_vad_processing(get_config().vad.backup_vad_model, input_audio, output_audio, game_line)
67
+ if not result.success:
68
+ if get_config().vad.add_audio_on_no_results:
69
+ logger.info("No voice activity detected, using full audio.")
70
+ result.output_audio = input_audio
71
+ else:
72
+ logger.info("No voice activity detected.")
73
+ return result
74
+ else:
75
+ logger.info(result.trim_successful_string())
76
+ return result
77
+
78
+
79
+ def _do_vad_processing(self, model, input_audio, output_audio, game_line):
80
+ match model:
81
+ case configuration.OFF:
82
+ return VADResult(False, 0, 0, "OFF")
83
+ case configuration.GROQ:
84
+ if not self.groq:
85
+ self.groq = GroqVADProcessor()
86
+ return self.groq.process_audio(input_audio, output_audio, game_line)
87
+ case configuration.SILERO:
88
+ if not self.silero:
89
+ self.silero = SileroVADProcessor()
90
+ return self.silero.process_audio(input_audio, output_audio, game_line)
91
+ case configuration.VOSK:
92
+ if not self.vosk:
93
+ self.vosk = VoskVADProcessor()
94
+ return self.vosk.process_audio(input_audio, output_audio, game_line)
95
+ case configuration.WHISPER:
96
+ if not self.whisper:
97
+ self.whisper = WhisperVADProcessor()
98
+ return self.whisper.process_audio(input_audio, output_audio, game_line)
99
+
100
+ # Base class for VAD systems
101
+ class VADProcessor(ABC):
102
+ def __init__(self):
103
+ self.vad_model = None
104
+ self.vad_system_name = None
105
+
106
+ @abstractmethod
107
+ def _detect_voice_activity(self, input_audio):
108
+ pass
109
+
110
+ def process_audio(self, input_audio, output_audio, game_line):
111
+ voice_activity = self._detect_voice_activity(input_audio)
112
+
113
+ if not voice_activity:
114
+ logger.info("No voice activity detected in the audio.")
115
+ return VADResult(False, 0, 0, self.vad_system_name)
116
+
117
+ start_time = voice_activity[0]['start'] if voice_activity else 0
118
+ end_time = voice_activity[-1]['end'] if voice_activity else 0
119
+
120
+ # Attempt to fix the end time if the last segment is too short
121
+ if game_line and game_line.next and len(voice_activity) > 1:
122
+ audio_length = get_audio_length(input_audio)
123
+ if 0 > audio_length - voice_activity[-1]['start'] + get_config().audio.beginning_offset:
124
+ end_time = voice_activity[-2]['end']
125
+
126
+ ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio)
127
+ return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, self.vad_system_name, output_audio)
128
+
129
+ class SileroVADProcessor(VADProcessor):
130
+ def __init__(self):
131
+ super().__init__()
132
+ from silero_vad import load_silero_vad
133
+ self.vad_model = load_silero_vad()
134
+ self.vad_system_name = SILERO
135
+
136
+ def _detect_voice_activity(self, input_audio):
137
+ from silero_vad import read_audio, get_speech_timestamps
138
+ temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
139
+ ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
140
+ wav = read_audio(temp_wav)
141
+ speech_timestamps = get_speech_timestamps(wav, self.vad_model, return_seconds=True)
142
+ logger.debug(speech_timestamps)
143
+ return speech_timestamps
144
+
145
+ class WhisperVADProcessor(VADProcessor):
146
+ def __init__(self):
147
+ super().__init__()
148
+ self.vad_model = self.load_whisper_model()
149
+ self.vad_system_name = WHISPER
150
+
151
+ def load_whisper_model(self):
152
+ import stable_whisper as whisper
153
+ if not self.vad_model:
154
+ with warnings.catch_warnings(action="ignore"):
155
+ self.vad_model = whisper.load_model(get_config().vad.whisper_model)
156
+ logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
157
+ return self.vad_model
158
+
159
+ def _detect_voice_activity(self, input_audio):
160
+ from stable_whisper import WhisperResult
161
+ # Convert the audio to 16kHz mono WAV
162
+ temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
163
+ ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
164
+
165
+ logger.info('transcribing audio...')
166
+
167
+ # Transcribe the audio using Whisper
168
+ with warnings.catch_warnings(action="ignore"):
169
+ result: WhisperResult = self.vad_model.transcribe(temp_wav, vad=True, language=get_config().vad.language,
170
+ temperature=0.0)
171
+ voice_activity = []
172
+
173
+ logger.debug(result.to_dict())
174
+
175
+ # Process the segments to extract tokens, timestamps, and confidence
176
+ for segment in result.segments:
177
+ logger.debug(segment.to_dict())
178
+ for word in segment.words:
179
+ logger.debug(word.to_dict())
180
+ confidence = word.probability
181
+ if confidence > .1:
182
+ logger.debug(word)
183
+ voice_activity.append({
184
+ 'text': word.word,
185
+ 'start': word.start,
186
+ 'end': word.end,
187
+ 'confidence': word.probability
188
+ })
189
+
190
+ # Analyze the detected words to decide whether to use the audio
191
+ should_use = False
192
+ unique_words = set(word['text'] for word in voice_activity)
193
+ if len(unique_words) > 1 or not all(item in ['えー', 'ん'] for item in unique_words):
194
+ should_use = True
195
+
196
+ if not should_use:
197
+ return None
198
+
199
+ # Return the detected voice activity and the total duration
200
+ return voice_activity
201
+
202
+ # Add a new class for Vosk-based VAD
203
+ class VoskVADProcessor(VADProcessor):
204
+ def __init__(self):
205
+ super().__init__()
206
+ self.vad_model = self._load_vosk_model()
207
+ self.vad_system_name = VOSK
208
+
209
+ def _load_vosk_model(self):
210
+ if not self.vad_model:
211
+ import vosk
212
+ vosk_model_path = self._download_and_cache_vosk_model()
213
+ self.vad_model = vosk.Model(vosk_model_path)
214
+ logger.info(f"Vosk model loaded from {vosk_model_path}")
215
+ return self.vad_model
216
+
217
+ def _download_and_cache_vosk_model(self, model_dir="vosk_model_cache"):
218
+ # Ensure the cache directory exists
219
+ import requests
220
+ import zipfile
221
+ import tarfile
222
+ if not os.path.exists(os.path.join(get_app_directory(), model_dir)):
223
+ os.makedirs(os.path.join(get_app_directory(), model_dir))
224
+
225
+ # Extract the model name from the URL
226
+ model_filename = get_config().vad.vosk_url.split("/")[-1]
227
+ model_path = os.path.join(get_app_directory(), model_dir, model_filename)
228
+
229
+ # If the model is already downloaded, skip the download
230
+ if not os.path.exists(model_path):
231
+ logger.info(
232
+ f"Downloading the Vosk model from {get_config().vad.vosk_url}... This will take a while if using large model, ~1G")
233
+ response = requests.get(get_config().vad.vosk_url, stream=True)
234
+ with open(model_path, "wb") as file:
235
+ for chunk in response.iter_content(chunk_size=8192):
236
+ if chunk:
237
+ file.write(chunk)
238
+ logger.info("Download complete.")
239
+
240
+ # Extract the model if it's a zip or tar file
241
+ model_extract_path = os.path.join(get_app_directory(), model_dir, "vosk_model")
242
+ if not os.path.exists(model_extract_path):
243
+ logger.info("Extracting the Vosk model...")
244
+ if model_filename.endswith(".zip"):
245
+ with zipfile.ZipFile(model_path, "r") as zip_ref:
246
+ zip_ref.extractall(model_extract_path)
247
+ elif model_filename.endswith(".tar.gz"):
248
+ with tarfile.open(model_path, "r:gz") as tar_ref:
249
+ tar_ref.extractall(model_extract_path)
250
+ else:
251
+ logger.info("Unknown archive format. Model extraction skipped.")
252
+ logger.info(f"Model extracted to {model_extract_path}.")
253
+ else:
254
+ logger.info(f"Model already extracted at {model_extract_path}.")
255
+
256
+ # Return the path to the actual model folder inside the extraction directory
257
+ extracted_folders = os.listdir(model_extract_path)
258
+ if extracted_folders:
259
+ actual_model_folder = os.path.join(model_extract_path,
260
+ extracted_folders[0]) # Assuming the first folder is the model
261
+ return actual_model_folder
262
+ else:
263
+ return model_extract_path # In case there's no subfolder, return the extraction path directly
264
+
265
+ def _detect_voice_activity(self, input_audio):
266
+ import soundfile as sf
267
+ import vosk
268
+ import numpy as np
269
+ # Convert the audio to 16kHz mono WAV
270
+ temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
271
+ ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
272
+
273
+ # Initialize recognizer
274
+ with sf.SoundFile(temp_wav) as audio_file:
275
+ recognizer = vosk.KaldiRecognizer(self.vad_model, audio_file.samplerate)
276
+ voice_activity = []
277
+
278
+ recognizer.SetWords(True)
279
+
280
+ # Process audio in chunks
281
+ while True:
282
+ data = audio_file.buffer_read(4000, dtype='int16')
283
+ if len(data) == 0:
284
+ break
285
+
286
+ # Convert buffer to bytes using NumPy
287
+ data_bytes = np.frombuffer(data, dtype='int16').tobytes()
288
+
289
+ if recognizer.AcceptWaveform(data_bytes):
290
+ pass
291
+
292
+ final_result = json.loads(recognizer.FinalResult())
293
+ if 'result' in final_result:
294
+ for word in final_result['result']:
295
+ if word['conf'] >= 0.90:
296
+ voice_activity.append({
297
+ 'text': word['word'],
298
+ 'start': word['start'],
299
+ 'end': word['end']
300
+ })
301
+
302
+ # Return the detected voice activity
303
+ return voice_activity
304
+
305
+ class GroqVADProcessor(VADProcessor):
306
+ def __init__(self):
307
+ super().__init__()
308
+ from groq import Groq
309
+ self.client = Groq(api_key=get_config().ai.groq_api_key)
310
+ self.vad_model = self.load_groq_model()
311
+ self.vad_system_name = GROQ
312
+
313
+ def load_groq_model(self):
314
+ if not self.vad_model:
315
+ from groq import Groq
316
+ self.vad_model = Groq()
317
+ logger.info("Groq model loaded.")
318
+ return self.vad_model
319
+
320
+ def _detect_voice_activity(self, input_audio):
321
+ try:
322
+ with open(input_audio, "rb") as file:
323
+ transcription = self.client.audio.transcriptions.create(
324
+ file=(os.path.basename(input_audio), file.read()),
325
+ model="whisper-large-v3-turbo",
326
+ response_format="verbose_json",
327
+ language=get_config().vad.language,
328
+ temperature=0.0,
329
+ timestamp_granularities=["segment"],
330
+ prompt=f"Start detecting speech from the first spoken word. If there is music or background noise, ignore it completely. Be very careful to not hallucinate on silence. If the transcription is anything but language:{get_config().vad.language}, ignore it completely. If the end of the audio seems like the start of a new sentence, ignore it completely.",
331
+ )
332
+
333
+ logger.debug(transcription)
334
+ speech_segments = []
335
+ if hasattr(transcription, 'segments'):
336
+ speech_segments = transcription.segments
337
+ elif hasattr(transcription, 'words'):
338
+ speech_segments = transcription.words
339
+ return speech_segments
340
+ except Exception as e:
341
+ logger.error(f"Error detecting voice with Groq: {e}")
342
+ return [], 0.0
343
+
344
+ vad_processor = VADSystem()
@@ -10,12 +10,12 @@ from dataclasses import dataclass
10
10
  import flask
11
11
  import websockets
12
12
 
13
- from GameSentenceMiner.text_log import GameLine, get_line_by_id, initial_time, get_all_lines
13
+ from GameSentenceMiner.util.gsm_utils import TEXT_REPLACEMENTS_FILE
14
+ from GameSentenceMiner.util.text_log import GameLine, get_line_by_id, initial_time
14
15
  from flask import request, jsonify, send_from_directory
15
16
  import webbrowser
16
17
  from GameSentenceMiner import obs
17
- from GameSentenceMiner.configuration import logger, get_config, DB_PATH, gsm_state
18
- from GameSentenceMiner.util import TEXT_REPLACEMENTS_FILE
18
+ from GameSentenceMiner.util.configuration import logger, get_config, DB_PATH, gsm_state
19
19
 
20
20
  port = get_config().general.texthooker_port
21
21
  url = f"http://localhost:{port}"
@@ -350,7 +350,6 @@ def start_web_server():
350
350
 
351
351
  app.run(port=port, debug=False) # debug=True provides helpful error messages during development
352
352
 
353
- import signal
354
353
 
355
354
  websocket_server_thread = None
356
355
  websocket_queue = queue.Queue()
@@ -407,12 +406,18 @@ class WebsocketServerThread(threading.Thread):
407
406
  self._loop = asyncio.get_running_loop()
408
407
  self._stop_event = stop_event = asyncio.Event()
409
408
  self._event.set()
410
- self.server = start_server = websockets.serve(self.server_handler,
411
- "0.0.0.0",
412
- get_config().advanced.texthooker_communication_websocket_port,
413
- max_size=1000000000)
414
- async with start_server:
415
- await stop_event.wait()
409
+ while True:
410
+ try:
411
+ self.server = start_server = websockets.serve(self.server_handler,
412
+ "0.0.0.0",
413
+ get_config().advanced.texthooker_communication_websocket_port,
414
+ max_size=1000000000)
415
+ async with start_server:
416
+ await stop_event.wait()
417
+ return
418
+ except Exception as e:
419
+ logger.warning(f"WebSocket server encountered an error: {e}. Retrying...")
420
+ await asyncio.sleep(1)
416
421
 
417
422
  asyncio.run(main())
418
423
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.9.4
3
+ Version: 2.9.6
4
4
  Summary: A tool for mining sentences from games.
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -21,7 +21,6 @@ Requires-Dist: soundfile~=0.12.1
21
21
  Requires-Dist: toml~=0.10.2
22
22
  Requires-Dist: psutil~=6.0.0
23
23
  Requires-Dist: rapidfuzz~=3.9.7
24
- Requires-Dist: obs-websocket-py~=1.0
25
24
  Requires-Dist: plyer~=2.1.0
26
25
  Requires-Dist: keyboard~=0.13.5
27
26
  Requires-Dist: websockets~=15.0.1
@@ -37,7 +36,7 @@ Requires-Dist: google-generativeai
37
36
  Requires-Dist: pygetwindow; sys_platform == "win32"
38
37
  Requires-Dist: flask
39
38
  Requires-Dist: groq
40
- Requires-Dist: obsws-python
39
+ Requires-Dist: obsws-python~=1.7.2
41
40
  Requires-Dist: Flask-SocketIO
42
41
  Dynamic: license-file
43
42