GameSentenceMiner 2.9.3__py3-none-any.whl → 2.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. GameSentenceMiner/ai/ai_prompting.py +3 -3
  2. GameSentenceMiner/anki.py +17 -11
  3. GameSentenceMiner/assets/icon.png +0 -0
  4. GameSentenceMiner/assets/icon128.png +0 -0
  5. GameSentenceMiner/assets/icon256.png +0 -0
  6. GameSentenceMiner/assets/icon32.png +0 -0
  7. GameSentenceMiner/assets/icon512.png +0 -0
  8. GameSentenceMiner/assets/icon64.png +0 -0
  9. GameSentenceMiner/assets/pickaxe.png +0 -0
  10. GameSentenceMiner/config_gui.py +22 -7
  11. GameSentenceMiner/gametext.py +5 -5
  12. GameSentenceMiner/gsm.py +26 -67
  13. GameSentenceMiner/obs.py +7 -9
  14. GameSentenceMiner/ocr/owocr_area_selector.py +1 -1
  15. GameSentenceMiner/ocr/owocr_helper.py +30 -13
  16. GameSentenceMiner/owocr/owocr/ocr.py +0 -2
  17. GameSentenceMiner/owocr/owocr/run.py +1 -1
  18. GameSentenceMiner/{communication → util/communication}/__init__.py +1 -1
  19. GameSentenceMiner/{communication → util/communication}/send.py +1 -1
  20. GameSentenceMiner/{communication → util/communication}/websocket.py +2 -2
  21. GameSentenceMiner/{downloader → util/downloader}/download_tools.py +3 -3
  22. GameSentenceMiner/vad.py +344 -0
  23. GameSentenceMiner/web/texthooking_page.py +78 -55
  24. {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/METADATA +2 -3
  25. gamesentenceminer-2.9.5.dist-info/RECORD +57 -0
  26. GameSentenceMiner/configuration.py +0 -647
  27. GameSentenceMiner/electron_config.py +0 -315
  28. GameSentenceMiner/ffmpeg.py +0 -441
  29. GameSentenceMiner/model.py +0 -177
  30. GameSentenceMiner/notification.py +0 -105
  31. GameSentenceMiner/package.py +0 -39
  32. GameSentenceMiner/ss_selector.py +0 -121
  33. GameSentenceMiner/text_log.py +0 -186
  34. GameSentenceMiner/util.py +0 -262
  35. GameSentenceMiner/vad/groq_trim.py +0 -82
  36. GameSentenceMiner/vad/result.py +0 -21
  37. GameSentenceMiner/vad/silero_trim.py +0 -52
  38. GameSentenceMiner/vad/vad_utils.py +0 -13
  39. GameSentenceMiner/vad/vosk_helper.py +0 -158
  40. GameSentenceMiner/vad/whisper_helper.py +0 -105
  41. gamesentenceminer-2.9.3.dist-info/RECORD +0 -64
  42. /GameSentenceMiner/{downloader → assets}/__init__.py +0 -0
  43. /GameSentenceMiner/{downloader → util/downloader}/Untitled_json.py +0 -0
  44. /GameSentenceMiner/{vad → util/downloader}/__init__.py +0 -0
  45. /GameSentenceMiner/{downloader → util/downloader}/oneocr_dl.py +0 -0
  46. {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/WHEEL +0 -0
  47. {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/entry_points.txt +0 -0
  48. {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/licenses/LICENSE +0 -0
  49. {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/top_level.txt +0 -0
GameSentenceMiner/util.py DELETED
@@ -1,262 +0,0 @@
1
- import json
2
- import os
3
- import random
4
- import re
5
- import string
6
- import subprocess
7
- import threading
8
- import time
9
- from datetime import datetime
10
-
11
- from rapidfuzz import process
12
-
13
- from GameSentenceMiner.configuration import logger, get_config, get_app_directory
14
-
15
- SCRIPTS_DIR = r"E:\Japanese Stuff\agent-v0.1.4-win32-x64\data\scripts"
16
-
17
- # Global variables to control script execution
18
- keep_running = True
19
- lock = threading.Lock()
20
- last_mined_line = None
21
-
22
- def get_last_mined_line():
23
- return last_mined_line
24
-
25
- def set_last_mined_line(line):
26
- global last_mined_line
27
- last_mined_line = line
28
-
29
- def run_new_thread(func):
30
- thread = threading.Thread(target=func, daemon=True)
31
- thread.start()
32
- return thread
33
-
34
-
35
- def make_unique_file_name(path):
36
- split = path.rsplit('.', 1)
37
- filename = split[0]
38
- extension = split[1]
39
-
40
- current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
41
-
42
- return f"{filename}_{current_time}.{extension}"
43
-
44
- def sanitize_filename(filename):
45
- return re.sub(r'[ <>:"/\\|?*\x00-\x1F]', '', filename)
46
-
47
-
48
- def get_random_digit_string():
49
- return ''.join(random.choice(string.digits) for i in range(9))
50
-
51
-
52
- def timedelta_to_ffmpeg_friendly_format(td_obj):
53
- total_seconds = td_obj.total_seconds()
54
- hours, remainder = divmod(total_seconds, 3600)
55
- minutes, seconds = divmod(remainder, 60)
56
- return "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
57
-
58
-
59
- def get_file_modification_time(file_path):
60
- mod_time_epoch = os.path.getmtime(file_path)
61
- mod_time = datetime.fromtimestamp(mod_time_epoch)
62
- return mod_time
63
-
64
-
65
- def get_process_id_by_title(game_title):
66
- powershell_command = f"Get-Process | Where-Object {{$_.MainWindowTitle -like '*{game_title}*'}} | Select-Object -First 1 -ExpandProperty Id"
67
- process_id = subprocess.check_output(["powershell", "-Command", powershell_command], text=True).strip()
68
- logger.info(f"Process ID for {game_title}: {process_id}")
69
- return process_id
70
-
71
-
72
- def get_script_files(directory):
73
- script_files = []
74
- for root, dirs, files in os.walk(directory):
75
- for file in files:
76
- if file.endswith(".js"): # Assuming the scripts are .js files
77
- script_files.append(os.path.join(root, file))
78
- return script_files
79
-
80
-
81
- def filter_steam_scripts(scripts):
82
- return [script for script in scripts if "PC_Steam" in os.path.basename(script)]
83
-
84
-
85
- def extract_game_name(script_path):
86
- # Remove directory and file extension to get the name part
87
- script_name = os.path.basename(script_path)
88
- game_name = script_name.replace("PC_Steam_", "").replace(".js", "")
89
- return game_name.replace("_", " ").replace(".", " ")
90
-
91
-
92
- def find_most_similar_script(game_title, steam_scripts):
93
- # Create a list of game names from the script paths
94
- game_names = [extract_game_name(script) for script in steam_scripts]
95
-
96
- # Use rapidfuzz to find the closest match
97
- best_match = process.extractOne(game_title, game_names)
98
-
99
- if best_match:
100
- matched_game_name, confidence_score, index = best_match
101
- return steam_scripts[index], matched_game_name, confidence_score
102
- return None, None, None
103
-
104
-
105
- def find_script_for_game(game_title):
106
- script_files = get_script_files(SCRIPTS_DIR)
107
-
108
- steam_scripts = filter_steam_scripts(script_files)
109
-
110
- best_script, matched_game_name, confidence = find_most_similar_script(game_title, steam_scripts)
111
-
112
-
113
- if best_script:
114
- logger.info(f"Found Script: {best_script}")
115
- return best_script
116
- else:
117
- logger.warning("No similar script found.")
118
-
119
-
120
- def run_agent_and_hook(pname, agent_script):
121
- command = f'agent --script=\"{agent_script}\" --pname={pname}'
122
- logger.info("Running and Hooking Agent!")
123
- try:
124
- dos_process = subprocess.Popen(command, shell=True)
125
- dos_process.wait() # Wait for the process to complete
126
- logger.info("Agent script finished or closed.")
127
- except Exception as e:
128
- logger.error(f"Error occurred while running agent script: {e}")
129
-
130
- keep_running = False
131
-
132
-
133
- # def run_command(command, shell=False, input=None, capture_output=False, timeout=None, check=False, **kwargs):
134
- # # Use shell=True if the OS is Linux, otherwise shell=False
135
- # if is_linux():
136
- # return subprocess.run(command, shell=True, input=input, capture_output=capture_output, timeout=timeout,
137
- # check=check, **kwargs)
138
- # else:
139
- # return subprocess.run(command, shell=shell, input=input, capture_output=capture_output, timeout=timeout,
140
- # check=check, **kwargs)
141
- def remove_html_and_cloze_tags(text):
142
- text = re.sub(r'<.*?>', '', re.sub(r'{{c\d+::(.*?)(::.*?)?}}', r'\1', text))
143
- return text
144
-
145
-
146
- def combine_dialogue(dialogue_lines, new_lines=None):
147
- if not dialogue_lines: # Handle empty input
148
- return []
149
-
150
- if new_lines is None:
151
- new_lines = []
152
-
153
- if len(dialogue_lines) == 1 and '「' not in dialogue_lines[0]:
154
- new_lines.append(dialogue_lines[0])
155
- return new_lines
156
-
157
- character_name = dialogue_lines[0].split("「")[0]
158
- text = character_name + "「"
159
-
160
- for i, line in enumerate(dialogue_lines):
161
- if not line.startswith(character_name + "「"):
162
- text = text + "」" + get_config().advanced.multi_line_line_break
163
- new_lines.append(text)
164
- new_lines.extend(combine_dialogue(dialogue_lines[i:]))
165
- break
166
- else:
167
- text += (get_config().advanced.multi_line_line_break if i > 0 else "") + line.split("「")[1].rstrip("」") + ""
168
- else:
169
- text = text + "」"
170
- new_lines.append(text)
171
-
172
- return new_lines
173
-
174
- def wait_for_stable_file(file_path, timeout=10, check_interval=0.1):
175
- elapsed_time = 0
176
- last_size = -1
177
-
178
- while elapsed_time < timeout:
179
- try:
180
- current_size = os.path.getsize(file_path)
181
- if current_size == last_size:
182
- try:
183
- with open(file_path, 'rb') as f:
184
- return True
185
- except Exception as e:
186
- time.sleep(check_interval)
187
- elapsed_time += check_interval
188
- last_size = current_size
189
- time.sleep(check_interval)
190
- elapsed_time += check_interval
191
- except Exception as e:
192
- logger.warning(f"Error checking file size, will still try updating Anki Card!: {e}")
193
- return False
194
- logger.warning("File size did not stabilize within the timeout period. Continuing...")
195
- return False
196
-
197
-
198
- def import_vad_models():
199
- silero_trim, whisper_helper, vosk_helper = None, None, None
200
- if get_config().vad.is_silero():
201
- from GameSentenceMiner.vad import silero_trim
202
- if get_config().vad.is_whisper():
203
- from GameSentenceMiner.vad import whisper_helper
204
- if get_config().vad.is_vosk():
205
- from GameSentenceMiner.vad import vosk_helper
206
- return silero_trim, whisper_helper, vosk_helper
207
-
208
-
209
- def isascii(s: str):
210
- try:
211
- return s.isascii()
212
- except:
213
- try:
214
- s.encode("ascii")
215
- return True
216
- except:
217
- return False
218
-
219
- def do_text_replacements(text, replacements_json):
220
- if not text:
221
- return text
222
-
223
- replacements = {}
224
- if os.path.exists(replacements_json):
225
- with open(replacements_json, 'r', encoding='utf-8') as f:
226
- replacements.update(json.load(f))
227
-
228
- if replacements.get("enabled", False):
229
- orig_text = text
230
- filters = replacements.get("args", {}).get("replacements", {})
231
- for fil, replacement in filters.items():
232
- if not fil:
233
- continue
234
- if fil.startswith("re:"):
235
- pattern = fil[3:]
236
- try:
237
- text = re.sub(pattern, replacement, text)
238
- except Exception:
239
- logger.error(f"Invalid regex pattern: {pattern}")
240
- continue
241
- if isascii(fil):
242
- text = re.sub(r"\b{}\b".format(re.escape(fil)), replacement, text)
243
- else:
244
- text = text.replace(fil, replacement)
245
- if text != orig_text:
246
- logger.info(f"Text replaced: '{orig_text}' -> '{text}' using replacements.")
247
- return text
248
-
249
-
250
- TEXT_REPLACEMENTS_FILE = os.path.join(get_app_directory(), 'config', 'text_replacements.json')
251
- OCR_REPLACEMENTS_FILE = os.path.join(get_app_directory(), 'config', 'ocr_replacements.json')
252
- os.makedirs(os.path.dirname(TEXT_REPLACEMENTS_FILE), exist_ok=True)
253
-
254
- # if not os.path.exists(OCR_REPLACEMENTS_FILE):
255
- # url = "https://raw.githubusercontent.com/bpwhelan/GameSentenceMiner/refs/heads/main/electron-src/assets/ocr_replacements.json"
256
- # try:
257
- # with urllib.request.urlopen(url) as response:
258
- # data = response.read().decode('utf-8')
259
- # with open(OCR_REPLACEMENTS_FILE, 'w', encoding='utf-8') as f:
260
- # f.write(data)
261
- # except Exception as e:
262
- # logger.error(f"Failed to fetch JSON from {url}: {e}")
@@ -1,82 +0,0 @@
1
- import os
2
- import tempfile
3
- import time
4
-
5
- from groq import Groq
6
-
7
- # Assuming these are available from GameSentenceMiner
8
- from GameSentenceMiner import configuration, ffmpeg
9
- from GameSentenceMiner.configuration import get_config, logger, GROQ # Import specific functions/objects
10
- from GameSentenceMiner.vad.result import VADResult
11
- from GameSentenceMiner.vad.vad_utils import get_audio_length
12
-
13
- # Initialize Groq Client
14
- client = Groq(api_key=get_config().ai.groq_api_key)
15
-
16
- def detect_voice_with_groq(input_audio_path):
17
- """
18
- Detects voice activity and extracts speech timestamps using the Groq Whisper API.
19
- """
20
- try:
21
- with open(input_audio_path, "rb") as file:
22
- transcription = client.audio.transcriptions.create(
23
- file=(os.path.basename(input_audio_path), file.read()),
24
- model="whisper-large-v3-turbo",
25
- response_format="verbose_json",
26
- language=get_config().vad.language,
27
- temperature=0.0,
28
- timestamp_granularities=["segment"],
29
- prompt=f"Start detecting speech from the first spoken word. If there is music or background noise, ignore it completely. Be very careful to not hallucinate on silence. If the transcription is anything but language:{get_config().vad.language}, ignore it completely. If the end of the audio seems like the start of a new sentence, ignore it completely.",
30
- )
31
-
32
- logger.debug(transcription)
33
-
34
- # print(transcription)
35
-
36
- speech_segments = transcription.segments if hasattr(transcription, 'segments') else []
37
- # print(f"Groq speech segments: {speech_segments}")
38
-
39
- audio_length = get_audio_length(input_audio_path)
40
- # print(f"FFPROBE Length of input audio: {audio_length}")
41
-
42
- return speech_segments, audio_length
43
- except Exception as e:
44
- logger.error(f"Error detecting voice with Groq: {e}")
45
- return [], 0.0
46
-
47
- def process_audio_with_groq(input_audio, output_audio, game_line):
48
- """
49
- Processes an audio file by detecting voice activity using Groq Whisper API,
50
- trimming the audio based on detected speech timestamps, and saving the trimmed audio.
51
- """
52
- start = time.time()
53
- voice_activity, audio_length = detect_voice_with_groq(input_audio)
54
- logger.info(f"Processing time for Groq: {time.time() - start:.2f} seconds")
55
-
56
- if not voice_activity:
57
- logger.info(f"No voice activity detected in {input_audio}")
58
- return VADResult(False, 0, 0, GROQ)
59
-
60
- start_time = voice_activity[0]['start']
61
- end_time = voice_activity[-1]['end']
62
-
63
- # Logic to potentially use the second-to-last timestamp if a next game line is expected
64
- # and there's a significant pause before the very last segment.
65
- if (game_line and hasattr(game_line, 'next') and game_line.next and
66
- len(voice_activity) > 1 and
67
- (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
68
- end_time = voice_activity[-2]['end']
69
- logger.info("Using the second last timestamp for trimming due to game_line.next and significant pause.")
70
-
71
- # Apply offsets from configuration, ensuring times are within valid bounds
72
- final_start_time = max(0, start_time + get_config().vad.beginning_offset)
73
- final_end_time = min(audio_length, end_time + get_config().audio.end_offset)
74
-
75
- logger.debug(f"Trimming {input_audio} from {final_start_time:.2f}s to {final_end_time:.2f}s into {output_audio}")
76
-
77
- ffmpeg.trim_audio(input_audio, final_start_time, final_end_time, output_audio)
78
-
79
- return VADResult(True, final_start_time, final_end_time, GROQ)
80
-
81
- # Example usage (uncomment and modify with your actual file paths for testing)
82
- # process_audio_with_groq("tmp6x81cy27.opus", "tmp6x81cy27_trimmed_groq.opus", None)
@@ -1,21 +0,0 @@
1
- from GameSentenceMiner.configuration import get_config
2
-
3
-
4
- class VADResult:
5
- def __init__(self, success: bool, start: float, end: float, model: str):
6
- self.success = success
7
- self.start = start
8
- self.end = end
9
- self.model = model
10
-
11
- def __repr__(self):
12
- return f"VADResult(success={self.success}, start={self.start}, end={self.end}, model={self.model})"
13
-
14
- def trim_successful_string(self):
15
- if self.success:
16
- if get_config().vad.trim_beginning:
17
- return f"Trimmed audio from {self.start:.2f} to {self.end:.2f} seconds using {self.model}."
18
- else:
19
- return f"Trimmed end of audio to {self.end:.2f} seconds using {self.model}."
20
- else:
21
- return f"Failed to trim audio using {self.model}."
@@ -1,52 +0,0 @@
1
- import tempfile
2
-
3
- from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
4
-
5
- from GameSentenceMiner import configuration, ffmpeg
6
- from GameSentenceMiner.configuration import *
7
- from GameSentenceMiner.vad.result import VADResult
8
- from GameSentenceMiner.vad.vad_utils import get_audio_length
9
-
10
- # Silero VAD setup
11
- vad_model = load_silero_vad()
12
-
13
-
14
- # Use Silero to detect voice activity with timestamps in the audio
15
- def detect_voice_with_silero(input_audio):
16
- # Convert the audio to 16kHz mono WAV
17
- temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
18
- ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
19
-
20
- # Load the audio and detect speech timestamps
21
- wav = read_audio(temp_wav)
22
- speech_timestamps = get_speech_timestamps(wav, vad_model, return_seconds=True)
23
-
24
- logger.debug(speech_timestamps)
25
-
26
- # Return the speech timestamps (start and end in seconds)
27
- return speech_timestamps, len(wav) / 16000
28
-
29
-
30
- # Example usage of Silero with trimming
31
- def process_audio_with_silero(input_audio, output_audio, game_line):
32
- voice_activity, audio_length = detect_voice_with_silero(input_audio)
33
-
34
- if not voice_activity:
35
- return VADResult(False, 0, 0, SILERO)
36
-
37
- # Trim based on the first and last speech detected
38
- start_time = voice_activity[0]['start'] if voice_activity else 0
39
- if game_line and game_line.next and len(voice_activity) > 1 and 0 > audio_length - voice_activity[-1]['start'] + get_config().audio.beginning_offset:
40
- # and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
41
- end_time = voice_activity[-2]['end']
42
- logger.info("Using the second last timestamp for trimming")
43
- else:
44
- end_time = voice_activity[-1]['end'] if voice_activity else 0
45
-
46
- # Trim the audio using FFmpeg
47
- ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio)
48
- return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, SILERO)
49
-
50
-
51
- # process_audio_with_silero("tmp6x81cy27.opus", "tmp6x81cy27_trimmed.opus", None)
52
- # print(detect_voice_with_silero("tmp6x81cy27.opus"))
@@ -1,13 +0,0 @@
1
- import subprocess
2
-
3
- from GameSentenceMiner.ffmpeg import get_ffprobe_path
4
-
5
-
6
- def get_audio_length(path):
7
- result = subprocess.run(
8
- [get_ffprobe_path(), "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", path],
9
- stdout=subprocess.PIPE,
10
- stderr=subprocess.PIPE,
11
- text=True
12
- )
13
- return float(result.stdout.strip())
@@ -1,158 +0,0 @@
1
- import tarfile
2
- import tempfile
3
- import zipfile
4
-
5
- import numpy as np
6
- import requests
7
- import soundfile as sf
8
- import vosk
9
-
10
- from GameSentenceMiner import configuration, ffmpeg
11
- from GameSentenceMiner.configuration import *
12
- from GameSentenceMiner.vad.result import VADResult
13
-
14
- ffmpeg_base_command_list = ["ffmpeg", "-hide_banner", "-loglevel", "error"]
15
- vosk.SetLogLevel(-1)
16
- vosk_model_path = ''
17
- vosk_model = None
18
-
19
-
20
- # Function to download and cache the Vosk model
21
- def download_and_cache_vosk_model(model_dir="vosk_model_cache"):
22
- # Ensure the cache directory exists
23
- if not os.path.exists(os.path.join(get_app_directory(), model_dir)):
24
- os.makedirs(os.path.join(get_app_directory(), model_dir))
25
-
26
- # Extract the model name from the URL
27
- model_filename = get_config().vad.vosk_url.split("/")[-1]
28
- model_path = os.path.join(get_app_directory(), model_dir, model_filename)
29
-
30
- # If the model is already downloaded, skip the download
31
- if not os.path.exists(model_path):
32
- logger.info(
33
- f"Downloading the Vosk model from {get_config().vad.vosk_url}... This will take a while if using large model, ~1G")
34
- response = requests.get(get_config().vad.vosk_url, stream=True)
35
- with open(model_path, "wb") as file:
36
- for chunk in response.iter_content(chunk_size=8192):
37
- if chunk:
38
- file.write(chunk)
39
- logger.info("Download complete.")
40
-
41
- # Extract the model if it's a zip or tar file
42
- model_extract_path = os.path.join(get_app_directory(), model_dir, "vosk_model")
43
- if not os.path.exists(model_extract_path):
44
- logger.info("Extracting the Vosk model...")
45
- if model_filename.endswith(".zip"):
46
- with zipfile.ZipFile(model_path, "r") as zip_ref:
47
- zip_ref.extractall(model_extract_path)
48
- elif model_filename.endswith(".tar.gz"):
49
- with tarfile.open(model_path, "r:gz") as tar_ref:
50
- tar_ref.extractall(model_extract_path)
51
- else:
52
- logger.info("Unknown archive format. Model extraction skipped.")
53
- logger.info(f"Model extracted to {model_extract_path}.")
54
- else:
55
- logger.info(f"Model already extracted at {model_extract_path}.")
56
-
57
- # Return the path to the actual model folder inside the extraction directory
58
- extracted_folders = os.listdir(model_extract_path)
59
- if extracted_folders:
60
- actual_model_folder = os.path.join(model_extract_path,
61
- extracted_folders[0]) # Assuming the first folder is the model
62
- return actual_model_folder
63
- else:
64
- return model_extract_path # In case there's no subfolder, return the extraction path directly
65
-
66
-
67
- # Use Vosk to detect voice activity with timestamps in the audio
68
- def detect_voice_with_vosk(input_audio):
69
- global vosk_model_path, vosk_model
70
- # Convert the audio to 16kHz mono WAV
71
- temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
72
- ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
73
-
74
- if not vosk_model_path or not vosk_model:
75
- vosk_model_path = download_and_cache_vosk_model()
76
- vosk_model = vosk.Model(vosk_model_path)
77
-
78
- # Open the audio file
79
- with sf.SoundFile(temp_wav) as audio_file:
80
- recognizer = vosk.KaldiRecognizer(vosk_model, audio_file.samplerate)
81
- voice_activity = []
82
- total_duration = len(audio_file) / audio_file.samplerate # Get total duration in seconds
83
-
84
- recognizer.SetWords(True)
85
- # recognizer.SetPartialWords(True)
86
-
87
- # Process audio in chunks
88
- while True:
89
- data = audio_file.buffer_read(4000, dtype='int16')
90
- if len(data) == 0:
91
- break
92
-
93
- # Convert buffer to bytes using NumPy
94
- data_bytes = np.frombuffer(data, dtype='int16').tobytes()
95
-
96
- if recognizer.AcceptWaveform(data_bytes):
97
- pass
98
-
99
- final_result = json.loads(recognizer.FinalResult())
100
- if 'result' in final_result:
101
- should_use = False
102
- unique_words = set()
103
- for word in final_result['result']:
104
- if word['conf'] >= .90:
105
- logger.debug(word)
106
- should_use = True
107
- unique_words.add(word['word'])
108
- if len(unique_words) == 1 or all(item in ['えー', 'ん'] for item in unique_words):
109
- should_use = False
110
-
111
- if not should_use:
112
- return None, 0
113
-
114
- for word in final_result['result']:
115
- voice_activity.append({
116
- 'text': word['word'],
117
- 'start': word['start'],
118
- 'end': word['end']
119
- })
120
-
121
- # Return the detected voice activity and the total duration
122
- return voice_activity, total_duration
123
-
124
-
125
- # Example usage of Vosk with trimming
126
- def process_audio_with_vosk(input_audio, output_audio, game_line):
127
- voice_activity, total_duration = detect_voice_with_vosk(input_audio)
128
-
129
- if not voice_activity:
130
- logger.info("No voice activity detected in the audio.")
131
- return VADResult(False, 0, 0, VOSK)
132
-
133
- # Trim based on the first and last speech detected
134
- start_time = voice_activity[0]['start'] if voice_activity else 0
135
- # if (game_line.next and len(voice_activity) > 1
136
- # and voice_activity[-1]['start'] - get_config().audio.beginning_offset > len(input_audio) / 16000
137
- # and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 5.0):
138
- # end_time = voice_activity[-2]['end']
139
- # logger.info("Using the second last timestamp for trimming")
140
- # else:
141
- end_time = voice_activity[-1]['end'] if voice_activity else 0
142
-
143
- if get_config().vad.trim_beginning:
144
- logger.info(f"VAD Trimmed Beginning of Audio to {start_time}")
145
-
146
- # Print detected speech details with timestamps
147
- logger.info(f"VAD Trimmed End of Audio to {end_time} seconds:")
148
-
149
- # Trim the audio using FFmpeg
150
- ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio)
151
- return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, VOSK)
152
-
153
-
154
- def get_vosk_model():
155
- global vosk_model_path, vosk_model
156
- vosk_model_path = download_and_cache_vosk_model()
157
- vosk_model = vosk.Model(vosk_model_path)
158
- logger.info(f"Using Vosk model from {vosk_model_path}")