GameSentenceMiner 2.9.3__py3-none-any.whl → 2.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/ai/ai_prompting.py +3 -3
- GameSentenceMiner/anki.py +17 -11
- GameSentenceMiner/assets/icon.png +0 -0
- GameSentenceMiner/assets/icon128.png +0 -0
- GameSentenceMiner/assets/icon256.png +0 -0
- GameSentenceMiner/assets/icon32.png +0 -0
- GameSentenceMiner/assets/icon512.png +0 -0
- GameSentenceMiner/assets/icon64.png +0 -0
- GameSentenceMiner/assets/pickaxe.png +0 -0
- GameSentenceMiner/config_gui.py +22 -7
- GameSentenceMiner/gametext.py +5 -5
- GameSentenceMiner/gsm.py +26 -67
- GameSentenceMiner/obs.py +7 -9
- GameSentenceMiner/ocr/owocr_area_selector.py +1 -1
- GameSentenceMiner/ocr/owocr_helper.py +30 -13
- GameSentenceMiner/owocr/owocr/ocr.py +0 -2
- GameSentenceMiner/owocr/owocr/run.py +1 -1
- GameSentenceMiner/{communication → util/communication}/__init__.py +1 -1
- GameSentenceMiner/{communication → util/communication}/send.py +1 -1
- GameSentenceMiner/{communication → util/communication}/websocket.py +2 -2
- GameSentenceMiner/{downloader → util/downloader}/download_tools.py +3 -3
- GameSentenceMiner/vad.py +344 -0
- GameSentenceMiner/web/texthooking_page.py +78 -55
- {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/METADATA +2 -3
- gamesentenceminer-2.9.5.dist-info/RECORD +57 -0
- GameSentenceMiner/configuration.py +0 -647
- GameSentenceMiner/electron_config.py +0 -315
- GameSentenceMiner/ffmpeg.py +0 -441
- GameSentenceMiner/model.py +0 -177
- GameSentenceMiner/notification.py +0 -105
- GameSentenceMiner/package.py +0 -39
- GameSentenceMiner/ss_selector.py +0 -121
- GameSentenceMiner/text_log.py +0 -186
- GameSentenceMiner/util.py +0 -262
- GameSentenceMiner/vad/groq_trim.py +0 -82
- GameSentenceMiner/vad/result.py +0 -21
- GameSentenceMiner/vad/silero_trim.py +0 -52
- GameSentenceMiner/vad/vad_utils.py +0 -13
- GameSentenceMiner/vad/vosk_helper.py +0 -158
- GameSentenceMiner/vad/whisper_helper.py +0 -105
- gamesentenceminer-2.9.3.dist-info/RECORD +0 -64
- /GameSentenceMiner/{downloader → assets}/__init__.py +0 -0
- /GameSentenceMiner/{downloader → util/downloader}/Untitled_json.py +0 -0
- /GameSentenceMiner/{vad → util/downloader}/__init__.py +0 -0
- /GameSentenceMiner/{downloader → util/downloader}/oneocr_dl.py +0 -0
- {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.9.3.dist-info → gamesentenceminer-2.9.5.dist-info}/top_level.txt +0 -0
GameSentenceMiner/util.py
DELETED
@@ -1,262 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import os
|
3
|
-
import random
|
4
|
-
import re
|
5
|
-
import string
|
6
|
-
import subprocess
|
7
|
-
import threading
|
8
|
-
import time
|
9
|
-
from datetime import datetime
|
10
|
-
|
11
|
-
from rapidfuzz import process
|
12
|
-
|
13
|
-
from GameSentenceMiner.configuration import logger, get_config, get_app_directory
|
14
|
-
|
15
|
-
SCRIPTS_DIR = r"E:\Japanese Stuff\agent-v0.1.4-win32-x64\data\scripts"
|
16
|
-
|
17
|
-
# Global variables to control script execution
|
18
|
-
keep_running = True
|
19
|
-
lock = threading.Lock()
|
20
|
-
last_mined_line = None
|
21
|
-
|
22
|
-
def get_last_mined_line():
|
23
|
-
return last_mined_line
|
24
|
-
|
25
|
-
def set_last_mined_line(line):
|
26
|
-
global last_mined_line
|
27
|
-
last_mined_line = line
|
28
|
-
|
29
|
-
def run_new_thread(func):
|
30
|
-
thread = threading.Thread(target=func, daemon=True)
|
31
|
-
thread.start()
|
32
|
-
return thread
|
33
|
-
|
34
|
-
|
35
|
-
def make_unique_file_name(path):
|
36
|
-
split = path.rsplit('.', 1)
|
37
|
-
filename = split[0]
|
38
|
-
extension = split[1]
|
39
|
-
|
40
|
-
current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
|
41
|
-
|
42
|
-
return f"{filename}_{current_time}.{extension}"
|
43
|
-
|
44
|
-
def sanitize_filename(filename):
|
45
|
-
return re.sub(r'[ <>:"/\\|?*\x00-\x1F]', '', filename)
|
46
|
-
|
47
|
-
|
48
|
-
def get_random_digit_string():
|
49
|
-
return ''.join(random.choice(string.digits) for i in range(9))
|
50
|
-
|
51
|
-
|
52
|
-
def timedelta_to_ffmpeg_friendly_format(td_obj):
|
53
|
-
total_seconds = td_obj.total_seconds()
|
54
|
-
hours, remainder = divmod(total_seconds, 3600)
|
55
|
-
minutes, seconds = divmod(remainder, 60)
|
56
|
-
return "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds)
|
57
|
-
|
58
|
-
|
59
|
-
def get_file_modification_time(file_path):
|
60
|
-
mod_time_epoch = os.path.getmtime(file_path)
|
61
|
-
mod_time = datetime.fromtimestamp(mod_time_epoch)
|
62
|
-
return mod_time
|
63
|
-
|
64
|
-
|
65
|
-
def get_process_id_by_title(game_title):
|
66
|
-
powershell_command = f"Get-Process | Where-Object {{$_.MainWindowTitle -like '*{game_title}*'}} | Select-Object -First 1 -ExpandProperty Id"
|
67
|
-
process_id = subprocess.check_output(["powershell", "-Command", powershell_command], text=True).strip()
|
68
|
-
logger.info(f"Process ID for {game_title}: {process_id}")
|
69
|
-
return process_id
|
70
|
-
|
71
|
-
|
72
|
-
def get_script_files(directory):
|
73
|
-
script_files = []
|
74
|
-
for root, dirs, files in os.walk(directory):
|
75
|
-
for file in files:
|
76
|
-
if file.endswith(".js"): # Assuming the scripts are .js files
|
77
|
-
script_files.append(os.path.join(root, file))
|
78
|
-
return script_files
|
79
|
-
|
80
|
-
|
81
|
-
def filter_steam_scripts(scripts):
|
82
|
-
return [script for script in scripts if "PC_Steam" in os.path.basename(script)]
|
83
|
-
|
84
|
-
|
85
|
-
def extract_game_name(script_path):
|
86
|
-
# Remove directory and file extension to get the name part
|
87
|
-
script_name = os.path.basename(script_path)
|
88
|
-
game_name = script_name.replace("PC_Steam_", "").replace(".js", "")
|
89
|
-
return game_name.replace("_", " ").replace(".", " ")
|
90
|
-
|
91
|
-
|
92
|
-
def find_most_similar_script(game_title, steam_scripts):
|
93
|
-
# Create a list of game names from the script paths
|
94
|
-
game_names = [extract_game_name(script) for script in steam_scripts]
|
95
|
-
|
96
|
-
# Use rapidfuzz to find the closest match
|
97
|
-
best_match = process.extractOne(game_title, game_names)
|
98
|
-
|
99
|
-
if best_match:
|
100
|
-
matched_game_name, confidence_score, index = best_match
|
101
|
-
return steam_scripts[index], matched_game_name, confidence_score
|
102
|
-
return None, None, None
|
103
|
-
|
104
|
-
|
105
|
-
def find_script_for_game(game_title):
|
106
|
-
script_files = get_script_files(SCRIPTS_DIR)
|
107
|
-
|
108
|
-
steam_scripts = filter_steam_scripts(script_files)
|
109
|
-
|
110
|
-
best_script, matched_game_name, confidence = find_most_similar_script(game_title, steam_scripts)
|
111
|
-
|
112
|
-
|
113
|
-
if best_script:
|
114
|
-
logger.info(f"Found Script: {best_script}")
|
115
|
-
return best_script
|
116
|
-
else:
|
117
|
-
logger.warning("No similar script found.")
|
118
|
-
|
119
|
-
|
120
|
-
def run_agent_and_hook(pname, agent_script):
|
121
|
-
command = f'agent --script=\"{agent_script}\" --pname={pname}'
|
122
|
-
logger.info("Running and Hooking Agent!")
|
123
|
-
try:
|
124
|
-
dos_process = subprocess.Popen(command, shell=True)
|
125
|
-
dos_process.wait() # Wait for the process to complete
|
126
|
-
logger.info("Agent script finished or closed.")
|
127
|
-
except Exception as e:
|
128
|
-
logger.error(f"Error occurred while running agent script: {e}")
|
129
|
-
|
130
|
-
keep_running = False
|
131
|
-
|
132
|
-
|
133
|
-
# def run_command(command, shell=False, input=None, capture_output=False, timeout=None, check=False, **kwargs):
|
134
|
-
# # Use shell=True if the OS is Linux, otherwise shell=False
|
135
|
-
# if is_linux():
|
136
|
-
# return subprocess.run(command, shell=True, input=input, capture_output=capture_output, timeout=timeout,
|
137
|
-
# check=check, **kwargs)
|
138
|
-
# else:
|
139
|
-
# return subprocess.run(command, shell=shell, input=input, capture_output=capture_output, timeout=timeout,
|
140
|
-
# check=check, **kwargs)
|
141
|
-
def remove_html_and_cloze_tags(text):
|
142
|
-
text = re.sub(r'<.*?>', '', re.sub(r'{{c\d+::(.*?)(::.*?)?}}', r'\1', text))
|
143
|
-
return text
|
144
|
-
|
145
|
-
|
146
|
-
def combine_dialogue(dialogue_lines, new_lines=None):
|
147
|
-
if not dialogue_lines: # Handle empty input
|
148
|
-
return []
|
149
|
-
|
150
|
-
if new_lines is None:
|
151
|
-
new_lines = []
|
152
|
-
|
153
|
-
if len(dialogue_lines) == 1 and '「' not in dialogue_lines[0]:
|
154
|
-
new_lines.append(dialogue_lines[0])
|
155
|
-
return new_lines
|
156
|
-
|
157
|
-
character_name = dialogue_lines[0].split("「")[0]
|
158
|
-
text = character_name + "「"
|
159
|
-
|
160
|
-
for i, line in enumerate(dialogue_lines):
|
161
|
-
if not line.startswith(character_name + "「"):
|
162
|
-
text = text + "」" + get_config().advanced.multi_line_line_break
|
163
|
-
new_lines.append(text)
|
164
|
-
new_lines.extend(combine_dialogue(dialogue_lines[i:]))
|
165
|
-
break
|
166
|
-
else:
|
167
|
-
text += (get_config().advanced.multi_line_line_break if i > 0 else "") + line.split("「")[1].rstrip("」") + ""
|
168
|
-
else:
|
169
|
-
text = text + "」"
|
170
|
-
new_lines.append(text)
|
171
|
-
|
172
|
-
return new_lines
|
173
|
-
|
174
|
-
def wait_for_stable_file(file_path, timeout=10, check_interval=0.1):
|
175
|
-
elapsed_time = 0
|
176
|
-
last_size = -1
|
177
|
-
|
178
|
-
while elapsed_time < timeout:
|
179
|
-
try:
|
180
|
-
current_size = os.path.getsize(file_path)
|
181
|
-
if current_size == last_size:
|
182
|
-
try:
|
183
|
-
with open(file_path, 'rb') as f:
|
184
|
-
return True
|
185
|
-
except Exception as e:
|
186
|
-
time.sleep(check_interval)
|
187
|
-
elapsed_time += check_interval
|
188
|
-
last_size = current_size
|
189
|
-
time.sleep(check_interval)
|
190
|
-
elapsed_time += check_interval
|
191
|
-
except Exception as e:
|
192
|
-
logger.warning(f"Error checking file size, will still try updating Anki Card!: {e}")
|
193
|
-
return False
|
194
|
-
logger.warning("File size did not stabilize within the timeout period. Continuing...")
|
195
|
-
return False
|
196
|
-
|
197
|
-
|
198
|
-
def import_vad_models():
|
199
|
-
silero_trim, whisper_helper, vosk_helper = None, None, None
|
200
|
-
if get_config().vad.is_silero():
|
201
|
-
from GameSentenceMiner.vad import silero_trim
|
202
|
-
if get_config().vad.is_whisper():
|
203
|
-
from GameSentenceMiner.vad import whisper_helper
|
204
|
-
if get_config().vad.is_vosk():
|
205
|
-
from GameSentenceMiner.vad import vosk_helper
|
206
|
-
return silero_trim, whisper_helper, vosk_helper
|
207
|
-
|
208
|
-
|
209
|
-
def isascii(s: str):
|
210
|
-
try:
|
211
|
-
return s.isascii()
|
212
|
-
except:
|
213
|
-
try:
|
214
|
-
s.encode("ascii")
|
215
|
-
return True
|
216
|
-
except:
|
217
|
-
return False
|
218
|
-
|
219
|
-
def do_text_replacements(text, replacements_json):
|
220
|
-
if not text:
|
221
|
-
return text
|
222
|
-
|
223
|
-
replacements = {}
|
224
|
-
if os.path.exists(replacements_json):
|
225
|
-
with open(replacements_json, 'r', encoding='utf-8') as f:
|
226
|
-
replacements.update(json.load(f))
|
227
|
-
|
228
|
-
if replacements.get("enabled", False):
|
229
|
-
orig_text = text
|
230
|
-
filters = replacements.get("args", {}).get("replacements", {})
|
231
|
-
for fil, replacement in filters.items():
|
232
|
-
if not fil:
|
233
|
-
continue
|
234
|
-
if fil.startswith("re:"):
|
235
|
-
pattern = fil[3:]
|
236
|
-
try:
|
237
|
-
text = re.sub(pattern, replacement, text)
|
238
|
-
except Exception:
|
239
|
-
logger.error(f"Invalid regex pattern: {pattern}")
|
240
|
-
continue
|
241
|
-
if isascii(fil):
|
242
|
-
text = re.sub(r"\b{}\b".format(re.escape(fil)), replacement, text)
|
243
|
-
else:
|
244
|
-
text = text.replace(fil, replacement)
|
245
|
-
if text != orig_text:
|
246
|
-
logger.info(f"Text replaced: '{orig_text}' -> '{text}' using replacements.")
|
247
|
-
return text
|
248
|
-
|
249
|
-
|
250
|
-
TEXT_REPLACEMENTS_FILE = os.path.join(get_app_directory(), 'config', 'text_replacements.json')
|
251
|
-
OCR_REPLACEMENTS_FILE = os.path.join(get_app_directory(), 'config', 'ocr_replacements.json')
|
252
|
-
os.makedirs(os.path.dirname(TEXT_REPLACEMENTS_FILE), exist_ok=True)
|
253
|
-
|
254
|
-
# if not os.path.exists(OCR_REPLACEMENTS_FILE):
|
255
|
-
# url = "https://raw.githubusercontent.com/bpwhelan/GameSentenceMiner/refs/heads/main/electron-src/assets/ocr_replacements.json"
|
256
|
-
# try:
|
257
|
-
# with urllib.request.urlopen(url) as response:
|
258
|
-
# data = response.read().decode('utf-8')
|
259
|
-
# with open(OCR_REPLACEMENTS_FILE, 'w', encoding='utf-8') as f:
|
260
|
-
# f.write(data)
|
261
|
-
# except Exception as e:
|
262
|
-
# logger.error(f"Failed to fetch JSON from {url}: {e}")
|
@@ -1,82 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import tempfile
|
3
|
-
import time
|
4
|
-
|
5
|
-
from groq import Groq
|
6
|
-
|
7
|
-
# Assuming these are available from GameSentenceMiner
|
8
|
-
from GameSentenceMiner import configuration, ffmpeg
|
9
|
-
from GameSentenceMiner.configuration import get_config, logger, GROQ # Import specific functions/objects
|
10
|
-
from GameSentenceMiner.vad.result import VADResult
|
11
|
-
from GameSentenceMiner.vad.vad_utils import get_audio_length
|
12
|
-
|
13
|
-
# Initialize Groq Client
|
14
|
-
client = Groq(api_key=get_config().ai.groq_api_key)
|
15
|
-
|
16
|
-
def detect_voice_with_groq(input_audio_path):
|
17
|
-
"""
|
18
|
-
Detects voice activity and extracts speech timestamps using the Groq Whisper API.
|
19
|
-
"""
|
20
|
-
try:
|
21
|
-
with open(input_audio_path, "rb") as file:
|
22
|
-
transcription = client.audio.transcriptions.create(
|
23
|
-
file=(os.path.basename(input_audio_path), file.read()),
|
24
|
-
model="whisper-large-v3-turbo",
|
25
|
-
response_format="verbose_json",
|
26
|
-
language=get_config().vad.language,
|
27
|
-
temperature=0.0,
|
28
|
-
timestamp_granularities=["segment"],
|
29
|
-
prompt=f"Start detecting speech from the first spoken word. If there is music or background noise, ignore it completely. Be very careful to not hallucinate on silence. If the transcription is anything but language:{get_config().vad.language}, ignore it completely. If the end of the audio seems like the start of a new sentence, ignore it completely.",
|
30
|
-
)
|
31
|
-
|
32
|
-
logger.debug(transcription)
|
33
|
-
|
34
|
-
# print(transcription)
|
35
|
-
|
36
|
-
speech_segments = transcription.segments if hasattr(transcription, 'segments') else []
|
37
|
-
# print(f"Groq speech segments: {speech_segments}")
|
38
|
-
|
39
|
-
audio_length = get_audio_length(input_audio_path)
|
40
|
-
# print(f"FFPROBE Length of input audio: {audio_length}")
|
41
|
-
|
42
|
-
return speech_segments, audio_length
|
43
|
-
except Exception as e:
|
44
|
-
logger.error(f"Error detecting voice with Groq: {e}")
|
45
|
-
return [], 0.0
|
46
|
-
|
47
|
-
def process_audio_with_groq(input_audio, output_audio, game_line):
|
48
|
-
"""
|
49
|
-
Processes an audio file by detecting voice activity using Groq Whisper API,
|
50
|
-
trimming the audio based on detected speech timestamps, and saving the trimmed audio.
|
51
|
-
"""
|
52
|
-
start = time.time()
|
53
|
-
voice_activity, audio_length = detect_voice_with_groq(input_audio)
|
54
|
-
logger.info(f"Processing time for Groq: {time.time() - start:.2f} seconds")
|
55
|
-
|
56
|
-
if not voice_activity:
|
57
|
-
logger.info(f"No voice activity detected in {input_audio}")
|
58
|
-
return VADResult(False, 0, 0, GROQ)
|
59
|
-
|
60
|
-
start_time = voice_activity[0]['start']
|
61
|
-
end_time = voice_activity[-1]['end']
|
62
|
-
|
63
|
-
# Logic to potentially use the second-to-last timestamp if a next game line is expected
|
64
|
-
# and there's a significant pause before the very last segment.
|
65
|
-
if (game_line and hasattr(game_line, 'next') and game_line.next and
|
66
|
-
len(voice_activity) > 1 and
|
67
|
-
(voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
|
68
|
-
end_time = voice_activity[-2]['end']
|
69
|
-
logger.info("Using the second last timestamp for trimming due to game_line.next and significant pause.")
|
70
|
-
|
71
|
-
# Apply offsets from configuration, ensuring times are within valid bounds
|
72
|
-
final_start_time = max(0, start_time + get_config().vad.beginning_offset)
|
73
|
-
final_end_time = min(audio_length, end_time + get_config().audio.end_offset)
|
74
|
-
|
75
|
-
logger.debug(f"Trimming {input_audio} from {final_start_time:.2f}s to {final_end_time:.2f}s into {output_audio}")
|
76
|
-
|
77
|
-
ffmpeg.trim_audio(input_audio, final_start_time, final_end_time, output_audio)
|
78
|
-
|
79
|
-
return VADResult(True, final_start_time, final_end_time, GROQ)
|
80
|
-
|
81
|
-
# Example usage (uncomment and modify with your actual file paths for testing)
|
82
|
-
# process_audio_with_groq("tmp6x81cy27.opus", "tmp6x81cy27_trimmed_groq.opus", None)
|
GameSentenceMiner/vad/result.py
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
from GameSentenceMiner.configuration import get_config
|
2
|
-
|
3
|
-
|
4
|
-
class VADResult:
|
5
|
-
def __init__(self, success: bool, start: float, end: float, model: str):
|
6
|
-
self.success = success
|
7
|
-
self.start = start
|
8
|
-
self.end = end
|
9
|
-
self.model = model
|
10
|
-
|
11
|
-
def __repr__(self):
|
12
|
-
return f"VADResult(success={self.success}, start={self.start}, end={self.end}, model={self.model})"
|
13
|
-
|
14
|
-
def trim_successful_string(self):
|
15
|
-
if self.success:
|
16
|
-
if get_config().vad.trim_beginning:
|
17
|
-
return f"Trimmed audio from {self.start:.2f} to {self.end:.2f} seconds using {self.model}."
|
18
|
-
else:
|
19
|
-
return f"Trimmed end of audio to {self.end:.2f} seconds using {self.model}."
|
20
|
-
else:
|
21
|
-
return f"Failed to trim audio using {self.model}."
|
@@ -1,52 +0,0 @@
|
|
1
|
-
import tempfile
|
2
|
-
|
3
|
-
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
|
4
|
-
|
5
|
-
from GameSentenceMiner import configuration, ffmpeg
|
6
|
-
from GameSentenceMiner.configuration import *
|
7
|
-
from GameSentenceMiner.vad.result import VADResult
|
8
|
-
from GameSentenceMiner.vad.vad_utils import get_audio_length
|
9
|
-
|
10
|
-
# Silero VAD setup
|
11
|
-
vad_model = load_silero_vad()
|
12
|
-
|
13
|
-
|
14
|
-
# Use Silero to detect voice activity with timestamps in the audio
|
15
|
-
def detect_voice_with_silero(input_audio):
|
16
|
-
# Convert the audio to 16kHz mono WAV
|
17
|
-
temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
|
18
|
-
ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
|
19
|
-
|
20
|
-
# Load the audio and detect speech timestamps
|
21
|
-
wav = read_audio(temp_wav)
|
22
|
-
speech_timestamps = get_speech_timestamps(wav, vad_model, return_seconds=True)
|
23
|
-
|
24
|
-
logger.debug(speech_timestamps)
|
25
|
-
|
26
|
-
# Return the speech timestamps (start and end in seconds)
|
27
|
-
return speech_timestamps, len(wav) / 16000
|
28
|
-
|
29
|
-
|
30
|
-
# Example usage of Silero with trimming
|
31
|
-
def process_audio_with_silero(input_audio, output_audio, game_line):
|
32
|
-
voice_activity, audio_length = detect_voice_with_silero(input_audio)
|
33
|
-
|
34
|
-
if not voice_activity:
|
35
|
-
return VADResult(False, 0, 0, SILERO)
|
36
|
-
|
37
|
-
# Trim based on the first and last speech detected
|
38
|
-
start_time = voice_activity[0]['start'] if voice_activity else 0
|
39
|
-
if game_line and game_line.next and len(voice_activity) > 1 and 0 > audio_length - voice_activity[-1]['start'] + get_config().audio.beginning_offset:
|
40
|
-
# and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
|
41
|
-
end_time = voice_activity[-2]['end']
|
42
|
-
logger.info("Using the second last timestamp for trimming")
|
43
|
-
else:
|
44
|
-
end_time = voice_activity[-1]['end'] if voice_activity else 0
|
45
|
-
|
46
|
-
# Trim the audio using FFmpeg
|
47
|
-
ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio)
|
48
|
-
return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, SILERO)
|
49
|
-
|
50
|
-
|
51
|
-
# process_audio_with_silero("tmp6x81cy27.opus", "tmp6x81cy27_trimmed.opus", None)
|
52
|
-
# print(detect_voice_with_silero("tmp6x81cy27.opus"))
|
@@ -1,13 +0,0 @@
|
|
1
|
-
import subprocess
|
2
|
-
|
3
|
-
from GameSentenceMiner.ffmpeg import get_ffprobe_path
|
4
|
-
|
5
|
-
|
6
|
-
def get_audio_length(path):
|
7
|
-
result = subprocess.run(
|
8
|
-
[get_ffprobe_path(), "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", path],
|
9
|
-
stdout=subprocess.PIPE,
|
10
|
-
stderr=subprocess.PIPE,
|
11
|
-
text=True
|
12
|
-
)
|
13
|
-
return float(result.stdout.strip())
|
@@ -1,158 +0,0 @@
|
|
1
|
-
import tarfile
|
2
|
-
import tempfile
|
3
|
-
import zipfile
|
4
|
-
|
5
|
-
import numpy as np
|
6
|
-
import requests
|
7
|
-
import soundfile as sf
|
8
|
-
import vosk
|
9
|
-
|
10
|
-
from GameSentenceMiner import configuration, ffmpeg
|
11
|
-
from GameSentenceMiner.configuration import *
|
12
|
-
from GameSentenceMiner.vad.result import VADResult
|
13
|
-
|
14
|
-
ffmpeg_base_command_list = ["ffmpeg", "-hide_banner", "-loglevel", "error"]
|
15
|
-
vosk.SetLogLevel(-1)
|
16
|
-
vosk_model_path = ''
|
17
|
-
vosk_model = None
|
18
|
-
|
19
|
-
|
20
|
-
# Function to download and cache the Vosk model
|
21
|
-
def download_and_cache_vosk_model(model_dir="vosk_model_cache"):
|
22
|
-
# Ensure the cache directory exists
|
23
|
-
if not os.path.exists(os.path.join(get_app_directory(), model_dir)):
|
24
|
-
os.makedirs(os.path.join(get_app_directory(), model_dir))
|
25
|
-
|
26
|
-
# Extract the model name from the URL
|
27
|
-
model_filename = get_config().vad.vosk_url.split("/")[-1]
|
28
|
-
model_path = os.path.join(get_app_directory(), model_dir, model_filename)
|
29
|
-
|
30
|
-
# If the model is already downloaded, skip the download
|
31
|
-
if not os.path.exists(model_path):
|
32
|
-
logger.info(
|
33
|
-
f"Downloading the Vosk model from {get_config().vad.vosk_url}... This will take a while if using large model, ~1G")
|
34
|
-
response = requests.get(get_config().vad.vosk_url, stream=True)
|
35
|
-
with open(model_path, "wb") as file:
|
36
|
-
for chunk in response.iter_content(chunk_size=8192):
|
37
|
-
if chunk:
|
38
|
-
file.write(chunk)
|
39
|
-
logger.info("Download complete.")
|
40
|
-
|
41
|
-
# Extract the model if it's a zip or tar file
|
42
|
-
model_extract_path = os.path.join(get_app_directory(), model_dir, "vosk_model")
|
43
|
-
if not os.path.exists(model_extract_path):
|
44
|
-
logger.info("Extracting the Vosk model...")
|
45
|
-
if model_filename.endswith(".zip"):
|
46
|
-
with zipfile.ZipFile(model_path, "r") as zip_ref:
|
47
|
-
zip_ref.extractall(model_extract_path)
|
48
|
-
elif model_filename.endswith(".tar.gz"):
|
49
|
-
with tarfile.open(model_path, "r:gz") as tar_ref:
|
50
|
-
tar_ref.extractall(model_extract_path)
|
51
|
-
else:
|
52
|
-
logger.info("Unknown archive format. Model extraction skipped.")
|
53
|
-
logger.info(f"Model extracted to {model_extract_path}.")
|
54
|
-
else:
|
55
|
-
logger.info(f"Model already extracted at {model_extract_path}.")
|
56
|
-
|
57
|
-
# Return the path to the actual model folder inside the extraction directory
|
58
|
-
extracted_folders = os.listdir(model_extract_path)
|
59
|
-
if extracted_folders:
|
60
|
-
actual_model_folder = os.path.join(model_extract_path,
|
61
|
-
extracted_folders[0]) # Assuming the first folder is the model
|
62
|
-
return actual_model_folder
|
63
|
-
else:
|
64
|
-
return model_extract_path # In case there's no subfolder, return the extraction path directly
|
65
|
-
|
66
|
-
|
67
|
-
# Use Vosk to detect voice activity with timestamps in the audio
|
68
|
-
def detect_voice_with_vosk(input_audio):
|
69
|
-
global vosk_model_path, vosk_model
|
70
|
-
# Convert the audio to 16kHz mono WAV
|
71
|
-
temp_wav = tempfile.NamedTemporaryFile(dir=configuration.get_temporary_directory(), suffix='.wav').name
|
72
|
-
ffmpeg.convert_audio_to_wav(input_audio, temp_wav)
|
73
|
-
|
74
|
-
if not vosk_model_path or not vosk_model:
|
75
|
-
vosk_model_path = download_and_cache_vosk_model()
|
76
|
-
vosk_model = vosk.Model(vosk_model_path)
|
77
|
-
|
78
|
-
# Open the audio file
|
79
|
-
with sf.SoundFile(temp_wav) as audio_file:
|
80
|
-
recognizer = vosk.KaldiRecognizer(vosk_model, audio_file.samplerate)
|
81
|
-
voice_activity = []
|
82
|
-
total_duration = len(audio_file) / audio_file.samplerate # Get total duration in seconds
|
83
|
-
|
84
|
-
recognizer.SetWords(True)
|
85
|
-
# recognizer.SetPartialWords(True)
|
86
|
-
|
87
|
-
# Process audio in chunks
|
88
|
-
while True:
|
89
|
-
data = audio_file.buffer_read(4000, dtype='int16')
|
90
|
-
if len(data) == 0:
|
91
|
-
break
|
92
|
-
|
93
|
-
# Convert buffer to bytes using NumPy
|
94
|
-
data_bytes = np.frombuffer(data, dtype='int16').tobytes()
|
95
|
-
|
96
|
-
if recognizer.AcceptWaveform(data_bytes):
|
97
|
-
pass
|
98
|
-
|
99
|
-
final_result = json.loads(recognizer.FinalResult())
|
100
|
-
if 'result' in final_result:
|
101
|
-
should_use = False
|
102
|
-
unique_words = set()
|
103
|
-
for word in final_result['result']:
|
104
|
-
if word['conf'] >= .90:
|
105
|
-
logger.debug(word)
|
106
|
-
should_use = True
|
107
|
-
unique_words.add(word['word'])
|
108
|
-
if len(unique_words) == 1 or all(item in ['えー', 'ん'] for item in unique_words):
|
109
|
-
should_use = False
|
110
|
-
|
111
|
-
if not should_use:
|
112
|
-
return None, 0
|
113
|
-
|
114
|
-
for word in final_result['result']:
|
115
|
-
voice_activity.append({
|
116
|
-
'text': word['word'],
|
117
|
-
'start': word['start'],
|
118
|
-
'end': word['end']
|
119
|
-
})
|
120
|
-
|
121
|
-
# Return the detected voice activity and the total duration
|
122
|
-
return voice_activity, total_duration
|
123
|
-
|
124
|
-
|
125
|
-
# Example usage of Vosk with trimming
|
126
|
-
def process_audio_with_vosk(input_audio, output_audio, game_line):
|
127
|
-
voice_activity, total_duration = detect_voice_with_vosk(input_audio)
|
128
|
-
|
129
|
-
if not voice_activity:
|
130
|
-
logger.info("No voice activity detected in the audio.")
|
131
|
-
return VADResult(False, 0, 0, VOSK)
|
132
|
-
|
133
|
-
# Trim based on the first and last speech detected
|
134
|
-
start_time = voice_activity[0]['start'] if voice_activity else 0
|
135
|
-
# if (game_line.next and len(voice_activity) > 1
|
136
|
-
# and voice_activity[-1]['start'] - get_config().audio.beginning_offset > len(input_audio) / 16000
|
137
|
-
# and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 5.0):
|
138
|
-
# end_time = voice_activity[-2]['end']
|
139
|
-
# logger.info("Using the second last timestamp for trimming")
|
140
|
-
# else:
|
141
|
-
end_time = voice_activity[-1]['end'] if voice_activity else 0
|
142
|
-
|
143
|
-
if get_config().vad.trim_beginning:
|
144
|
-
logger.info(f"VAD Trimmed Beginning of Audio to {start_time}")
|
145
|
-
|
146
|
-
# Print detected speech details with timestamps
|
147
|
-
logger.info(f"VAD Trimmed End of Audio to {end_time} seconds:")
|
148
|
-
|
149
|
-
# Trim the audio using FFmpeg
|
150
|
-
ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio)
|
151
|
-
return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, VOSK)
|
152
|
-
|
153
|
-
|
154
|
-
def get_vosk_model():
|
155
|
-
global vosk_model_path, vosk_model
|
156
|
-
vosk_model_path = download_and_cache_vosk_model()
|
157
|
-
vosk_model = vosk.Model(vosk_model_path)
|
158
|
-
logger.info(f"Using Vosk model from {vosk_model_path}")
|