GameSentenceMiner 2.8.54__py3-none-any.whl → 2.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import tempfile
2
2
  import warnings
3
3
 
4
4
  import stable_whisper as whisper
5
+ import torch
5
6
  from stable_whisper import WhisperResult
6
7
 
7
8
  from GameSentenceMiner import configuration, ffmpeg
@@ -16,10 +17,9 @@ whisper_model = None
16
17
  def load_whisper_model():
17
18
  global whisper_model
18
19
  if whisper_model is None:
19
- logger.info(f"Loading Whisper model '{get_config().vad.whisper_model}'... This may take a while.")
20
20
  with warnings.catch_warnings(action="ignore"):
21
21
  whisper_model = whisper.load_model(get_config().vad.whisper_model)
22
- logger.info("Whisper model loaded.")
22
+ logger.info(f"Whisper model '{get_config().vad.whisper_model}' loaded.")
23
23
 
24
24
 
25
25
  # Use Whisper to detect voice activity with timestamps in the audio
@@ -35,8 +35,7 @@ def detect_voice_with_whisper(input_audio):
35
35
 
36
36
  # Transcribe the audio using Whisper
37
37
  with warnings.catch_warnings(action="ignore"):
38
- result: WhisperResult = whisper_model.transcribe(temp_wav, vad=True, language='ja')
39
-
38
+ result: WhisperResult = whisper_model.transcribe(temp_wav, vad=True, language=get_config().vad.language, temperature=0.0)
40
39
  voice_activity = []
41
40
 
42
41
  logger.debug(result.to_dict())
@@ -75,7 +74,7 @@ def process_audio_with_whisper(input_audio, output_audio, game_line):
75
74
 
76
75
  if not voice_activity:
77
76
  logger.info("No voice activity detected in the audio.")
78
- return VADResult(False, 0, 0)
77
+ return VADResult(False, 0, 0, WHISPER)
79
78
 
80
79
  # Trim based on the first and last speech detected
81
80
  start_time = voice_activity[0]['start'] if voice_activity else 0
@@ -95,10 +94,12 @@ def process_audio_with_whisper(input_audio, output_audio, game_line):
95
94
 
96
95
  # Trim the audio using FFmpeg
97
96
  ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio)
98
- return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset)
97
+ return VADResult(True, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, WHISPER)
99
98
 
100
99
 
101
100
  # Load Whisper model initially
102
101
  def initialize_whisper_model():
103
102
  load_whisper_model()
104
- logger.info(f"Using Whisper model '{get_config().vad.whisper_model}' for Japanese voice detection")
103
+
104
+ # initialize_whisper_model()
105
+ # process_audio_with_whisper("tmp6x81cy27.opus", "tmp6x81cy27_trimmed.opus", None)