ailia-speech 1.3.0.3__tar.gz → 1.3.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ailia-speech might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ailia_speech
3
- Version: 1.3.0.3
3
+ Version: 1.3.0.5
4
4
  Summary: ailia AI Speech
5
5
  Home-page: https://ailia.jp/
6
6
  Author: ax Inc.
@@ -41,7 +41,6 @@ pip3 install ./
41
41
  ## Usage
42
42
 
43
43
  ```python
44
- import ailia
45
44
  import ailia_speech
46
45
 
47
46
  import librosa
@@ -50,19 +49,20 @@ import os
50
49
  import urllib.request
51
50
 
52
51
  # Load target audio
53
- ref_file_path = "demo.wav"
54
- if not os.path.exists(ref_file_path):
52
+ input_file_path = "demo.wav"
53
+ if not os.path.exists(input_file_path):
55
54
  urllib.request.urlretrieve(
56
55
  "https://github.com/axinc-ai/ailia-models/raw/refs/heads/master/audio_processing/whisper/demo.wa",
57
56
  "demo.wav"
58
57
  )
59
- audio_waveform, sampling_rate = librosa.load(ref_file_path, mono=True)
58
+ audio_waveform, sampling_rate = librosa.load(input_file_path, mono=True)
60
59
 
61
60
  # Infer
62
61
  speech = ailia_speech.Whisper()
63
62
  speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL)
64
63
  recognized_text = speech.transcribe(audio_waveform, sampling_rate)
65
- print(recognized_text)
64
+ for text in recognized_text:
65
+ print(text)
66
66
  ```
67
67
 
68
68
  ## API specification
@@ -28,7 +28,6 @@ pip3 install ./
28
28
  ## Usage
29
29
 
30
30
  ```python
31
- import ailia
32
31
  import ailia_speech
33
32
 
34
33
  import librosa
@@ -37,19 +36,20 @@ import os
37
36
  import urllib.request
38
37
 
39
38
  # Load target audio
40
- ref_file_path = "demo.wav"
41
- if not os.path.exists(ref_file_path):
39
+ input_file_path = "demo.wav"
40
+ if not os.path.exists(input_file_path):
42
41
  urllib.request.urlretrieve(
43
42
  "https://github.com/axinc-ai/ailia-models/raw/refs/heads/master/audio_processing/whisper/demo.wa",
44
43
  "demo.wav"
45
44
  )
46
- audio_waveform, sampling_rate = librosa.load(ref_file_path, mono=True)
45
+ audio_waveform, sampling_rate = librosa.load(input_file_path, mono=True)
47
46
 
48
47
  # Infer
49
48
  speech = ailia_speech.Whisper()
50
49
  speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL)
51
50
  recognized_text = speech.transcribe(audio_waveform, sampling_rate)
52
- print(recognized_text)
51
+ for text in recognized_text:
52
+ print(text)
53
53
  ```
54
54
 
55
55
  ## API specification
@@ -475,19 +475,24 @@ class Whisper(AiliaSpeechModel):
475
475
 
476
476
  self._check(dll.ailiaSpeechPushInputData(self._instance, audio_waveform, channels, audio_waveform.shape[0] // channels, sampling_rate))
477
477
  self._check(dll.ailiaSpeechFinalizeInputData(self._instance))
478
- self._check(dll.ailiaSpeechTranscribe(self._instance))
479
478
 
480
- count = ctypes.c_uint(0)
481
- self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
482
- results = []
483
- for i in range(count.value):
484
- text = AILIASpeechText()
485
- self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
486
- results.append({"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence})
479
+ while True:
480
+ complete = ctypes.c_uint(0)
481
+ self._check(dll.ailiaSpeechComplete(self._instance, ctypes.byref(complete)))
482
+ if complete.value == 1:
483
+ break
487
484
 
488
- self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
485
+ self._check(dll.ailiaSpeechTranscribe(self._instance))
486
+
487
+ count = ctypes.c_uint(0)
488
+ self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
489
+ results = []
490
+ for i in range(count.value):
491
+ text = AILIASpeechText()
492
+ self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
493
+ yield {"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence}
489
494
 
490
- return results
495
+ self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
491
496
 
492
497
  def __del__(self):
493
498
  if self._instance:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ailia_speech
3
- Version: 1.3.0.3
3
+ Version: 1.3.0.5
4
4
  Summary: ailia AI Speech
5
5
  Home-page: https://ailia.jp/
6
6
  Author: ax Inc.
@@ -41,7 +41,6 @@ pip3 install ./
41
41
  ## Usage
42
42
 
43
43
  ```python
44
- import ailia
45
44
  import ailia_speech
46
45
 
47
46
  import librosa
@@ -50,19 +49,20 @@ import os
50
49
  import urllib.request
51
50
 
52
51
  # Load target audio
53
- ref_file_path = "demo.wav"
54
- if not os.path.exists(ref_file_path):
52
+ input_file_path = "demo.wav"
53
+ if not os.path.exists(input_file_path):
55
54
  urllib.request.urlretrieve(
56
55
  "https://github.com/axinc-ai/ailia-models/raw/refs/heads/master/audio_processing/whisper/demo.wa",
57
56
  "demo.wav"
58
57
  )
59
- audio_waveform, sampling_rate = librosa.load(ref_file_path, mono=True)
58
+ audio_waveform, sampling_rate = librosa.load(input_file_path, mono=True)
60
59
 
61
60
  # Infer
62
61
  speech = ailia_speech.Whisper()
63
62
  speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL)
64
63
  recognized_text = speech.transcribe(audio_waveform, sampling_rate)
65
- print(recognized_text)
64
+ for text in recognized_text:
65
+ print(text)
66
66
  ```
67
67
 
68
68
  ## API specification
@@ -54,7 +54,7 @@ if __name__ == "__main__":
54
54
  setup(
55
55
  name="ailia_speech",
56
56
  scripts=scripts,
57
- version="1.3.0.3",
57
+ version="1.3.0.5",
58
58
  install_requires=[
59
59
  "ailia",
60
60
  "ailia_tokenizer",
File without changes