ailia-speech 1.3.0.3__py3-none-any.whl → 1.3.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ailia-speech might be problematic. Click here for more details.

ailia_speech/__init__.py CHANGED
@@ -475,19 +475,24 @@ class Whisper(AiliaSpeechModel):
475
475
 
476
476
  self._check(dll.ailiaSpeechPushInputData(self._instance, audio_waveform, channels, audio_waveform.shape[0] // channels, sampling_rate))
477
477
  self._check(dll.ailiaSpeechFinalizeInputData(self._instance))
478
- self._check(dll.ailiaSpeechTranscribe(self._instance))
479
478
 
480
- count = ctypes.c_uint(0)
481
- self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
482
- results = []
483
- for i in range(count.value):
484
- text = AILIASpeechText()
485
- self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
486
- results.append({"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence})
479
+ while True:
480
+ complete = ctypes.c_uint(0)
481
+ self._check(dll.ailiaSpeechComplete(self._instance, ctypes.byref(complete)))
482
+ if complete.value == 1:
483
+ break
487
484
 
488
- self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
485
+ self._check(dll.ailiaSpeechTranscribe(self._instance))
486
+
487
+ count = ctypes.c_uint(0)
488
+ self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
489
+ results = []
490
+ for i in range(count.value):
491
+ text = AILIASpeechText()
492
+ self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
493
+ yield {"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence}
489
494
 
490
- return results
495
+ self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
491
496
 
492
497
  def __del__(self):
493
498
  if self._instance:
@@ -475,19 +475,24 @@ class Whisper(AiliaSpeechModel):
475
475
 
476
476
  self._check(dll.ailiaSpeechPushInputData(self._instance, audio_waveform, channels, audio_waveform.shape[0] // channels, sampling_rate))
477
477
  self._check(dll.ailiaSpeechFinalizeInputData(self._instance))
478
- self._check(dll.ailiaSpeechTranscribe(self._instance))
479
478
 
480
- count = ctypes.c_uint(0)
481
- self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
482
- results = []
483
- for i in range(count.value):
484
- text = AILIASpeechText()
485
- self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
486
- results.append({"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence})
479
+ while True:
480
+ complete = ctypes.c_uint(0)
481
+ self._check(dll.ailiaSpeechComplete(self._instance, ctypes.byref(complete)))
482
+ if complete.value == 1:
483
+ break
487
484
 
488
- self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
485
+ self._check(dll.ailiaSpeechTranscribe(self._instance))
486
+
487
+ count = ctypes.c_uint(0)
488
+ self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
489
+ results = []
490
+ for i in range(count.value):
491
+ text = AILIASpeechText()
492
+ self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
493
+ yield {"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence}
489
494
 
490
- return results
495
+ self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
491
496
 
492
497
  def __del__(self):
493
498
  if self._instance:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ailia_speech
3
- Version: 1.3.0.3
3
+ Version: 1.3.0.5
4
4
  Summary: ailia AI Speech
5
5
  Home-page: https://ailia.jp/
6
6
  Author: ax Inc.
@@ -41,7 +41,6 @@ pip3 install ./
41
41
  ## Usage
42
42
 
43
43
  ```python
44
- import ailia
45
44
  import ailia_speech
46
45
 
47
46
  import librosa
@@ -50,19 +49,20 @@ import os
50
49
  import urllib.request
51
50
 
52
51
  # Load target audio
53
- ref_file_path = "demo.wav"
54
- if not os.path.exists(ref_file_path):
52
+ input_file_path = "demo.wav"
53
+ if not os.path.exists(input_file_path):
55
54
  urllib.request.urlretrieve(
56
55
  "https://github.com/axinc-ai/ailia-models/raw/refs/heads/master/audio_processing/whisper/demo.wa",
57
56
  "demo.wav"
58
57
  )
59
- audio_waveform, sampling_rate = librosa.load(ref_file_path, mono=True)
58
+ audio_waveform, sampling_rate = librosa.load(input_file_path, mono=True)
60
59
 
61
60
  # Infer
62
61
  speech = ailia_speech.Whisper()
63
62
  speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL)
64
63
  recognized_text = speech.transcribe(audio_waveform, sampling_rate)
65
- print(recognized_text)
64
+ for text in recognized_text:
65
+ print(text)
66
66
  ```
67
67
 
68
68
  ## API specification
@@ -1,12 +1,12 @@
1
1
  ailia_speech/LICENSE_AILIA_EN.pdf,sha256=1DzVViPnw1uAS8gJ5a8uN3iZNNR5I1ItIXmezHfUpeM,70149
2
2
  ailia_speech/LICENSE_AILIA_JA.pdf,sha256=s628QN47S2bNqIfuSjm2LBf0vIluv2df6MSemn6Ksmw,174134
3
- ailia_speech/__init__.py,sha256=ooZ5u_Iv8SDEHnqqUR9B9FcYjJRvHt1GjaZdIqUMfRk,25430
3
+ ailia_speech/__init__.py,sha256=owi3WkmDlaYf0P1TJF_CVyVSW-cLMj3kp9RoVH9Rd3c,25637
4
4
  ailia_speech/linux/arm64-v8a/libailia_speech.so,sha256=JAOwnBr7lbiMZmPCM99pd4vJQ08ZuXDPpq-FurrXSnE,166096
5
5
  ailia_speech/linux/x64/libailia_speech.so,sha256=WbFvA5wKTgS_Zx8ErT7WBKJbzOUexavr4nP4EkLNawQ,171360
6
6
  ailia_speech/mac/libailia_speech.dylib,sha256=-JAC40yLslAVMvfh6LhDvP3Zyt3hIT3WZc7wa9-07zU,317112
7
7
  ailia_speech/windows/x64/ailia_speech.dll,sha256=WJCOHi0Na4tdMG1RT7dA7yAoWumiGSWeW1vxUtiXDS8,126464
8
- ailia_speech-1.3.0.3.data/scripts/__init__.py,sha256=ooZ5u_Iv8SDEHnqqUR9B9FcYjJRvHt1GjaZdIqUMfRk,25430
9
- ailia_speech-1.3.0.3.dist-info/METADATA,sha256=Ry_r7KNBQtutbZEnwrdKw-uYdl8Zw6vgfQf9WkiietE,1920
10
- ailia_speech-1.3.0.3.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
- ailia_speech-1.3.0.3.dist-info/top_level.txt,sha256=Ou9XeJ9AvdK8eutw07oosCthftD1tRYzAgNY2BrYhDc,13
12
- ailia_speech-1.3.0.3.dist-info/RECORD,,
8
+ ailia_speech-1.3.0.5.data/scripts/__init__.py,sha256=owi3WkmDlaYf0P1TJF_CVyVSW-cLMj3kp9RoVH9Rd3c,25637
9
+ ailia_speech-1.3.0.5.dist-info/METADATA,sha256=webGrA8CrUKo8WSk_vIvqb_aDekuX7yzXlelkeOrP6M,1932
10
+ ailia_speech-1.3.0.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
+ ailia_speech-1.3.0.5.dist-info/top_level.txt,sha256=Ou9XeJ9AvdK8eutw07oosCthftD1tRYzAgNY2BrYhDc,13
12
+ ailia_speech-1.3.0.5.dist-info/RECORD,,