ailia-speech 1.3.0.4__py3-none-any.whl → 1.3.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ailia-speech might be problematic. Click here for more details.

ailia_speech/__init__.py CHANGED
@@ -475,19 +475,24 @@ class Whisper(AiliaSpeechModel):
475
475
 
476
476
  self._check(dll.ailiaSpeechPushInputData(self._instance, audio_waveform, channels, audio_waveform.shape[0] // channels, sampling_rate))
477
477
  self._check(dll.ailiaSpeechFinalizeInputData(self._instance))
478
- self._check(dll.ailiaSpeechTranscribe(self._instance))
479
478
 
480
- count = ctypes.c_uint(0)
481
- self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
482
- results = []
483
- for i in range(count.value):
484
- text = AILIASpeechText()
485
- self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
486
- results.append({"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence})
479
+ while True:
480
+ complete = ctypes.c_uint(0)
481
+ self._check(dll.ailiaSpeechComplete(self._instance, ctypes.byref(complete)))
482
+ if complete.value == 1:
483
+ break
487
484
 
488
- self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
485
+ self._check(dll.ailiaSpeechTranscribe(self._instance))
486
+
487
+ count = ctypes.c_uint(0)
488
+ self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
489
+ results = []
490
+ for i in range(count.value):
491
+ text = AILIASpeechText()
492
+ self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
493
+ yield {"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence}
489
494
 
490
- return results
495
+ self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
491
496
 
492
497
  def __del__(self):
493
498
  if self._instance:
@@ -475,19 +475,24 @@ class Whisper(AiliaSpeechModel):
475
475
 
476
476
  self._check(dll.ailiaSpeechPushInputData(self._instance, audio_waveform, channels, audio_waveform.shape[0] // channels, sampling_rate))
477
477
  self._check(dll.ailiaSpeechFinalizeInputData(self._instance))
478
- self._check(dll.ailiaSpeechTranscribe(self._instance))
479
478
 
480
- count = ctypes.c_uint(0)
481
- self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
482
- results = []
483
- for i in range(count.value):
484
- text = AILIASpeechText()
485
- self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
486
- results.append({"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence})
479
+ while True:
480
+ complete = ctypes.c_uint(0)
481
+ self._check(dll.ailiaSpeechComplete(self._instance, ctypes.byref(complete)))
482
+ if complete.value == 1:
483
+ break
487
484
 
488
- self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
485
+ self._check(dll.ailiaSpeechTranscribe(self._instance))
486
+
487
+ count = ctypes.c_uint(0)
488
+ self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
489
+ results = []
490
+ for i in range(count.value):
491
+ text = AILIASpeechText()
492
+ self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
493
+ yield {"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence}
489
494
 
490
- return results
495
+ self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
491
496
 
492
497
  def __del__(self):
493
498
  if self._instance:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ailia_speech
3
- Version: 1.3.0.4
3
+ Version: 1.3.0.5
4
4
  Summary: ailia AI Speech
5
5
  Home-page: https://ailia.jp/
6
6
  Author: ax Inc.
@@ -49,19 +49,20 @@ import os
49
49
  import urllib.request
50
50
 
51
51
  # Load target audio
52
- ref_file_path = "demo.wav"
53
- if not os.path.exists(ref_file_path):
52
+ input_file_path = "demo.wav"
53
+ if not os.path.exists(input_file_path):
54
54
  urllib.request.urlretrieve(
55
55
  "https://github.com/axinc-ai/ailia-models/raw/refs/heads/master/audio_processing/whisper/demo.wa",
56
56
  "demo.wav"
57
57
  )
58
- audio_waveform, sampling_rate = librosa.load(ref_file_path, mono=True)
58
+ audio_waveform, sampling_rate = librosa.load(input_file_path, mono=True)
59
59
 
60
60
  # Infer
61
61
  speech = ailia_speech.Whisper()
62
62
  speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL)
63
63
  recognized_text = speech.transcribe(audio_waveform, sampling_rate)
64
- print(recognized_text)
64
+ for text in recognized_text:
65
+ print(text)
65
66
  ```
66
67
 
67
68
  ## API specification
@@ -1,12 +1,12 @@
1
1
  ailia_speech/LICENSE_AILIA_EN.pdf,sha256=1DzVViPnw1uAS8gJ5a8uN3iZNNR5I1ItIXmezHfUpeM,70149
2
2
  ailia_speech/LICENSE_AILIA_JA.pdf,sha256=s628QN47S2bNqIfuSjm2LBf0vIluv2df6MSemn6Ksmw,174134
3
- ailia_speech/__init__.py,sha256=ooZ5u_Iv8SDEHnqqUR9B9FcYjJRvHt1GjaZdIqUMfRk,25430
3
+ ailia_speech/__init__.py,sha256=owi3WkmDlaYf0P1TJF_CVyVSW-cLMj3kp9RoVH9Rd3c,25637
4
4
  ailia_speech/linux/arm64-v8a/libailia_speech.so,sha256=JAOwnBr7lbiMZmPCM99pd4vJQ08ZuXDPpq-FurrXSnE,166096
5
5
  ailia_speech/linux/x64/libailia_speech.so,sha256=WbFvA5wKTgS_Zx8ErT7WBKJbzOUexavr4nP4EkLNawQ,171360
6
6
  ailia_speech/mac/libailia_speech.dylib,sha256=-JAC40yLslAVMvfh6LhDvP3Zyt3hIT3WZc7wa9-07zU,317112
7
7
  ailia_speech/windows/x64/ailia_speech.dll,sha256=WJCOHi0Na4tdMG1RT7dA7yAoWumiGSWeW1vxUtiXDS8,126464
8
- ailia_speech-1.3.0.4.data/scripts/__init__.py,sha256=ooZ5u_Iv8SDEHnqqUR9B9FcYjJRvHt1GjaZdIqUMfRk,25430
9
- ailia_speech-1.3.0.4.dist-info/METADATA,sha256=OrP0OpJWFywZVcdzchCDDu_UdBXSABFtvHk7M4Pmtx4,1907
10
- ailia_speech-1.3.0.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
- ailia_speech-1.3.0.4.dist-info/top_level.txt,sha256=Ou9XeJ9AvdK8eutw07oosCthftD1tRYzAgNY2BrYhDc,13
12
- ailia_speech-1.3.0.4.dist-info/RECORD,,
8
+ ailia_speech-1.3.0.5.data/scripts/__init__.py,sha256=owi3WkmDlaYf0P1TJF_CVyVSW-cLMj3kp9RoVH9Rd3c,25637
9
+ ailia_speech-1.3.0.5.dist-info/METADATA,sha256=webGrA8CrUKo8WSk_vIvqb_aDekuX7yzXlelkeOrP6M,1932
10
+ ailia_speech-1.3.0.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
11
+ ailia_speech-1.3.0.5.dist-info/top_level.txt,sha256=Ou9XeJ9AvdK8eutw07oosCthftD1tRYzAgNY2BrYhDc,13
12
+ ailia_speech-1.3.0.5.dist-info/RECORD,,