ailia-speech 1.3.0.3__py3-none-any.whl → 1.3.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ailia-speech might be problematic. Click here for more details.
- ailia_speech/__init__.py +15 -10
- {ailia_speech-1.3.0.3.data → ailia_speech-1.3.0.5.data}/scripts/__init__.py +15 -10
- {ailia_speech-1.3.0.3.dist-info → ailia_speech-1.3.0.5.dist-info}/METADATA +6 -6
- {ailia_speech-1.3.0.3.dist-info → ailia_speech-1.3.0.5.dist-info}/RECORD +6 -6
- {ailia_speech-1.3.0.3.dist-info → ailia_speech-1.3.0.5.dist-info}/WHEEL +0 -0
- {ailia_speech-1.3.0.3.dist-info → ailia_speech-1.3.0.5.dist-info}/top_level.txt +0 -0
ailia_speech/__init__.py
CHANGED
|
@@ -475,19 +475,24 @@ class Whisper(AiliaSpeechModel):
|
|
|
475
475
|
|
|
476
476
|
self._check(dll.ailiaSpeechPushInputData(self._instance, audio_waveform, channels, audio_waveform.shape[0] // channels, sampling_rate))
|
|
477
477
|
self._check(dll.ailiaSpeechFinalizeInputData(self._instance))
|
|
478
|
-
self._check(dll.ailiaSpeechTranscribe(self._instance))
|
|
479
478
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
|
|
486
|
-
results.append({"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence})
|
|
479
|
+
while True:
|
|
480
|
+
complete = ctypes.c_uint(0)
|
|
481
|
+
self._check(dll.ailiaSpeechComplete(self._instance, ctypes.byref(complete)))
|
|
482
|
+
if complete.value == 1:
|
|
483
|
+
break
|
|
487
484
|
|
|
488
|
-
|
|
485
|
+
self._check(dll.ailiaSpeechTranscribe(self._instance))
|
|
486
|
+
|
|
487
|
+
count = ctypes.c_uint(0)
|
|
488
|
+
self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
|
|
489
|
+
results = []
|
|
490
|
+
for i in range(count.value):
|
|
491
|
+
text = AILIASpeechText()
|
|
492
|
+
self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
|
|
493
|
+
yield {"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence}
|
|
489
494
|
|
|
490
|
-
|
|
495
|
+
self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
|
|
491
496
|
|
|
492
497
|
def __del__(self):
|
|
493
498
|
if self._instance:
|
|
@@ -475,19 +475,24 @@ class Whisper(AiliaSpeechModel):
|
|
|
475
475
|
|
|
476
476
|
self._check(dll.ailiaSpeechPushInputData(self._instance, audio_waveform, channels, audio_waveform.shape[0] // channels, sampling_rate))
|
|
477
477
|
self._check(dll.ailiaSpeechFinalizeInputData(self._instance))
|
|
478
|
-
self._check(dll.ailiaSpeechTranscribe(self._instance))
|
|
479
478
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
|
|
486
|
-
results.append({"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence})
|
|
479
|
+
while True:
|
|
480
|
+
complete = ctypes.c_uint(0)
|
|
481
|
+
self._check(dll.ailiaSpeechComplete(self._instance, ctypes.byref(complete)))
|
|
482
|
+
if complete.value == 1:
|
|
483
|
+
break
|
|
487
484
|
|
|
488
|
-
|
|
485
|
+
self._check(dll.ailiaSpeechTranscribe(self._instance))
|
|
486
|
+
|
|
487
|
+
count = ctypes.c_uint(0)
|
|
488
|
+
self._check(dll.ailiaSpeechGetTextCount(self._instance, ctypes.byref(count)))
|
|
489
|
+
results = []
|
|
490
|
+
for i in range(count.value):
|
|
491
|
+
text = AILIASpeechText()
|
|
492
|
+
self._check(dll.ailiaSpeechGetText(self._instance, ctypes.byref(text), AILIA_SPEECH_TEXT_VERSION, i))
|
|
493
|
+
yield {"text" : text.text.decode(), "time_stamp_begin" : text.time_stamp_begin, "time_stamp_end" : text.time_stamp_end, "person_id" : text.person_id, "language" : text.language.decode(), "confidence" : text.confidence}
|
|
489
494
|
|
|
490
|
-
|
|
495
|
+
self._check(dll.ailiaSpeechResetTranscribeState(self._instance))
|
|
491
496
|
|
|
492
497
|
def __del__(self):
|
|
493
498
|
if self._instance:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ailia_speech
|
|
3
|
-
Version: 1.3.0.
|
|
3
|
+
Version: 1.3.0.5
|
|
4
4
|
Summary: ailia AI Speech
|
|
5
5
|
Home-page: https://ailia.jp/
|
|
6
6
|
Author: ax Inc.
|
|
@@ -41,7 +41,6 @@ pip3 install ./
|
|
|
41
41
|
## Usage
|
|
42
42
|
|
|
43
43
|
```python
|
|
44
|
-
import ailia
|
|
45
44
|
import ailia_speech
|
|
46
45
|
|
|
47
46
|
import librosa
|
|
@@ -50,19 +49,20 @@ import os
|
|
|
50
49
|
import urllib.request
|
|
51
50
|
|
|
52
51
|
# Load target audio
|
|
53
|
-
|
|
54
|
-
if not os.path.exists(
|
|
52
|
+
input_file_path = "demo.wav"
|
|
53
|
+
if not os.path.exists(input_file_path):
|
|
55
54
|
urllib.request.urlretrieve(
|
|
56
55
|
"https://github.com/axinc-ai/ailia-models/raw/refs/heads/master/audio_processing/whisper/demo.wa",
|
|
57
56
|
"demo.wav"
|
|
58
57
|
)
|
|
59
|
-
audio_waveform, sampling_rate = librosa.load(
|
|
58
|
+
audio_waveform, sampling_rate = librosa.load(input_file_path, mono=True)
|
|
60
59
|
|
|
61
60
|
# Infer
|
|
62
61
|
speech = ailia_speech.Whisper()
|
|
63
62
|
speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL)
|
|
64
63
|
recognized_text = speech.transcribe(audio_waveform, sampling_rate)
|
|
65
|
-
|
|
64
|
+
for text in recognized_text:
|
|
65
|
+
print(text)
|
|
66
66
|
```
|
|
67
67
|
|
|
68
68
|
## API specification
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
ailia_speech/LICENSE_AILIA_EN.pdf,sha256=1DzVViPnw1uAS8gJ5a8uN3iZNNR5I1ItIXmezHfUpeM,70149
|
|
2
2
|
ailia_speech/LICENSE_AILIA_JA.pdf,sha256=s628QN47S2bNqIfuSjm2LBf0vIluv2df6MSemn6Ksmw,174134
|
|
3
|
-
ailia_speech/__init__.py,sha256=
|
|
3
|
+
ailia_speech/__init__.py,sha256=owi3WkmDlaYf0P1TJF_CVyVSW-cLMj3kp9RoVH9Rd3c,25637
|
|
4
4
|
ailia_speech/linux/arm64-v8a/libailia_speech.so,sha256=JAOwnBr7lbiMZmPCM99pd4vJQ08ZuXDPpq-FurrXSnE,166096
|
|
5
5
|
ailia_speech/linux/x64/libailia_speech.so,sha256=WbFvA5wKTgS_Zx8ErT7WBKJbzOUexavr4nP4EkLNawQ,171360
|
|
6
6
|
ailia_speech/mac/libailia_speech.dylib,sha256=-JAC40yLslAVMvfh6LhDvP3Zyt3hIT3WZc7wa9-07zU,317112
|
|
7
7
|
ailia_speech/windows/x64/ailia_speech.dll,sha256=WJCOHi0Na4tdMG1RT7dA7yAoWumiGSWeW1vxUtiXDS8,126464
|
|
8
|
-
ailia_speech-1.3.0.
|
|
9
|
-
ailia_speech-1.3.0.
|
|
10
|
-
ailia_speech-1.3.0.
|
|
11
|
-
ailia_speech-1.3.0.
|
|
12
|
-
ailia_speech-1.3.0.
|
|
8
|
+
ailia_speech-1.3.0.5.data/scripts/__init__.py,sha256=owi3WkmDlaYf0P1TJF_CVyVSW-cLMj3kp9RoVH9Rd3c,25637
|
|
9
|
+
ailia_speech-1.3.0.5.dist-info/METADATA,sha256=webGrA8CrUKo8WSk_vIvqb_aDekuX7yzXlelkeOrP6M,1932
|
|
10
|
+
ailia_speech-1.3.0.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
11
|
+
ailia_speech-1.3.0.5.dist-info/top_level.txt,sha256=Ou9XeJ9AvdK8eutw07oosCthftD1tRYzAgNY2BrYhDc,13
|
|
12
|
+
ailia_speech-1.3.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|