ailia-speech 1.4.0__tar.gz → 1.4.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ailia-speech might be problematic. Click here for more details.
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/PKG-INFO +2 -2
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/README.md +1 -1
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech/__init__.py +6 -6
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech.egg-info/PKG-INFO +2 -2
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/setup.py +1 -1
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech/LICENSE_AILIA_EN.pdf +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech/LICENSE_AILIA_JA.pdf +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech/linux/arm64-v8a/libailia_speech.so +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech/linux/x64/libailia_speech.so +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech/mac/libailia_speech.dylib +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech/windows/x64/ailia_speech.dll +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech.egg-info/SOURCES.txt +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech.egg-info/dependency_links.txt +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech.egg-info/requires.txt +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/ailia_speech.egg-info/top_level.txt +0 -0
- {ailia_speech-1.4.0 → ailia_speech-1.4.0.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ailia_speech
|
|
3
|
-
Version: 1.4.0
|
|
3
|
+
Version: 1.4.0.1
|
|
4
4
|
Summary: ailia AI Speech
|
|
5
5
|
Home-page: https://ailia.jp/
|
|
6
6
|
Author: ax Inc.
|
|
@@ -117,7 +117,7 @@ for i in range(0, audio_waveform.shape[0], sampling_rate):
|
|
|
117
117
|
By specifying dialization_type, speaker diarization can be performed. When speaker diarization is enabled, speaker_id becomes valid.
|
|
118
118
|
|
|
119
119
|
```
|
|
120
|
-
speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3_TURBO,
|
|
120
|
+
speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3_TURBO, diarization_type = ailia_speech.AILIA_SPEECH_DIARIZATION_TYPE_PYANNOTE_AUDIO)
|
|
121
121
|
```
|
|
122
122
|
|
|
123
123
|
### Available model types
|
|
@@ -95,7 +95,7 @@ for i in range(0, audio_waveform.shape[0], sampling_rate):
|
|
|
95
95
|
By specifying dialization_type, speaker diarization can be performed. When speaker diarization is enabled, speaker_id becomes valid.
|
|
96
96
|
|
|
97
97
|
```
|
|
98
|
-
speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3_TURBO,
|
|
98
|
+
speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3_TURBO, diarization_type = ailia_speech.AILIA_SPEECH_DIARIZATION_TYPE_PYANNOTE_AUDIO)
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
### Available model types
|
|
@@ -409,7 +409,7 @@ class Whisper(AiliaSpeechModel):
|
|
|
409
409
|
intermediate_callback_cnt = intermediate_callback_cnt + 1
|
|
410
410
|
|
|
411
411
|
|
|
412
|
-
def initialize_model(self, model_path = "./", model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY, vad_type = AILIA_SPEECH_VAD_TYPE_SILERO,
|
|
412
|
+
def initialize_model(self, model_path = "./", model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY, vad_type = AILIA_SPEECH_VAD_TYPE_SILERO, diarization_type = None):
|
|
413
413
|
if "time_license" in ailia.get_version():
|
|
414
414
|
ailia.check_and_download_license()
|
|
415
415
|
if model_type == AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY:
|
|
@@ -448,15 +448,15 @@ class Whisper(AiliaSpeechModel):
|
|
|
448
448
|
encoder_pb_path = "encoder_turbo_weights.opt.pb"
|
|
449
449
|
decoder_pb_path = None
|
|
450
450
|
model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3
|
|
451
|
-
self._download_model(model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type,
|
|
451
|
+
self._download_model(model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, diarization_type)
|
|
452
452
|
self._open_model(model_path + encoder_path, model_path + decoder_path, model_type)
|
|
453
453
|
if vad_type is not None:
|
|
454
454
|
self._open_vad(model_path + "silero_vad.onnx", vad_type)
|
|
455
|
-
if
|
|
456
|
-
self._open_diarization(model_path + "segmentation.onnx", model_path + "speaker-embedding.onnx",
|
|
455
|
+
if diarization_type is not None:
|
|
456
|
+
self._open_diarization(model_path + "segmentation.onnx", model_path + "speaker-embedding.onnx", diarization_type)
|
|
457
457
|
|
|
458
458
|
|
|
459
|
-
def _download_model(self, model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type,
|
|
459
|
+
def _download_model(self, model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, diarization_type):
|
|
460
460
|
REMOTE_PATH = "https://storage.googleapis.com/ailia-models/whisper/"
|
|
461
461
|
os.makedirs(model_path, exist_ok = True)
|
|
462
462
|
check_and_download_file(model_path + encoder_path, REMOTE_PATH)
|
|
@@ -470,7 +470,7 @@ class Whisper(AiliaSpeechModel):
|
|
|
470
470
|
REMOTE_PATH = "https://storage.googleapis.com/ailia-models/silero-vad/"
|
|
471
471
|
check_and_download_file(model_path + "silero_vad.onnx", REMOTE_PATH)
|
|
472
472
|
|
|
473
|
-
if
|
|
473
|
+
if diarization_type is not None:
|
|
474
474
|
REMOTE_PATH = "https://storage.googleapis.com/ailia-models/pyannote-audio/"
|
|
475
475
|
check_and_download_file(model_path + "segmentation.onnx", REMOTE_PATH)
|
|
476
476
|
check_and_download_file(model_path + "speaker-embedding.onnx", REMOTE_PATH)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ailia_speech
|
|
3
|
-
Version: 1.4.0
|
|
3
|
+
Version: 1.4.0.1
|
|
4
4
|
Summary: ailia AI Speech
|
|
5
5
|
Home-page: https://ailia.jp/
|
|
6
6
|
Author: ax Inc.
|
|
@@ -117,7 +117,7 @@ for i in range(0, audio_waveform.shape[0], sampling_rate):
|
|
|
117
117
|
By specifying dialization_type, speaker diarization can be performed. When speaker diarization is enabled, speaker_id becomes valid.
|
|
118
118
|
|
|
119
119
|
```
|
|
120
|
-
speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3_TURBO,
|
|
120
|
+
speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3_TURBO, diarization_type = ailia_speech.AILIA_SPEECH_DIARIZATION_TYPE_PYANNOTE_AUDIO)
|
|
121
121
|
```
|
|
122
122
|
|
|
123
123
|
### Available model types
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|