ailia-speech 1.4.0__py3-none-any.whl → 1.4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ailia-speech might be problematic. Click here for more details.

ailia_speech/__init__.py CHANGED
@@ -409,7 +409,7 @@ class Whisper(AiliaSpeechModel):
409
409
  intermediate_callback_cnt = intermediate_callback_cnt + 1
410
410
 
411
411
 
412
- def initialize_model(self, model_path = "./", model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY, vad_type = AILIA_SPEECH_VAD_TYPE_SILERO, dialization_type = None):
412
+ def initialize_model(self, model_path = "./", model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY, vad_type = AILIA_SPEECH_VAD_TYPE_SILERO, diarization_type = None):
413
413
  if "time_license" in ailia.get_version():
414
414
  ailia.check_and_download_license()
415
415
  if model_type == AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY:
@@ -448,15 +448,15 @@ class Whisper(AiliaSpeechModel):
448
448
  encoder_pb_path = "encoder_turbo_weights.opt.pb"
449
449
  decoder_pb_path = None
450
450
  model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3
451
- self._download_model(model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, dialization_type)
451
+ self._download_model(model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, diarization_type)
452
452
  self._open_model(model_path + encoder_path, model_path + decoder_path, model_type)
453
453
  if vad_type is not None:
454
454
  self._open_vad(model_path + "silero_vad.onnx", vad_type)
455
- if dialization_type is not None:
456
- self._open_diarization(model_path + "segmentation.onnx", model_path + "speaker-embedding.onnx", dialization_type)
455
+ if diarization_type is not None:
456
+ self._open_diarization(model_path + "segmentation.onnx", model_path + "speaker-embedding.onnx", diarization_type)
457
457
 
458
458
 
459
- def _download_model(self, model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, dialization_type):
459
+ def _download_model(self, model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, diarization_type):
460
460
  REMOTE_PATH = "https://storage.googleapis.com/ailia-models/whisper/"
461
461
  os.makedirs(model_path, exist_ok = True)
462
462
  check_and_download_file(model_path + encoder_path, REMOTE_PATH)
@@ -470,7 +470,7 @@ class Whisper(AiliaSpeechModel):
470
470
  REMOTE_PATH = "https://storage.googleapis.com/ailia-models/silero-vad/"
471
471
  check_and_download_file(model_path + "silero_vad.onnx", REMOTE_PATH)
472
472
 
473
- if dialization_type is not None:
473
+ if diarization_type is not None:
474
474
  REMOTE_PATH = "https://storage.googleapis.com/ailia-models/pyannote-audio/"
475
475
  check_and_download_file(model_path + "segmentation.onnx", REMOTE_PATH)
476
476
  check_and_download_file(model_path + "speaker-embedding.onnx", REMOTE_PATH)
@@ -409,7 +409,7 @@ class Whisper(AiliaSpeechModel):
409
409
  intermediate_callback_cnt = intermediate_callback_cnt + 1
410
410
 
411
411
 
412
- def initialize_model(self, model_path = "./", model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY, vad_type = AILIA_SPEECH_VAD_TYPE_SILERO, dialization_type = None):
412
+ def initialize_model(self, model_path = "./", model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY, vad_type = AILIA_SPEECH_VAD_TYPE_SILERO, diarization_type = None):
413
413
  if "time_license" in ailia.get_version():
414
414
  ailia.check_and_download_license()
415
415
  if model_type == AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY:
@@ -448,15 +448,15 @@ class Whisper(AiliaSpeechModel):
448
448
  encoder_pb_path = "encoder_turbo_weights.opt.pb"
449
449
  decoder_pb_path = None
450
450
  model_type = AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3
451
- self._download_model(model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, dialization_type)
451
+ self._download_model(model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, diarization_type)
452
452
  self._open_model(model_path + encoder_path, model_path + decoder_path, model_type)
453
453
  if vad_type is not None:
454
454
  self._open_vad(model_path + "silero_vad.onnx", vad_type)
455
- if dialization_type is not None:
456
- self._open_diarization(model_path + "segmentation.onnx", model_path + "speaker-embedding.onnx", dialization_type)
455
+ if diarization_type is not None:
456
+ self._open_diarization(model_path + "segmentation.onnx", model_path + "speaker-embedding.onnx", diarization_type)
457
457
 
458
458
 
459
- def _download_model(self, model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, dialization_type):
459
+ def _download_model(self, model_path, encoder_path, decoder_path, encoder_pb_path, decoder_pb_path, vad_type, diarization_type):
460
460
  REMOTE_PATH = "https://storage.googleapis.com/ailia-models/whisper/"
461
461
  os.makedirs(model_path, exist_ok = True)
462
462
  check_and_download_file(model_path + encoder_path, REMOTE_PATH)
@@ -470,7 +470,7 @@ class Whisper(AiliaSpeechModel):
470
470
  REMOTE_PATH = "https://storage.googleapis.com/ailia-models/silero-vad/"
471
471
  check_and_download_file(model_path + "silero_vad.onnx", REMOTE_PATH)
472
472
 
473
- if dialization_type is not None:
473
+ if diarization_type is not None:
474
474
  REMOTE_PATH = "https://storage.googleapis.com/ailia-models/pyannote-audio/"
475
475
  check_and_download_file(model_path + "segmentation.onnx", REMOTE_PATH)
476
476
  check_and_download_file(model_path + "speaker-embedding.onnx", REMOTE_PATH)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ailia_speech
3
- Version: 1.4.0
3
+ Version: 1.4.0.1
4
4
  Summary: ailia AI Speech
5
5
  Home-page: https://ailia.jp/
6
6
  Author: ax Inc.
@@ -117,7 +117,7 @@ for i in range(0, audio_waveform.shape[0], sampling_rate):
117
117
  By specifying dialization_type, speaker diarization can be performed. When speaker diarization is enabled, speaker_id becomes valid.
118
118
 
119
119
  ```
120
- speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3_TURBO, dialization_type = AILIA_SPEECH_DIARIZATION_TYPE_PYANNOTE_AUDIO)
120
+ speech.initialize_model(model_path = "./models/", model_type = ailia_speech.AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3_TURBO, diarization_type = ailia_speech.AILIA_SPEECH_DIARIZATION_TYPE_PYANNOTE_AUDIO)
121
121
  ```
122
122
 
123
123
  ### Available model types
@@ -1,12 +1,12 @@
1
1
  ailia_speech/LICENSE_AILIA_EN.pdf,sha256=1DzVViPnw1uAS8gJ5a8uN3iZNNR5I1ItIXmezHfUpeM,70149
2
2
  ailia_speech/LICENSE_AILIA_JA.pdf,sha256=s628QN47S2bNqIfuSjm2LBf0vIluv2df6MSemn6Ksmw,174134
3
- ailia_speech/__init__.py,sha256=Hq9QUW7VlSlq3Lxkghc7Fv-pidM2PwG-7LuM5QRX6Gk,29965
3
+ ailia_speech/__init__.py,sha256=hlo2aSAwFjI87mijd56TMLEas4HKjF1AClzt5Md1Ib8,29965
4
4
  ailia_speech/linux/arm64-v8a/libailia_speech.so,sha256=8_b0D4tkTij8yryjSQciVIyklCVQcKe8VmnF_6S5iLI,515664
5
5
  ailia_speech/linux/x64/libailia_speech.so,sha256=VbAg7uLSPT3rqOr9Io1uVcaeaIp7Jf1TfjLcf5LYgGw,589056
6
6
  ailia_speech/mac/libailia_speech.dylib,sha256=CTHndHt7XoWC9tddyLdy9I-CCJiQekiFhLd69nPLJC0,867128
7
7
  ailia_speech/windows/x64/ailia_speech.dll,sha256=q2uKfwI0dtWwvA7ejJmwkCHE_G1526mUEzRo_xoHGWI,345600
8
- ailia_speech-1.4.0.data/scripts/__init__.py,sha256=Hq9QUW7VlSlq3Lxkghc7Fv-pidM2PwG-7LuM5QRX6Gk,29965
9
- ailia_speech-1.4.0.dist-info/METADATA,sha256=lLz8DPVhS49okRk68aTAK-FhSp2RJIa7XPwIkQ4Z2so,4263
10
- ailia_speech-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- ailia_speech-1.4.0.dist-info/top_level.txt,sha256=Ou9XeJ9AvdK8eutw07oosCthftD1tRYzAgNY2BrYhDc,13
12
- ailia_speech-1.4.0.dist-info/RECORD,,
8
+ ailia_speech-1.4.0.1.data/scripts/__init__.py,sha256=hlo2aSAwFjI87mijd56TMLEas4HKjF1AClzt5Md1Ib8,29965
9
+ ailia_speech-1.4.0.1.dist-info/METADATA,sha256=n7dwyazi2iDekow2sMQbCJQw4k509OSg3SMBaXwbvLM,4278
10
+ ailia_speech-1.4.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ ailia_speech-1.4.0.1.dist-info/top_level.txt,sha256=Ou9XeJ9AvdK8eutw07oosCthftD1tRYzAgNY2BrYhDc,13
12
+ ailia_speech-1.4.0.1.dist-info/RECORD,,