sinapsis-speech 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,6 @@
2
2
  """Base template for ElevenLabs speech synthesis"""
3
3
 
4
4
  import abc
5
- import os
6
5
  from typing import Generator, Iterable, Iterator, Literal
7
6
 
8
7
  import numpy as np
@@ -18,7 +17,6 @@ from sinapsis_core.template_base.base_models import (
18
17
  UIPropertiesMetadata,
19
18
  )
20
19
  from sinapsis_core.template_base.template import Template
21
- from sinapsis_core.utils.env_var_keys import WORKING_DIR
22
20
  from sinapsis_generic_data_tools.helpers.audio_encoder import audio_bytes_to_numpy
23
21
 
24
22
  from sinapsis_elevenlabs.helpers.env_var_keys import ELEVENLABS_API_KEY
@@ -47,8 +45,7 @@ class ElevenLabsBase(Template, abc.ABC):
47
45
  Args:
48
46
  api_key (str): The API used key to authenticate with ElevenLabs' API.
49
47
  model (Literal): The model identifier to use for speech synthesis.
50
- output_file_name (str | None): Optional name for saved audio file.
51
- If not provided a random UUI will be used as file name. Defaults to None.
48
+
52
49
  output_format (OutputFormat): The output audio format and quality. Options include:
53
50
  ["mp3_22050_32", "mp3_44100_32", "mp3_44100_64", "mp3_44100_96", "mp3_44100_128",
54
51
  "mp3_44100_192", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "ulaw_8000"]
@@ -71,9 +68,7 @@ class ElevenLabsBase(Template, abc.ABC):
71
68
  "eleven_english_sts_v2",
72
69
  "eleven_multilingual_sts_v2",
73
70
  ] = "eleven_turbo_v2_5"
74
- output_file_name: str | None = None
75
71
  output_format: OutputFormat = "mp3_44100_128"
76
- output_folder: str = os.path.join(WORKING_DIR, "elevenlabs", "audios")
77
72
  stream: bool = False
78
73
  voice: str | Voice | None = None
79
74
  voice_settings: VoiceSettings = Field(default_factory=dict) # type: ignore[arg-type]
@@ -33,9 +33,7 @@ class ElevenLabsSTS(ElevenLabsBase):
33
33
  attributes:
34
34
  api_key: null
35
35
  model: eleven_multilingual_sts_v2
36
- output_file_name: null
37
36
  output_format: mp3_44100_128
38
- output_folder: <WORKING_DIR>/elevenlabs/audios
39
37
  stream: false
40
38
  voice: null
41
39
  voice_settings:
@@ -40,7 +40,6 @@ class ElevenLabsTTS(ElevenLabsBase):
40
40
  voice_settings: null
41
41
  model: eleven_turbo_v2_5
42
42
  output_format: mp3_44100_128
43
- output_folder: <WORKING_DIR>/elevenlabs/audios
44
43
  stream: false
45
44
 
46
45
  """
@@ -32,9 +32,7 @@ class ElevenLabsVoiceClone(ElevenLabsTTS):
32
32
  attributes:
33
33
  api_key: null
34
34
  model: eleven_turbo_v2_5
35
- output_file_name: null
36
35
  output_format: mp3_44100_128
37
- output_folder: <WORKING_DIR>/elevenlabs/audios
38
36
  stream: false
39
37
  voice: null
40
38
  voice_settings:
@@ -37,7 +37,6 @@ class ElevenLabsVoiceGeneration(ElevenLabsBase):
37
37
  voice_settings: null
38
38
  model: eleven_turbo_v2_5
39
39
  output_format: mp3_44100_128
40
- output_folder: <WORKING_DIR>/elevenlabs/audios
41
40
  stream: false
42
41
  voice_description: An old British male with a raspy, deep voice. Professional,
43
42
  relaxed and assertive
@@ -14,7 +14,13 @@ from sinapsis_core.data_containers.data_packet import (
14
14
  DataContainer,
15
15
  )
16
16
  from sinapsis_core.template_base import Template
17
- from sinapsis_core.template_base.base_models import OutputTypes, TemplateAttributes, UIPropertiesMetadata
17
+ from sinapsis_core.template_base.base_models import (
18
+ OutputTypes,
19
+ TemplateAttributes,
20
+ TemplateAttributeType,
21
+ UIPropertiesMetadata,
22
+ )
23
+ from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
18
24
 
19
25
  from sinapsis_f5_tts.helpers.tags import Tags
20
26
 
@@ -107,6 +113,8 @@ class F5TTSInferenceAttributes(TemplateAttributes):
107
113
 
108
114
  device: str | None = Field(default=None, json_schema_extra={F5CliKeys.cli_param: "--device"})
109
115
 
116
+ root_dir: str | None = None
117
+
110
118
 
111
119
  class F5TTSInference(Template):
112
120
  """Template for performing text-to-speech synthesis using the F5TTS model.
@@ -155,6 +163,11 @@ class F5TTSInference(Template):
155
163
  tags=[Tags.AUDIO, Tags.AUDIO_GENERATION, Tags.F5TTS, Tags.SPEECH, Tags.TEXT_TO_SPEECH],
156
164
  )
157
165
 
166
+ def __init__(self, attributes: TemplateAttributeType) -> None:
167
+ super().__init__(attributes)
168
+ self.attributes.root_dir = self.attributes.root_dir or SINAPSIS_CACHE_DIR
169
+ self.attributes.ref_audio = os.path.join(self.attributes.root_dir, self.attributes.ref_audio)
170
+
158
171
  def _add_attribute_to_command(self, cli_command: list[str], field_name: str, field: Any) -> None:
159
172
  """
160
173
  This method examines each attribute field's metadata to determine if and how
@@ -15,6 +15,7 @@ from sinapsis_core.template_base.base_models import (
15
15
  UIPropertiesMetadata,
16
16
  )
17
17
  from sinapsis_core.template_base.template import Template
18
+ from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
18
19
 
19
20
  from sinapsis_parakeet_tdt.helpers.tags import Tags
20
21
 
@@ -36,6 +37,7 @@ class ParakeetTDTInferenceAttributes(TemplateAttributes):
36
37
 
37
38
  model_name: str = "nvidia/parakeet-tdt-0.6b-v2"
38
39
  audio_paths: list[str] | None = None
40
+ root_dir: str | None = None
39
41
  enable_timestamps: bool = False
40
42
  timestamp_level: Literal["char", "word", "segment"] = "word"
41
43
  device: Literal["cpu", "cuda"] = "cuda"
@@ -88,6 +90,7 @@ class ParakeetTDTInference(Template):
88
90
 
89
91
  def __init__(self, attributes: TemplateAttributes) -> None:
90
92
  super().__init__(attributes)
93
+ self.attributes.root_dir = self.attributes.root_dir or SINAPSIS_CACHE_DIR
91
94
  self._load_model()
92
95
 
93
96
  def _load_model(self) -> None:
@@ -131,9 +134,10 @@ class ParakeetTDTInference(Template):
131
134
  """
132
135
  sources = []
133
136
  for path in paths:
134
- if not os.path.exists(path):
135
- self.logger.warning(f"Audio file not found: {path}")
136
- sources.append(path)
137
+ full_path = os.path.join(self.attributes.root_dir, path)
138
+ if not os.path.exists(full_path):
139
+ self.logger.warning(f"Audio file not found: {full_path}")
140
+ sources.append(full_path)
137
141
  return sources
138
142
 
139
143
  def get_audio_sources(self, container: DataContainer) -> list[str]:
@@ -156,7 +160,6 @@ class ParakeetTDTInference(Template):
156
160
 
157
161
  if not sources and self.attributes.audio_paths:
158
162
  sources = self.get_sources_from_paths(self.attributes.audio_paths)
159
-
160
163
  return sources
161
164
 
162
165
  @staticmethod
@@ -220,6 +223,7 @@ class ParakeetTDTInference(Template):
220
223
  Returns:
221
224
  list[Any]: List of transcription results from the ASR model.
222
225
  """
226
+
223
227
  return self.model.transcribe(
224
228
  sources,
225
229
  timestamps=self.attributes.enable_timestamps,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-speech
3
- Version: 0.4.3
3
+ Version: 0.4.4
4
4
  Summary: Generate speech using various libraries.
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -4,16 +4,16 @@ sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/env_var_keys.py,sha256=j8J64
4
4
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/tags.py,sha256=EzEwYJqwPFhSzJB7K8g1HGm3xiy6M_kE1j19TYQAfS8,402
5
5
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/helpers/voice_utils.py,sha256=2Ym4suCk8wy-Nj2Hmk0uu3_-3nu1QlSs_KubDydm5wY,3383
6
6
  sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/__init__.py,sha256=UG35_hown3HITVR42iK_e3yVsUbuq2oYTLpCGwJ89L4,708
7
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py,sha256=Gb0wTth3GAAN_22RLm0mPPXtw3eUd2DJQVRc4itEqAM,7900
8
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py,sha256=72WKvjxcro8PD234iFBV9kiYIBLTQUPU25xzGwbPjv8,3799
9
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py,sha256=CePqHzLHrsgKF87nhBQfZE2htB6DEE2iP7_LHjFUQ_E,3047
10
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py,sha256=pLq3U1ZZSW1NUOJEhvUJHhSxjEQiEKFiqHJikhOjye8,4967
11
- sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py,sha256=CvPOrdLBA4XNA-G1XYscT2eUvPa-elJNuNHvm8bzhOk,3078
7
+ sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_base.py,sha256=dry8bhtdn1KBZroPoUAqDHTuXlSqrmD549DEe-HcEXw,7542
8
+ sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_sts.py,sha256=PaWfAPyo0eTjHxE3plEzQ4Rk5m88Q_2Nqp21yyJLHTQ,3713
9
+ sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_tts.py,sha256=8yJwh-COxotjMTrQln85HpE7-znq6GSs4_qHlod9U_g,2992
10
+ sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_clone.py,sha256=YEtT-z6qv8hwAAwrNIZHBpV3vhvgUVaSbgpf3bm8_ac,4881
11
+ sinapsis_elevenlabs/src/sinapsis_elevenlabs/templates/elevenlabs_voice_generation.py,sha256=VTgquS9l9jimVtaDG62TCKYC8xXCFqwcJVueeCQXSpQ,3023
12
12
  sinapsis_f5_tts/src/sinapsis_f5_tts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  sinapsis_f5_tts/src/sinapsis_f5_tts/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  sinapsis_f5_tts/src/sinapsis_f5_tts/helpers/tags.py,sha256=T9RbgsPgq7Wc-0Lu3W8Si4bxjQsyWbX6hPrc-dakdCs,208
15
15
  sinapsis_f5_tts/src/sinapsis_f5_tts/templates/__init__.py,sha256=28BOPAr9GG1jYcrXi45ZWO1n2FAZJOdDcmRkOXdEYmk,496
16
- sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py,sha256=HooFxYB1lqRKuzGjMDiw48Xzm34YI81dE0uD3WWm89A,16344
16
+ sinapsis_f5_tts/src/sinapsis_f5_tts/templates/f5_tts_inference.py,sha256=OBAWVOg_QId14hbftEa_oJHap6jpqAZeVj8ZnYN0Vsk,16774
17
17
  sinapsis_kokoro/src/sinapsis_kokoro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  sinapsis_kokoro/src/sinapsis_kokoro/helpers/kokoro_utils.py,sha256=2IMJuwURPKK7keIkgS-rpGD28REG5M1FwW0COGcm3nI,1573
19
19
  sinapsis_kokoro/src/sinapsis_kokoro/helpers/tags.py,sha256=AiHIvqmujKX6tYQ4lEXjRGhq8Ujst8gZwVmcAjS7u3k,210
@@ -29,8 +29,8 @@ sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/__init__.py,sha256=47DEQpj8HBSa-
29
29
  sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
30
  sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/helpers/tags.py,sha256=OKZbq4zIL6XWM7eG5WuQ3dWYkmYNWjuCnlseXmjR_j0,262
31
31
  sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/__init__.py,sha256=3LppgbS6v70Rmx__yXXQgnoZ2ZBHcXkXeWZYQQf6Zwg,504
32
- sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py,sha256=ECNDd_zxkkop3WikQpkRW_bjLAMjROGpdWbChvaJdBE,9948
33
- sinapsis_speech-0.4.3.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
32
+ sinapsis_parakeet_tdt/src/sinapsis_parakeet_tdt/templates/parakeet_tdt.py,sha256=Tw9S8Nqf74lXwUxBodaLK_JaQvh9ITt8cWFQJ2QNP6s,10210
33
+ sinapsis_speech-0.4.4.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
34
34
  sinapsis_zonos/src/sinapsis_zonos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  sinapsis_zonos/src/sinapsis_zonos/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  sinapsis_zonos/src/sinapsis_zonos/helpers/tags.py,sha256=Y7MKQNx1E2k7ebF6r_1l1nBeS5k8hO424yFTT9NI7Rg,244
@@ -38,7 +38,7 @@ sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_keys.py,sha256=m1GdOYfzP73JGmtxH
38
38
  sinapsis_zonos/src/sinapsis_zonos/helpers/zonos_tts_utils.py,sha256=bwu88wsJGzEqbssgb-wpS_7lFscJ74J8cgyca-hX_Qw,6422
39
39
  sinapsis_zonos/src/sinapsis_zonos/templates/__init__.py,sha256=A-_F0K3hbEFqeWWAh4YftgU9CFX-WHrauSiCAww9yp8,482
40
40
  sinapsis_zonos/src/sinapsis_zonos/templates/zonos_tts.py,sha256=h5EToXoJgAgjqvz9WLDfSjhCsV5zgBwZrX5cTJ4VnhM,7679
41
- sinapsis_speech-0.4.3.dist-info/METADATA,sha256=UZXiSYdXx8deyu9p28aaUkIDyxtfm1REJYrvV2JSj6E,12783
42
- sinapsis_speech-0.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- sinapsis_speech-0.4.3.dist-info/top_level.txt,sha256=KvdwXupt5wnqb_4XGRcuJaL9Glgdw-DBvRkNzhgl_Ds,110
44
- sinapsis_speech-0.4.3.dist-info/RECORD,,
41
+ sinapsis_speech-0.4.4.dist-info/METADATA,sha256=dWpD72J-S4yNHvKEZY0dECGMfQ3wqX63x18iOf4bupw,12783
42
+ sinapsis_speech-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ sinapsis_speech-0.4.4.dist-info/top_level.txt,sha256=KvdwXupt5wnqb_4XGRcuJaL9Glgdw-DBvRkNzhgl_Ds,110
44
+ sinapsis_speech-0.4.4.dist-info/RECORD,,