smallestai 1.3.4__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

smallest/async_tts.py CHANGED
@@ -1,26 +1,25 @@
1
1
  import os
2
2
  import copy
3
+ import json
3
4
  import aiohttp
4
5
  import aiofiles
6
+ import requests
5
7
  from typing import Optional, Union, List
6
8
 
7
- from smallest.models import TTSModels, TTSVoices
8
9
  from smallest.exceptions import TTSError, APIError
9
- from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
10
+ from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
11
+ get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
11
12
 
12
13
 
13
14
  class AsyncSmallest:
14
15
  def __init__(
15
16
  self,
16
17
  api_key: str = None,
17
- model: TTSModels = "lightning",
18
- sample_rate: int = 24000,
19
- voice: TTSVoices = "emily",
18
+ model: Optional[str] = "lightning",
19
+ sample_rate: Optional[int] = 24000,
20
+ voice_id: Optional[str] = "emily",
20
21
  speed: Optional[float] = 1.0,
21
- add_wav_header: Optional[bool] = True,
22
- transliterate: Optional[bool] = False,
23
- remove_extra_silence: Optional[bool] = False
22
+ add_wav_header: Optional[bool] = True
24
23
  ) -> None:
25
24
  """
26
25
  AsyncSmallest Instance for asynchronous text-to-speech synthesis.
@@ -33,11 +32,9 @@ class AsyncSmallest:
33
32
  - api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
34
33
  - model (TTSModels): The model to be used for synthesis.
35
34
  - sample_rate (int): The sample rate for the audio output.
36
- - voice (TTSVoices): The voice to be used for synthesis.
35
+ - voice_id (TTSVoices): The voice to be used for synthesis.
37
36
  - speed (float): The speed of the speech synthesis.
38
37
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
39
- - transliterate (bool): Whether to transliterate the text.
40
- - remove_extra_silence (bool): Whether to remove extra silence from the synthesized audio.
41
38
 
42
39
  Methods:
43
40
  - get_languages: Returns a list of available languages for synthesis.
@@ -48,45 +45,85 @@ class AsyncSmallest:
48
45
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
49
46
  if not self.api_key:
50
47
  raise TTSError()
48
+ if model == "lightning-large":
49
+ voice_id = "lakshya"
50
+
51
51
  self.chunk_size = 250
52
52
 
53
53
  self.opts = TTSOptions(
54
54
  model=model,
55
55
  sample_rate=sample_rate,
56
- voice=voice,
56
+ voice_id=voice_id,
57
57
  api_key=self.api_key,
58
58
  add_wav_header=add_wav_header,
59
- speed=speed,
60
- transliterate=transliterate,
61
- remove_extra_silence=remove_extra_silence,
59
+ speed=speed
62
60
  )
63
61
  self.session = None
64
62
 
63
+
65
64
  async def __aenter__(self):
66
65
  if self.session is None:
67
66
  self.session = aiohttp.ClientSession()
68
67
  return self
69
68
 
69
+
70
70
  async def __aexit__(self, exc_type, exc_val, exc_tb):
71
71
  if self.session:
72
72
  await self.session.close()
73
73
 
74
74
 
75
+ async def _ensure_session(self):
76
+ """Ensure session exists for direct calls"""
77
+ if not self.session:
78
+ self.session = aiohttp.ClientSession()
79
+ return True
80
+ return False
81
+
82
+
75
83
  def get_languages(self) -> List[str]:
76
84
  """Returns a list of available languages."""
77
85
  return get_smallest_languages()
78
86
 
79
- def get_voices(self) -> List[str]:
87
+ def get_cloned_voices(self) -> str:
88
+ """Returns a list of your cloned voices."""
89
+ headers = {
90
+ "Authorization": f"Bearer {self.api_key}",
91
+ }
92
+
93
+ res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers)
94
+ if res.status_code != 200:
95
+ raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/")
96
+
97
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
98
+
99
+
100
+ def get_voices(
101
+ self,
102
+ model: Optional[str] = "lightning"
103
+ ) -> str:
80
104
  """Returns a list of available voices."""
81
- return get_smallest_voices()
105
+ headers = {
106
+ "Authorization": f"Bearer {self.api_key}",
107
+ }
108
+
109
+ res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers)
110
+ if res.status_code != 200:
111
+ raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/")
112
+
113
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
114
+
82
115
 
83
116
  def get_models(self) -> List[str]:
84
117
  """Returns a list of available models."""
85
118
  return get_smallest_models()
86
119
 
120
+
87
121
  async def synthesize(
88
122
  self,
89
123
  text: str,
124
+ consistency: Optional[float] = 0.5,
125
+ similarity: Optional[float] = 0,
126
+ enhancement: Optional[bool] = False,
90
127
  save_as: Optional[str] = None,
91
128
  **kwargs
92
129
  ) -> Union[bytes, None]:
@@ -97,6 +134,9 @@ class AsyncSmallest:
97
134
  - text (str): The text to be converted to speech.
98
135
  - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
99
136
  The file must have a .wav extension.
137
+ - consistency (Optional[float]): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
138
+ - similarity (Optional[float]): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
139
+ - enhancement (Optional[bool]): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
100
140
  - kwargs: Additional optional parameters to override `__init__` options for this call.
101
141
 
102
142
  Returns:
@@ -106,52 +146,166 @@ class AsyncSmallest:
106
146
  Raises:
107
147
  - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
108
148
  - APIError: If the API request fails or returns an error.
149
+ - ValueError: If an unexpected parameter is passed in `kwargs`.
109
150
  """
110
- opts = copy.deepcopy(self.opts)
111
- for key, value in kwargs.items():
112
- setattr(opts, key, value)
113
-
114
- validate_input(preprocess_text(text), opts.voice, opts.model, opts.sample_rate, opts.speed)
115
-
116
- chunks = split_into_chunks(text)
117
- audio_content = b""
118
-
119
- for chunk in chunks:
120
- payload = {
121
- "text": preprocess_text(chunk),
122
- "sample_rate": opts.sample_rate,
123
- "voice_id": opts.voice,
124
- "add_wav_header": False,
125
- "speed": opts.speed,
126
- "model": opts.model,
127
- "transliterate": opts.transliterate,
128
- "remove_extra_silence": opts.remove_extra_silence
129
- }
130
-
131
- headers = {
132
- "Authorization": f"Bearer {self.api_key}",
133
- "Content-Type": "application/json",
134
- }
135
-
136
- if not self.session:
137
- self.session = aiohttp.ClientSession()
138
-
139
- async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
151
+ should_cleanup = False
152
+
153
+ if self.session is None or self.session.closed:
154
+ self.session = aiohttp.ClientSession()
155
+ should_cleanup = True # Cleanup only if we created a new session
156
+
157
+ try:
158
+ opts = copy.deepcopy(self.opts)
159
+ valid_keys = set(vars(opts).keys())
160
+
161
+ invalid_keys = [key for key in kwargs if key not in valid_keys]
162
+ if invalid_keys:
163
+ raise ValueError(f"Invalid parameter(s) in kwargs: {', '.join(invalid_keys)}. Allowed parameters are: {', '.join(valid_keys)}")
164
+
165
+ for key, value in kwargs.items():
166
+ setattr(opts, key, value)
167
+
168
+ validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed, consistency, similarity, enhancement)
169
+
170
+ self.chunk_size = 250
171
+ if opts.model == 'lightning-large':
172
+ self.chunk_size = 140
173
+
174
+ chunks = chunk_text(text, self.chunk_size)
175
+ audio_content = b""
176
+
177
+ for chunk in chunks:
178
+ payload = {
179
+ "text": preprocess_text(chunk),
180
+ "sample_rate": opts.sample_rate,
181
+ "voice_id": opts.voice_id,
182
+ "add_wav_header": False,
183
+ "speed": opts.speed,
184
+ "model": opts.model
185
+ }
186
+
187
+ if opts.model == "lightning-large":
188
+ if consistency:
189
+ payload["consistency"] = consistency
190
+ if similarity:
191
+ payload["similarity"] = similarity
192
+ if enhancement:
193
+ payload["enhancement"] = enhancement
194
+
195
+
196
+ headers = {
197
+ "Authorization": f"Bearer {self.api_key}",
198
+ "Content-Type": "application/json",
199
+ }
200
+
201
+ async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
202
+ if res.status != 200:
203
+ raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
204
+
205
+ audio_content += await res.read()
206
+
207
+ if save_as:
208
+ if not save_as.endswith(".wav"):
209
+ raise TTSError("Invalid file name. Extension must be .wav")
210
+
211
+ async with aiofiles.open(save_as, mode='wb') as f:
212
+ await f.write(add_wav_header(audio_content, opts.sample_rate))
213
+
214
+ return None
215
+
216
+ if opts.add_wav_header:
217
+ return add_wav_header(audio_content, opts.sample_rate)
218
+
219
+ return audio_content
220
+
221
+ finally:
222
+ if should_cleanup and self.session:
223
+ await self.session.close()
224
+ self.session = None
225
+
226
+
227
+ async def add_voice(self, display_name: str, file_path: str) -> str:
228
+ """
229
+ Instantly clone your voice asynchronously.
230
+
231
+ Args:
232
+ - display_name (str): The display name for the new voice.
233
+ - file_path (str): The path to the reference audio file to be cloned.
234
+
235
+ Returns:
236
+ - str: The response from the API as a formatted JSON string.
237
+
238
+ Raises:
239
+ - TTSError: If the file does not exist or is not a valid audio file.
240
+ - APIError: If the API request fails or returns an error.
241
+ """
242
+ url = f"{API_BASE_URL}/lightning-large/add_voice"
243
+
244
+ if not os.path.exists(file_path):
245
+ raise TTSError("Invalid file path. File does not exist.")
246
+
247
+ file_extension = os.path.splitext(file_path)[1].lower()
248
+ if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
249
+ raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
250
+
251
+ headers = {
252
+ 'Authorization': f"Bearer {self.api_key}",
253
+ }
254
+
255
+ should_cleanup = await self._ensure_session()
256
+
257
+ try:
258
+ async with aiofiles.open(file_path, 'rb') as f:
259
+ file_data = await f.read()
260
+
261
+ data = aiohttp.FormData()
262
+ content_type = file_extension[1:]
263
+
264
+ data.add_field('displayName', display_name)
265
+ data.add_field('file', file_data, filename=file_path, content_type=f"audio/{content_type}")
266
+
267
+ async with self.session.post(url, headers=headers, data=data) as res:
140
268
  if res.status != 200:
141
- raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
269
+ raise APIError(f"Failed to add voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
270
+
271
+ return json.dumps(await res.json(), indent=4, ensure_ascii=False)
272
+
273
+ finally:
274
+ if should_cleanup and self.session:
275
+ await self.session.close()
276
+ self.session = None
277
+
278
+
279
+ async def delete_voice(self, voice_id: str) -> str:
280
+ """
281
+ Delete a cloned voice asynchronously.
142
282
 
143
- audio_content += await res.read()
283
+ Args:
284
+ - voice_id (str): The ID of the voice to be deleted.
144
285
 
145
- if save_as:
146
- if not save_as.endswith(".wav"):
147
- raise TTSError("Invalid file name. Extension must be .wav")
286
+ Returns:
287
+ - str: The response from the API.
148
288
 
149
- async with aiofiles.open(save_as, mode='wb') as f:
150
- await f.write(add_wav_header(audio_content, self.opts.sample_rate))
289
+ Raises:
290
+ - APIError: If the API request fails or returns an error.
291
+ """
292
+ url = f"{API_BASE_URL}/lightning-large"
293
+ payload = {'voiceId': voice_id}
294
+
295
+ headers = {
296
+ "Authorization": f"Bearer {self.api_key}",
297
+ }
151
298
 
152
- return None
299
+ should_cleanup = await self._ensure_session()
153
300
 
154
- if opts.add_wav_header:
155
- return add_wav_header(audio_content, self.opts.sample_rate)
301
+ try:
302
+ async with self.session.delete(url, headers=headers, json=payload) as res:
303
+ if res.status != 200:
304
+ raise APIError(f"Failed to delete voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
156
305
 
157
- return audio_content
306
+ return await res.text()
307
+
308
+ finally:
309
+ if should_cleanup and self.session:
310
+ await self.session.close()
311
+ self.session = None
smallest/models.py CHANGED
@@ -1,23 +1,5 @@
1
- from typing import Literal, List, Tuple, cast
2
- import aiohttp
3
- import asyncio
4
-
5
- API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
6
-
7
- async def _fetch_voice_and_model() -> Tuple[List[str], List[str]]:
8
- async with aiohttp.ClientSession() as session:
9
- async with session.get(f"{API_BASE_URL}/voice/get-all-models") as response:
10
- api_response = await response.json()
11
-
12
- voices = []
13
- for model in api_response:
14
- for voice in model['voiceIds']:
15
- voices.append(voice['voiceId'])
16
- models = [model['modelName'] for model in api_response]
17
- return models, voices
18
-
19
- models, voices = asyncio.run(_fetch_voice_and_model())
20
-
21
1
  TTSLanguages = ["en", "hi"]
22
- TTSModels = models
23
- TTSVoices = voices
2
+ TTSModels = [
3
+ "lightning",
4
+ "lightning-large"
5
+ ]
smallest/stream_tts.py CHANGED
@@ -12,8 +12,8 @@ class TextToAudioStream:
12
12
  def __init__(
13
13
  self,
14
14
  tts_instance: Union[Smallest, AsyncSmallest],
15
- queue_timeout: float = 5.0,
16
- max_retries: int = 3
15
+ queue_timeout: Optional[float] = 5.0,
16
+ max_retries: Optional[int] = 3
17
17
  ):
18
18
  """
19
19
  A real-time text-to-speech processor that converts streaming text into audio output.
@@ -30,12 +30,11 @@ class TextToAudioStream:
30
30
 
31
31
  Args:
32
32
  tts_instance: The text-to-speech engine to use (Smallest or AsyncSmallest)
33
- queue_timeout: How long to wait for new text (seconds, default: 1.0)
33
+ queue_timeout: How long to wait for new text (seconds, default: 5.0)
34
34
  max_retries: Number of retry attempts for failed synthesis (default: 3)
35
35
  """
36
36
  self.tts_instance = tts_instance
37
37
  self.tts_instance.opts.add_wav_header = False
38
-
39
38
  self.sentence_end_regex = SENTENCE_END_REGEX
40
39
  self.queue_timeout = queue_timeout
41
40
  self.max_retries = max_retries
@@ -43,69 +42,72 @@ class TextToAudioStream:
43
42
  self.buffer_size = 250
44
43
  self.stop_flag = False
45
44
 
45
+ if self.tts_instance.opts.model == 'lightning-large':
46
+ self.buffer_size = 140
47
+
46
48
 
47
49
  async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
48
50
  """
49
- Streams the LLM output, splitting it into sentences based on the regex
50
- and chunk size, and adding each chunk to the queue.
51
+ Streams the LLM output, splitting it into chunks based on sentence boundaries
52
+ or space characters if no sentence boundary is found before reaching buffer_size.
51
53
 
52
54
  Parameters:
53
55
  - llm_output (AsyncGenerator[str, None]): An async generator yielding LLM output.
54
56
  """
55
57
  buffer = ""
56
- last_break_index = 0
57
58
 
58
59
  async for chunk in llm_output:
59
60
  buffer += chunk
60
- i = 0
61
-
62
- while i < len(buffer):
63
- current_chunk = buffer[:i + 1]
64
- if self.sentence_end_regex.match(current_chunk):
65
- last_break_index = i
66
61
 
67
- if len(current_chunk) >= self.buffer_size:
68
- if last_break_index > 0:
69
- self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip())
70
- buffer = buffer[last_break_index + 1:]
62
+ while len(buffer) > self.buffer_size:
63
+ chunk_text = buffer[:self.buffer_size]
64
+ last_break_index = -1
65
+
66
+ # Find last sentence boundary using regex
67
+ for i in range(len(chunk_text) - 1, -1, -1):
68
+ if self.sentence_end_regex.match(chunk_text[:i + 1]):
69
+ last_break_index = i
70
+ break
71
+
72
+ if last_break_index == -1:
73
+ # Fallback to space if no sentence boundary found
74
+ last_space = chunk_text.rfind(' ')
75
+ if last_space != -1:
76
+ last_break_index = last_space
71
77
  else:
72
- # No sentence boundary, split at max chunk size
73
- self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip())
74
- buffer = buffer[self.buffer_size:]
78
+ last_break_index = self.buffer_size - 1
75
79
 
76
- last_break_index = 0
77
- i = -1
78
-
79
- i += 1
80
+ # Add chunk to queue and update buffer
81
+ self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ')
82
+ buffer = buffer[last_break_index + 1:].strip()
80
83
 
84
+ # Don't forget the remaining text
81
85
  if buffer:
82
- self.queue.put(buffer.replace("—", " ").strip())
86
+ self.queue.put(f'{buffer.replace("—", " ").strip()} ')
83
87
 
84
- self.stop_flag = True # completion flag when LLM output ends
88
+ self.stop_flag = True
85
89
 
86
90
 
87
- async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
88
- """Asynchronously synthesizes a given sentence."""
91
+ def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
92
+ """Synchronously synthesizes a given sentence."""
89
93
  try:
90
- return await self.tts_instance.synthesize(sentence)
94
+ return self.tts_instance.synthesize(sentence)
91
95
  except APIError as e:
92
96
  if retries < self.max_retries:
93
- return await self._synthesize_async(sentence, retries + 1)
97
+ return self._synthesize_sync(sentence, retries + 1)
94
98
  else:
95
- print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
96
- return None
97
-
99
+ raise APIError(f"Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
100
+
98
101
 
99
- def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
100
- """Synchronously synthesizes a given sentence."""
102
+ async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
103
+ """Asynchronously synthesizes a given sentence."""
101
104
  try:
102
- return self.tts_instance.synthesize(sentence)
105
+ return await self.tts_instance.synthesize(sentence)
103
106
  except APIError as e:
104
107
  if retries < self.max_retries:
105
- return self._synthesize_sync(sentence, retries + 1)
108
+ return await self._synthesize_async(sentence, retries + 1)
106
109
  else:
107
- print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
108
- return None
110
+ raise APIError(f"Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
109
111
 
110
112
 
111
113
  async def _run_synthesis(self) -> AsyncGenerator[bytes, None]:
@@ -147,10 +149,13 @@ class TextToAudioStream:
147
149
  - Streamed over a network
148
150
  - Further processed as needed
149
151
  """
150
- llm_thread = Thread(target=asyncio.run, args=(self._stream_llm_output(llm_output),))
151
- llm_thread.start()
152
+ stream_task = asyncio.create_task(self._stream_llm_output(llm_output))
152
153
 
153
- async for audio_content in self._run_synthesis():
154
- yield audio_content
154
+ try:
155
+ async for audio_content in self._run_synthesis():
156
+ yield audio_content
157
+ except Exception as e:
158
+ raise APIError(f"Error during synthesis processing: {e}")
155
159
 
156
- llm_thread.join()
160
+ finally:
161
+ await stream_task
smallest/tts.py CHANGED
@@ -1,25 +1,23 @@
1
1
  import os
2
+ import json
2
3
  import wave
3
4
  import copy
4
5
  import requests
5
6
  from typing import Optional, Union, List
6
7
 
7
- from smallest.models import TTSModels, TTSVoices
8
8
  from smallest.exceptions import TTSError, APIError
9
- from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
9
+ from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
10
+ get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
11
11
 
12
12
  class Smallest:
13
13
  def __init__(
14
14
  self,
15
15
  api_key: str = None,
16
- model: TTSModels = "lightning",
17
- sample_rate: int = 24000,
18
- voice: TTSVoices = "emily",
16
+ model: Optional[str] = "lightning",
17
+ sample_rate: Optional[int] = 24000,
18
+ voice_id: Optional[str] = "emily",
19
19
  speed: Optional[float] = 1.0,
20
- add_wav_header: Optional[bool] = True,
21
- transliterate: Optional[bool] = False,
22
- remove_extra_silence: Optional[bool] = True
20
+ add_wav_header: Optional[bool] = True
23
21
  ) -> None:
24
22
  """
25
23
  Smallest Instance for text-to-speech synthesis.
@@ -31,11 +29,9 @@ class Smallest:
31
29
  - api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
32
30
  - model (TTSModels): The model to be used for synthesis.
33
31
  - sample_rate (int): The sample rate for the audio output.
34
- - voice (TTSVoices): The voice to be used for synthesis.
32
+ - voice_id (TTSVoices): The voice to be used for synthesis.
35
33
  - speed (float): The speed of the speech synthesis.
36
34
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
37
- - transliterate (bool): Whether to transliterate the text.
38
- - remove_extra_silence (bool): Whether to remove extra silence from the synthesized audio.
39
35
 
40
36
  Methods:
41
37
  - get_languages: Returns a list of available languages for synthesis.
@@ -46,18 +42,18 @@ class Smallest:
46
42
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
47
43
  if not self.api_key:
48
44
  raise TTSError()
49
-
45
+ if model == "lightning-large":
46
+ voice_id = "lakshya"
47
+
50
48
  self.chunk_size = 250
51
49
 
52
50
  self.opts = TTSOptions(
53
51
  model=model,
54
52
  sample_rate=sample_rate,
55
- voice=voice,
53
+ voice_id=voice_id,
56
54
  api_key=self.api_key,
57
55
  add_wav_header=add_wav_header,
58
- speed=speed,
59
- transliterate=transliterate,
60
- remove_extra_silence=remove_extra_silence
56
+ speed=speed
61
57
  )
62
58
 
63
59
 
@@ -65,17 +61,46 @@ class Smallest:
65
61
  """Returns a list of available languages."""
66
62
  return get_smallest_languages()
67
63
 
68
- def get_voices(self) -> List[str]:
64
+ def get_cloned_voices(self) -> str:
65
+ """Returns a list of your cloned voices."""
66
+ headers = {
67
+ "Authorization": f"Bearer {self.api_key}",
68
+ }
69
+
70
+ res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers)
71
+ if res.status_code != 200:
72
+ raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/")
73
+
74
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
75
+
76
+
77
+ def get_voices(
78
+ self,
79
+ model: Optional[str] = "lightning"
80
+ ) -> str:
69
81
  """Returns a list of available voices."""
70
- return get_smallest_voices()
82
+ headers = {
83
+ "Authorization": f"Bearer {self.api_key}",
84
+ }
85
+
86
+ res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers)
87
+ if res.status_code != 200:
88
+ raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/")
89
+
90
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
91
+
71
92
 
72
93
  def get_models(self) -> List[str]:
73
94
  """Returns a list of available models."""
74
95
  return get_smallest_models()
75
96
 
97
+
76
98
  def synthesize(
77
99
  self,
78
100
  text: str,
101
+ consistency: Optional[float] = 0.5,
102
+ similarity: Optional[float] = 0,
103
+ enhancement: Optional[bool] = False,
79
104
  save_as: Optional[str] = None,
80
105
  **kwargs
81
106
  ) -> Union[bytes, None]:
@@ -86,6 +111,9 @@ class Smallest:
86
111
  - text (str): The text to be converted to speech.
87
112
  - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
88
113
  The file must have a .wav extension.
114
+ - consistency (Optional[float]): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
115
+ - similarity (Optional[float]): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
116
+ - enhancement (Optional[bool]): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
89
117
  - kwargs: Additional optional parameters to override `__init__` options for this call.
90
118
 
91
119
  Returns:
@@ -97,26 +125,41 @@ class Smallest:
97
125
  - APIError: If the API request fails or returns an error.
98
126
  """
99
127
  opts = copy.deepcopy(self.opts)
128
+ valid_keys = set(vars(opts).keys())
129
+
130
+ invalid_keys = [key for key in kwargs if key not in valid_keys]
131
+ if invalid_keys:
132
+ raise ValueError(f"Invalid parameter(s) in kwargs: {', '.join(invalid_keys)}. Allowed parameters are: {', '.join(valid_keys)}")
133
+
100
134
  for key, value in kwargs.items():
101
135
  setattr(opts, key, value)
102
136
 
103
- validate_input(preprocess_text(text), opts.voice, opts.model, opts.sample_rate, opts.speed)
137
+ validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed, consistency, similarity, enhancement)
138
+
139
+ self.chunk_size = 250
140
+ if opts.model == "lightning-large":
141
+ self.chunk_size = 140
104
142
 
105
- chunks = split_into_chunks(text)
143
+ chunks = chunk_text(text, self.chunk_size)
106
144
  audio_content = b""
107
145
 
108
146
  for chunk in chunks:
109
147
  payload = {
110
148
  "text": preprocess_text(chunk),
111
149
  "sample_rate": opts.sample_rate,
112
- "voice_id": opts.voice,
150
+ "voice_id": opts.voice_id,
113
151
  "add_wav_header": False,
114
152
  "speed": opts.speed,
115
- "model": opts.model,
116
- "transliterate": opts.transliterate,
117
- "remove_extra_silence": opts.remove_extra_silence,
118
153
  }
119
154
 
155
+ if opts.model == "lightning-large":
156
+ if consistency:
157
+ payload["consistency"] = consistency
158
+ if similarity:
159
+ payload["similarity"] = similarity
160
+ if enhancement:
161
+ payload["enhancement"] = enhancement
162
+
120
163
  headers = {
121
164
  "Authorization": f"Bearer {self.api_key}",
122
165
  "Content-Type": "application/json",
@@ -124,15 +167,10 @@ class Smallest:
124
167
 
125
168
  res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
126
169
  if res.status_code != 200:
127
- raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
170
+ raise APIError(f"Failed to synthesize speech: {res.text}. For more information, visit https://waves.smallest.ai/")
128
171
 
129
172
  audio_content += res.content
130
173
 
131
-
132
- res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
133
- if res.status_code != 200:
134
- raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
135
-
136
174
  if save_as:
137
175
  if not save_as.endswith(".wav"):
138
176
  raise TTSError("Invalid file name. Extension must be .wav")
@@ -140,11 +178,76 @@ class Smallest:
140
178
  with wave.open(save_as, "wb") as wf:
141
179
  wf.setnchannels(1)
142
180
  wf.setsampwidth(2)
143
- wf.setframerate(self.opts.sample_rate)
181
+ wf.setframerate(opts.sample_rate)
144
182
  wf.writeframes(audio_content)
145
183
  return None
146
184
 
147
- if self.opts.add_wav_header:
148
- return add_wav_header(audio_content, self.opts.sample_rate)
185
+ if opts.add_wav_header:
186
+ return add_wav_header(audio_content, opts.sample_rate)
149
187
 
150
188
  return audio_content
189
+
190
+
191
+ def add_voice(self, display_name: str, file_path: str) -> str:
192
+ """
193
+ Instantly clone your voice synchronously.
194
+
195
+ Args:
196
+ - display_name (str): The display name for the new voice.
197
+ - file_path (str): The path to the reference audio file to be cloned.
198
+
199
+ Returns:
200
+ - str: The response from the API as a formatted JSON string.
201
+
202
+ Raises:
203
+ - TTSError: If the file does not exist or is not a valid audio file.
204
+ - APIError: If the API request fails or returns an error.
205
+ """
206
+ if not os.path.isfile(file_path):
207
+ raise TTSError("Invalid file path. File does not exist.")
208
+
209
+ file_extension = os.path.splitext(file_path)[1].lower()
210
+ if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
211
+ raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
212
+
213
+ url = f"{API_BASE_URL}/lightning-large/add_voice"
214
+ payload = {'displayName': display_name}
215
+
216
+ files = [('file', (os.path.basename(file_path), open(file_path, 'rb'), 'audio/wav'))]
217
+
218
+ headers = {
219
+ 'Authorization': f"Bearer {self.api_key}",
220
+ }
221
+
222
+ response = requests.post(url, headers=headers, data=payload, files=files)
223
+ if response.status_code != 200:
224
+ raise APIError(f"Failed to add voice: {response.text}. For more information, visit https://waves.smallest.ai/")
225
+
226
+ return json.dumps(response.json(), indent=4, ensure_ascii=False)
227
+
228
+
229
+ def delete_voice(self, voice_id: str) -> str:
230
+ """
231
+ Delete a cloned voice synchronously.
232
+
233
+ Args:
234
+ - voice_id (str): The ID of the voice to be deleted.
235
+
236
+ Returns:
237
+ - str: The response from the API.
238
+
239
+ Raises:
240
+ - APIError: If the API request fails or returns an error.
241
+ """
242
+ url = f"{API_BASE_URL}/lightning-large"
243
+ payload = {'voiceId': voice_id}
244
+
245
+ headers = {
246
+ 'Authorization': f"Bearer {self.api_key}",
247
+ }
248
+
249
+ response = requests.delete(url, headers=headers, json=payload)
250
+ if response.status_code != 200:
251
+ raise APIError(f"Failed to delete voice: {response.text}. For more information, visit https://waves.smallest.ai/")
252
+
253
+ return json.dumps(response.json(), indent=4, ensure_ascii=False)
smallest/utils.py CHANGED
@@ -1,109 +1,97 @@
1
1
  import re
2
2
  import io
3
- import unicodedata
4
3
  from typing import List
4
+ from typing import Optional
5
5
  from pydub import AudioSegment
6
6
  from dataclasses import dataclass
7
7
  from sacremoses import MosesPunctNormalizer
8
8
 
9
9
  from smallest.exceptions import ValidationError
10
- from smallest.models import TTSModels, TTSLanguages, TTSVoices
10
+ from smallest.models import TTSModels, TTSLanguages
11
11
 
12
12
 
13
13
  API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
14
- SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
15
- CHUNK_SIZE = 250
14
+ SENTENCE_END_REGEX = re.compile(r'.*[-.—!?,;:…।|]$')
15
+ mpn = MosesPunctNormalizer()
16
16
  SAMPLE_WIDTH = 2
17
17
  CHANNELS = 1
18
+ ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
18
19
 
19
20
 
20
21
  @dataclass
21
22
  class TTSOptions:
22
- model: TTSModels
23
+ model: str
23
24
  sample_rate: int
24
- voice: TTSVoices
25
+ voice_id: str
25
26
  api_key: str
26
27
  add_wav_header: bool
27
28
  speed: float
28
- transliterate: bool
29
- remove_extra_silence: bool
30
29
 
31
30
 
32
- def validate_input(text: str, voice: TTSVoices, model: TTSModels, sample_rate: int, speed: float):
31
+ def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[bool] = None):
33
32
  if not text:
34
- raise ValidationError("Text cannot be empty")
35
- if voice not in TTSVoices:
36
- raise ValidationError(f"Invalid voice: {voice}")
33
+ raise ValidationError("Text cannot be empty.")
37
34
  if model not in TTSModels:
38
- raise ValidationError(f"Invalid model: {model}")
35
+ raise ValidationError(f"Invalid model: {model}. Must be one of {TTSModels}")
39
36
  if not 8000 <= sample_rate <= 24000:
40
37
  raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 24000")
41
38
  if not 0.5 <= speed <= 2.0:
42
39
  raise ValidationError(f"Invalid speed: {speed}. Must be between 0.5 and 2.0")
40
+ if consistency is not None and not 0.0 <= consistency <= 1.0:
41
+ raise ValidationError(f"Invalid consistency: {consistency}. Must be between 0.0 and 1.0")
42
+ if similarity is not None and not 0.0 <= similarity <= 1.0:
43
+ raise ValidationError(f"Invalid similarity: {similarity}. Must be between 0.0 and 1.0")
44
+ if enhancement is not None and not isinstance(enhancement, bool):
45
+ raise ValidationError(f"Invalid enhancement: {enhancement}. Must be a boolean value.")
43
46
 
44
47
 
45
48
  def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: int = 2, channels: int = 1) -> bytes:
46
- audio = AudioSegment(data=frame_input, sample_width=sample_width, frame_rate=sample_rate, channels=channels)
47
- wav_buf = io.BytesIO()
48
- audio.export(wav_buf, format="wav")
49
- wav_buf.seek(0)
50
- return wav_buf.read()
49
+ audio = AudioSegment(data=frame_input, sample_width=sample_width, frame_rate=sample_rate, channels=channels)
50
+ wav_buf = io.BytesIO()
51
+ audio.export(wav_buf, format="wav")
52
+ wav_buf.seek(0)
53
+ return wav_buf.read()
51
54
 
52
55
 
53
56
  def preprocess_text(text: str) -> str:
54
- text = text.replace("\n", " ").replace("\t", " ").replace("—", " ")
57
+ text = text.replace("\n", " ").replace("\t", " ").replace("—", " ").replace("-", " ").replace("–", " ")
55
58
  text = re.sub(r'\s+', ' ', text)
56
- mpn = MosesPunctNormalizer()
57
59
  text = mpn.normalize(text)
58
60
  return text.strip()
59
61
 
60
62
 
61
- def split_into_chunks(text: str) -> List[str]:
62
- """
63
- Splits the input text into chunks based on sentence boundaries
64
- defined by SENTENCE_END_REGEX and the maximum chunk size.
65
- Only splits at valid sentence boundaries to avoid breaking words.
66
- """
63
+ def chunk_text(text: str, chunk_size: int = 250) -> List[str]:
67
64
  chunks = []
68
65
  while text:
69
- # If the remaining text is shorter than chunk size, add it as final chunk
70
- if len(text) <= CHUNK_SIZE:
66
+ if len(text) <= chunk_size:
71
67
  chunks.append(text.strip())
72
68
  break
73
69
 
74
- # Find the last sentence boundary within CHUNK_SIZE
75
- chunk_text = text[:CHUNK_SIZE]
70
+ chunk_text = text[:chunk_size]
76
71
  last_break_index = -1
77
72
 
78
- # Check each character in reverse order to find last punctuation
73
+ # Find last sentence boundary using regex
79
74
  for i in range(len(chunk_text) - 1, -1, -1):
80
- if chunk_text[i] in '-.—!?;:…\n':
75
+ if SENTENCE_END_REGEX.match(chunk_text[:i + 1]):
81
76
  last_break_index = i
82
77
  break
83
78
 
84
79
  if last_break_index == -1:
85
- # If no punctuation found in chunk, look for the last space
86
- # to avoid breaking words
80
+ # Fallback to space if no sentence boundary found
87
81
  last_space = chunk_text.rfind(' ')
88
82
  if last_space != -1:
89
- last_break_index = last_space
83
+ last_break_index = last_space
90
84
  else:
91
- # If no space found, use the full chunk size
92
- last_break_index = CHUNK_SIZE - 1
85
+ last_break_index = chunk_size - 1
93
86
 
94
- # Add the chunk up to the break point
95
87
  chunks.append(text[:last_break_index + 1].strip())
96
- # Continue with remaining text
97
88
  text = text[last_break_index + 1:].strip()
98
89
 
99
90
  return chunks
100
91
 
101
92
 
102
93
  def get_smallest_languages() -> List[str]:
103
- return list(TTSLanguages)
104
-
105
- def get_smallest_voices() -> List[str]:
106
- return list(TTSVoices)
94
+ return TTSLanguages
107
95
 
108
96
  def get_smallest_models() -> List[str]:
109
- return ["lightning"]
97
+ return TTSModels
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: smallestai
3
- Version: 1.3.4
3
+ Version: 2.1.0
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <support@smallest.ai>
6
6
  License: MIT
@@ -55,9 +55,15 @@ Currently, the library supports direct synthesis and the ability to synthesize s
55
55
  - [Get the API Key](#get-the-api-key)
56
56
  - [Best Practices for Input Text](#best-practices-for-input-text)
57
57
  - [Examples](#examples)
58
- - [Sync](#sync)
59
- - [Async](#async)
58
+ - [Synchronous](#Synchronous)
59
+ - [Aynchronous](#Synchronous)
60
60
  - [LLM to Speech](#llm-to-speech)
61
+ - [Add your Voice](#add-your-voice)
62
+ - [Synchronously](#add-synchronously)
63
+ - [Asynchronously](#add-asynchronously)
64
+ - [Delete your Voice](#delete-your-voice)
65
+ - [Synchronously](#delete-synchronously)
66
+ - [Asynchronously](#delete-asynchronously)
61
67
  - [Available Methods](#available-methods)
62
68
  - [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
63
69
 
@@ -77,28 +83,22 @@ When using an SDK in your application, make sure to pin to at least the major ve
77
83
  3. Create a new API Key and copy it.
78
84
  4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
79
85
 
80
- ## Best Practices for Input Text
81
- While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
82
-
83
- For optimal voice generation results:
84
-
85
- 1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
86
- 2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
87
- 3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
88
86
 
89
87
  ## Examples
90
88
 
91
- ### Sync
89
+ ### Synchronous
92
90
  A synchronous text-to-speech synthesis client.
93
91
 
94
92
  **Basic Usage:**
95
93
  ```python
96
- import os
97
94
  from smallest import Smallest
98
95
 
99
96
  def main():
100
- client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
101
- client.synthesize("Hello, this is a test for sync synthesis function.", save_as="sync_synthesize.wav")
97
+ client = Smallest(api_key="SMALLEST_API_KEY")
98
+ client.synthesize(
99
+ text="Hello, this is a test for sync synthesis function.",
100
+ save_as="sync_synthesize.wav"
101
+ )
102
102
 
103
103
  if __name__ == "__main__":
104
104
  main()
@@ -108,11 +108,12 @@ if __name__ == "__main__":
108
108
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
109
109
  - `model`: TTS model to use (default: "lightning")
110
110
  - `sample_rate`: Audio sample rate (default: 24000)
111
- - `voice`: Voice ID (default: "emily")
111
+ - `voice_id`: Voice ID (default: "emily")
112
112
  - `speed`: Speech speed multiplier (default: 1.0)
113
- - `add_wav_header`: Include WAV header in output (default: True)
114
- - `transliterate`: Enable text transliteration (default: False)
115
- - `remove_extra_silence`: Remove additional silence (default: True)
113
+ - `consistency`: Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. (default: 0.5)
114
+ - `similarity`: Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. (default: 0)
115
+ - `enhancement`: Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. (default: False)
116
+ - `add_wav_header`: Whether to add a WAV header to the output audio.
116
117
 
117
118
  These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override these parameters for a specific synthesis request.
118
119
 
@@ -127,19 +128,17 @@ client.synthesize(
127
128
  ```
128
129
 
129
130
 
130
- ### Async
131
+ ### Asynchronous
131
132
  Asynchronous text-to-speech synthesis client.
132
133
 
133
134
  **Basic Usage:**
134
135
  ```python
135
- import os
136
136
  import asyncio
137
137
  import aiofiles
138
138
  from smallest import AsyncSmallest
139
139
 
140
- client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
141
-
142
140
  async def main():
141
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
143
142
  async with client as tts:
144
143
  audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
145
144
  async with aiofiles.open("async_synthesize.wav", "wb") as f:
@@ -149,15 +148,33 @@ if __name__ == "__main__":
149
148
  asyncio.run(main())
150
149
  ```
151
150
 
151
+ **Running Asynchronously in a Jupyter Notebook**
152
+ If you are using a Jupyter Notebook, use the following approach to execute the asynchronous function within an existing event loop:
153
+ ```python
154
+ import asyncio
155
+ import aiofiles
156
+ from smallest import AsyncSmallest
157
+
158
+ async def main():
159
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
160
+ async with client as tts:
161
+ audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
162
+ async with aiofiles.open("async_synthesize.wav", "wb") as f:
163
+ await f.write(audio_bytes) # alternatively you can use the `save_as` parameter.
164
+
165
+ await main()
166
+ ```
167
+
152
168
  **Parameters:**
153
169
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
154
170
  - `model`: TTS model to use (default: "lightning")
155
171
  - `sample_rate`: Audio sample rate (default: 24000)
156
- - `voice`: Voice ID (default: "emily")
172
+ - `voice_id`: Voice ID (default: "emily")
157
173
  - `speed`: Speech speed multiplier (default: 1.0)
158
- - `add_wav_header`: Include WAV header in output (default: True)
159
- - `transliterate`: Enable text transliteration (default: False)
160
- - `remove_extra_silence`: Remove additional silence (default: True)
174
+ - `consistency`: Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
175
+ - `similarity`: Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
176
+ - `enhancement`: Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
177
+ - `add_wav_header`: Whether to add a WAV header to the output audio.
161
178
 
162
179
  These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis.
163
180
 
@@ -174,16 +191,66 @@ audio_bytes = await tts.synthesize(
174
191
 
175
192
  The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
176
193
 
194
+ #### Stream through a WebSocket
195
+
196
+ ```python
197
+ import asyncio
198
+ import websockets
199
+ from groq import Groq
200
+ from smallest import Smallest, TextToAudioStream
201
+
202
+ # Initialize Groq (LLM) and Smallest (TTS) instances
203
+ llm = Groq(api_key="GROQ_API_KEY")
204
+ tts = Smallest(api_key="SMALLEST_API_KEY")
205
+ WEBSOCKET_URL = "wss://echo.websocket.events" # Mock WebSocket server
206
+
207
+ # Async function to stream text generation from LLM
208
+ async def generate_text(prompt):
209
+ completion = llm.chat.completions.create(
210
+ messages=[{"role": "user", "content": prompt}],
211
+ model="llama3-8b-8192",
212
+ stream=True,
213
+ )
214
+
215
+ # Yield text as it is generated
216
+ for chunk in completion:
217
+ text = chunk.choices[0].delta.content
218
+ if text:
219
+ yield text
220
+
221
+ # Main function to run the process
222
+ async def main():
223
+ # Initialize the TTS processor
224
+ processor = TextToAudioStream(tts_instance=tts)
225
+
226
+ # Generate text from LLM
227
+ llm_output = generate_text("Explain text to speech like I am five in 5 sentences.")
228
+
229
+ # Stream the generated speech throught a websocket
230
+ async with websockets.connect(WEBSOCKET_URL) as ws:
231
+ print("Connected to WebSocket server.")
232
+
233
+ # Stream the generated speech
234
+ async for audio_chunk in processor.process(llm_output):
235
+ await ws.send(audio_chunk) # Send audio chunk
236
+ echoed_data = await ws.recv() # Receive the echoed message
237
+ print("Received from server:", echoed_data[:20], "...") # Print first 20 bytes
238
+
239
+ print("WebSocket connection closed.")
240
+
241
+ if __name__ == "__main__":
242
+ asyncio.run(main())
243
+ ```
244
+
245
+ #### Save to a File
177
246
  ```python
178
- import os
179
247
  import wave
180
248
  import asyncio
181
249
  from groq import Groq
182
- from smallest import Smallest
183
- from smallest import TextToAudioStream
250
+ from smallest import Smallest, TextToAudioStream
184
251
 
185
- llm = Groq(api_key=os.environ.get("GROQ_API_KEY"))
186
- tts = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
252
+ llm = Groq(api_key="GROQ_API_KEY")
253
+ tts = Smallest(api_key="SMALLEST_API_KEY")
187
254
 
188
255
  async def generate_text(prompt):
189
256
  """Async generator for streaming text from Groq. You can use any LLM"""
@@ -240,16 +307,76 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
240
307
  - Streamed over a network
241
308
  - Further processed as needed
242
309
 
310
+ ## Add your Voice
311
+ The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
312
+
313
+ ### Add Synchronously
314
+ ```python
315
+ from smallest import Smallest
316
+
317
+ def main():
318
+ client = Smallest(api_key="SMALLEST_API_KEY")
319
+ res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
320
+ print(res)
321
+
322
+ if __name__ == "__main__":
323
+ main()
324
+ ```
325
+
326
+ ### Add Asynchronously
327
+ ```python
328
+ import asyncio
329
+ from smallest import AsyncSmallest
330
+
331
+ async def main():
332
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
333
+ res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
334
+ print(res)
335
+
336
+ if __name__ == "__main__":
337
+ asyncio.run(main())
338
+ ```
339
+
340
+ ## Delete your Voice
341
+ The Smallest AI SDK allows you to delete your cloned voice. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
342
+
343
+ ### Delete Synchronously
344
+ ```python
345
+ from smallest import Smallest
346
+
347
+ def main():
348
+ client = Smallest(api_key="SMALLEST_API_KEY")
349
+ res = client.delete_voice(voice_id="voice_id")
350
+ print(res)
351
+
352
+ if __name__ == "__main__":
353
+ main()
354
+ ```
355
+
356
+ ### Delete Asynchronously
357
+ ```python
358
+ import asyncio
359
+ from smallest import AsyncSmallest
360
+
361
+ async def main():
362
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
363
+ res = await client.delete_voice(voice_id="voice_id")
364
+ print(res)
365
+
366
+ if __name__ == "__main__":
367
+ asyncio.run(main())
368
+ ```
243
369
 
244
370
  ## Available Methods
245
371
 
246
372
  ```python
247
- from smallest.tts import Smallest
373
+ from smallest import Smallest
248
374
 
249
- client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
375
+ client = Smallest(api_key="SMALLEST_API_KEY")
250
376
 
251
- print(f"Avalaible Languages: {client.get_languages()}")
252
- print(f"Available Voices: {client.get_voices()}")
377
+ print(f"Available Languages: {client.get_languages()}")
378
+ print(f"Available Voices: {client.get_voices(model='lightning')}")
379
+ print(f"Available Voices: {client.get_cloned_voices()}")
253
380
  print(f"Available Models: {client.get_models()}")
254
381
  ```
255
382
 
@@ -0,0 +1,12 @@
1
+ smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
+ smallest/async_tts.py,sha256=Jr7IID5tJrnMx_d2217foUJqfFvAFsddvy_0HG5tKGc,11905
3
+ smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
+ smallest/models.py,sha256=g2e_4nU5P48vyXZandKLWqZC1TkoEGeLvYKqJIqurSI,83
5
+ smallest/stream_tts.py,sha256=dUxoY0VkXecsMZ41QA8RkX4t_pD5-7mMIJhaB01tQrk,6512
6
+ smallest/tts.py,sha256=bSL7EYmLpd5yT42dbUXVb-IgZ_xIcXpyHvCu2-hHtMs,10024
7
+ smallest/utils.py,sha256=HDpDjPkUeeQLqDhrV-zPTLtOH9hJueae0q9SNq486GQ,3396
8
+ smallestai-2.1.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
+ smallestai-2.1.0.dist-info/METADATA,sha256=BwCUFiVZTRActimZBQcPJg8vHJy0M-6vYA_yHvaFpDk,14904
10
+ smallestai-2.1.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
11
+ smallestai-2.1.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
+ smallestai-2.1.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
- smallest/async_tts.py,sha256=2BrNMxq0PDtF7CCZqYPnrp9D0qxZndCgT31EbdrnV-E,6084
3
- smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
- smallest/models.py,sha256=Ndmek9f5VWDjxaNPfSmNk-xP55Y6uXzkzI5V54FnuvU,771
5
- smallest/stream_tts.py,sha256=9sSGR9F_BiSSB1IsiUJP-How0t4-3qdYyTJ-H7ESkMk,6230
6
- smallest/tts.py,sha256=Km3-rFf4D_-XXLi8CAVsiYrw5D-OQRLDHl-LTUh83ec,6030
7
- smallest/utils.py,sha256=kIlS3wQaICT3R4B8R3HpywmXMABJUkCgbvFziStfno8,3527
8
- smallestai-1.3.4.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
- smallestai-1.3.4.dist-info/METADATA,sha256=yfREoK1kPNxKBTWcE_aRp8ByEF-m86nkdyiiBpF2Q4k,10584
10
- smallestai-1.3.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
- smallestai-1.3.4.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
- smallestai-1.3.4.dist-info/RECORD,,