smallestai 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

smallest/async_tts.py CHANGED
@@ -8,7 +8,7 @@ from typing import Optional, Union, List
8
8
 
9
9
  from smallest.exceptions import TTSError, APIError
10
10
  from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
11
- get_smallest_languages, get_smallest_models, API_BASE_URL)
11
+ get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
12
12
 
13
13
 
14
14
  class AsyncSmallest:
@@ -19,9 +19,7 @@ class AsyncSmallest:
19
19
  sample_rate: Optional[int] = 24000,
20
20
  voice_id: Optional[str] = "emily",
21
21
  speed: Optional[float] = 1.0,
22
- add_wav_header: Optional[bool] = True,
23
- transliterate: Optional[bool] = False,
24
- remove_extra_silence: Optional[bool] = False
22
+ add_wav_header: Optional[bool] = True
25
23
  ) -> None:
26
24
  """
27
25
  AsyncSmallest Instance for asynchronous text-to-speech synthesis.
@@ -37,8 +35,6 @@ class AsyncSmallest:
37
35
  - voice_id (TTSVoices): The voice to be used for synthesis.
38
36
  - speed (float): The speed of the speech synthesis.
39
37
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
40
- - transliterate (bool): Whether to transliterate the text.
41
- - remove_extra_silence (bool): Whether to remove extra silence from the synthesized audio.
42
38
 
43
39
  Methods:
44
40
  - get_languages: Returns a list of available languages for synthesis.
@@ -49,6 +45,9 @@ class AsyncSmallest:
49
45
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
50
46
  if not self.api_key:
51
47
  raise TTSError()
48
+ if model == "lightning-large":
49
+ voice_id = "lakshya"
50
+
52
51
  self.chunk_size = 250
53
52
 
54
53
  self.opts = TTSOptions(
@@ -57,9 +56,7 @@ class AsyncSmallest:
57
56
  voice_id=voice_id,
58
57
  api_key=self.api_key,
59
58
  add_wav_header=add_wav_header,
60
- speed=speed,
61
- transliterate=transliterate,
62
- remove_extra_silence=remove_extra_silence,
59
+ speed=speed
63
60
  )
64
61
  self.session = None
65
62
 
@@ -124,6 +121,9 @@ class AsyncSmallest:
124
121
  async def synthesize(
125
122
  self,
126
123
  text: str,
124
+ consistency: Optional[float] = 0.5,
125
+ similarity: Optional[float] = 0,
126
+ enhancement: Optional[bool] = False,
127
127
  save_as: Optional[str] = None,
128
128
  **kwargs
129
129
  ) -> Union[bytes, None]:
@@ -134,6 +134,9 @@ class AsyncSmallest:
134
134
  - text (str): The text to be converted to speech.
135
135
  - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
136
136
  The file must have a .wav extension.
137
+ - consistency (Optional[float]): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
138
+ - similarity (Optional[float]): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
139
+ - enhancement (Optional[bool]): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
137
140
  - kwargs: Additional optional parameters to override `__init__` options for this call.
138
141
 
139
142
  Returns:
@@ -143,18 +146,29 @@ class AsyncSmallest:
143
146
  Raises:
144
147
  - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
145
148
  - APIError: If the API request fails or returns an error.
149
+ - ValueError: If an unexpected parameter is passed in `kwargs`.
146
150
  """
147
- should_cleanup = await self._ensure_session()
151
+ should_cleanup = False
152
+
153
+ if self.session is None or self.session.closed:
154
+ self.session = aiohttp.ClientSession()
155
+ should_cleanup = True # Cleanup only if we created a new session
148
156
 
149
157
  try:
150
158
  opts = copy.deepcopy(self.opts)
159
+ valid_keys = set(vars(opts).keys())
160
+
161
+ invalid_keys = [key for key in kwargs if key not in valid_keys]
162
+ if invalid_keys:
163
+ raise ValueError(f"Invalid parameter(s) in kwargs: {', '.join(invalid_keys)}. Allowed parameters are: {', '.join(valid_keys)}")
164
+
151
165
  for key, value in kwargs.items():
152
166
  setattr(opts, key, value)
153
167
 
154
- validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed)
168
+ validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed, consistency, similarity, enhancement)
155
169
 
156
170
  self.chunk_size = 250
157
- if opts.model == 'ligtning-large':
171
+ if opts.model == 'lightning-large':
158
172
  self.chunk_size = 140
159
173
 
160
174
  chunks = chunk_text(text, self.chunk_size)
@@ -167,19 +181,23 @@ class AsyncSmallest:
167
181
  "voice_id": opts.voice_id,
168
182
  "add_wav_header": False,
169
183
  "speed": opts.speed,
170
- "model": opts.model,
171
- "transliterate": opts.transliterate,
172
- "remove_extra_silence": opts.remove_extra_silence
184
+ "model": opts.model
173
185
  }
186
+
187
+ if opts.model == "lightning-large":
188
+ if consistency:
189
+ payload["consistency"] = consistency
190
+ if similarity:
191
+ payload["similarity"] = similarity
192
+ if enhancement:
193
+ payload["enhancement"] = enhancement
194
+
174
195
 
175
196
  headers = {
176
197
  "Authorization": f"Bearer {self.api_key}",
177
198
  "Content-Type": "application/json",
178
199
  }
179
200
 
180
- if not self.session:
181
- self.session = aiohttp.ClientSession()
182
-
183
201
  async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
184
202
  if res.status != 200:
185
203
  raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
@@ -199,7 +217,7 @@ class AsyncSmallest:
199
217
  return add_wav_header(audio_content, opts.sample_rate)
200
218
 
201
219
  return audio_content
202
-
220
+
203
221
  finally:
204
222
  if should_cleanup and self.session:
205
223
  await self.session.close()
@@ -226,7 +244,6 @@ class AsyncSmallest:
226
244
  if not os.path.exists(file_path):
227
245
  raise TTSError("Invalid file path. File does not exist.")
228
246
 
229
- ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
230
247
  file_extension = os.path.splitext(file_path)[1].lower()
231
248
  if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
232
249
  raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
@@ -257,4 +274,38 @@ class AsyncSmallest:
257
274
  if should_cleanup and self.session:
258
275
  await self.session.close()
259
276
  self.session = None
277
+
278
+
279
+ async def delete_voice(self, voice_id: str) -> str:
280
+ """
281
+ Delete a cloned voice asynchronously.
260
282
 
283
+ Args:
284
+ - voice_id (str): The ID of the voice to be deleted.
285
+
286
+ Returns:
287
+ - str: The response from the API.
288
+
289
+ Raises:
290
+ - APIError: If the API request fails or returns an error.
291
+ """
292
+ url = f"{API_BASE_URL}/lightning-large"
293
+ payload = {'voiceId': voice_id}
294
+
295
+ headers = {
296
+ "Authorization": f"Bearer {self.api_key}",
297
+ }
298
+
299
+ should_cleanup = await self._ensure_session()
300
+
301
+ try:
302
+ async with self.session.delete(url, headers=headers, json=payload) as res:
303
+ if res.status != 200:
304
+ raise APIError(f"Failed to delete voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
305
+
306
+ return await res.text()
307
+
308
+ finally:
309
+ if should_cleanup and self.session:
310
+ await self.session.close()
311
+ self.session = None
smallest/stream_tts.py CHANGED
@@ -30,7 +30,7 @@ class TextToAudioStream:
30
30
 
31
31
  Args:
32
32
  tts_instance: The text-to-speech engine to use (Smallest or AsyncSmallest)
33
- queue_timeout: How long to wait for new text (seconds, default: 1.0)
33
+ queue_timeout: How long to wait for new text (seconds, default: 5.0)
34
34
  max_retries: Number of retry attempts for failed synthesis (default: 3)
35
35
  """
36
36
  self.tts_instance = tts_instance
@@ -48,36 +48,43 @@ class TextToAudioStream:
48
48
 
49
49
  async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
50
50
  """
51
- Streams the LLM output, splitting it into sentences based on the regex
52
- and chunk size, and adding each chunk to the queue.
51
+ Streams the LLM output, splitting it into chunks based on sentence boundaries
52
+ or space characters if no sentence boundary is found before reaching buffer_size.
53
53
 
54
54
  Parameters:
55
55
  - llm_output (AsyncGenerator[str, None]): An async generator yielding LLM output.
56
56
  """
57
57
  buffer = ""
58
- last_break_index = 0
59
58
 
60
59
  async for chunk in llm_output:
61
60
  buffer += chunk
62
- i = 0
63
- while i < len(buffer):
64
- current_chunk = buffer[:i + 1]
65
- if self.sentence_end_regex.match(current_chunk):
66
- last_break_index = i
67
- if len(current_chunk) >= self.buffer_size:
68
- if last_break_index > 0:
69
- self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ')
70
- buffer = buffer[last_break_index + 1:]
61
+
62
+ while len(buffer) > self.buffer_size:
63
+ chunk_text = buffer[:self.buffer_size]
64
+ last_break_index = -1
65
+
66
+ # Find last sentence boundary using regex
67
+ for i in range(len(chunk_text) - 1, -1, -1):
68
+ if self.sentence_end_regex.match(chunk_text[:i + 1]):
69
+ last_break_index = i
70
+ break
71
+
72
+ if last_break_index == -1:
73
+ # Fallback to space if no sentence boundary found
74
+ last_space = chunk_text.rfind(' ')
75
+ if last_space != -1:
76
+ last_break_index = last_space
71
77
  else:
72
- # No sentence boundary, split at max chunk size
73
- self.queue.put(f'{buffer[:self.buffer_size].replace("—", " ").strip()} ')
74
- buffer = buffer[self.buffer_size:]
75
- last_break_index = 0
76
- i = -1
77
- i += 1
78
-
78
+ last_break_index = self.buffer_size - 1
79
+
80
+ # Add chunk to queue and update buffer
81
+ self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ')
82
+ buffer = buffer[last_break_index + 1:].strip()
83
+
84
+ # Don't forget the remaining text
79
85
  if buffer:
80
86
  self.queue.put(f'{buffer.replace("—", " ").strip()} ')
87
+
81
88
  self.stop_flag = True
82
89
 
83
90
 
@@ -89,8 +96,7 @@ class TextToAudioStream:
89
96
  if retries < self.max_retries:
90
97
  return self._synthesize_sync(sentence, retries + 1)
91
98
  else:
92
- print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
93
- return None
99
+ raise APIError(f"Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
94
100
 
95
101
 
96
102
  async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
@@ -101,8 +107,7 @@ class TextToAudioStream:
101
107
  if retries < self.max_retries:
102
108
  return await self._synthesize_async(sentence, retries + 1)
103
109
  else:
104
- print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
105
- return None
110
+ raise APIError(f"Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
106
111
 
107
112
 
108
113
  async def _run_synthesis(self) -> AsyncGenerator[bytes, None]:
@@ -144,10 +149,13 @@ class TextToAudioStream:
144
149
  - Streamed over a network
145
150
  - Further processed as needed
146
151
  """
147
- llm_thread = Thread(target=asyncio.run, args=(self._stream_llm_output(llm_output),))
148
- llm_thread.start()
152
+ stream_task = asyncio.create_task(self._stream_llm_output(llm_output))
149
153
 
150
- async for audio_content in self._run_synthesis():
151
- yield audio_content
154
+ try:
155
+ async for audio_content in self._run_synthesis():
156
+ yield audio_content
157
+ except Exception as e:
158
+ raise APIError(f"Error during synthesis processing: {e}")
152
159
 
153
- llm_thread.join()
160
+ finally:
161
+ await stream_task
smallest/tts.py CHANGED
@@ -7,7 +7,7 @@ from typing import Optional, Union, List
7
7
 
8
8
  from smallest.exceptions import TTSError, APIError
9
9
  from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
10
- get_smallest_languages, get_smallest_models, API_BASE_URL)
10
+ get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
11
11
 
12
12
  class Smallest:
13
13
  def __init__(
@@ -17,9 +17,7 @@ class Smallest:
17
17
  sample_rate: Optional[int] = 24000,
18
18
  voice_id: Optional[str] = "emily",
19
19
  speed: Optional[float] = 1.0,
20
- add_wav_header: Optional[bool] = True,
21
- transliterate: Optional[bool] = False,
22
- remove_extra_silence: Optional[bool] = True
20
+ add_wav_header: Optional[bool] = True
23
21
  ) -> None:
24
22
  """
25
23
  Smallest Instance for text-to-speech synthesis.
@@ -34,8 +32,6 @@ class Smallest:
34
32
  - voice_id (TTSVoices): The voice to be used for synthesis.
35
33
  - speed (float): The speed of the speech synthesis.
36
34
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
37
- - transliterate (bool): Whether to transliterate the text.
38
- - remove_extra_silence (bool): Whether to remove extra silence from the synthesized audio.
39
35
 
40
36
  Methods:
41
37
  - get_languages: Returns a list of available languages for synthesis.
@@ -46,7 +42,9 @@ class Smallest:
46
42
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
47
43
  if not self.api_key:
48
44
  raise TTSError()
49
-
45
+ if model == "lightning-large":
46
+ voice_id = "lakshya"
47
+
50
48
  self.chunk_size = 250
51
49
 
52
50
  self.opts = TTSOptions(
@@ -55,9 +53,7 @@ class Smallest:
55
53
  voice_id=voice_id,
56
54
  api_key=self.api_key,
57
55
  add_wav_header=add_wav_header,
58
- speed=speed,
59
- transliterate=transliterate,
60
- remove_extra_silence=remove_extra_silence
56
+ speed=speed
61
57
  )
62
58
 
63
59
 
@@ -102,6 +98,9 @@ class Smallest:
102
98
  def synthesize(
103
99
  self,
104
100
  text: str,
101
+ consistency: Optional[float] = 0.5,
102
+ similarity: Optional[float] = 0,
103
+ enhancement: Optional[bool] = False,
105
104
  save_as: Optional[str] = None,
106
105
  **kwargs
107
106
  ) -> Union[bytes, None]:
@@ -112,6 +111,9 @@ class Smallest:
112
111
  - text (str): The text to be converted to speech.
113
112
  - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
114
113
  The file must have a .wav extension.
114
+ - consistency (Optional[float]): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
115
+ - similarity (Optional[float]): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
116
+ - enhancement (Optional[bool]): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
115
117
  - kwargs: Additional optional parameters to override `__init__` options for this call.
116
118
 
117
119
  Returns:
@@ -123,10 +125,16 @@ class Smallest:
123
125
  - APIError: If the API request fails or returns an error.
124
126
  """
125
127
  opts = copy.deepcopy(self.opts)
128
+ valid_keys = set(vars(opts).keys())
129
+
130
+ invalid_keys = [key for key in kwargs if key not in valid_keys]
131
+ if invalid_keys:
132
+ raise ValueError(f"Invalid parameter(s) in kwargs: {', '.join(invalid_keys)}. Allowed parameters are: {', '.join(valid_keys)}")
133
+
126
134
  for key, value in kwargs.items():
127
135
  setattr(opts, key, value)
128
136
 
129
- validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed)
137
+ validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed, consistency, similarity, enhancement)
130
138
 
131
139
  self.chunk_size = 250
132
140
  if opts.model == "lightning-large":
@@ -142,11 +150,16 @@ class Smallest:
142
150
  "voice_id": opts.voice_id,
143
151
  "add_wav_header": False,
144
152
  "speed": opts.speed,
145
- "model": opts.model,
146
- "transliterate": opts.transliterate,
147
- "remove_extra_silence": opts.remove_extra_silence,
148
153
  }
149
154
 
155
+ if opts.model == "lightning-large":
156
+ if consistency:
157
+ payload["consistency"] = consistency
158
+ if similarity:
159
+ payload["similarity"] = similarity
160
+ if enhancement:
161
+ payload["enhancement"] = enhancement
162
+
150
163
  headers = {
151
164
  "Authorization": f"Bearer {self.api_key}",
152
165
  "Content-Type": "application/json",
@@ -154,7 +167,7 @@ class Smallest:
154
167
 
155
168
  res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
156
169
  if res.status_code != 200:
157
- raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
170
+ raise APIError(f"Failed to synthesize speech: {res.text}. For more information, visit https://waves.smallest.ai/")
158
171
 
159
172
  audio_content += res.content
160
173
 
@@ -193,7 +206,6 @@ class Smallest:
193
206
  if not os.path.isfile(file_path):
194
207
  raise TTSError("Invalid file path. File does not exist.")
195
208
 
196
- ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
197
209
  file_extension = os.path.splitext(file_path)[1].lower()
198
210
  if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
199
211
  raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
@@ -212,3 +224,30 @@ class Smallest:
212
224
  raise APIError(f"Failed to add voice: {response.text}. For more information, visit https://waves.smallest.ai/")
213
225
 
214
226
  return json.dumps(response.json(), indent=4, ensure_ascii=False)
227
+
228
+
229
+ def delete_voice(self, voice_id: str) -> str:
230
+ """
231
+ Delete a cloned voice synchronously.
232
+
233
+ Args:
234
+ - voice_id (str): The ID of the voice to be deleted.
235
+
236
+ Returns:
237
+ - str: The response from the API.
238
+
239
+ Raises:
240
+ - APIError: If the API request fails or returns an error.
241
+ """
242
+ url = f"{API_BASE_URL}/lightning-large"
243
+ payload = {'voiceId': voice_id}
244
+
245
+ headers = {
246
+ 'Authorization': f"Bearer {self.api_key}",
247
+ }
248
+
249
+ response = requests.delete(url, headers=headers, json=payload)
250
+ if response.status_code != 200:
251
+ raise APIError(f"Failed to delete voice: {response.text}. For more information, visit https://waves.smallest.ai/")
252
+
253
+ return json.dumps(response.json(), indent=4, ensure_ascii=False)
smallest/utils.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import re
2
2
  import io
3
3
  from typing import List
4
+ from typing import Optional
4
5
  from pydub import AudioSegment
5
6
  from dataclasses import dataclass
6
7
  from sacremoses import MosesPunctNormalizer
@@ -14,6 +15,7 @@ SENTENCE_END_REGEX = re.compile(r'.*[-.—!?,;:…।|]$')
14
15
  mpn = MosesPunctNormalizer()
15
16
  SAMPLE_WIDTH = 2
16
17
  CHANNELS = 1
18
+ ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
17
19
 
18
20
 
19
21
  @dataclass
@@ -24,11 +26,9 @@ class TTSOptions:
24
26
  api_key: str
25
27
  add_wav_header: bool
26
28
  speed: float
27
- transliterate: bool
28
- remove_extra_silence: bool
29
29
 
30
30
 
31
- def validate_input(text: str, model: str, sample_rate: int, speed: float):
31
+ def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[bool] = None):
32
32
  if not text:
33
33
  raise ValidationError("Text cannot be empty.")
34
34
  if model not in TTSModels:
@@ -37,14 +37,20 @@ def validate_input(text: str, model: str, sample_rate: int, speed: float):
37
37
  raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 24000")
38
38
  if not 0.5 <= speed <= 2.0:
39
39
  raise ValidationError(f"Invalid speed: {speed}. Must be between 0.5 and 2.0")
40
+ if consistency is not None and not 0.0 <= consistency <= 1.0:
41
+ raise ValidationError(f"Invalid consistency: {consistency}. Must be between 0.0 and 1.0")
42
+ if similarity is not None and not 0.0 <= similarity <= 1.0:
43
+ raise ValidationError(f"Invalid similarity: {similarity}. Must be between 0.0 and 1.0")
44
+ if enhancement is not None and not isinstance(enhancement, bool):
45
+ raise ValidationError(f"Invalid enhancement: {enhancement}. Must be a boolean value.")
40
46
 
41
47
 
42
48
  def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: int = 2, channels: int = 1) -> bytes:
43
- audio = AudioSegment(data=frame_input, sample_width=sample_width, frame_rate=sample_rate, channels=channels)
44
- wav_buf = io.BytesIO()
45
- audio.export(wav_buf, format="wav")
46
- wav_buf.seek(0)
47
- return wav_buf.read()
49
+ audio = AudioSegment(data=frame_input, sample_width=sample_width, frame_rate=sample_rate, channels=channels)
50
+ wav_buf = io.BytesIO()
51
+ audio.export(wav_buf, format="wav")
52
+ wav_buf.seek(0)
53
+ return wav_buf.read()
48
54
 
49
55
 
50
56
  def preprocess_text(text: str) -> str:
@@ -55,11 +61,6 @@ def preprocess_text(text: str) -> str:
55
61
 
56
62
 
57
63
  def chunk_text(text: str, chunk_size: int = 250) -> List[str]:
58
- """
59
- Splits the input text into chunks based on sentence boundaries
60
- defined by SENTENCE_END_REGEX and the maximum chunk size.
61
- Only splits at valid sentence boundaries to avoid breaking words.
62
- """
63
64
  chunks = []
64
65
  while text:
65
66
  if len(text) <= chunk_size:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: smallestai
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <support@smallest.ai>
6
6
  License: MIT
@@ -59,8 +59,11 @@ Currently, the library supports direct synthesis and the ability to synthesize s
59
59
  - [Aynchronous](#Synchronous)
60
60
  - [LLM to Speech](#llm-to-speech)
61
61
  - [Add your Voice](#add-your-voice)
62
- - [Synchronously](#synchronously)
63
- - [Asynchronously](#asynchronously)
62
+ - [Synchronously](#add-synchronously)
63
+ - [Asynchronously](#add-asynchronously)
64
+ - [Delete your Voice](#delete-your-voice)
65
+ - [Synchronously](#delete-synchronously)
66
+ - [Asynchronously](#delete-asynchronously)
64
67
  - [Available Methods](#available-methods)
65
68
  - [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
66
69
 
@@ -80,14 +83,6 @@ When using an SDK in your application, make sure to pin to at least the major ve
80
83
  3. Create a new API Key and copy it.
81
84
  4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
82
85
 
83
- ## Best Practices for Input Text
84
- While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
85
-
86
- For optimal voice generation results:
87
-
88
- 1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
89
- 2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
90
- 3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
91
86
 
92
87
  ## Examples
93
88
 
@@ -115,9 +110,10 @@ if __name__ == "__main__":
115
110
  - `sample_rate`: Audio sample rate (default: 24000)
116
111
  - `voice_id`: Voice ID (default: "emily")
117
112
  - `speed`: Speech speed multiplier (default: 1.0)
118
- - `add_wav_header`: Include WAV header in output (default: True)
119
- - `transliterate`: Enable text transliteration (default: False)
120
- - `remove_extra_silence`: Remove additional silence (default: True)
113
+ - `consistency`: Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. (default: 0.5)
114
+ - `similarity`: Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. (default: 0)
115
+ - `enhancement`: Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. (default: False)
116
+ - `add_wav_header`: Whether to add a WAV header to the output audio.
121
117
 
122
118
  These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override these parameters for a specific synthesis request.
123
119
 
@@ -141,9 +137,8 @@ import asyncio
141
137
  import aiofiles
142
138
  from smallest import AsyncSmallest
143
139
 
144
- client = AsyncSmallest(api_key="SMALLEST_API_KEY")
145
-
146
140
  async def main():
141
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
147
142
  async with client as tts:
148
143
  audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
149
144
  async with aiofiles.open("async_synthesize.wav", "wb") as f:
@@ -153,15 +148,33 @@ if __name__ == "__main__":
153
148
  asyncio.run(main())
154
149
  ```
155
150
 
151
+ **Running Asynchronously in a Jupyter Notebook**
152
+ If you are using a Jupyter Notebook, use the following approach to execute the asynchronous function within an existing event loop:
153
+ ```python
154
+ import asyncio
155
+ import aiofiles
156
+ from smallest import AsyncSmallest
157
+
158
+ async def main():
159
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
160
+ async with client as tts:
161
+ audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
162
+ async with aiofiles.open("async_synthesize.wav", "wb") as f:
163
+ await f.write(audio_bytes) # alternatively you can use the `save_as` parameter.
164
+
165
+ await main()
166
+ ```
167
+
156
168
  **Parameters:**
157
169
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
158
170
  - `model`: TTS model to use (default: "lightning")
159
171
  - `sample_rate`: Audio sample rate (default: 24000)
160
172
  - `voice_id`: Voice ID (default: "emily")
161
173
  - `speed`: Speech speed multiplier (default: 1.0)
162
- - `add_wav_header`: Include WAV header in output (default: True)
163
- - `transliterate`: Enable text transliteration (default: False)
164
- - `remove_extra_silence`: Remove additional silence (default: True)
174
+ - `consistency`: Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
175
+ - `similarity`: Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
176
+ - `enhancement`: Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
177
+ - `add_wav_header`: Whether to add a WAV header to the output audio.
165
178
 
166
179
  These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis.
167
180
 
@@ -178,6 +191,58 @@ audio_bytes = await tts.synthesize(
178
191
 
179
192
  The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
180
193
 
194
+ #### Stream through a WebSocket
195
+
196
+ ```python
197
+ import asyncio
198
+ import websockets
199
+ from groq import Groq
200
+ from smallest import Smallest, TextToAudioStream
201
+
202
+ # Initialize Groq (LLM) and Smallest (TTS) instances
203
+ llm = Groq(api_key="GROQ_API_KEY")
204
+ tts = Smallest(api_key="SMALLEST_API_KEY")
205
+ WEBSOCKET_URL = "wss://echo.websocket.events" # Mock WebSocket server
206
+
207
+ # Async function to stream text generation from LLM
208
+ async def generate_text(prompt):
209
+ completion = llm.chat.completions.create(
210
+ messages=[{"role": "user", "content": prompt}],
211
+ model="llama3-8b-8192",
212
+ stream=True,
213
+ )
214
+
215
+ # Yield text as it is generated
216
+ for chunk in completion:
217
+ text = chunk.choices[0].delta.content
218
+ if text:
219
+ yield text
220
+
221
+ # Main function to run the process
222
+ async def main():
223
+ # Initialize the TTS processor
224
+ processor = TextToAudioStream(tts_instance=tts)
225
+
226
+ # Generate text from LLM
227
+ llm_output = generate_text("Explain text to speech like I am five in 5 sentences.")
228
+
229
+ # Stream the generated speech throught a websocket
230
+ async with websockets.connect(WEBSOCKET_URL) as ws:
231
+ print("Connected to WebSocket server.")
232
+
233
+ # Stream the generated speech
234
+ async for audio_chunk in processor.process(llm_output):
235
+ await ws.send(audio_chunk) # Send audio chunk
236
+ echoed_data = await ws.recv() # Receive the echoed message
237
+ print("Received from server:", echoed_data[:20], "...") # Print first 20 bytes
238
+
239
+ print("WebSocket connection closed.")
240
+
241
+ if __name__ == "__main__":
242
+ asyncio.run(main())
243
+ ```
244
+
245
+ #### Save to a File
181
246
  ```python
182
247
  import wave
183
248
  import asyncio
@@ -245,12 +310,12 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
245
310
  ## Add your Voice
246
311
  The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
247
312
 
248
- ### Synchronously
313
+ ### Add Synchronously
249
314
  ```python
250
315
  from smallest import Smallest
251
316
 
252
317
  def main():
253
- client = Smallest(api_key="YOUR_API_KEY")
318
+ client = Smallest(api_key="SMALLEST_API_KEY")
254
319
  res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
255
320
  print(res)
256
321
 
@@ -258,13 +323,13 @@ if __name__ == "__main__":
258
323
  main()
259
324
  ```
260
325
 
261
- ### Asynchronously
326
+ ### Add Asynchronously
262
327
  ```python
263
328
  import asyncio
264
329
  from smallest import AsyncSmallest
265
330
 
266
331
  async def main():
267
- client = AsyncSmallest(api_key="YOUR_API_KEY")
332
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
268
333
  res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
269
334
  print(res)
270
335
 
@@ -272,6 +337,36 @@ if __name__ == "__main__":
272
337
  asyncio.run(main())
273
338
  ```
274
339
 
340
+ ## Delete your Voice
341
+ The Smallest AI SDK allows you to delete your cloned voice. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
342
+
343
+ ### Delete Synchronously
344
+ ```python
345
+ from smallest import Smallest
346
+
347
+ def main():
348
+ client = Smallest(api_key="SMALLEST_API_KEY")
349
+ res = client.delete_voice(voice_id="voice_id")
350
+ print(res)
351
+
352
+ if __name__ == "__main__":
353
+ main()
354
+ ```
355
+
356
+ ### Delete Asynchronously
357
+ ```python
358
+ import asyncio
359
+ from smallest import AsyncSmallest
360
+
361
+ async def main():
362
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
363
+ res = await client.delete_voice(voice_id="voice_id")
364
+ print(res)
365
+
366
+ if __name__ == "__main__":
367
+ asyncio.run(main())
368
+ ```
369
+
275
370
  ## Available Methods
276
371
 
277
372
  ```python
@@ -0,0 +1,12 @@
1
+ smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
+ smallest/async_tts.py,sha256=Jr7IID5tJrnMx_d2217foUJqfFvAFsddvy_0HG5tKGc,11905
3
+ smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
+ smallest/models.py,sha256=g2e_4nU5P48vyXZandKLWqZC1TkoEGeLvYKqJIqurSI,83
5
+ smallest/stream_tts.py,sha256=dUxoY0VkXecsMZ41QA8RkX4t_pD5-7mMIJhaB01tQrk,6512
6
+ smallest/tts.py,sha256=bSL7EYmLpd5yT42dbUXVb-IgZ_xIcXpyHvCu2-hHtMs,10024
7
+ smallest/utils.py,sha256=HDpDjPkUeeQLqDhrV-zPTLtOH9hJueae0q9SNq486GQ,3396
8
+ smallestai-2.1.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
+ smallestai-2.1.0.dist-info/METADATA,sha256=BwCUFiVZTRActimZBQcPJg8vHJy0M-6vYA_yHvaFpDk,14904
10
+ smallestai-2.1.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
11
+ smallestai-2.1.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
+ smallestai-2.1.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
- smallest/async_tts.py,sha256=5qW7owlMeSWFx0rpn9dYfbO76mmNY0DXcytNjLfbbz8,9727
3
- smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
- smallest/models.py,sha256=g2e_4nU5P48vyXZandKLWqZC1TkoEGeLvYKqJIqurSI,83
5
- smallest/stream_tts.py,sha256=SeP9A9zXJWiV62Eezv0L1J5sRIR304Llc_mwVtOOSUI,6348
6
- smallest/tts.py,sha256=xBBEk_byRPGT6SYkE6qvhfEupgHl6XBdAqtxmzw2rF8,8311
7
- smallest/utils.py,sha256=FCZkvbbHJBoN0jpBSqmt1hJjvks56t8i82we4XnqjYk,3016
8
- smallestai-2.0.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
- smallestai-2.0.0.dist-info/METADATA,sha256=EIyZZqzAvHgQ7jfEs5x5LUx3HjzoCUhzJoXfkb3CuoI,11538
10
- smallestai-2.0.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
- smallestai-2.0.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
- smallestai-2.0.0.dist-info/RECORD,,