smallestai 1.3.3__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

smallest/async_tts.py CHANGED
@@ -1,22 +1,23 @@
1
1
  import os
2
2
  import copy
3
+ import json
3
4
  import aiohttp
4
5
  import aiofiles
6
+ import requests
5
7
  from typing import Optional, Union, List
6
8
 
7
- from .models import TTSModels, TTSVoices
8
- from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
9
+ from smallest.exceptions import TTSError, APIError
10
+ from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
11
+ get_smallest_languages, get_smallest_models, API_BASE_URL)
11
12
 
12
13
 
13
14
  class AsyncSmallest:
14
15
  def __init__(
15
16
  self,
16
- api_key: Optional[str] = None,
17
- model: TTSModels = "lightning",
18
- sample_rate: int = 24000,
19
- voice: TTSVoices = "emily",
17
+ api_key: str = None,
18
+ model: Optional[str] = "lightning",
19
+ sample_rate: Optional[int] = 24000,
20
+ voice_id: Optional[str] = "emily",
20
21
  speed: Optional[float] = 1.0,
21
22
  add_wav_header: Optional[bool] = True,
22
23
  transliterate: Optional[bool] = False,
@@ -25,15 +26,15 @@ class AsyncSmallest:
25
26
  """
26
27
  AsyncSmallest Instance for asynchronous text-to-speech synthesis.
27
28
 
28
- This class provides an asynchronous implementation of the text-to-speech functionality.
29
- It allows for non-blocking synthesis of speech from text, making it suitable for applications
29
+ This class provides an asynchronous implementation of the text-to-speech functionality.
30
+ It allows for non-blocking synthesis of speech from text, making it suitable for applications
30
31
  that require async processing.
31
32
 
32
33
  Args:
33
34
  - api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
34
35
  - model (TTSModels): The model to be used for synthesis.
35
36
  - sample_rate (int): The sample rate for the audio output.
36
- - voice (TTSVoices): The voice to be used for synthesis.
37
+ - voice_id (TTSVoices): The voice to be used for synthesis.
37
38
  - speed (float): The speed of the speech synthesis.
38
39
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
39
40
  - transliterate (bool): Whether to transliterate the text.
@@ -49,11 +50,11 @@ class AsyncSmallest:
49
50
  if not self.api_key:
50
51
  raise TTSError()
51
52
  self.chunk_size = 250
52
-
53
+
53
54
  self.opts = TTSOptions(
54
55
  model=model,
55
56
  sample_rate=sample_rate,
56
- voice=voice,
57
+ voice_id=voice_id,
57
58
  api_key=self.api_key,
58
59
  add_wav_header=add_wav_header,
59
60
  speed=speed,
@@ -61,29 +62,65 @@ class AsyncSmallest:
61
62
  remove_extra_silence=remove_extra_silence,
62
63
  )
63
64
  self.session = None
64
-
65
+
66
+
65
67
  async def __aenter__(self):
66
68
  if self.session is None:
67
69
  self.session = aiohttp.ClientSession()
68
70
  return self
69
71
 
72
+
70
73
  async def __aexit__(self, exc_type, exc_val, exc_tb):
71
74
  if self.session:
72
75
  await self.session.close()
73
76
 
74
77
 
78
+ async def _ensure_session(self):
79
+ """Ensure session exists for direct calls"""
80
+ if not self.session:
81
+ self.session = aiohttp.ClientSession()
82
+ return True
83
+ return False
84
+
85
+
75
86
  def get_languages(self) -> List[str]:
76
87
  """Returns a list of available languages."""
77
88
  return get_smallest_languages()
89
+
90
+ def get_cloned_voices(self) -> str:
91
+ """Returns a list of your cloned voices."""
92
+ headers = {
93
+ "Authorization": f"Bearer {self.api_key}",
94
+ }
95
+
96
+ res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers)
97
+ if res.status_code != 200:
98
+ raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/")
99
+
100
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
78
101
 
79
- def get_voices(self) -> List[str]:
102
+
103
+ def get_voices(
104
+ self,
105
+ model: Optional[str] = "lightning"
106
+ ) -> str:
80
107
  """Returns a list of available voices."""
81
- return get_smallest_voices()
108
+ headers = {
109
+ "Authorization": f"Bearer {self.api_key}",
110
+ }
111
+
112
+ res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers)
113
+ if res.status_code != 200:
114
+ raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/")
115
+
116
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
117
+
82
118
 
83
119
  def get_models(self) -> List[str]:
84
120
  """Returns a list of available models."""
85
121
  return get_smallest_models()
86
-
122
+
123
+
87
124
  async def synthesize(
88
125
  self,
89
126
  text: str,
@@ -95,62 +132,129 @@ class AsyncSmallest:
95
132
 
96
133
  Args:
97
134
  - text (str): The text to be converted to speech.
98
- - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
135
+ - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
99
136
  The file must have a .wav extension.
100
137
  - kwargs: Additional optional parameters to override `__init__` options for this call.
101
138
 
102
139
  Returns:
103
- - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
140
+ - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
104
141
  otherwise, returns None after saving the audio to the specified file.
105
142
 
106
143
  Raises:
107
144
  - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
108
145
  - APIError: If the API request fails or returns an error.
109
146
  """
110
- opts = copy.deepcopy(self.opts)
111
- for key, value in kwargs.items():
112
- setattr(opts, key, value)
113
-
114
- validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
115
- chunks = split_into_chunks(text)
116
- audio_content = b""
117
-
118
- for chunk in chunks:
119
- payload = {
120
- "text": preprocess_text(chunk),
121
- "sample_rate": opts.sample_rate,
122
- "voice_id": opts.voice,
123
- "add_wav_header": False,
124
- "speed": opts.speed,
125
- "model": opts.model,
126
- "transliterate": opts.transliterate,
127
- "remove_extra_silence": opts.remove_extra_silence
128
- }
129
-
130
- headers = {
131
- "Authorization": f"Bearer {self.api_key}",
132
- "Content-Type": "application/json",
133
- }
134
-
135
- if not self.session:
136
- self.session = aiohttp.ClientSession()
147
+ should_cleanup = await self._ensure_session()
148
+
149
+ try:
150
+ opts = copy.deepcopy(self.opts)
151
+ for key, value in kwargs.items():
152
+ setattr(opts, key, value)
153
+
154
+ validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed)
155
+
156
+ self.chunk_size = 250
157
+ if opts.model == 'ligtning-large':
158
+ self.chunk_size = 140
159
+
160
+ chunks = chunk_text(text, self.chunk_size)
161
+ audio_content = b""
162
+
163
+ for chunk in chunks:
164
+ payload = {
165
+ "text": preprocess_text(chunk),
166
+ "sample_rate": opts.sample_rate,
167
+ "voice_id": opts.voice_id,
168
+ "add_wav_header": False,
169
+ "speed": opts.speed,
170
+ "model": opts.model,
171
+ "transliterate": opts.transliterate,
172
+ "remove_extra_silence": opts.remove_extra_silence
173
+ }
174
+
175
+ headers = {
176
+ "Authorization": f"Bearer {self.api_key}",
177
+ "Content-Type": "application/json",
178
+ }
179
+
180
+ if not self.session:
181
+ self.session = aiohttp.ClientSession()
182
+
183
+ async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
184
+ if res.status != 200:
185
+ raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
186
+
187
+ audio_content += await res.read()
188
+
189
+ if save_as:
190
+ if not save_as.endswith(".wav"):
191
+ raise TTSError("Invalid file name. Extension must be .wav")
192
+
193
+ async with aiofiles.open(save_as, mode='wb') as f:
194
+ await f.write(add_wav_header(audio_content, opts.sample_rate))
195
+
196
+ return None
197
+
198
+ if opts.add_wav_header:
199
+ return add_wav_header(audio_content, opts.sample_rate)
200
+
201
+ return audio_content
137
202
 
138
- async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
139
- if res.status != 200:
140
- raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
141
-
142
- audio_content += await res.read()
203
+ finally:
204
+ if should_cleanup and self.session:
205
+ await self.session.close()
206
+ self.session = None
207
+
208
+
209
+ async def add_voice(self, display_name: str, file_path: str) -> str:
210
+ """
211
+ Instantly clone your voice asynchronously.
212
+
213
+ Args:
214
+ - display_name (str): The display name for the new voice.
215
+ - file_path (str): The path to the reference audio file to be cloned.
143
216
 
144
- if save_as:
145
- if not save_as.endswith(".wav"):
146
- raise TTSError("Invalid file name. Extension must be .wav")
217
+ Returns:
218
+ - str: The response from the API as a formatted JSON string.
219
+
220
+ Raises:
221
+ - TTSError: If the file does not exist or is not a valid audio file.
222
+ - APIError: If the API request fails or returns an error.
223
+ """
224
+ url = f"{API_BASE_URL}/lightning-large/add_voice"
225
+
226
+ if not os.path.exists(file_path):
227
+ raise TTSError("Invalid file path. File does not exist.")
228
+
229
+ ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
230
+ file_extension = os.path.splitext(file_path)[1].lower()
231
+ if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
232
+ raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
233
+
234
+ headers = {
235
+ 'Authorization': f"Bearer {self.api_key}",
236
+ }
237
+
238
+ should_cleanup = await self._ensure_session()
239
+
240
+ try:
241
+ async with aiofiles.open(file_path, 'rb') as f:
242
+ file_data = await f.read()
243
+
244
+ data = aiohttp.FormData()
245
+ content_type = file_extension[1:]
147
246
 
148
- async with aiofiles.open(save_as, mode='wb') as f:
149
- await f.write(add_wav_header(audio_content, self.opts.sample_rate))
247
+ data.add_field('displayName', display_name)
248
+ data.add_field('file', file_data, filename=file_path, content_type=f"audio/{content_type}")
150
249
 
151
- return None
250
+ async with self.session.post(url, headers=headers, data=data) as res:
251
+ if res.status != 200:
252
+ raise APIError(f"Failed to add voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
152
253
 
153
- if opts.add_wav_header:
154
- return add_wav_header(audio_content, self.opts.sample_rate)
254
+ return json.dumps(await res.json(), indent=4, ensure_ascii=False)
155
255
 
156
- return audio_content
256
+ finally:
257
+ if should_cleanup and self.session:
258
+ await self.session.close()
259
+ self.session = None
260
+
smallest/models.py CHANGED
@@ -1,7 +1,5 @@
1
- from typing import Literal
2
-
3
- TTSModels = Literal["lightning"]
4
- TTSLanguages = Literal["en", "hi"]
5
- TTSVoices = Literal["emily", "jasmine", "arman", "james", "mithali", "aravind", "raj",
6
- "arjun", "sanya", "saina", "pooja", "saurabh", "nisha", "mansi", "radhika", "kajal",
7
- "raghav", "deepika", "niharika", "monika", "raman", "diya", "ananya", "william"]
1
+ TTSLanguages = ["en", "hi"]
2
+ TTSModels = [
3
+ "lightning",
4
+ "lightning-large"
5
+ ]
smallest/stream_tts.py CHANGED
@@ -3,17 +3,17 @@ from threading import Thread
3
3
  from queue import Queue, Empty
4
4
  from typing import AsyncGenerator, Optional, Union
5
5
 
6
- from .tts import Smallest
7
- from .exceptions import APIError
8
- from .async_tts import AsyncSmallest
9
- from .utils import SENTENCE_END_REGEX
6
+ from smallest.tts import Smallest
7
+ from smallest.exceptions import APIError
8
+ from smallest.async_tts import AsyncSmallest
9
+ from smallest.utils import SENTENCE_END_REGEX
10
10
 
11
11
  class TextToAudioStream:
12
12
  def __init__(
13
13
  self,
14
14
  tts_instance: Union[Smallest, AsyncSmallest],
15
- queue_timeout: float = 5.0,
16
- max_retries: int = 3
15
+ queue_timeout: Optional[float] = 5.0,
16
+ max_retries: Optional[int] = 3
17
17
  ):
18
18
  """
19
19
  A real-time text-to-speech processor that converts streaming text into audio output.
@@ -35,7 +35,6 @@ class TextToAudioStream:
35
35
  """
36
36
  self.tts_instance = tts_instance
37
37
  self.tts_instance.opts.add_wav_header = False
38
-
39
38
  self.sentence_end_regex = SENTENCE_END_REGEX
40
39
  self.queue_timeout = queue_timeout
41
40
  self.max_retries = max_retries
@@ -43,6 +42,9 @@ class TextToAudioStream:
43
42
  self.buffer_size = 250
44
43
  self.stop_flag = False
45
44
 
45
+ if self.tts_instance.opts.model == 'lightning-large':
46
+ self.buffer_size = 140
47
+
46
48
 
47
49
  async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
48
50
  """
@@ -58,51 +60,46 @@ class TextToAudioStream:
58
60
  async for chunk in llm_output:
59
61
  buffer += chunk
60
62
  i = 0
61
-
62
63
  while i < len(buffer):
63
64
  current_chunk = buffer[:i + 1]
64
65
  if self.sentence_end_regex.match(current_chunk):
65
66
  last_break_index = i
66
-
67
67
  if len(current_chunk) >= self.buffer_size:
68
68
  if last_break_index > 0:
69
- self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip())
69
+ self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ')
70
70
  buffer = buffer[last_break_index + 1:]
71
71
  else:
72
72
  # No sentence boundary, split at max chunk size
73
- self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip())
73
+ self.queue.put(f'{buffer[:self.buffer_size].replace("—", " ").strip()} ')
74
74
  buffer = buffer[self.buffer_size:]
75
-
76
75
  last_break_index = 0
77
76
  i = -1
78
-
79
77
  i += 1
80
-
78
+
81
79
  if buffer:
82
- self.queue.put(buffer.replace("—", " ").strip())
83
-
84
- self.stop_flag = True # completion flag when LLM output ends
80
+ self.queue.put(f'{buffer.replace("—", " ").strip()} ')
81
+ self.stop_flag = True
85
82
 
86
83
 
87
- async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
88
- """Asynchronously synthesizes a given sentence."""
84
+ def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
85
+ """Synchronously synthesizes a given sentence."""
89
86
  try:
90
- return await self.tts_instance.synthesize(sentence)
87
+ return self.tts_instance.synthesize(sentence)
91
88
  except APIError as e:
92
89
  if retries < self.max_retries:
93
- return await self._synthesize_async(sentence, retries + 1)
90
+ return self._synthesize_sync(sentence, retries + 1)
94
91
  else:
95
92
  print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
96
93
  return None
94
+
97
95
 
98
-
99
- def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
100
- """Synchronously synthesizes a given sentence."""
96
+ async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
97
+ """Asynchronously synthesizes a given sentence."""
101
98
  try:
102
- return self.tts_instance.synthesize(sentence)
99
+ return await self.tts_instance.synthesize(sentence)
103
100
  except APIError as e:
104
101
  if retries < self.max_retries:
105
- return self._synthesize_sync(sentence, retries + 1)
102
+ return await self._synthesize_async(sentence, retries + 1)
106
103
  else:
107
104
  print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
108
105
  return None
smallest/tts.py CHANGED
@@ -1,21 +1,21 @@
1
1
  import os
2
+ import json
2
3
  import wave
3
4
  import copy
4
5
  import requests
5
6
  from typing import Optional, Union, List
6
7
 
7
- from .models import TTSModels, TTSVoices
8
- from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
8
+ from smallest.exceptions import TTSError, APIError
9
+ from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
10
+ get_smallest_languages, get_smallest_models, API_BASE_URL)
11
11
 
12
12
  class Smallest:
13
13
  def __init__(
14
14
  self,
15
- api_key: Optional[str] = None,
16
- model: TTSModels = "lightning",
17
- sample_rate: int = 24000,
18
- voice: TTSVoices = "emily",
15
+ api_key: str = None,
16
+ model: Optional[str] = "lightning",
17
+ sample_rate: Optional[int] = 24000,
18
+ voice_id: Optional[str] = "emily",
19
19
  speed: Optional[float] = 1.0,
20
20
  add_wav_header: Optional[bool] = True,
21
21
  transliterate: Optional[bool] = False,
@@ -31,7 +31,7 @@ class Smallest:
31
31
  - api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
32
32
  - model (TTSModels): The model to be used for synthesis.
33
33
  - sample_rate (int): The sample rate for the audio output.
34
- - voice (TTSVoices): The voice to be used for synthesis.
34
+ - voice_id (TTSVoices): The voice to be used for synthesis.
35
35
  - speed (float): The speed of the speech synthesis.
36
36
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
37
37
  - transliterate (bool): Whether to transliterate the text.
@@ -52,7 +52,7 @@ class Smallest:
52
52
  self.opts = TTSOptions(
53
53
  model=model,
54
54
  sample_rate=sample_rate,
55
- voice=voice,
55
+ voice_id=voice_id,
56
56
  api_key=self.api_key,
57
57
  add_wav_header=add_wav_header,
58
58
  speed=speed,
@@ -65,14 +65,40 @@ class Smallest:
65
65
  """Returns a list of available languages."""
66
66
  return get_smallest_languages()
67
67
 
68
- def get_voices(self) -> List[str]:
68
+ def get_cloned_voices(self) -> str:
69
+ """Returns a list of your cloned voices."""
70
+ headers = {
71
+ "Authorization": f"Bearer {self.api_key}",
72
+ }
73
+
74
+ res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers)
75
+ if res.status_code != 200:
76
+ raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/")
77
+
78
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
79
+
80
+
81
+ def get_voices(
82
+ self,
83
+ model: Optional[str] = "lightning"
84
+ ) -> str:
69
85
  """Returns a list of available voices."""
70
- return get_smallest_voices()
86
+ headers = {
87
+ "Authorization": f"Bearer {self.api_key}",
88
+ }
89
+
90
+ res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers)
91
+ if res.status_code != 200:
92
+ raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/")
93
+
94
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
95
+
71
96
 
72
97
  def get_models(self) -> List[str]:
73
98
  """Returns a list of available models."""
74
99
  return get_smallest_models()
75
100
 
101
+
76
102
  def synthesize(
77
103
  self,
78
104
  text: str,
@@ -100,16 +126,20 @@ class Smallest:
100
126
  for key, value in kwargs.items():
101
127
  setattr(opts, key, value)
102
128
 
103
- validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
129
+ validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed)
130
+
131
+ self.chunk_size = 250
132
+ if opts.model == "lightning-large":
133
+ self.chunk_size = 140
104
134
 
105
- chunks = split_into_chunks(text)
135
+ chunks = chunk_text(text, self.chunk_size)
106
136
  audio_content = b""
107
137
 
108
138
  for chunk in chunks:
109
139
  payload = {
110
140
  "text": preprocess_text(chunk),
111
141
  "sample_rate": opts.sample_rate,
112
- "voice_id": opts.voice,
142
+ "voice_id": opts.voice_id,
113
143
  "add_wav_header": False,
114
144
  "speed": opts.speed,
115
145
  "model": opts.model,
@@ -128,11 +158,6 @@ class Smallest:
128
158
 
129
159
  audio_content += res.content
130
160
 
131
-
132
- res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
133
- if res.status_code != 200:
134
- raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
135
-
136
161
  if save_as:
137
162
  if not save_as.endswith(".wav"):
138
163
  raise TTSError("Invalid file name. Extension must be .wav")
@@ -140,11 +165,50 @@ class Smallest:
140
165
  with wave.open(save_as, "wb") as wf:
141
166
  wf.setnchannels(1)
142
167
  wf.setsampwidth(2)
143
- wf.setframerate(self.opts.sample_rate)
168
+ wf.setframerate(opts.sample_rate)
144
169
  wf.writeframes(audio_content)
145
170
  return None
146
171
 
147
- if self.opts.add_wav_header:
148
- return add_wav_header(audio_content, self.opts.sample_rate)
172
+ if opts.add_wav_header:
173
+ return add_wav_header(audio_content, opts.sample_rate)
149
174
 
150
175
  return audio_content
176
+
177
+
178
+ def add_voice(self, display_name: str, file_path: str) -> str:
179
+ """
180
+ Instantly clone your voice synchronously.
181
+
182
+ Args:
183
+ - display_name (str): The display name for the new voice.
184
+ - file_path (str): The path to the reference audio file to be cloned.
185
+
186
+ Returns:
187
+ - str: The response from the API as a formatted JSON string.
188
+
189
+ Raises:
190
+ - TTSError: If the file does not exist or is not a valid audio file.
191
+ - APIError: If the API request fails or returns an error.
192
+ """
193
+ if not os.path.isfile(file_path):
194
+ raise TTSError("Invalid file path. File does not exist.")
195
+
196
+ ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
197
+ file_extension = os.path.splitext(file_path)[1].lower()
198
+ if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
199
+ raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
200
+
201
+ url = f"{API_BASE_URL}/lightning-large/add_voice"
202
+ payload = {'displayName': display_name}
203
+
204
+ files = [('file', (os.path.basename(file_path), open(file_path, 'rb'), 'audio/wav'))]
205
+
206
+ headers = {
207
+ 'Authorization': f"Bearer {self.api_key}",
208
+ }
209
+
210
+ response = requests.post(url, headers=headers, data=payload, files=files)
211
+ if response.status_code != 200:
212
+ raise APIError(f"Failed to add voice: {response.text}. For more information, visit https://waves.smallest.ai/")
213
+
214
+ return json.dumps(response.json(), indent=4, ensure_ascii=False)
smallest/utils.py CHANGED
@@ -1,27 +1,26 @@
1
1
  import re
2
2
  import io
3
- import unicodedata
4
3
  from typing import List
5
4
  from pydub import AudioSegment
6
5
  from dataclasses import dataclass
7
6
  from sacremoses import MosesPunctNormalizer
8
7
 
9
- from .exceptions import ValidationError
10
- from .models import TTSModels, TTSLanguages, TTSVoices
8
+ from smallest.exceptions import ValidationError
9
+ from smallest.models import TTSModels, TTSLanguages
11
10
 
12
11
 
13
12
  API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
14
- SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
15
- CHUNK_SIZE = 250
13
+ SENTENCE_END_REGEX = re.compile(r'.*[-.—!?,;:…।|]$')
14
+ mpn = MosesPunctNormalizer()
16
15
  SAMPLE_WIDTH = 2
17
16
  CHANNELS = 1
18
17
 
19
18
 
20
19
  @dataclass
21
20
  class TTSOptions:
22
- model: TTSModels
21
+ model: str
23
22
  sample_rate: int
24
- voice: TTSVoices
23
+ voice_id: str
25
24
  api_key: str
26
25
  add_wav_header: bool
27
26
  speed: float
@@ -29,15 +28,13 @@ class TTSOptions:
29
28
  remove_extra_silence: bool
30
29
 
31
30
 
32
- def validate_input(text: str, voice: TTSVoices, model: TTSModels, sample_rate: int, speed: float):
31
+ def validate_input(text: str, model: str, sample_rate: int, speed: float):
33
32
  if not text:
34
- raise ValidationError("Text cannot be empty")
35
- if voice not in TTSVoices.__args__:
36
- raise ValidationError(f"Invalid voice: {voice}")
37
- if model not in ['lightning']:
38
- raise ValidationError(f"Invalid model: {model}")
39
- if not 8000 <= sample_rate <= 48000:
40
- raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 48000")
33
+ raise ValidationError("Text cannot be empty.")
34
+ if model not in TTSModels:
35
+ raise ValidationError(f"Invalid model: {model}. Must be one of {TTSModels}")
36
+ if not 8000 <= sample_rate <= 24000:
37
+ raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 24000")
41
38
  if not 0.5 <= speed <= 2.0:
42
39
  raise ValidationError(f"Invalid speed: {speed}. Must be between 0.5 and 2.0")
43
40
 
@@ -51,65 +48,49 @@ def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: i
51
48
 
52
49
 
53
50
  def preprocess_text(text: str) -> str:
54
- # Replace special characters with their normal form
55
- text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('ASCII')
56
- text = text.lower()
57
- text = text.replace("—", " ")
58
- # Normalize punctuation using Moses punct normalizer
59
- mpn = MosesPunctNormalizer()
51
+ text = text.replace("\n", " ").replace("\t", " ").replace("—", " ").replace("-", " ").replace("–", " ")
52
+ text = re.sub(r'\s+', ' ', text)
60
53
  text = mpn.normalize(text)
61
54
  return text.strip()
62
55
 
63
- def split_into_chunks(text: str) -> List[str]:
64
- """
65
- Splits the input text into chunks based on sentence boundaries
66
- defined by SENTENCE_END_REGEX and the maximum chunk size.
67
- """
68
- chunks = []
69
- current_chunk = ""
70
- last_break_index = 0
71
-
72
- i = 0
73
- while i < len(text):
74
- current_chunk += text[i]
75
-
76
- # Check for sentence boundary using regex
77
- if SENTENCE_END_REGEX.match(current_chunk):
78
- last_break_index = i
79
56
 
80
- if len(current_chunk) >= CHUNK_SIZE:
81
- if last_break_index > 0:
82
- # Split at the last valid sentence boundary
83
- chunk = text[:last_break_index + 1].strip()
84
- chunk = chunk.replace("—", " ")
85
- chunks.append(chunk)
86
-
87
- text = text[last_break_index + 1:]
88
- i = -1 # Reset index to process the remaining text
89
- current_chunk = ""
90
- last_break_index = 0
91
- else:
92
- # No sentence boundary found, split at max length
93
- current_chunk = current_chunk.replace("—", " ")
94
- chunks.append(current_chunk.strip())
95
- text = text[CHUNK_SIZE:]
96
- i = -1 # Reset index to process the remaining text
97
- current_chunk = ""
98
-
99
- i += 1
100
-
101
- if text:
102
- text = text.replace("—", " ")
57
+ def chunk_text(text: str, chunk_size: int = 250) -> List[str]:
58
+ """
59
+ Splits the input text into chunks based on sentence boundaries
60
+ defined by SENTENCE_END_REGEX and the maximum chunk size.
61
+ Only splits at valid sentence boundaries to avoid breaking words.
62
+ """
63
+ chunks = []
64
+ while text:
65
+ if len(text) <= chunk_size:
103
66
  chunks.append(text.strip())
67
+ break
68
+
69
+ chunk_text = text[:chunk_size]
70
+ last_break_index = -1
104
71
 
105
- return chunks
72
+ # Find last sentence boundary using regex
73
+ for i in range(len(chunk_text) - 1, -1, -1):
74
+ if SENTENCE_END_REGEX.match(chunk_text[:i + 1]):
75
+ last_break_index = i
76
+ break
106
77
 
78
+ if last_break_index == -1:
79
+ # Fallback to space if no sentence boundary found
80
+ last_space = chunk_text.rfind(' ')
81
+ if last_space != -1:
82
+ last_break_index = last_space
83
+ else:
84
+ last_break_index = chunk_size - 1
107
85
 
108
- def get_smallest_languages() -> List[str]:
109
- return list(TTSLanguages.__args__)
86
+ chunks.append(text[:last_break_index + 1].strip())
87
+ text = text[last_break_index + 1:].strip()
110
88
 
111
- def get_smallest_voices() -> List[str]:
112
- return list(TTSVoices.__args__)
89
+ return chunks
90
+
91
+
92
+ def get_smallest_languages() -> List[str]:
93
+ return TTSLanguages
113
94
 
114
95
  def get_smallest_models() -> List[str]:
115
- return ["lightning"]
96
+ return TTSModels
@@ -1,8 +1,8 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: smallestai
3
- Version: 1.3.3
3
+ Version: 2.0.0
4
4
  Summary: Official Python client for the Smallest AI API
5
- Author-email: Smallest <info@smallest.ai>
5
+ Author-email: Smallest <support@smallest.ai>
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/smallest-inc/smallest-python-sdk
8
8
  Keywords: smallest,smallest.ai,tts,text-to-speech
@@ -53,10 +53,14 @@ Currently, the library supports direct synthesis and the ability to synthesize s
53
53
 
54
54
  - [Installation](#installation)
55
55
  - [Get the API Key](#get-the-api-key)
56
+ - [Best Practices for Input Text](#best-practices-for-input-text)
56
57
  - [Examples](#examples)
57
- - [Sync](#sync)
58
- - [Async](#async)
58
+ - [Synchronous](#Synchronous)
59
+ - [Aynchronous](#Synchronous)
59
60
  - [LLM to Speech](#llm-to-speech)
61
+ - [Add your Voice](#add-your-voice)
62
+ - [Synchronously](#synchronously)
63
+ - [Asynchronously](#asynchronously)
60
64
  - [Available Methods](#available-methods)
61
65
  - [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
62
66
 
@@ -76,19 +80,30 @@ When using an SDK in your application, make sure to pin to at least the major ve
76
80
  3. Create a new API Key and copy it.
77
81
  4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
78
82
 
83
+ ## Best Practices for Input Text
84
+ While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
85
+
86
+ For optimal voice generation results:
87
+
88
+ 1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
89
+ 2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
90
+ 3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
91
+
79
92
  ## Examples
80
93
 
81
- ### Sync
94
+ ### Synchronous
82
95
  A synchronous text-to-speech synthesis client.
83
96
 
84
97
  **Basic Usage:**
85
98
  ```python
86
- import os
87
99
  from smallest import Smallest
88
100
 
89
101
  def main():
90
- client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
91
- client.synthesize("Hello, this is a test for sync synthesis function.", save_as="sync_synthesize.wav")
102
+ client = Smallest(api_key="SMALLEST_API_KEY")
103
+ client.synthesize(
104
+ text="Hello, this is a test for sync synthesis function.",
105
+ save_as="sync_synthesize.wav"
106
+ )
92
107
 
93
108
  if __name__ == "__main__":
94
109
  main()
@@ -98,7 +113,7 @@ if __name__ == "__main__":
98
113
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
99
114
  - `model`: TTS model to use (default: "lightning")
100
115
  - `sample_rate`: Audio sample rate (default: 24000)
101
- - `voice`: Voice ID (default: "emily")
116
+ - `voice_id`: Voice ID (default: "emily")
102
117
  - `speed`: Speech speed multiplier (default: 1.0)
103
118
  - `add_wav_header`: Include WAV header in output (default: True)
104
119
  - `transliterate`: Enable text transliteration (default: False)
@@ -117,17 +132,16 @@ client.synthesize(
117
132
  ```
118
133
 
119
134
 
120
- ### Async
135
+ ### Asynchronous
121
136
  Asynchronous text-to-speech synthesis client.
122
137
 
123
138
  **Basic Usage:**
124
139
  ```python
125
- import os
126
140
  import asyncio
127
141
  import aiofiles
128
142
  from smallest import AsyncSmallest
129
143
 
130
- client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
144
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
131
145
 
132
146
  async def main():
133
147
  async with client as tts:
@@ -143,7 +157,7 @@ if __name__ == "__main__":
143
157
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
144
158
  - `model`: TTS model to use (default: "lightning")
145
159
  - `sample_rate`: Audio sample rate (default: 24000)
146
- - `voice`: Voice ID (default: "emily")
160
+ - `voice_id`: Voice ID (default: "emily")
147
161
  - `speed`: Speech speed multiplier (default: 1.0)
148
162
  - `add_wav_header`: Include WAV header in output (default: True)
149
163
  - `transliterate`: Enable text transliteration (default: False)
@@ -162,18 +176,16 @@ audio_bytes = await tts.synthesize(
162
176
 
163
177
  ### LLM to Speech
164
178
 
165
- The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output with minimal latency. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
179
+ The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
166
180
 
167
181
  ```python
168
- import os
169
182
  import wave
170
183
  import asyncio
171
184
  from groq import Groq
172
- from smallest import Smallest
173
- from smallest import TextToAudioStream
185
+ from smallest import Smallest, TextToAudioStream
174
186
 
175
- llm = Groq(api_key=os.environ.get("GROQ_API_KEY"))
176
- tts = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
187
+ llm = Groq(api_key="GROQ_API_KEY")
188
+ tts = Smallest(api_key="SMALLEST_API_KEY")
177
189
 
178
190
  async def generate_text(prompt):
179
191
  """Async generator for streaming text from Groq. You can use any LLM"""
@@ -230,16 +242,46 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
230
242
  - Streamed over a network
231
243
  - Further processed as needed
232
244
 
245
+ ## Add your Voice
246
+ The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
247
+
248
+ ### Synchronously
249
+ ```python
250
+ from smallest import Smallest
251
+
252
+ def main():
253
+ client = Smallest(api_key="YOUR_API_KEY")
254
+ res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
255
+ print(res)
256
+
257
+ if __name__ == "__main__":
258
+ main()
259
+ ```
260
+
261
+ ### Asynchronously
262
+ ```python
263
+ import asyncio
264
+ from smallest import AsyncSmallest
265
+
266
+ async def main():
267
+ client = AsyncSmallest(api_key="YOUR_API_KEY")
268
+ res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
269
+ print(res)
270
+
271
+ if __name__ == "__main__":
272
+ asyncio.run(main())
273
+ ```
233
274
 
234
275
  ## Available Methods
235
276
 
236
277
  ```python
237
- from smallest.tts import Smallest
278
+ from smallest import Smallest
238
279
 
239
- client = Smallest()
280
+ client = Smallest(api_key="SMALLEST_API_KEY")
240
281
 
241
- print(f"Avalaible Languages: {client.get_languages()}")
242
- print(f"Available Voices: {client.get_voices()}")
282
+ print(f"Available Languages: {client.get_languages()}")
283
+ print(f"Available Voices: {client.get_voices(model='lightning')}")
284
+ print(f"Available Voices: {client.get_cloned_voices()}")
243
285
  print(f"Available Models: {client.get_models()}")
244
286
  ```
245
287
 
@@ -254,7 +296,7 @@ When implementing audio streaming with chunks of synthesized speech, WAV headers
254
296
  - Sequential playback of chunks with headers causes audio artifacts (pop sounds) when concatenating or playing audio sequentially.
255
297
  - Audio players would try to reinitialize audio settings for each chunk.
256
298
 
257
- ### Best Practices
299
+ ### Best Practices for Audio Streaming
258
300
  1. Stream raw PCM audio data without headers
259
301
  2. Add a single WAV header only when:
260
302
  - Saving the complete stream to a file
@@ -0,0 +1,12 @@
1
+ smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
+ smallest/async_tts.py,sha256=5qW7owlMeSWFx0rpn9dYfbO76mmNY0DXcytNjLfbbz8,9727
3
+ smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
+ smallest/models.py,sha256=g2e_4nU5P48vyXZandKLWqZC1TkoEGeLvYKqJIqurSI,83
5
+ smallest/stream_tts.py,sha256=SeP9A9zXJWiV62Eezv0L1J5sRIR304Llc_mwVtOOSUI,6348
6
+ smallest/tts.py,sha256=xBBEk_byRPGT6SYkE6qvhfEupgHl6XBdAqtxmzw2rF8,8311
7
+ smallest/utils.py,sha256=FCZkvbbHJBoN0jpBSqmt1hJjvks56t8i82we4XnqjYk,3016
8
+ smallestai-2.0.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
+ smallestai-2.0.0.dist-info/METADATA,sha256=EIyZZqzAvHgQ7jfEs5x5LUx3HjzoCUhzJoXfkb3CuoI,11538
10
+ smallestai-2.0.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
11
+ smallestai-2.0.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
+ smallestai-2.0.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
- smallest/async_tts.py,sha256=ReYTePzR0e3UcWxwnetGnwO4q_F7g2LRZPxIVfmgB4Y,6120
3
- smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
- smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
5
- smallest/stream_tts.py,sha256=0OypcUzgP7CN3VGcGJDnQ2FDw2JOzPaSQ1cXK69k5dY,6198
6
- smallest/tts.py,sha256=l8VHaOE8-Feg3Ey8C3osOrLs3ffYz0q_J1ACiEtZ8y0,5999
7
- smallest/utils.py,sha256=hrta82o-rJRaOHTVKqHqC86_T56jAuvqJHIizAEqFok,3796
8
- smallestai-1.3.3.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
- smallestai-1.3.3.dist-info/METADATA,sha256=ji7TsSt2C9_GGNjRuh8DAyBXipVRuFeouLoGi76JFAU,9856
10
- smallestai-1.3.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
- smallestai-1.3.3.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
- smallestai-1.3.3.dist-info/RECORD,,