smallestai 1.3.3__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of smallestai might be problematic. Click here for more details.
- smallest/async_tts.py +164 -60
- smallest/models.py +5 -7
- smallest/stream_tts.py +23 -26
- smallest/tts.py +87 -23
- smallest/utils.py +47 -66
- {smallestai-1.3.3.dist-info → smallestai-2.0.0.dist-info}/METADATA +67 -25
- smallestai-2.0.0.dist-info/RECORD +12 -0
- {smallestai-1.3.3.dist-info → smallestai-2.0.0.dist-info}/WHEEL +1 -1
- smallestai-1.3.3.dist-info/RECORD +0 -12
- {smallestai-1.3.3.dist-info → smallestai-2.0.0.dist-info}/LICENSE +0 -0
- {smallestai-1.3.3.dist-info → smallestai-2.0.0.dist-info}/top_level.txt +0 -0
smallest/async_tts.py
CHANGED
|
@@ -1,22 +1,23 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import copy
|
|
3
|
+
import json
|
|
3
4
|
import aiohttp
|
|
4
5
|
import aiofiles
|
|
6
|
+
import requests
|
|
5
7
|
from typing import Optional, Union, List
|
|
6
8
|
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
9
|
-
|
|
10
|
-
get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
|
|
9
|
+
from smallest.exceptions import TTSError, APIError
|
|
10
|
+
from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
|
|
11
|
+
get_smallest_languages, get_smallest_models, API_BASE_URL)
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class AsyncSmallest:
|
|
14
15
|
def __init__(
|
|
15
16
|
self,
|
|
16
|
-
api_key:
|
|
17
|
-
model:
|
|
18
|
-
sample_rate: int = 24000,
|
|
19
|
-
|
|
17
|
+
api_key: str = None,
|
|
18
|
+
model: Optional[str] = "lightning",
|
|
19
|
+
sample_rate: Optional[int] = 24000,
|
|
20
|
+
voice_id: Optional[str] = "emily",
|
|
20
21
|
speed: Optional[float] = 1.0,
|
|
21
22
|
add_wav_header: Optional[bool] = True,
|
|
22
23
|
transliterate: Optional[bool] = False,
|
|
@@ -25,15 +26,15 @@ class AsyncSmallest:
|
|
|
25
26
|
"""
|
|
26
27
|
AsyncSmallest Instance for asynchronous text-to-speech synthesis.
|
|
27
28
|
|
|
28
|
-
This class provides an asynchronous implementation of the text-to-speech functionality.
|
|
29
|
-
It allows for non-blocking synthesis of speech from text, making it suitable for applications
|
|
29
|
+
This class provides an asynchronous implementation of the text-to-speech functionality.
|
|
30
|
+
It allows for non-blocking synthesis of speech from text, making it suitable for applications
|
|
30
31
|
that require async processing.
|
|
31
32
|
|
|
32
33
|
Args:
|
|
33
34
|
- api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
|
|
34
35
|
- model (TTSModels): The model to be used for synthesis.
|
|
35
36
|
- sample_rate (int): The sample rate for the audio output.
|
|
36
|
-
-
|
|
37
|
+
- voice_id (TTSVoices): The voice to be used for synthesis.
|
|
37
38
|
- speed (float): The speed of the speech synthesis.
|
|
38
39
|
- add_wav_header (bool): Whether to add a WAV header to the output audio.
|
|
39
40
|
- transliterate (bool): Whether to transliterate the text.
|
|
@@ -49,11 +50,11 @@ class AsyncSmallest:
|
|
|
49
50
|
if not self.api_key:
|
|
50
51
|
raise TTSError()
|
|
51
52
|
self.chunk_size = 250
|
|
52
|
-
|
|
53
|
+
|
|
53
54
|
self.opts = TTSOptions(
|
|
54
55
|
model=model,
|
|
55
56
|
sample_rate=sample_rate,
|
|
56
|
-
|
|
57
|
+
voice_id=voice_id,
|
|
57
58
|
api_key=self.api_key,
|
|
58
59
|
add_wav_header=add_wav_header,
|
|
59
60
|
speed=speed,
|
|
@@ -61,29 +62,65 @@ class AsyncSmallest:
|
|
|
61
62
|
remove_extra_silence=remove_extra_silence,
|
|
62
63
|
)
|
|
63
64
|
self.session = None
|
|
64
|
-
|
|
65
|
+
|
|
66
|
+
|
|
65
67
|
async def __aenter__(self):
|
|
66
68
|
if self.session is None:
|
|
67
69
|
self.session = aiohttp.ClientSession()
|
|
68
70
|
return self
|
|
69
71
|
|
|
72
|
+
|
|
70
73
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
71
74
|
if self.session:
|
|
72
75
|
await self.session.close()
|
|
73
76
|
|
|
74
77
|
|
|
78
|
+
async def _ensure_session(self):
|
|
79
|
+
"""Ensure session exists for direct calls"""
|
|
80
|
+
if not self.session:
|
|
81
|
+
self.session = aiohttp.ClientSession()
|
|
82
|
+
return True
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
|
|
75
86
|
def get_languages(self) -> List[str]:
|
|
76
87
|
"""Returns a list of available languages."""
|
|
77
88
|
return get_smallest_languages()
|
|
89
|
+
|
|
90
|
+
def get_cloned_voices(self) -> str:
|
|
91
|
+
"""Returns a list of your cloned voices."""
|
|
92
|
+
headers = {
|
|
93
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers)
|
|
97
|
+
if res.status_code != 200:
|
|
98
|
+
raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/")
|
|
99
|
+
|
|
100
|
+
return json.dumps(res.json(), indent=4, ensure_ascii=False)
|
|
78
101
|
|
|
79
|
-
|
|
102
|
+
|
|
103
|
+
def get_voices(
|
|
104
|
+
self,
|
|
105
|
+
model: Optional[str] = "lightning"
|
|
106
|
+
) -> str:
|
|
80
107
|
"""Returns a list of available voices."""
|
|
81
|
-
|
|
108
|
+
headers = {
|
|
109
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers)
|
|
113
|
+
if res.status_code != 200:
|
|
114
|
+
raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/")
|
|
115
|
+
|
|
116
|
+
return json.dumps(res.json(), indent=4, ensure_ascii=False)
|
|
117
|
+
|
|
82
118
|
|
|
83
119
|
def get_models(self) -> List[str]:
|
|
84
120
|
"""Returns a list of available models."""
|
|
85
121
|
return get_smallest_models()
|
|
86
|
-
|
|
122
|
+
|
|
123
|
+
|
|
87
124
|
async def synthesize(
|
|
88
125
|
self,
|
|
89
126
|
text: str,
|
|
@@ -95,62 +132,129 @@ class AsyncSmallest:
|
|
|
95
132
|
|
|
96
133
|
Args:
|
|
97
134
|
- text (str): The text to be converted to speech.
|
|
98
|
-
- save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
|
|
135
|
+
- save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
|
|
99
136
|
The file must have a .wav extension.
|
|
100
137
|
- kwargs: Additional optional parameters to override `__init__` options for this call.
|
|
101
138
|
|
|
102
139
|
Returns:
|
|
103
|
-
- Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
|
|
140
|
+
- Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
|
|
104
141
|
otherwise, returns None after saving the audio to the specified file.
|
|
105
142
|
|
|
106
143
|
Raises:
|
|
107
144
|
- TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
|
|
108
145
|
- APIError: If the API request fails or returns an error.
|
|
109
146
|
"""
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
147
|
+
should_cleanup = await self._ensure_session()
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
opts = copy.deepcopy(self.opts)
|
|
151
|
+
for key, value in kwargs.items():
|
|
152
|
+
setattr(opts, key, value)
|
|
153
|
+
|
|
154
|
+
validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed)
|
|
155
|
+
|
|
156
|
+
self.chunk_size = 250
|
|
157
|
+
if opts.model == 'ligtning-large':
|
|
158
|
+
self.chunk_size = 140
|
|
159
|
+
|
|
160
|
+
chunks = chunk_text(text, self.chunk_size)
|
|
161
|
+
audio_content = b""
|
|
162
|
+
|
|
163
|
+
for chunk in chunks:
|
|
164
|
+
payload = {
|
|
165
|
+
"text": preprocess_text(chunk),
|
|
166
|
+
"sample_rate": opts.sample_rate,
|
|
167
|
+
"voice_id": opts.voice_id,
|
|
168
|
+
"add_wav_header": False,
|
|
169
|
+
"speed": opts.speed,
|
|
170
|
+
"model": opts.model,
|
|
171
|
+
"transliterate": opts.transliterate,
|
|
172
|
+
"remove_extra_silence": opts.remove_extra_silence
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
headers = {
|
|
176
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
177
|
+
"Content-Type": "application/json",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if not self.session:
|
|
181
|
+
self.session = aiohttp.ClientSession()
|
|
182
|
+
|
|
183
|
+
async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
|
|
184
|
+
if res.status != 200:
|
|
185
|
+
raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
|
|
186
|
+
|
|
187
|
+
audio_content += await res.read()
|
|
188
|
+
|
|
189
|
+
if save_as:
|
|
190
|
+
if not save_as.endswith(".wav"):
|
|
191
|
+
raise TTSError("Invalid file name. Extension must be .wav")
|
|
192
|
+
|
|
193
|
+
async with aiofiles.open(save_as, mode='wb') as f:
|
|
194
|
+
await f.write(add_wav_header(audio_content, opts.sample_rate))
|
|
195
|
+
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
if opts.add_wav_header:
|
|
199
|
+
return add_wav_header(audio_content, opts.sample_rate)
|
|
200
|
+
|
|
201
|
+
return audio_content
|
|
137
202
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
203
|
+
finally:
|
|
204
|
+
if should_cleanup and self.session:
|
|
205
|
+
await self.session.close()
|
|
206
|
+
self.session = None
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
async def add_voice(self, display_name: str, file_path: str) -> str:
|
|
210
|
+
"""
|
|
211
|
+
Instantly clone your voice asynchronously.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
- display_name (str): The display name for the new voice.
|
|
215
|
+
- file_path (str): The path to the reference audio file to be cloned.
|
|
143
216
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
217
|
+
Returns:
|
|
218
|
+
- str: The response from the API as a formatted JSON string.
|
|
219
|
+
|
|
220
|
+
Raises:
|
|
221
|
+
- TTSError: If the file does not exist or is not a valid audio file.
|
|
222
|
+
- APIError: If the API request fails or returns an error.
|
|
223
|
+
"""
|
|
224
|
+
url = f"{API_BASE_URL}/lightning-large/add_voice"
|
|
225
|
+
|
|
226
|
+
if not os.path.exists(file_path):
|
|
227
|
+
raise TTSError("Invalid file path. File does not exist.")
|
|
228
|
+
|
|
229
|
+
ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
|
|
230
|
+
file_extension = os.path.splitext(file_path)[1].lower()
|
|
231
|
+
if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
|
|
232
|
+
raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
|
|
233
|
+
|
|
234
|
+
headers = {
|
|
235
|
+
'Authorization': f"Bearer {self.api_key}",
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
should_cleanup = await self._ensure_session()
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
async with aiofiles.open(file_path, 'rb') as f:
|
|
242
|
+
file_data = await f.read()
|
|
243
|
+
|
|
244
|
+
data = aiohttp.FormData()
|
|
245
|
+
content_type = file_extension[1:]
|
|
147
246
|
|
|
148
|
-
|
|
149
|
-
|
|
247
|
+
data.add_field('displayName', display_name)
|
|
248
|
+
data.add_field('file', file_data, filename=file_path, content_type=f"audio/{content_type}")
|
|
150
249
|
|
|
151
|
-
|
|
250
|
+
async with self.session.post(url, headers=headers, data=data) as res:
|
|
251
|
+
if res.status != 200:
|
|
252
|
+
raise APIError(f"Failed to add voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
|
|
152
253
|
|
|
153
|
-
|
|
154
|
-
return add_wav_header(audio_content, self.opts.sample_rate)
|
|
254
|
+
return json.dumps(await res.json(), indent=4, ensure_ascii=False)
|
|
155
255
|
|
|
156
|
-
|
|
256
|
+
finally:
|
|
257
|
+
if should_cleanup and self.session:
|
|
258
|
+
await self.session.close()
|
|
259
|
+
self.session = None
|
|
260
|
+
|
smallest/models.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
"arjun", "sanya", "saina", "pooja", "saurabh", "nisha", "mansi", "radhika", "kajal",
|
|
7
|
-
"raghav", "deepika", "niharika", "monika", "raman", "diya", "ananya", "william"]
|
|
1
|
+
TTSLanguages = ["en", "hi"]
|
|
2
|
+
TTSModels = [
|
|
3
|
+
"lightning",
|
|
4
|
+
"lightning-large"
|
|
5
|
+
]
|
smallest/stream_tts.py
CHANGED
|
@@ -3,17 +3,17 @@ from threading import Thread
|
|
|
3
3
|
from queue import Queue, Empty
|
|
4
4
|
from typing import AsyncGenerator, Optional, Union
|
|
5
5
|
|
|
6
|
-
from .tts import Smallest
|
|
7
|
-
from .exceptions import APIError
|
|
8
|
-
from .async_tts import AsyncSmallest
|
|
9
|
-
from .utils import SENTENCE_END_REGEX
|
|
6
|
+
from smallest.tts import Smallest
|
|
7
|
+
from smallest.exceptions import APIError
|
|
8
|
+
from smallest.async_tts import AsyncSmallest
|
|
9
|
+
from smallest.utils import SENTENCE_END_REGEX
|
|
10
10
|
|
|
11
11
|
class TextToAudioStream:
|
|
12
12
|
def __init__(
|
|
13
13
|
self,
|
|
14
14
|
tts_instance: Union[Smallest, AsyncSmallest],
|
|
15
|
-
queue_timeout: float = 5.0,
|
|
16
|
-
max_retries: int = 3
|
|
15
|
+
queue_timeout: Optional[float] = 5.0,
|
|
16
|
+
max_retries: Optional[int] = 3
|
|
17
17
|
):
|
|
18
18
|
"""
|
|
19
19
|
A real-time text-to-speech processor that converts streaming text into audio output.
|
|
@@ -35,7 +35,6 @@ class TextToAudioStream:
|
|
|
35
35
|
"""
|
|
36
36
|
self.tts_instance = tts_instance
|
|
37
37
|
self.tts_instance.opts.add_wav_header = False
|
|
38
|
-
|
|
39
38
|
self.sentence_end_regex = SENTENCE_END_REGEX
|
|
40
39
|
self.queue_timeout = queue_timeout
|
|
41
40
|
self.max_retries = max_retries
|
|
@@ -43,6 +42,9 @@ class TextToAudioStream:
|
|
|
43
42
|
self.buffer_size = 250
|
|
44
43
|
self.stop_flag = False
|
|
45
44
|
|
|
45
|
+
if self.tts_instance.opts.model == 'lightning-large':
|
|
46
|
+
self.buffer_size = 140
|
|
47
|
+
|
|
46
48
|
|
|
47
49
|
async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
|
|
48
50
|
"""
|
|
@@ -58,51 +60,46 @@ class TextToAudioStream:
|
|
|
58
60
|
async for chunk in llm_output:
|
|
59
61
|
buffer += chunk
|
|
60
62
|
i = 0
|
|
61
|
-
|
|
62
63
|
while i < len(buffer):
|
|
63
64
|
current_chunk = buffer[:i + 1]
|
|
64
65
|
if self.sentence_end_regex.match(current_chunk):
|
|
65
66
|
last_break_index = i
|
|
66
|
-
|
|
67
67
|
if len(current_chunk) >= self.buffer_size:
|
|
68
68
|
if last_break_index > 0:
|
|
69
|
-
self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip())
|
|
69
|
+
self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ')
|
|
70
70
|
buffer = buffer[last_break_index + 1:]
|
|
71
71
|
else:
|
|
72
72
|
# No sentence boundary, split at max chunk size
|
|
73
|
-
self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip())
|
|
73
|
+
self.queue.put(f'{buffer[:self.buffer_size].replace("—", " ").strip()} ')
|
|
74
74
|
buffer = buffer[self.buffer_size:]
|
|
75
|
-
|
|
76
75
|
last_break_index = 0
|
|
77
76
|
i = -1
|
|
78
|
-
|
|
79
77
|
i += 1
|
|
80
|
-
|
|
78
|
+
|
|
81
79
|
if buffer:
|
|
82
|
-
self.queue.put(buffer.replace("—", " ").strip())
|
|
83
|
-
|
|
84
|
-
self.stop_flag = True # completion flag when LLM output ends
|
|
80
|
+
self.queue.put(f'{buffer.replace("—", " ").strip()} ')
|
|
81
|
+
self.stop_flag = True
|
|
85
82
|
|
|
86
83
|
|
|
87
|
-
|
|
88
|
-
"""
|
|
84
|
+
def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
|
|
85
|
+
"""Synchronously synthesizes a given sentence."""
|
|
89
86
|
try:
|
|
90
|
-
return
|
|
87
|
+
return self.tts_instance.synthesize(sentence)
|
|
91
88
|
except APIError as e:
|
|
92
89
|
if retries < self.max_retries:
|
|
93
|
-
return
|
|
90
|
+
return self._synthesize_sync(sentence, retries + 1)
|
|
94
91
|
else:
|
|
95
92
|
print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
|
|
96
93
|
return None
|
|
94
|
+
|
|
97
95
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
"""Synchronously synthesizes a given sentence."""
|
|
96
|
+
async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
|
|
97
|
+
"""Asynchronously synthesizes a given sentence."""
|
|
101
98
|
try:
|
|
102
|
-
return self.tts_instance.synthesize(sentence)
|
|
99
|
+
return await self.tts_instance.synthesize(sentence)
|
|
103
100
|
except APIError as e:
|
|
104
101
|
if retries < self.max_retries:
|
|
105
|
-
return self.
|
|
102
|
+
return await self._synthesize_async(sentence, retries + 1)
|
|
106
103
|
else:
|
|
107
104
|
print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
|
|
108
105
|
return None
|
smallest/tts.py
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import json
|
|
2
3
|
import wave
|
|
3
4
|
import copy
|
|
4
5
|
import requests
|
|
5
6
|
from typing import Optional, Union, List
|
|
6
7
|
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
9
|
-
|
|
10
|
-
get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
|
|
8
|
+
from smallest.exceptions import TTSError, APIError
|
|
9
|
+
from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
|
|
10
|
+
get_smallest_languages, get_smallest_models, API_BASE_URL)
|
|
11
11
|
|
|
12
12
|
class Smallest:
|
|
13
13
|
def __init__(
|
|
14
14
|
self,
|
|
15
|
-
api_key:
|
|
16
|
-
model:
|
|
17
|
-
sample_rate: int = 24000,
|
|
18
|
-
|
|
15
|
+
api_key: str = None,
|
|
16
|
+
model: Optional[str] = "lightning",
|
|
17
|
+
sample_rate: Optional[int] = 24000,
|
|
18
|
+
voice_id: Optional[str] = "emily",
|
|
19
19
|
speed: Optional[float] = 1.0,
|
|
20
20
|
add_wav_header: Optional[bool] = True,
|
|
21
21
|
transliterate: Optional[bool] = False,
|
|
@@ -31,7 +31,7 @@ class Smallest:
|
|
|
31
31
|
- api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
|
|
32
32
|
- model (TTSModels): The model to be used for synthesis.
|
|
33
33
|
- sample_rate (int): The sample rate for the audio output.
|
|
34
|
-
-
|
|
34
|
+
- voice_id (TTSVoices): The voice to be used for synthesis.
|
|
35
35
|
- speed (float): The speed of the speech synthesis.
|
|
36
36
|
- add_wav_header (bool): Whether to add a WAV header to the output audio.
|
|
37
37
|
- transliterate (bool): Whether to transliterate the text.
|
|
@@ -52,7 +52,7 @@ class Smallest:
|
|
|
52
52
|
self.opts = TTSOptions(
|
|
53
53
|
model=model,
|
|
54
54
|
sample_rate=sample_rate,
|
|
55
|
-
|
|
55
|
+
voice_id=voice_id,
|
|
56
56
|
api_key=self.api_key,
|
|
57
57
|
add_wav_header=add_wav_header,
|
|
58
58
|
speed=speed,
|
|
@@ -65,14 +65,40 @@ class Smallest:
|
|
|
65
65
|
"""Returns a list of available languages."""
|
|
66
66
|
return get_smallest_languages()
|
|
67
67
|
|
|
68
|
-
def
|
|
68
|
+
def get_cloned_voices(self) -> str:
|
|
69
|
+
"""Returns a list of your cloned voices."""
|
|
70
|
+
headers = {
|
|
71
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers)
|
|
75
|
+
if res.status_code != 200:
|
|
76
|
+
raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/")
|
|
77
|
+
|
|
78
|
+
return json.dumps(res.json(), indent=4, ensure_ascii=False)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_voices(
|
|
82
|
+
self,
|
|
83
|
+
model: Optional[str] = "lightning"
|
|
84
|
+
) -> str:
|
|
69
85
|
"""Returns a list of available voices."""
|
|
70
|
-
|
|
86
|
+
headers = {
|
|
87
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers)
|
|
91
|
+
if res.status_code != 200:
|
|
92
|
+
raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/")
|
|
93
|
+
|
|
94
|
+
return json.dumps(res.json(), indent=4, ensure_ascii=False)
|
|
95
|
+
|
|
71
96
|
|
|
72
97
|
def get_models(self) -> List[str]:
|
|
73
98
|
"""Returns a list of available models."""
|
|
74
99
|
return get_smallest_models()
|
|
75
100
|
|
|
101
|
+
|
|
76
102
|
def synthesize(
|
|
77
103
|
self,
|
|
78
104
|
text: str,
|
|
@@ -100,16 +126,20 @@ class Smallest:
|
|
|
100
126
|
for key, value in kwargs.items():
|
|
101
127
|
setattr(opts, key, value)
|
|
102
128
|
|
|
103
|
-
validate_input(text, opts.
|
|
129
|
+
validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed)
|
|
130
|
+
|
|
131
|
+
self.chunk_size = 250
|
|
132
|
+
if opts.model == "lightning-large":
|
|
133
|
+
self.chunk_size = 140
|
|
104
134
|
|
|
105
|
-
chunks =
|
|
135
|
+
chunks = chunk_text(text, self.chunk_size)
|
|
106
136
|
audio_content = b""
|
|
107
137
|
|
|
108
138
|
for chunk in chunks:
|
|
109
139
|
payload = {
|
|
110
140
|
"text": preprocess_text(chunk),
|
|
111
141
|
"sample_rate": opts.sample_rate,
|
|
112
|
-
"voice_id": opts.
|
|
142
|
+
"voice_id": opts.voice_id,
|
|
113
143
|
"add_wav_header": False,
|
|
114
144
|
"speed": opts.speed,
|
|
115
145
|
"model": opts.model,
|
|
@@ -128,11 +158,6 @@ class Smallest:
|
|
|
128
158
|
|
|
129
159
|
audio_content += res.content
|
|
130
160
|
|
|
131
|
-
|
|
132
|
-
res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
|
|
133
|
-
if res.status_code != 200:
|
|
134
|
-
raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
|
|
135
|
-
|
|
136
161
|
if save_as:
|
|
137
162
|
if not save_as.endswith(".wav"):
|
|
138
163
|
raise TTSError("Invalid file name. Extension must be .wav")
|
|
@@ -140,11 +165,50 @@ class Smallest:
|
|
|
140
165
|
with wave.open(save_as, "wb") as wf:
|
|
141
166
|
wf.setnchannels(1)
|
|
142
167
|
wf.setsampwidth(2)
|
|
143
|
-
wf.setframerate(
|
|
168
|
+
wf.setframerate(opts.sample_rate)
|
|
144
169
|
wf.writeframes(audio_content)
|
|
145
170
|
return None
|
|
146
171
|
|
|
147
|
-
if
|
|
148
|
-
return add_wav_header(audio_content,
|
|
172
|
+
if opts.add_wav_header:
|
|
173
|
+
return add_wav_header(audio_content, opts.sample_rate)
|
|
149
174
|
|
|
150
175
|
return audio_content
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def add_voice(self, display_name: str, file_path: str) -> str:
|
|
179
|
+
"""
|
|
180
|
+
Instantly clone your voice synchronously.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
- display_name (str): The display name for the new voice.
|
|
184
|
+
- file_path (str): The path to the reference audio file to be cloned.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
- str: The response from the API as a formatted JSON string.
|
|
188
|
+
|
|
189
|
+
Raises:
|
|
190
|
+
- TTSError: If the file does not exist or is not a valid audio file.
|
|
191
|
+
- APIError: If the API request fails or returns an error.
|
|
192
|
+
"""
|
|
193
|
+
if not os.path.isfile(file_path):
|
|
194
|
+
raise TTSError("Invalid file path. File does not exist.")
|
|
195
|
+
|
|
196
|
+
ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
|
|
197
|
+
file_extension = os.path.splitext(file_path)[1].lower()
|
|
198
|
+
if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
|
|
199
|
+
raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
|
|
200
|
+
|
|
201
|
+
url = f"{API_BASE_URL}/lightning-large/add_voice"
|
|
202
|
+
payload = {'displayName': display_name}
|
|
203
|
+
|
|
204
|
+
files = [('file', (os.path.basename(file_path), open(file_path, 'rb'), 'audio/wav'))]
|
|
205
|
+
|
|
206
|
+
headers = {
|
|
207
|
+
'Authorization': f"Bearer {self.api_key}",
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
response = requests.post(url, headers=headers, data=payload, files=files)
|
|
211
|
+
if response.status_code != 200:
|
|
212
|
+
raise APIError(f"Failed to add voice: {response.text}. For more information, visit https://waves.smallest.ai/")
|
|
213
|
+
|
|
214
|
+
return json.dumps(response.json(), indent=4, ensure_ascii=False)
|
smallest/utils.py
CHANGED
|
@@ -1,27 +1,26 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import io
|
|
3
|
-
import unicodedata
|
|
4
3
|
from typing import List
|
|
5
4
|
from pydub import AudioSegment
|
|
6
5
|
from dataclasses import dataclass
|
|
7
6
|
from sacremoses import MosesPunctNormalizer
|
|
8
7
|
|
|
9
|
-
from .exceptions import ValidationError
|
|
10
|
-
from .models import TTSModels, TTSLanguages
|
|
8
|
+
from smallest.exceptions import ValidationError
|
|
9
|
+
from smallest.models import TTSModels, TTSLanguages
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
|
|
14
|
-
SENTENCE_END_REGEX = re.compile(r'.*[
|
|
15
|
-
|
|
13
|
+
SENTENCE_END_REGEX = re.compile(r'.*[-.—!?,;:…।|]$')
|
|
14
|
+
mpn = MosesPunctNormalizer()
|
|
16
15
|
SAMPLE_WIDTH = 2
|
|
17
16
|
CHANNELS = 1
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
@dataclass
|
|
21
20
|
class TTSOptions:
|
|
22
|
-
model:
|
|
21
|
+
model: str
|
|
23
22
|
sample_rate: int
|
|
24
|
-
|
|
23
|
+
voice_id: str
|
|
25
24
|
api_key: str
|
|
26
25
|
add_wav_header: bool
|
|
27
26
|
speed: float
|
|
@@ -29,15 +28,13 @@ class TTSOptions:
|
|
|
29
28
|
remove_extra_silence: bool
|
|
30
29
|
|
|
31
30
|
|
|
32
|
-
def validate_input(text: str,
|
|
31
|
+
def validate_input(text: str, model: str, sample_rate: int, speed: float):
|
|
33
32
|
if not text:
|
|
34
|
-
raise ValidationError("Text cannot be empty")
|
|
35
|
-
if
|
|
36
|
-
raise ValidationError(f"Invalid
|
|
37
|
-
if
|
|
38
|
-
raise ValidationError(f"Invalid
|
|
39
|
-
if not 8000 <= sample_rate <= 48000:
|
|
40
|
-
raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 48000")
|
|
33
|
+
raise ValidationError("Text cannot be empty.")
|
|
34
|
+
if model not in TTSModels:
|
|
35
|
+
raise ValidationError(f"Invalid model: {model}. Must be one of {TTSModels}")
|
|
36
|
+
if not 8000 <= sample_rate <= 24000:
|
|
37
|
+
raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 24000")
|
|
41
38
|
if not 0.5 <= speed <= 2.0:
|
|
42
39
|
raise ValidationError(f"Invalid speed: {speed}. Must be between 0.5 and 2.0")
|
|
43
40
|
|
|
@@ -51,65 +48,49 @@ def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: i
|
|
|
51
48
|
|
|
52
49
|
|
|
53
50
|
def preprocess_text(text: str) -> str:
|
|
54
|
-
|
|
55
|
-
text =
|
|
56
|
-
text = text.lower()
|
|
57
|
-
text = text.replace("—", " ")
|
|
58
|
-
# Normalize punctuation using Moses punct normalizer
|
|
59
|
-
mpn = MosesPunctNormalizer()
|
|
51
|
+
text = text.replace("\n", " ").replace("\t", " ").replace("—", " ").replace("-", " ").replace("–", " ")
|
|
52
|
+
text = re.sub(r'\s+', ' ', text)
|
|
60
53
|
text = mpn.normalize(text)
|
|
61
54
|
return text.strip()
|
|
62
55
|
|
|
63
|
-
def split_into_chunks(text: str) -> List[str]:
|
|
64
|
-
"""
|
|
65
|
-
Splits the input text into chunks based on sentence boundaries
|
|
66
|
-
defined by SENTENCE_END_REGEX and the maximum chunk size.
|
|
67
|
-
"""
|
|
68
|
-
chunks = []
|
|
69
|
-
current_chunk = ""
|
|
70
|
-
last_break_index = 0
|
|
71
|
-
|
|
72
|
-
i = 0
|
|
73
|
-
while i < len(text):
|
|
74
|
-
current_chunk += text[i]
|
|
75
|
-
|
|
76
|
-
# Check for sentence boundary using regex
|
|
77
|
-
if SENTENCE_END_REGEX.match(current_chunk):
|
|
78
|
-
last_break_index = i
|
|
79
56
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
current_chunk = ""
|
|
90
|
-
last_break_index = 0
|
|
91
|
-
else:
|
|
92
|
-
# No sentence boundary found, split at max length
|
|
93
|
-
current_chunk = current_chunk.replace("—", " ")
|
|
94
|
-
chunks.append(current_chunk.strip())
|
|
95
|
-
text = text[CHUNK_SIZE:]
|
|
96
|
-
i = -1 # Reset index to process the remaining text
|
|
97
|
-
current_chunk = ""
|
|
98
|
-
|
|
99
|
-
i += 1
|
|
100
|
-
|
|
101
|
-
if text:
|
|
102
|
-
text = text.replace("—", " ")
|
|
57
|
+
def chunk_text(text: str, chunk_size: int = 250) -> List[str]:
|
|
58
|
+
"""
|
|
59
|
+
Splits the input text into chunks based on sentence boundaries
|
|
60
|
+
defined by SENTENCE_END_REGEX and the maximum chunk size.
|
|
61
|
+
Only splits at valid sentence boundaries to avoid breaking words.
|
|
62
|
+
"""
|
|
63
|
+
chunks = []
|
|
64
|
+
while text:
|
|
65
|
+
if len(text) <= chunk_size:
|
|
103
66
|
chunks.append(text.strip())
|
|
67
|
+
break
|
|
68
|
+
|
|
69
|
+
chunk_text = text[:chunk_size]
|
|
70
|
+
last_break_index = -1
|
|
104
71
|
|
|
105
|
-
|
|
72
|
+
# Find last sentence boundary using regex
|
|
73
|
+
for i in range(len(chunk_text) - 1, -1, -1):
|
|
74
|
+
if SENTENCE_END_REGEX.match(chunk_text[:i + 1]):
|
|
75
|
+
last_break_index = i
|
|
76
|
+
break
|
|
106
77
|
|
|
78
|
+
if last_break_index == -1:
|
|
79
|
+
# Fallback to space if no sentence boundary found
|
|
80
|
+
last_space = chunk_text.rfind(' ')
|
|
81
|
+
if last_space != -1:
|
|
82
|
+
last_break_index = last_space
|
|
83
|
+
else:
|
|
84
|
+
last_break_index = chunk_size - 1
|
|
107
85
|
|
|
108
|
-
|
|
109
|
-
|
|
86
|
+
chunks.append(text[:last_break_index + 1].strip())
|
|
87
|
+
text = text[last_break_index + 1:].strip()
|
|
110
88
|
|
|
111
|
-
|
|
112
|
-
|
|
89
|
+
return chunks
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_smallest_languages() -> List[str]:
|
|
93
|
+
return TTSLanguages
|
|
113
94
|
|
|
114
95
|
def get_smallest_models() -> List[str]:
|
|
115
|
-
return
|
|
96
|
+
return TTSModels
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: smallestai
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: Official Python client for the Smallest AI API
|
|
5
|
-
Author-email: Smallest <
|
|
5
|
+
Author-email: Smallest <support@smallest.ai>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/smallest-inc/smallest-python-sdk
|
|
8
8
|
Keywords: smallest,smallest.ai,tts,text-to-speech
|
|
@@ -53,10 +53,14 @@ Currently, the library supports direct synthesis and the ability to synthesize s
|
|
|
53
53
|
|
|
54
54
|
- [Installation](#installation)
|
|
55
55
|
- [Get the API Key](#get-the-api-key)
|
|
56
|
+
- [Best Practices for Input Text](#best-practices-for-input-text)
|
|
56
57
|
- [Examples](#examples)
|
|
57
|
-
- [
|
|
58
|
-
- [
|
|
58
|
+
- [Synchronous](#Synchronous)
|
|
59
|
+
- [Aynchronous](#Synchronous)
|
|
59
60
|
- [LLM to Speech](#llm-to-speech)
|
|
61
|
+
- [Add your Voice](#add-your-voice)
|
|
62
|
+
- [Synchronously](#synchronously)
|
|
63
|
+
- [Asynchronously](#asynchronously)
|
|
60
64
|
- [Available Methods](#available-methods)
|
|
61
65
|
- [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
|
|
62
66
|
|
|
@@ -76,19 +80,30 @@ When using an SDK in your application, make sure to pin to at least the major ve
|
|
|
76
80
|
3. Create a new API Key and copy it.
|
|
77
81
|
4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
|
|
78
82
|
|
|
83
|
+
## Best Practices for Input Text
|
|
84
|
+
While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
|
|
85
|
+
|
|
86
|
+
For optimal voice generation results:
|
|
87
|
+
|
|
88
|
+
1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
|
|
89
|
+
2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
|
|
90
|
+
3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
|
|
91
|
+
|
|
79
92
|
## Examples
|
|
80
93
|
|
|
81
|
-
###
|
|
94
|
+
### Synchronous
|
|
82
95
|
A synchronous text-to-speech synthesis client.
|
|
83
96
|
|
|
84
97
|
**Basic Usage:**
|
|
85
98
|
```python
|
|
86
|
-
import os
|
|
87
99
|
from smallest import Smallest
|
|
88
100
|
|
|
89
101
|
def main():
|
|
90
|
-
client = Smallest(api_key=
|
|
91
|
-
client.synthesize(
|
|
102
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
103
|
+
client.synthesize(
|
|
104
|
+
text="Hello, this is a test for sync synthesis function.",
|
|
105
|
+
save_as="sync_synthesize.wav"
|
|
106
|
+
)
|
|
92
107
|
|
|
93
108
|
if __name__ == "__main__":
|
|
94
109
|
main()
|
|
@@ -98,7 +113,7 @@ if __name__ == "__main__":
|
|
|
98
113
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
99
114
|
- `model`: TTS model to use (default: "lightning")
|
|
100
115
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
101
|
-
- `
|
|
116
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
102
117
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
103
118
|
- `add_wav_header`: Include WAV header in output (default: True)
|
|
104
119
|
- `transliterate`: Enable text transliteration (default: False)
|
|
@@ -117,17 +132,16 @@ client.synthesize(
|
|
|
117
132
|
```
|
|
118
133
|
|
|
119
134
|
|
|
120
|
-
###
|
|
135
|
+
### Asynchronous
|
|
121
136
|
Asynchronous text-to-speech synthesis client.
|
|
122
137
|
|
|
123
138
|
**Basic Usage:**
|
|
124
139
|
```python
|
|
125
|
-
import os
|
|
126
140
|
import asyncio
|
|
127
141
|
import aiofiles
|
|
128
142
|
from smallest import AsyncSmallest
|
|
129
143
|
|
|
130
|
-
client = AsyncSmallest(api_key=
|
|
144
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
131
145
|
|
|
132
146
|
async def main():
|
|
133
147
|
async with client as tts:
|
|
@@ -143,7 +157,7 @@ if __name__ == "__main__":
|
|
|
143
157
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
144
158
|
- `model`: TTS model to use (default: "lightning")
|
|
145
159
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
146
|
-
- `
|
|
160
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
147
161
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
148
162
|
- `add_wav_header`: Include WAV header in output (default: True)
|
|
149
163
|
- `transliterate`: Enable text transliteration (default: False)
|
|
@@ -162,18 +176,16 @@ audio_bytes = await tts.synthesize(
|
|
|
162
176
|
|
|
163
177
|
### LLM to Speech
|
|
164
178
|
|
|
165
|
-
The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output
|
|
179
|
+
The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
|
|
166
180
|
|
|
167
181
|
```python
|
|
168
|
-
import os
|
|
169
182
|
import wave
|
|
170
183
|
import asyncio
|
|
171
184
|
from groq import Groq
|
|
172
|
-
from smallest import Smallest
|
|
173
|
-
from smallest import TextToAudioStream
|
|
185
|
+
from smallest import Smallest, TextToAudioStream
|
|
174
186
|
|
|
175
|
-
llm = Groq(api_key=
|
|
176
|
-
tts = Smallest(api_key=
|
|
187
|
+
llm = Groq(api_key="GROQ_API_KEY")
|
|
188
|
+
tts = Smallest(api_key="SMALLEST_API_KEY")
|
|
177
189
|
|
|
178
190
|
async def generate_text(prompt):
|
|
179
191
|
"""Async generator for streaming text from Groq. You can use any LLM"""
|
|
@@ -230,16 +242,46 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
|
|
|
230
242
|
- Streamed over a network
|
|
231
243
|
- Further processed as needed
|
|
232
244
|
|
|
245
|
+
## Add your Voice
|
|
246
|
+
The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
|
|
247
|
+
|
|
248
|
+
### Synchronously
|
|
249
|
+
```python
|
|
250
|
+
from smallest import Smallest
|
|
251
|
+
|
|
252
|
+
def main():
|
|
253
|
+
client = Smallest(api_key="YOUR_API_KEY")
|
|
254
|
+
res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
255
|
+
print(res)
|
|
256
|
+
|
|
257
|
+
if __name__ == "__main__":
|
|
258
|
+
main()
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Asynchronously
|
|
262
|
+
```python
|
|
263
|
+
import asyncio
|
|
264
|
+
from smallest import AsyncSmallest
|
|
265
|
+
|
|
266
|
+
async def main():
|
|
267
|
+
client = AsyncSmallest(api_key="YOUR_API_KEY")
|
|
268
|
+
res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
269
|
+
print(res)
|
|
270
|
+
|
|
271
|
+
if __name__ == "__main__":
|
|
272
|
+
asyncio.run(main())
|
|
273
|
+
```
|
|
233
274
|
|
|
234
275
|
## Available Methods
|
|
235
276
|
|
|
236
277
|
```python
|
|
237
|
-
from smallest
|
|
278
|
+
from smallest import Smallest
|
|
238
279
|
|
|
239
|
-
client = Smallest()
|
|
280
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
240
281
|
|
|
241
|
-
print(f"
|
|
242
|
-
print(f"Available Voices: {client.get_voices()}")
|
|
282
|
+
print(f"Available Languages: {client.get_languages()}")
|
|
283
|
+
print(f"Available Voices: {client.get_voices(model='lightning')}")
|
|
284
|
+
print(f"Available Voices: {client.get_cloned_voices()}")
|
|
243
285
|
print(f"Available Models: {client.get_models()}")
|
|
244
286
|
```
|
|
245
287
|
|
|
@@ -254,7 +296,7 @@ When implementing audio streaming with chunks of synthesized speech, WAV headers
|
|
|
254
296
|
- Sequential playback of chunks with headers causes audio artifacts (pop sounds) when concatenating or playing audio sequentially.
|
|
255
297
|
- Audio players would try to reinitialize audio settings for each chunk.
|
|
256
298
|
|
|
257
|
-
### Best Practices
|
|
299
|
+
### Best Practices for Audio Streaming
|
|
258
300
|
1. Stream raw PCM audio data without headers
|
|
259
301
|
2. Add a single WAV header only when:
|
|
260
302
|
- Saving the complete stream to a file
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
|
|
2
|
+
smallest/async_tts.py,sha256=5qW7owlMeSWFx0rpn9dYfbO76mmNY0DXcytNjLfbbz8,9727
|
|
3
|
+
smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
|
|
4
|
+
smallest/models.py,sha256=g2e_4nU5P48vyXZandKLWqZC1TkoEGeLvYKqJIqurSI,83
|
|
5
|
+
smallest/stream_tts.py,sha256=SeP9A9zXJWiV62Eezv0L1J5sRIR304Llc_mwVtOOSUI,6348
|
|
6
|
+
smallest/tts.py,sha256=xBBEk_byRPGT6SYkE6qvhfEupgHl6XBdAqtxmzw2rF8,8311
|
|
7
|
+
smallest/utils.py,sha256=FCZkvbbHJBoN0jpBSqmt1hJjvks56t8i82we4XnqjYk,3016
|
|
8
|
+
smallestai-2.0.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
|
|
9
|
+
smallestai-2.0.0.dist-info/METADATA,sha256=EIyZZqzAvHgQ7jfEs5x5LUx3HjzoCUhzJoXfkb3CuoI,11538
|
|
10
|
+
smallestai-2.0.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
11
|
+
smallestai-2.0.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
|
|
12
|
+
smallestai-2.0.0.dist-info/RECORD,,
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
|
|
2
|
-
smallest/async_tts.py,sha256=ReYTePzR0e3UcWxwnetGnwO4q_F7g2LRZPxIVfmgB4Y,6120
|
|
3
|
-
smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
|
|
4
|
-
smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
|
|
5
|
-
smallest/stream_tts.py,sha256=0OypcUzgP7CN3VGcGJDnQ2FDw2JOzPaSQ1cXK69k5dY,6198
|
|
6
|
-
smallest/tts.py,sha256=l8VHaOE8-Feg3Ey8C3osOrLs3ffYz0q_J1ACiEtZ8y0,5999
|
|
7
|
-
smallest/utils.py,sha256=hrta82o-rJRaOHTVKqHqC86_T56jAuvqJHIizAEqFok,3796
|
|
8
|
-
smallestai-1.3.3.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
|
|
9
|
-
smallestai-1.3.3.dist-info/METADATA,sha256=ji7TsSt2C9_GGNjRuh8DAyBXipVRuFeouLoGi76JFAU,9856
|
|
10
|
-
smallestai-1.3.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
11
|
-
smallestai-1.3.3.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
|
|
12
|
-
smallestai-1.3.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|