smallestai 1.3.4__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of smallestai might be problematic. Click here for more details.
- {smallestai-1.3.4/smallestai.egg-info → smallestai-2.0.0}/PKG-INFO +54 -22
- {smallestai-1.3.4 → smallestai-2.0.0}/README.md +52 -20
- {smallestai-1.3.4 → smallestai-2.0.0}/pyproject.toml +1 -1
- smallestai-2.0.0/smallest/async_tts.py +260 -0
- smallestai-2.0.0/smallest/models.py +5 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/smallest/stream_tts.py +19 -22
- {smallestai-1.3.4 → smallestai-2.0.0}/smallest/tts.py +85 -21
- {smallestai-1.3.4 → smallestai-2.0.0}/smallest/utils.py +19 -32
- {smallestai-1.3.4 → smallestai-2.0.0/smallestai.egg-info}/PKG-INFO +54 -22
- smallestai-2.0.0/tests/test_utils.py +47 -0
- smallestai-1.3.4/smallest/async_tts.py +0 -157
- smallestai-1.3.4/smallest/models.py +0 -23
- smallestai-1.3.4/tests/test_utils.py +0 -43
- {smallestai-1.3.4 → smallestai-2.0.0}/LICENSE +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/setup.cfg +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/smallest/__init__.py +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/smallest/exceptions.py +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/smallestai.egg-info/SOURCES.txt +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/smallestai.egg-info/dependency_links.txt +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/smallestai.egg-info/requires.txt +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/smallestai.egg-info/top_level.txt +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/tests/test_async.py +0 -0
- {smallestai-1.3.4 → smallestai-2.0.0}/tests/test_sync.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: smallestai
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: Official Python client for the Smallest AI API
|
|
5
5
|
Author-email: Smallest <support@smallest.ai>
|
|
6
6
|
License: MIT
|
|
@@ -55,9 +55,12 @@ Currently, the library supports direct synthesis and the ability to synthesize s
|
|
|
55
55
|
- [Get the API Key](#get-the-api-key)
|
|
56
56
|
- [Best Practices for Input Text](#best-practices-for-input-text)
|
|
57
57
|
- [Examples](#examples)
|
|
58
|
-
- [
|
|
59
|
-
- [
|
|
58
|
+
- [Synchronous](#Synchronous)
|
|
59
|
+
- [Aynchronous](#Synchronous)
|
|
60
60
|
- [LLM to Speech](#llm-to-speech)
|
|
61
|
+
- [Add your Voice](#add-your-voice)
|
|
62
|
+
- [Synchronously](#synchronously)
|
|
63
|
+
- [Asynchronously](#asynchronously)
|
|
61
64
|
- [Available Methods](#available-methods)
|
|
62
65
|
- [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
|
|
63
66
|
|
|
@@ -88,17 +91,19 @@ For optimal voice generation results:
|
|
|
88
91
|
|
|
89
92
|
## Examples
|
|
90
93
|
|
|
91
|
-
###
|
|
94
|
+
### Synchronous
|
|
92
95
|
A synchronous text-to-speech synthesis client.
|
|
93
96
|
|
|
94
97
|
**Basic Usage:**
|
|
95
98
|
```python
|
|
96
|
-
import os
|
|
97
99
|
from smallest import Smallest
|
|
98
100
|
|
|
99
101
|
def main():
|
|
100
|
-
client = Smallest(api_key=
|
|
101
|
-
client.synthesize(
|
|
102
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
103
|
+
client.synthesize(
|
|
104
|
+
text="Hello, this is a test for sync synthesis function.",
|
|
105
|
+
save_as="sync_synthesize.wav"
|
|
106
|
+
)
|
|
102
107
|
|
|
103
108
|
if __name__ == "__main__":
|
|
104
109
|
main()
|
|
@@ -108,7 +113,7 @@ if __name__ == "__main__":
|
|
|
108
113
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
109
114
|
- `model`: TTS model to use (default: "lightning")
|
|
110
115
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
111
|
-
- `
|
|
116
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
112
117
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
113
118
|
- `add_wav_header`: Include WAV header in output (default: True)
|
|
114
119
|
- `transliterate`: Enable text transliteration (default: False)
|
|
@@ -127,17 +132,16 @@ client.synthesize(
|
|
|
127
132
|
```
|
|
128
133
|
|
|
129
134
|
|
|
130
|
-
###
|
|
135
|
+
### Asynchronous
|
|
131
136
|
Asynchronous text-to-speech synthesis client.
|
|
132
137
|
|
|
133
138
|
**Basic Usage:**
|
|
134
139
|
```python
|
|
135
|
-
import os
|
|
136
140
|
import asyncio
|
|
137
141
|
import aiofiles
|
|
138
142
|
from smallest import AsyncSmallest
|
|
139
143
|
|
|
140
|
-
client = AsyncSmallest(api_key=
|
|
144
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
141
145
|
|
|
142
146
|
async def main():
|
|
143
147
|
async with client as tts:
|
|
@@ -153,7 +157,7 @@ if __name__ == "__main__":
|
|
|
153
157
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
154
158
|
- `model`: TTS model to use (default: "lightning")
|
|
155
159
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
156
|
-
- `
|
|
160
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
157
161
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
158
162
|
- `add_wav_header`: Include WAV header in output (default: True)
|
|
159
163
|
- `transliterate`: Enable text transliteration (default: False)
|
|
@@ -175,15 +179,13 @@ audio_bytes = await tts.synthesize(
|
|
|
175
179
|
The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
|
|
176
180
|
|
|
177
181
|
```python
|
|
178
|
-
import os
|
|
179
182
|
import wave
|
|
180
183
|
import asyncio
|
|
181
184
|
from groq import Groq
|
|
182
|
-
from smallest import Smallest
|
|
183
|
-
from smallest import TextToAudioStream
|
|
185
|
+
from smallest import Smallest, TextToAudioStream
|
|
184
186
|
|
|
185
|
-
llm = Groq(api_key=
|
|
186
|
-
tts = Smallest(api_key=
|
|
187
|
+
llm = Groq(api_key="GROQ_API_KEY")
|
|
188
|
+
tts = Smallest(api_key="SMALLEST_API_KEY")
|
|
187
189
|
|
|
188
190
|
async def generate_text(prompt):
|
|
189
191
|
"""Async generator for streaming text from Groq. You can use any LLM"""
|
|
@@ -240,16 +242,46 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
|
|
|
240
242
|
- Streamed over a network
|
|
241
243
|
- Further processed as needed
|
|
242
244
|
|
|
245
|
+
## Add your Voice
|
|
246
|
+
The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
|
|
247
|
+
|
|
248
|
+
### Synchronously
|
|
249
|
+
```python
|
|
250
|
+
from smallest import Smallest
|
|
251
|
+
|
|
252
|
+
def main():
|
|
253
|
+
client = Smallest(api_key="YOUR_API_KEY")
|
|
254
|
+
res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
255
|
+
print(res)
|
|
256
|
+
|
|
257
|
+
if __name__ == "__main__":
|
|
258
|
+
main()
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Asynchronously
|
|
262
|
+
```python
|
|
263
|
+
import asyncio
|
|
264
|
+
from smallest import AsyncSmallest
|
|
265
|
+
|
|
266
|
+
async def main():
|
|
267
|
+
client = AsyncSmallest(api_key="YOUR_API_KEY")
|
|
268
|
+
res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
269
|
+
print(res)
|
|
270
|
+
|
|
271
|
+
if __name__ == "__main__":
|
|
272
|
+
asyncio.run(main())
|
|
273
|
+
```
|
|
243
274
|
|
|
244
275
|
## Available Methods
|
|
245
276
|
|
|
246
277
|
```python
|
|
247
|
-
from smallest
|
|
278
|
+
from smallest import Smallest
|
|
248
279
|
|
|
249
|
-
client = Smallest(api_key=
|
|
280
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
250
281
|
|
|
251
|
-
print(f"
|
|
252
|
-
print(f"Available Voices: {client.get_voices()}")
|
|
282
|
+
print(f"Available Languages: {client.get_languages()}")
|
|
283
|
+
print(f"Available Voices: {client.get_voices(model='lightning')}")
|
|
284
|
+
print(f"Available Voices: {client.get_cloned_voices()}")
|
|
253
285
|
print(f"Available Models: {client.get_models()}")
|
|
254
286
|
```
|
|
255
287
|
|
|
@@ -28,9 +28,12 @@ Currently, the library supports direct synthesis and the ability to synthesize s
|
|
|
28
28
|
- [Get the API Key](#get-the-api-key)
|
|
29
29
|
- [Best Practices for Input Text](#best-practices-for-input-text)
|
|
30
30
|
- [Examples](#examples)
|
|
31
|
-
- [
|
|
32
|
-
- [
|
|
31
|
+
- [Synchronous](#Synchronous)
|
|
32
|
+
- [Aynchronous](#Synchronous)
|
|
33
33
|
- [LLM to Speech](#llm-to-speech)
|
|
34
|
+
- [Add your Voice](#add-your-voice)
|
|
35
|
+
- [Synchronously](#synchronously)
|
|
36
|
+
- [Asynchronously](#asynchronously)
|
|
34
37
|
- [Available Methods](#available-methods)
|
|
35
38
|
- [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
|
|
36
39
|
|
|
@@ -61,17 +64,19 @@ For optimal voice generation results:
|
|
|
61
64
|
|
|
62
65
|
## Examples
|
|
63
66
|
|
|
64
|
-
###
|
|
67
|
+
### Synchronous
|
|
65
68
|
A synchronous text-to-speech synthesis client.
|
|
66
69
|
|
|
67
70
|
**Basic Usage:**
|
|
68
71
|
```python
|
|
69
|
-
import os
|
|
70
72
|
from smallest import Smallest
|
|
71
73
|
|
|
72
74
|
def main():
|
|
73
|
-
client = Smallest(api_key=
|
|
74
|
-
client.synthesize(
|
|
75
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
76
|
+
client.synthesize(
|
|
77
|
+
text="Hello, this is a test for sync synthesis function.",
|
|
78
|
+
save_as="sync_synthesize.wav"
|
|
79
|
+
)
|
|
75
80
|
|
|
76
81
|
if __name__ == "__main__":
|
|
77
82
|
main()
|
|
@@ -81,7 +86,7 @@ if __name__ == "__main__":
|
|
|
81
86
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
82
87
|
- `model`: TTS model to use (default: "lightning")
|
|
83
88
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
84
|
-
- `
|
|
89
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
85
90
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
86
91
|
- `add_wav_header`: Include WAV header in output (default: True)
|
|
87
92
|
- `transliterate`: Enable text transliteration (default: False)
|
|
@@ -100,17 +105,16 @@ client.synthesize(
|
|
|
100
105
|
```
|
|
101
106
|
|
|
102
107
|
|
|
103
|
-
###
|
|
108
|
+
### Asynchronous
|
|
104
109
|
Asynchronous text-to-speech synthesis client.
|
|
105
110
|
|
|
106
111
|
**Basic Usage:**
|
|
107
112
|
```python
|
|
108
|
-
import os
|
|
109
113
|
import asyncio
|
|
110
114
|
import aiofiles
|
|
111
115
|
from smallest import AsyncSmallest
|
|
112
116
|
|
|
113
|
-
client = AsyncSmallest(api_key=
|
|
117
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
114
118
|
|
|
115
119
|
async def main():
|
|
116
120
|
async with client as tts:
|
|
@@ -126,7 +130,7 @@ if __name__ == "__main__":
|
|
|
126
130
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
127
131
|
- `model`: TTS model to use (default: "lightning")
|
|
128
132
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
129
|
-
- `
|
|
133
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
130
134
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
131
135
|
- `add_wav_header`: Include WAV header in output (default: True)
|
|
132
136
|
- `transliterate`: Enable text transliteration (default: False)
|
|
@@ -148,15 +152,13 @@ audio_bytes = await tts.synthesize(
|
|
|
148
152
|
The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
|
|
149
153
|
|
|
150
154
|
```python
|
|
151
|
-
import os
|
|
152
155
|
import wave
|
|
153
156
|
import asyncio
|
|
154
157
|
from groq import Groq
|
|
155
|
-
from smallest import Smallest
|
|
156
|
-
from smallest import TextToAudioStream
|
|
158
|
+
from smallest import Smallest, TextToAudioStream
|
|
157
159
|
|
|
158
|
-
llm = Groq(api_key=
|
|
159
|
-
tts = Smallest(api_key=
|
|
160
|
+
llm = Groq(api_key="GROQ_API_KEY")
|
|
161
|
+
tts = Smallest(api_key="SMALLEST_API_KEY")
|
|
160
162
|
|
|
161
163
|
async def generate_text(prompt):
|
|
162
164
|
"""Async generator for streaming text from Groq. You can use any LLM"""
|
|
@@ -213,16 +215,46 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
|
|
|
213
215
|
- Streamed over a network
|
|
214
216
|
- Further processed as needed
|
|
215
217
|
|
|
218
|
+
## Add your Voice
|
|
219
|
+
The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
|
|
220
|
+
|
|
221
|
+
### Synchronously
|
|
222
|
+
```python
|
|
223
|
+
from smallest import Smallest
|
|
224
|
+
|
|
225
|
+
def main():
|
|
226
|
+
client = Smallest(api_key="YOUR_API_KEY")
|
|
227
|
+
res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
228
|
+
print(res)
|
|
229
|
+
|
|
230
|
+
if __name__ == "__main__":
|
|
231
|
+
main()
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### Asynchronously
|
|
235
|
+
```python
|
|
236
|
+
import asyncio
|
|
237
|
+
from smallest import AsyncSmallest
|
|
238
|
+
|
|
239
|
+
async def main():
|
|
240
|
+
client = AsyncSmallest(api_key="YOUR_API_KEY")
|
|
241
|
+
res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
242
|
+
print(res)
|
|
243
|
+
|
|
244
|
+
if __name__ == "__main__":
|
|
245
|
+
asyncio.run(main())
|
|
246
|
+
```
|
|
216
247
|
|
|
217
248
|
## Available Methods
|
|
218
249
|
|
|
219
250
|
```python
|
|
220
|
-
from smallest
|
|
251
|
+
from smallest import Smallest
|
|
221
252
|
|
|
222
|
-
client = Smallest(api_key=
|
|
253
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
223
254
|
|
|
224
|
-
print(f"
|
|
225
|
-
print(f"Available Voices: {client.get_voices()}")
|
|
255
|
+
print(f"Available Languages: {client.get_languages()}")
|
|
256
|
+
print(f"Available Voices: {client.get_voices(model='lightning')}")
|
|
257
|
+
print(f"Available Voices: {client.get_cloned_voices()}")
|
|
226
258
|
print(f"Available Models: {client.get_models()}")
|
|
227
259
|
```
|
|
228
260
|
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import copy
|
|
3
|
+
import json
|
|
4
|
+
import aiohttp
|
|
5
|
+
import aiofiles
|
|
6
|
+
import requests
|
|
7
|
+
from typing import Optional, Union, List
|
|
8
|
+
|
|
9
|
+
from smallest.exceptions import TTSError, APIError
|
|
10
|
+
from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
|
|
11
|
+
get_smallest_languages, get_smallest_models, API_BASE_URL)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AsyncSmallest:
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
api_key: str = None,
|
|
18
|
+
model: Optional[str] = "lightning",
|
|
19
|
+
sample_rate: Optional[int] = 24000,
|
|
20
|
+
voice_id: Optional[str] = "emily",
|
|
21
|
+
speed: Optional[float] = 1.0,
|
|
22
|
+
add_wav_header: Optional[bool] = True,
|
|
23
|
+
transliterate: Optional[bool] = False,
|
|
24
|
+
remove_extra_silence: Optional[bool] = False
|
|
25
|
+
) -> None:
|
|
26
|
+
"""
|
|
27
|
+
AsyncSmallest Instance for asynchronous text-to-speech synthesis.
|
|
28
|
+
|
|
29
|
+
This class provides an asynchronous implementation of the text-to-speech functionality.
|
|
30
|
+
It allows for non-blocking synthesis of speech from text, making it suitable for applications
|
|
31
|
+
that require async processing.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
- api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
|
|
35
|
+
- model (TTSModels): The model to be used for synthesis.
|
|
36
|
+
- sample_rate (int): The sample rate for the audio output.
|
|
37
|
+
- voice_id (TTSVoices): The voice to be used for synthesis.
|
|
38
|
+
- speed (float): The speed of the speech synthesis.
|
|
39
|
+
- add_wav_header (bool): Whether to add a WAV header to the output audio.
|
|
40
|
+
- transliterate (bool): Whether to transliterate the text.
|
|
41
|
+
- remove_extra_silence (bool): Whether to remove extra silence from the synthesized audio.
|
|
42
|
+
|
|
43
|
+
Methods:
|
|
44
|
+
- get_languages: Returns a list of available languages for synthesis.
|
|
45
|
+
- get_voices: Returns a list of available voices for synthesis.
|
|
46
|
+
- get_models: Returns a list of available models for synthesis.
|
|
47
|
+
- synthesize: Asynchronously converts the provided text into speech and returns the audio content.
|
|
48
|
+
"""
|
|
49
|
+
self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
|
|
50
|
+
if not self.api_key:
|
|
51
|
+
raise TTSError()
|
|
52
|
+
self.chunk_size = 250
|
|
53
|
+
|
|
54
|
+
self.opts = TTSOptions(
|
|
55
|
+
model=model,
|
|
56
|
+
sample_rate=sample_rate,
|
|
57
|
+
voice_id=voice_id,
|
|
58
|
+
api_key=self.api_key,
|
|
59
|
+
add_wav_header=add_wav_header,
|
|
60
|
+
speed=speed,
|
|
61
|
+
transliterate=transliterate,
|
|
62
|
+
remove_extra_silence=remove_extra_silence,
|
|
63
|
+
)
|
|
64
|
+
self.session = None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def __aenter__(self):
|
|
68
|
+
if self.session is None:
|
|
69
|
+
self.session = aiohttp.ClientSession()
|
|
70
|
+
return self
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
74
|
+
if self.session:
|
|
75
|
+
await self.session.close()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
async def _ensure_session(self):
|
|
79
|
+
"""Ensure session exists for direct calls"""
|
|
80
|
+
if not self.session:
|
|
81
|
+
self.session = aiohttp.ClientSession()
|
|
82
|
+
return True
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_languages(self) -> List[str]:
|
|
87
|
+
"""Returns a list of available languages."""
|
|
88
|
+
return get_smallest_languages()
|
|
89
|
+
|
|
90
|
+
def get_cloned_voices(self) -> str:
|
|
91
|
+
"""Returns a list of your cloned voices."""
|
|
92
|
+
headers = {
|
|
93
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers)
|
|
97
|
+
if res.status_code != 200:
|
|
98
|
+
raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/")
|
|
99
|
+
|
|
100
|
+
return json.dumps(res.json(), indent=4, ensure_ascii=False)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_voices(
|
|
104
|
+
self,
|
|
105
|
+
model: Optional[str] = "lightning"
|
|
106
|
+
) -> str:
|
|
107
|
+
"""Returns a list of available voices."""
|
|
108
|
+
headers = {
|
|
109
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers)
|
|
113
|
+
if res.status_code != 200:
|
|
114
|
+
raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/")
|
|
115
|
+
|
|
116
|
+
return json.dumps(res.json(), indent=4, ensure_ascii=False)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_models(self) -> List[str]:
|
|
120
|
+
"""Returns a list of available models."""
|
|
121
|
+
return get_smallest_models()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
async def synthesize(
|
|
125
|
+
self,
|
|
126
|
+
text: str,
|
|
127
|
+
save_as: Optional[str] = None,
|
|
128
|
+
**kwargs
|
|
129
|
+
) -> Union[bytes, None]:
|
|
130
|
+
"""
|
|
131
|
+
Asynchronously synthesize speech from the provided text.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
- text (str): The text to be converted to speech.
|
|
135
|
+
- save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
|
|
136
|
+
The file must have a .wav extension.
|
|
137
|
+
- kwargs: Additional optional parameters to override `__init__` options for this call.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
- Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
|
|
141
|
+
otherwise, returns None after saving the audio to the specified file.
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
- TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
|
|
145
|
+
- APIError: If the API request fails or returns an error.
|
|
146
|
+
"""
|
|
147
|
+
should_cleanup = await self._ensure_session()
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
opts = copy.deepcopy(self.opts)
|
|
151
|
+
for key, value in kwargs.items():
|
|
152
|
+
setattr(opts, key, value)
|
|
153
|
+
|
|
154
|
+
validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed)
|
|
155
|
+
|
|
156
|
+
self.chunk_size = 250
|
|
157
|
+
if opts.model == 'ligtning-large':
|
|
158
|
+
self.chunk_size = 140
|
|
159
|
+
|
|
160
|
+
chunks = chunk_text(text, self.chunk_size)
|
|
161
|
+
audio_content = b""
|
|
162
|
+
|
|
163
|
+
for chunk in chunks:
|
|
164
|
+
payload = {
|
|
165
|
+
"text": preprocess_text(chunk),
|
|
166
|
+
"sample_rate": opts.sample_rate,
|
|
167
|
+
"voice_id": opts.voice_id,
|
|
168
|
+
"add_wav_header": False,
|
|
169
|
+
"speed": opts.speed,
|
|
170
|
+
"model": opts.model,
|
|
171
|
+
"transliterate": opts.transliterate,
|
|
172
|
+
"remove_extra_silence": opts.remove_extra_silence
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
headers = {
|
|
176
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
177
|
+
"Content-Type": "application/json",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if not self.session:
|
|
181
|
+
self.session = aiohttp.ClientSession()
|
|
182
|
+
|
|
183
|
+
async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
|
|
184
|
+
if res.status != 200:
|
|
185
|
+
raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
|
|
186
|
+
|
|
187
|
+
audio_content += await res.read()
|
|
188
|
+
|
|
189
|
+
if save_as:
|
|
190
|
+
if not save_as.endswith(".wav"):
|
|
191
|
+
raise TTSError("Invalid file name. Extension must be .wav")
|
|
192
|
+
|
|
193
|
+
async with aiofiles.open(save_as, mode='wb') as f:
|
|
194
|
+
await f.write(add_wav_header(audio_content, opts.sample_rate))
|
|
195
|
+
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
if opts.add_wav_header:
|
|
199
|
+
return add_wav_header(audio_content, opts.sample_rate)
|
|
200
|
+
|
|
201
|
+
return audio_content
|
|
202
|
+
|
|
203
|
+
finally:
|
|
204
|
+
if should_cleanup and self.session:
|
|
205
|
+
await self.session.close()
|
|
206
|
+
self.session = None
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
async def add_voice(self, display_name: str, file_path: str) -> str:
|
|
210
|
+
"""
|
|
211
|
+
Instantly clone your voice asynchronously.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
- display_name (str): The display name for the new voice.
|
|
215
|
+
- file_path (str): The path to the reference audio file to be cloned.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
- str: The response from the API as a formatted JSON string.
|
|
219
|
+
|
|
220
|
+
Raises:
|
|
221
|
+
- TTSError: If the file does not exist or is not a valid audio file.
|
|
222
|
+
- APIError: If the API request fails or returns an error.
|
|
223
|
+
"""
|
|
224
|
+
url = f"{API_BASE_URL}/lightning-large/add_voice"
|
|
225
|
+
|
|
226
|
+
if not os.path.exists(file_path):
|
|
227
|
+
raise TTSError("Invalid file path. File does not exist.")
|
|
228
|
+
|
|
229
|
+
ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
|
|
230
|
+
file_extension = os.path.splitext(file_path)[1].lower()
|
|
231
|
+
if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
|
|
232
|
+
raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
|
|
233
|
+
|
|
234
|
+
headers = {
|
|
235
|
+
'Authorization': f"Bearer {self.api_key}",
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
should_cleanup = await self._ensure_session()
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
async with aiofiles.open(file_path, 'rb') as f:
|
|
242
|
+
file_data = await f.read()
|
|
243
|
+
|
|
244
|
+
data = aiohttp.FormData()
|
|
245
|
+
content_type = file_extension[1:]
|
|
246
|
+
|
|
247
|
+
data.add_field('displayName', display_name)
|
|
248
|
+
data.add_field('file', file_data, filename=file_path, content_type=f"audio/{content_type}")
|
|
249
|
+
|
|
250
|
+
async with self.session.post(url, headers=headers, data=data) as res:
|
|
251
|
+
if res.status != 200:
|
|
252
|
+
raise APIError(f"Failed to add voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
|
|
253
|
+
|
|
254
|
+
return json.dumps(await res.json(), indent=4, ensure_ascii=False)
|
|
255
|
+
|
|
256
|
+
finally:
|
|
257
|
+
if should_cleanup and self.session:
|
|
258
|
+
await self.session.close()
|
|
259
|
+
self.session = None
|
|
260
|
+
|
|
@@ -12,8 +12,8 @@ class TextToAudioStream:
|
|
|
12
12
|
def __init__(
|
|
13
13
|
self,
|
|
14
14
|
tts_instance: Union[Smallest, AsyncSmallest],
|
|
15
|
-
queue_timeout: float = 5.0,
|
|
16
|
-
max_retries: int = 3
|
|
15
|
+
queue_timeout: Optional[float] = 5.0,
|
|
16
|
+
max_retries: Optional[int] = 3
|
|
17
17
|
):
|
|
18
18
|
"""
|
|
19
19
|
A real-time text-to-speech processor that converts streaming text into audio output.
|
|
@@ -35,7 +35,6 @@ class TextToAudioStream:
|
|
|
35
35
|
"""
|
|
36
36
|
self.tts_instance = tts_instance
|
|
37
37
|
self.tts_instance.opts.add_wav_header = False
|
|
38
|
-
|
|
39
38
|
self.sentence_end_regex = SENTENCE_END_REGEX
|
|
40
39
|
self.queue_timeout = queue_timeout
|
|
41
40
|
self.max_retries = max_retries
|
|
@@ -43,6 +42,9 @@ class TextToAudioStream:
|
|
|
43
42
|
self.buffer_size = 250
|
|
44
43
|
self.stop_flag = False
|
|
45
44
|
|
|
45
|
+
if self.tts_instance.opts.model == 'lightning-large':
|
|
46
|
+
self.buffer_size = 140
|
|
47
|
+
|
|
46
48
|
|
|
47
49
|
async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
|
|
48
50
|
"""
|
|
@@ -58,51 +60,46 @@ class TextToAudioStream:
|
|
|
58
60
|
async for chunk in llm_output:
|
|
59
61
|
buffer += chunk
|
|
60
62
|
i = 0
|
|
61
|
-
|
|
62
63
|
while i < len(buffer):
|
|
63
64
|
current_chunk = buffer[:i + 1]
|
|
64
65
|
if self.sentence_end_regex.match(current_chunk):
|
|
65
66
|
last_break_index = i
|
|
66
|
-
|
|
67
67
|
if len(current_chunk) >= self.buffer_size:
|
|
68
68
|
if last_break_index > 0:
|
|
69
|
-
self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip())
|
|
69
|
+
self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ')
|
|
70
70
|
buffer = buffer[last_break_index + 1:]
|
|
71
71
|
else:
|
|
72
72
|
# No sentence boundary, split at max chunk size
|
|
73
|
-
self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip())
|
|
73
|
+
self.queue.put(f'{buffer[:self.buffer_size].replace("—", " ").strip()} ')
|
|
74
74
|
buffer = buffer[self.buffer_size:]
|
|
75
|
-
|
|
76
75
|
last_break_index = 0
|
|
77
76
|
i = -1
|
|
78
|
-
|
|
79
77
|
i += 1
|
|
80
|
-
|
|
78
|
+
|
|
81
79
|
if buffer:
|
|
82
|
-
self.queue.put(buffer.replace("—", " ").strip())
|
|
83
|
-
|
|
84
|
-
self.stop_flag = True # completion flag when LLM output ends
|
|
80
|
+
self.queue.put(f'{buffer.replace("—", " ").strip()} ')
|
|
81
|
+
self.stop_flag = True
|
|
85
82
|
|
|
86
83
|
|
|
87
|
-
|
|
88
|
-
"""
|
|
84
|
+
def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
|
|
85
|
+
"""Synchronously synthesizes a given sentence."""
|
|
89
86
|
try:
|
|
90
|
-
return
|
|
87
|
+
return self.tts_instance.synthesize(sentence)
|
|
91
88
|
except APIError as e:
|
|
92
89
|
if retries < self.max_retries:
|
|
93
|
-
return
|
|
90
|
+
return self._synthesize_sync(sentence, retries + 1)
|
|
94
91
|
else:
|
|
95
92
|
print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
|
|
96
93
|
return None
|
|
94
|
+
|
|
97
95
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
"""Synchronously synthesizes a given sentence."""
|
|
96
|
+
async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
|
|
97
|
+
"""Asynchronously synthesizes a given sentence."""
|
|
101
98
|
try:
|
|
102
|
-
return self.tts_instance.synthesize(sentence)
|
|
99
|
+
return await self.tts_instance.synthesize(sentence)
|
|
103
100
|
except APIError as e:
|
|
104
101
|
if retries < self.max_retries:
|
|
105
|
-
return self.
|
|
102
|
+
return await self._synthesize_async(sentence, retries + 1)
|
|
106
103
|
else:
|
|
107
104
|
print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
|
|
108
105
|
return None
|