smallestai 1.3.4__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of smallestai might be problematic. Click here for more details.
- {smallestai-1.3.4/smallestai.egg-info → smallestai-2.1.0}/PKG-INFO +164 -37
- {smallestai-1.3.4 → smallestai-2.1.0}/README.md +162 -35
- {smallestai-1.3.4 → smallestai-2.1.0}/pyproject.toml +1 -1
- smallestai-2.1.0/smallest/async_tts.py +311 -0
- smallestai-2.1.0/smallest/models.py +5 -0
- {smallestai-1.3.4 → smallestai-2.1.0}/smallest/stream_tts.py +49 -44
- smallestai-2.1.0/smallest/tts.py +253 -0
- smallestai-2.1.0/smallest/utils.py +97 -0
- {smallestai-1.3.4 → smallestai-2.1.0/smallestai.egg-info}/PKG-INFO +164 -37
- {smallestai-1.3.4 → smallestai-2.1.0}/tests/test_async.py +1 -1
- {smallestai-1.3.4 → smallestai-2.1.0}/tests/test_sync.py +1 -1
- smallestai-2.1.0/tests/test_utils.py +47 -0
- smallestai-1.3.4/smallest/async_tts.py +0 -157
- smallestai-1.3.4/smallest/models.py +0 -23
- smallestai-1.3.4/smallest/tts.py +0 -150
- smallestai-1.3.4/smallest/utils.py +0 -109
- smallestai-1.3.4/tests/test_utils.py +0 -43
- {smallestai-1.3.4 → smallestai-2.1.0}/LICENSE +0 -0
- {smallestai-1.3.4 → smallestai-2.1.0}/setup.cfg +0 -0
- {smallestai-1.3.4 → smallestai-2.1.0}/smallest/__init__.py +0 -0
- {smallestai-1.3.4 → smallestai-2.1.0}/smallest/exceptions.py +0 -0
- {smallestai-1.3.4 → smallestai-2.1.0}/smallestai.egg-info/SOURCES.txt +0 -0
- {smallestai-1.3.4 → smallestai-2.1.0}/smallestai.egg-info/dependency_links.txt +0 -0
- {smallestai-1.3.4 → smallestai-2.1.0}/smallestai.egg-info/requires.txt +0 -0
- {smallestai-1.3.4 → smallestai-2.1.0}/smallestai.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: smallestai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 2.1.0
|
|
4
4
|
Summary: Official Python client for the Smallest AI API
|
|
5
5
|
Author-email: Smallest <support@smallest.ai>
|
|
6
6
|
License: MIT
|
|
@@ -55,9 +55,15 @@ Currently, the library supports direct synthesis and the ability to synthesize s
|
|
|
55
55
|
- [Get the API Key](#get-the-api-key)
|
|
56
56
|
- [Best Practices for Input Text](#best-practices-for-input-text)
|
|
57
57
|
- [Examples](#examples)
|
|
58
|
-
- [
|
|
59
|
-
- [
|
|
58
|
+
- [Synchronous](#Synchronous)
|
|
59
|
+
- [Aynchronous](#Synchronous)
|
|
60
60
|
- [LLM to Speech](#llm-to-speech)
|
|
61
|
+
- [Add your Voice](#add-your-voice)
|
|
62
|
+
- [Synchronously](#add-synchronously)
|
|
63
|
+
- [Asynchronously](#add-asynchronously)
|
|
64
|
+
- [Delete your Voice](#delete-your-voice)
|
|
65
|
+
- [Synchronously](#delete-synchronously)
|
|
66
|
+
- [Asynchronously](#delete-asynchronously)
|
|
61
67
|
- [Available Methods](#available-methods)
|
|
62
68
|
- [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
|
|
63
69
|
|
|
@@ -77,28 +83,22 @@ When using an SDK in your application, make sure to pin to at least the major ve
|
|
|
77
83
|
3. Create a new API Key and copy it.
|
|
78
84
|
4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
|
|
79
85
|
|
|
80
|
-
## Best Practices for Input Text
|
|
81
|
-
While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
|
|
82
|
-
|
|
83
|
-
For optimal voice generation results:
|
|
84
|
-
|
|
85
|
-
1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
|
|
86
|
-
2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
|
|
87
|
-
3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
|
|
88
86
|
|
|
89
87
|
## Examples
|
|
90
88
|
|
|
91
|
-
###
|
|
89
|
+
### Synchronous
|
|
92
90
|
A synchronous text-to-speech synthesis client.
|
|
93
91
|
|
|
94
92
|
**Basic Usage:**
|
|
95
93
|
```python
|
|
96
|
-
import os
|
|
97
94
|
from smallest import Smallest
|
|
98
95
|
|
|
99
96
|
def main():
|
|
100
|
-
client = Smallest(api_key=
|
|
101
|
-
client.synthesize(
|
|
97
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
98
|
+
client.synthesize(
|
|
99
|
+
text="Hello, this is a test for sync synthesis function.",
|
|
100
|
+
save_as="sync_synthesize.wav"
|
|
101
|
+
)
|
|
102
102
|
|
|
103
103
|
if __name__ == "__main__":
|
|
104
104
|
main()
|
|
@@ -108,11 +108,12 @@ if __name__ == "__main__":
|
|
|
108
108
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
109
109
|
- `model`: TTS model to use (default: "lightning")
|
|
110
110
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
111
|
-
- `
|
|
111
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
112
112
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
113
|
-
- `
|
|
114
|
-
- `
|
|
115
|
-
- `
|
|
113
|
+
- `consistency`: Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. (default: 0.5)
|
|
114
|
+
- `similarity`: Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. (default: 0)
|
|
115
|
+
- `enhancement`: Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. (default: False)
|
|
116
|
+
- `add_wav_header`: Whether to add a WAV header to the output audio.
|
|
116
117
|
|
|
117
118
|
These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override these parameters for a specific synthesis request.
|
|
118
119
|
|
|
@@ -127,19 +128,17 @@ client.synthesize(
|
|
|
127
128
|
```
|
|
128
129
|
|
|
129
130
|
|
|
130
|
-
###
|
|
131
|
+
### Asynchronous
|
|
131
132
|
Asynchronous text-to-speech synthesis client.
|
|
132
133
|
|
|
133
134
|
**Basic Usage:**
|
|
134
135
|
```python
|
|
135
|
-
import os
|
|
136
136
|
import asyncio
|
|
137
137
|
import aiofiles
|
|
138
138
|
from smallest import AsyncSmallest
|
|
139
139
|
|
|
140
|
-
client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
|
|
141
|
-
|
|
142
140
|
async def main():
|
|
141
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
143
142
|
async with client as tts:
|
|
144
143
|
audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
|
|
145
144
|
async with aiofiles.open("async_synthesize.wav", "wb") as f:
|
|
@@ -149,15 +148,33 @@ if __name__ == "__main__":
|
|
|
149
148
|
asyncio.run(main())
|
|
150
149
|
```
|
|
151
150
|
|
|
151
|
+
**Running Asynchronously in a Jupyter Notebook**
|
|
152
|
+
If you are using a Jupyter Notebook, use the following approach to execute the asynchronous function within an existing event loop:
|
|
153
|
+
```python
|
|
154
|
+
import asyncio
|
|
155
|
+
import aiofiles
|
|
156
|
+
from smallest import AsyncSmallest
|
|
157
|
+
|
|
158
|
+
async def main():
|
|
159
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
160
|
+
async with client as tts:
|
|
161
|
+
audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
|
|
162
|
+
async with aiofiles.open("async_synthesize.wav", "wb") as f:
|
|
163
|
+
await f.write(audio_bytes) # alternatively you can use the `save_as` parameter.
|
|
164
|
+
|
|
165
|
+
await main()
|
|
166
|
+
```
|
|
167
|
+
|
|
152
168
|
**Parameters:**
|
|
153
169
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
154
170
|
- `model`: TTS model to use (default: "lightning")
|
|
155
171
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
156
|
-
- `
|
|
172
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
157
173
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
158
|
-
- `
|
|
159
|
-
- `
|
|
160
|
-
- `
|
|
174
|
+
- `consistency`: Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
|
|
175
|
+
- `similarity`: Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
|
|
176
|
+
- `enhancement`: Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
|
|
177
|
+
- `add_wav_header`: Whether to add a WAV header to the output audio.
|
|
161
178
|
|
|
162
179
|
These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis.
|
|
163
180
|
|
|
@@ -174,16 +191,66 @@ audio_bytes = await tts.synthesize(
|
|
|
174
191
|
|
|
175
192
|
The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
|
|
176
193
|
|
|
194
|
+
#### Stream through a WebSocket
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
import asyncio
|
|
198
|
+
import websockets
|
|
199
|
+
from groq import Groq
|
|
200
|
+
from smallest import Smallest, TextToAudioStream
|
|
201
|
+
|
|
202
|
+
# Initialize Groq (LLM) and Smallest (TTS) instances
|
|
203
|
+
llm = Groq(api_key="GROQ_API_KEY")
|
|
204
|
+
tts = Smallest(api_key="SMALLEST_API_KEY")
|
|
205
|
+
WEBSOCKET_URL = "wss://echo.websocket.events" # Mock WebSocket server
|
|
206
|
+
|
|
207
|
+
# Async function to stream text generation from LLM
|
|
208
|
+
async def generate_text(prompt):
|
|
209
|
+
completion = llm.chat.completions.create(
|
|
210
|
+
messages=[{"role": "user", "content": prompt}],
|
|
211
|
+
model="llama3-8b-8192",
|
|
212
|
+
stream=True,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Yield text as it is generated
|
|
216
|
+
for chunk in completion:
|
|
217
|
+
text = chunk.choices[0].delta.content
|
|
218
|
+
if text:
|
|
219
|
+
yield text
|
|
220
|
+
|
|
221
|
+
# Main function to run the process
|
|
222
|
+
async def main():
|
|
223
|
+
# Initialize the TTS processor
|
|
224
|
+
processor = TextToAudioStream(tts_instance=tts)
|
|
225
|
+
|
|
226
|
+
# Generate text from LLM
|
|
227
|
+
llm_output = generate_text("Explain text to speech like I am five in 5 sentences.")
|
|
228
|
+
|
|
229
|
+
# Stream the generated speech throught a websocket
|
|
230
|
+
async with websockets.connect(WEBSOCKET_URL) as ws:
|
|
231
|
+
print("Connected to WebSocket server.")
|
|
232
|
+
|
|
233
|
+
# Stream the generated speech
|
|
234
|
+
async for audio_chunk in processor.process(llm_output):
|
|
235
|
+
await ws.send(audio_chunk) # Send audio chunk
|
|
236
|
+
echoed_data = await ws.recv() # Receive the echoed message
|
|
237
|
+
print("Received from server:", echoed_data[:20], "...") # Print first 20 bytes
|
|
238
|
+
|
|
239
|
+
print("WebSocket connection closed.")
|
|
240
|
+
|
|
241
|
+
if __name__ == "__main__":
|
|
242
|
+
asyncio.run(main())
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
#### Save to a File
|
|
177
246
|
```python
|
|
178
|
-
import os
|
|
179
247
|
import wave
|
|
180
248
|
import asyncio
|
|
181
249
|
from groq import Groq
|
|
182
|
-
from smallest import Smallest
|
|
183
|
-
from smallest import TextToAudioStream
|
|
250
|
+
from smallest import Smallest, TextToAudioStream
|
|
184
251
|
|
|
185
|
-
llm = Groq(api_key=
|
|
186
|
-
tts = Smallest(api_key=
|
|
252
|
+
llm = Groq(api_key="GROQ_API_KEY")
|
|
253
|
+
tts = Smallest(api_key="SMALLEST_API_KEY")
|
|
187
254
|
|
|
188
255
|
async def generate_text(prompt):
|
|
189
256
|
"""Async generator for streaming text from Groq. You can use any LLM"""
|
|
@@ -240,16 +307,76 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
|
|
|
240
307
|
- Streamed over a network
|
|
241
308
|
- Further processed as needed
|
|
242
309
|
|
|
310
|
+
## Add your Voice
|
|
311
|
+
The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
|
|
312
|
+
|
|
313
|
+
### Add Synchronously
|
|
314
|
+
```python
|
|
315
|
+
from smallest import Smallest
|
|
316
|
+
|
|
317
|
+
def main():
|
|
318
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
319
|
+
res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
320
|
+
print(res)
|
|
321
|
+
|
|
322
|
+
if __name__ == "__main__":
|
|
323
|
+
main()
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Add Asynchronously
|
|
327
|
+
```python
|
|
328
|
+
import asyncio
|
|
329
|
+
from smallest import AsyncSmallest
|
|
330
|
+
|
|
331
|
+
async def main():
|
|
332
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
333
|
+
res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
334
|
+
print(res)
|
|
335
|
+
|
|
336
|
+
if __name__ == "__main__":
|
|
337
|
+
asyncio.run(main())
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
## Delete your Voice
|
|
341
|
+
The Smallest AI SDK allows you to delete your cloned voice. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
|
|
342
|
+
|
|
343
|
+
### Delete Synchronously
|
|
344
|
+
```python
|
|
345
|
+
from smallest import Smallest
|
|
346
|
+
|
|
347
|
+
def main():
|
|
348
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
349
|
+
res = client.delete_voice(voice_id="voice_id")
|
|
350
|
+
print(res)
|
|
351
|
+
|
|
352
|
+
if __name__ == "__main__":
|
|
353
|
+
main()
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
### Delete Asynchronously
|
|
357
|
+
```python
|
|
358
|
+
import asyncio
|
|
359
|
+
from smallest import AsyncSmallest
|
|
360
|
+
|
|
361
|
+
async def main():
|
|
362
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
363
|
+
res = await client.delete_voice(voice_id="voice_id")
|
|
364
|
+
print(res)
|
|
365
|
+
|
|
366
|
+
if __name__ == "__main__":
|
|
367
|
+
asyncio.run(main())
|
|
368
|
+
```
|
|
243
369
|
|
|
244
370
|
## Available Methods
|
|
245
371
|
|
|
246
372
|
```python
|
|
247
|
-
from smallest
|
|
373
|
+
from smallest import Smallest
|
|
248
374
|
|
|
249
|
-
client = Smallest(api_key=
|
|
375
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
250
376
|
|
|
251
|
-
print(f"
|
|
252
|
-
print(f"Available Voices: {client.get_voices()}")
|
|
377
|
+
print(f"Available Languages: {client.get_languages()}")
|
|
378
|
+
print(f"Available Voices: {client.get_voices(model='lightning')}")
|
|
379
|
+
print(f"Available Voices: {client.get_cloned_voices()}")
|
|
253
380
|
print(f"Available Models: {client.get_models()}")
|
|
254
381
|
```
|
|
255
382
|
|
|
@@ -28,9 +28,15 @@ Currently, the library supports direct synthesis and the ability to synthesize s
|
|
|
28
28
|
- [Get the API Key](#get-the-api-key)
|
|
29
29
|
- [Best Practices for Input Text](#best-practices-for-input-text)
|
|
30
30
|
- [Examples](#examples)
|
|
31
|
-
- [
|
|
32
|
-
- [
|
|
31
|
+
- [Synchronous](#Synchronous)
|
|
32
|
+
- [Aynchronous](#Synchronous)
|
|
33
33
|
- [LLM to Speech](#llm-to-speech)
|
|
34
|
+
- [Add your Voice](#add-your-voice)
|
|
35
|
+
- [Synchronously](#add-synchronously)
|
|
36
|
+
- [Asynchronously](#add-asynchronously)
|
|
37
|
+
- [Delete your Voice](#delete-your-voice)
|
|
38
|
+
- [Synchronously](#delete-synchronously)
|
|
39
|
+
- [Asynchronously](#delete-asynchronously)
|
|
34
40
|
- [Available Methods](#available-methods)
|
|
35
41
|
- [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
|
|
36
42
|
|
|
@@ -50,28 +56,22 @@ When using an SDK in your application, make sure to pin to at least the major ve
|
|
|
50
56
|
3. Create a new API Key and copy it.
|
|
51
57
|
4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
|
|
52
58
|
|
|
53
|
-
## Best Practices for Input Text
|
|
54
|
-
While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
|
|
55
|
-
|
|
56
|
-
For optimal voice generation results:
|
|
57
|
-
|
|
58
|
-
1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
|
|
59
|
-
2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
|
|
60
|
-
3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
|
|
61
59
|
|
|
62
60
|
## Examples
|
|
63
61
|
|
|
64
|
-
###
|
|
62
|
+
### Synchronous
|
|
65
63
|
A synchronous text-to-speech synthesis client.
|
|
66
64
|
|
|
67
65
|
**Basic Usage:**
|
|
68
66
|
```python
|
|
69
|
-
import os
|
|
70
67
|
from smallest import Smallest
|
|
71
68
|
|
|
72
69
|
def main():
|
|
73
|
-
client = Smallest(api_key=
|
|
74
|
-
client.synthesize(
|
|
70
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
71
|
+
client.synthesize(
|
|
72
|
+
text="Hello, this is a test for sync synthesis function.",
|
|
73
|
+
save_as="sync_synthesize.wav"
|
|
74
|
+
)
|
|
75
75
|
|
|
76
76
|
if __name__ == "__main__":
|
|
77
77
|
main()
|
|
@@ -81,11 +81,12 @@ if __name__ == "__main__":
|
|
|
81
81
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
82
82
|
- `model`: TTS model to use (default: "lightning")
|
|
83
83
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
84
|
-
- `
|
|
84
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
85
85
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
86
|
-
- `
|
|
87
|
-
- `
|
|
88
|
-
- `
|
|
86
|
+
- `consistency`: Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. (default: 0.5)
|
|
87
|
+
- `similarity`: Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. (default: 0)
|
|
88
|
+
- `enhancement`: Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. (default: False)
|
|
89
|
+
- `add_wav_header`: Whether to add a WAV header to the output audio.
|
|
89
90
|
|
|
90
91
|
These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override these parameters for a specific synthesis request.
|
|
91
92
|
|
|
@@ -100,19 +101,17 @@ client.synthesize(
|
|
|
100
101
|
```
|
|
101
102
|
|
|
102
103
|
|
|
103
|
-
###
|
|
104
|
+
### Asynchronous
|
|
104
105
|
Asynchronous text-to-speech synthesis client.
|
|
105
106
|
|
|
106
107
|
**Basic Usage:**
|
|
107
108
|
```python
|
|
108
|
-
import os
|
|
109
109
|
import asyncio
|
|
110
110
|
import aiofiles
|
|
111
111
|
from smallest import AsyncSmallest
|
|
112
112
|
|
|
113
|
-
client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
|
|
114
|
-
|
|
115
113
|
async def main():
|
|
114
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
116
115
|
async with client as tts:
|
|
117
116
|
audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
|
|
118
117
|
async with aiofiles.open("async_synthesize.wav", "wb") as f:
|
|
@@ -122,15 +121,33 @@ if __name__ == "__main__":
|
|
|
122
121
|
asyncio.run(main())
|
|
123
122
|
```
|
|
124
123
|
|
|
124
|
+
**Running Asynchronously in a Jupyter Notebook**
|
|
125
|
+
If you are using a Jupyter Notebook, use the following approach to execute the asynchronous function within an existing event loop:
|
|
126
|
+
```python
|
|
127
|
+
import asyncio
|
|
128
|
+
import aiofiles
|
|
129
|
+
from smallest import AsyncSmallest
|
|
130
|
+
|
|
131
|
+
async def main():
|
|
132
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
133
|
+
async with client as tts:
|
|
134
|
+
audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
|
|
135
|
+
async with aiofiles.open("async_synthesize.wav", "wb") as f:
|
|
136
|
+
await f.write(audio_bytes) # alternatively you can use the `save_as` parameter.
|
|
137
|
+
|
|
138
|
+
await main()
|
|
139
|
+
```
|
|
140
|
+
|
|
125
141
|
**Parameters:**
|
|
126
142
|
- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
|
|
127
143
|
- `model`: TTS model to use (default: "lightning")
|
|
128
144
|
- `sample_rate`: Audio sample rate (default: 24000)
|
|
129
|
-
- `
|
|
145
|
+
- `voice_id`: Voice ID (default: "emily")
|
|
130
146
|
- `speed`: Speech speed multiplier (default: 1.0)
|
|
131
|
-
- `
|
|
132
|
-
- `
|
|
133
|
-
- `
|
|
147
|
+
- `consistency`: Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
|
|
148
|
+
- `similarity`: Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
|
|
149
|
+
- `enhancement`: Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
|
|
150
|
+
- `add_wav_header`: Whether to add a WAV header to the output audio.
|
|
134
151
|
|
|
135
152
|
These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis.
|
|
136
153
|
|
|
@@ -147,16 +164,66 @@ audio_bytes = await tts.synthesize(
|
|
|
147
164
|
|
|
148
165
|
The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
|
|
149
166
|
|
|
167
|
+
#### Stream through a WebSocket
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
import asyncio
|
|
171
|
+
import websockets
|
|
172
|
+
from groq import Groq
|
|
173
|
+
from smallest import Smallest, TextToAudioStream
|
|
174
|
+
|
|
175
|
+
# Initialize Groq (LLM) and Smallest (TTS) instances
|
|
176
|
+
llm = Groq(api_key="GROQ_API_KEY")
|
|
177
|
+
tts = Smallest(api_key="SMALLEST_API_KEY")
|
|
178
|
+
WEBSOCKET_URL = "wss://echo.websocket.events" # Mock WebSocket server
|
|
179
|
+
|
|
180
|
+
# Async function to stream text generation from LLM
|
|
181
|
+
async def generate_text(prompt):
|
|
182
|
+
completion = llm.chat.completions.create(
|
|
183
|
+
messages=[{"role": "user", "content": prompt}],
|
|
184
|
+
model="llama3-8b-8192",
|
|
185
|
+
stream=True,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Yield text as it is generated
|
|
189
|
+
for chunk in completion:
|
|
190
|
+
text = chunk.choices[0].delta.content
|
|
191
|
+
if text:
|
|
192
|
+
yield text
|
|
193
|
+
|
|
194
|
+
# Main function to run the process
|
|
195
|
+
async def main():
|
|
196
|
+
# Initialize the TTS processor
|
|
197
|
+
processor = TextToAudioStream(tts_instance=tts)
|
|
198
|
+
|
|
199
|
+
# Generate text from LLM
|
|
200
|
+
llm_output = generate_text("Explain text to speech like I am five in 5 sentences.")
|
|
201
|
+
|
|
202
|
+
# Stream the generated speech throught a websocket
|
|
203
|
+
async with websockets.connect(WEBSOCKET_URL) as ws:
|
|
204
|
+
print("Connected to WebSocket server.")
|
|
205
|
+
|
|
206
|
+
# Stream the generated speech
|
|
207
|
+
async for audio_chunk in processor.process(llm_output):
|
|
208
|
+
await ws.send(audio_chunk) # Send audio chunk
|
|
209
|
+
echoed_data = await ws.recv() # Receive the echoed message
|
|
210
|
+
print("Received from server:", echoed_data[:20], "...") # Print first 20 bytes
|
|
211
|
+
|
|
212
|
+
print("WebSocket connection closed.")
|
|
213
|
+
|
|
214
|
+
if __name__ == "__main__":
|
|
215
|
+
asyncio.run(main())
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
#### Save to a File
|
|
150
219
|
```python
|
|
151
|
-
import os
|
|
152
220
|
import wave
|
|
153
221
|
import asyncio
|
|
154
222
|
from groq import Groq
|
|
155
|
-
from smallest import Smallest
|
|
156
|
-
from smallest import TextToAudioStream
|
|
223
|
+
from smallest import Smallest, TextToAudioStream
|
|
157
224
|
|
|
158
|
-
llm = Groq(api_key=
|
|
159
|
-
tts = Smallest(api_key=
|
|
225
|
+
llm = Groq(api_key="GROQ_API_KEY")
|
|
226
|
+
tts = Smallest(api_key="SMALLEST_API_KEY")
|
|
160
227
|
|
|
161
228
|
async def generate_text(prompt):
|
|
162
229
|
"""Async generator for streaming text from Groq. You can use any LLM"""
|
|
@@ -213,16 +280,76 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
|
|
|
213
280
|
- Streamed over a network
|
|
214
281
|
- Further processed as needed
|
|
215
282
|
|
|
283
|
+
## Add your Voice
|
|
284
|
+
The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
|
|
285
|
+
|
|
286
|
+
### Add Synchronously
|
|
287
|
+
```python
|
|
288
|
+
from smallest import Smallest
|
|
289
|
+
|
|
290
|
+
def main():
|
|
291
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
292
|
+
res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
293
|
+
print(res)
|
|
294
|
+
|
|
295
|
+
if __name__ == "__main__":
|
|
296
|
+
main()
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### Add Asynchronously
|
|
300
|
+
```python
|
|
301
|
+
import asyncio
|
|
302
|
+
from smallest import AsyncSmallest
|
|
303
|
+
|
|
304
|
+
async def main():
|
|
305
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
306
|
+
res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
|
|
307
|
+
print(res)
|
|
308
|
+
|
|
309
|
+
if __name__ == "__main__":
|
|
310
|
+
asyncio.run(main())
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
## Delete your Voice
|
|
314
|
+
The Smallest AI SDK allows you to delete your cloned voice. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
|
|
315
|
+
|
|
316
|
+
### Delete Synchronously
|
|
317
|
+
```python
|
|
318
|
+
from smallest import Smallest
|
|
319
|
+
|
|
320
|
+
def main():
|
|
321
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
322
|
+
res = client.delete_voice(voice_id="voice_id")
|
|
323
|
+
print(res)
|
|
324
|
+
|
|
325
|
+
if __name__ == "__main__":
|
|
326
|
+
main()
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
### Delete Asynchronously
|
|
330
|
+
```python
|
|
331
|
+
import asyncio
|
|
332
|
+
from smallest import AsyncSmallest
|
|
333
|
+
|
|
334
|
+
async def main():
|
|
335
|
+
client = AsyncSmallest(api_key="SMALLEST_API_KEY")
|
|
336
|
+
res = await client.delete_voice(voice_id="voice_id")
|
|
337
|
+
print(res)
|
|
338
|
+
|
|
339
|
+
if __name__ == "__main__":
|
|
340
|
+
asyncio.run(main())
|
|
341
|
+
```
|
|
216
342
|
|
|
217
343
|
## Available Methods
|
|
218
344
|
|
|
219
345
|
```python
|
|
220
|
-
from smallest
|
|
346
|
+
from smallest import Smallest
|
|
221
347
|
|
|
222
|
-
client = Smallest(api_key=
|
|
348
|
+
client = Smallest(api_key="SMALLEST_API_KEY")
|
|
223
349
|
|
|
224
|
-
print(f"
|
|
225
|
-
print(f"Available Voices: {client.get_voices()}")
|
|
350
|
+
print(f"Available Languages: {client.get_languages()}")
|
|
351
|
+
print(f"Available Voices: {client.get_voices(model='lightning')}")
|
|
352
|
+
print(f"Available Voices: {client.get_cloned_voices()}")
|
|
226
353
|
print(f"Available Models: {client.get_models()}")
|
|
227
354
|
```
|
|
228
355
|
|