smallestai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 smallest.ai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,246 @@
1
+ Metadata-Version: 2.1
2
+ Name: smallestai
3
+ Version: 0.1.0
4
+ Summary: Official Python client for the Smallest AI API
5
+ Author-email: Smallest <info@smallest.ai>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/smallest-inc/smallest-python-sdk
8
+ Keywords: smallest,smallest.ai,tts,text-to-speech
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: aiohttp
16
+ Requires-Dist: aiofiles
17
+ Requires-Dist: requests
18
+ Requires-Dist: sacremoses
19
+ Requires-Dist: pydub
20
+ Provides-Extra: test
21
+ Requires-Dist: jiwer; extra == "test"
22
+ Requires-Dist: httpx; extra == "test"
23
+ Requires-Dist: pytest; extra == "test"
24
+ Requires-Dist: pytest-asyncio; extra == "test"
25
+ Requires-Dist: deepgram-sdk; extra == "test"
26
+ Requires-Dist: python-dotenv; extra == "test"
27
+
28
+ ![image](https://i.imgur.com/TJ2tT4g.png)
29
+
30
+
31
+ <div align="center">
32
+ <a href="https://twitter.com/smallest_AI">
33
+ <img src="https://img.shields.io/twitter/url/https/twitter.com/smallest_AI.svg?style=social&label=Follow%20smallest_AI" alt="Twitter">
34
+ </a>
35
+ <a href="https://discord.gg/ywShEyXHBW">
36
+ <img src="https://dcbadge.vercel.app/api/server/ywShEyXHBW?style=flat" alt="Discord">
37
+ </a>
38
+ <a href="https://www.linkedin.com/company/smallest">
39
+ <img src="https://img.shields.io/badge/LinkedIn-Connect-blue" alt="Linkedin">
40
+ </a>
41
+ <a href="https://www.youtube.com/@smallest_ai">
42
+ <img src="https://img.shields.io/static/v1?message=smallest_ai&logo=youtube&label=&color=FF0000&logoColor=white&labelColor=&style=for-the-badge" height=20 alt="Youtube">
43
+ </a>
44
+ </div>
45
+
46
+ ## Official Python Client for Smallest AI API
47
+
48
+ Smallest AI builds high-speed multi-lingual voice models tailored for real-time applications, achieving ultra-realistic audio generation in as fast as ~100 milliseconds for 10 seconds of audio. With this sdk, you can easily convert text into high-quality audio with humanlike expressiveness.
49
+
50
+ Currently, the library supports direct synthesis and the ability to synthesize streamed LLM output, both synchronously and asynchronously.
51
+
52
+ ## Table of Contents
53
+
54
+ - [Installation](#installation)
55
+ - [Get the API Key](#get-the-api-key)
56
+ - [Examples](#examples)
57
+ - [Sync](#sync)
58
+ - [Async](#async)
59
+ - [LLM to Speech](#llm-to-speech)
60
+ - [Available Methods](#available-methods)
61
+ - [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
62
+
63
+ ## Installation
64
+
65
+ To install the package, follow these steps:
66
+
67
+ 1. Clone the repository:
68
+ ```bash
69
+ git clone https://github.com/smallest-inc/smallest-python-sdk.git
70
+ ```
71
+
72
+ 2. Navigate to the cloned directory and install the package:
73
+ ```bash
74
+ cd smallest-python
75
+ pip install .
76
+ ```
77
+
78
+ ## Get the API Key
79
+
80
+ 1. Visit [waves.smallest.ai](https://waves.smallest.ai/) and sign up for an account or log in if you already have an account.
81
+ 2. Navigate to `API Key` tab in your account dashboard.
82
+ 3. Create a new API Key and copy it.
83
+ 4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
84
+
85
+ ## Examples
86
+
87
+ ### Sync
88
+ A synchronous text-to-speech synthesis client.
89
+
90
+ **Basic Usage:**
91
+ ```python
92
+ import os
93
+ from smallest import Smallest
94
+
95
+ def main():
96
+ client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
97
+ audio_data = client.synthesize("Hello, this is a test for sync synthesis function.")
98
+ with open("sync_synthesize.wav", "wb") as f:
99
+ f.write(audio_data)
100
+
101
+ if __name__ == "__main__":
102
+ main()
103
+ ```
104
+
105
+ **Parameters:**
106
+ - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
107
+ - `model`: TTS model to use (default: "lightning")
108
+ - `sample_rate`: Audio sample rate (default: 24000)
109
+ - `voice`: Voice ID (default: "emily")
110
+ - `speed`: Speech speed multiplier (default: 1.0)
111
+ - `add_wav_header`: Include WAV header in output (default: True)
112
+ - `transliterate`: Enable text transliteration (default: False)
113
+ - `remove_extra_silence`: Remove additional silence (default: True)
114
+
115
+ ### Async
116
+ A synchronous text-to-speech synthesis client.
117
+
118
+ **Basic Usage:**
119
+ ```python
120
+ import os
121
+ import asyncio
122
+ import aiofiles
123
+ from smallest import AsyncSmallest
124
+
125
+ client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
126
+
127
+ async def main():
128
+ async with client as tts:
129
+ audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
130
+ async with aiofiles.open("async_synthesize.wav", "wb") as f:
131
+ await f.write(audio_bytes)
132
+
133
+ if __name__ == "__main__":
134
+ asyncio.run(main())
135
+ ```
136
+
137
+ **Parameters:**
138
+ - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
139
+ - `model`: TTS model to use (default: "lightning")
140
+ - `sample_rate`: Audio sample rate (default: 24000)
141
+ - `voice`: Voice ID (default: "emily")
142
+ - `speed`: Speech speed multiplier (default: 1.0)
143
+ - `add_wav_header`: Include WAV header in output (default: True)
144
+ - `transliterate`: Enable text transliteration (default: False)
145
+ - `remove_extra_silence`: Remove additional silence (default: True)
146
+
147
+ ### LLM to Speech
148
+
149
+ The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output with minimal latency. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
150
+
151
+ ```python
152
+ import os
153
+ import wave
154
+ import asyncio
155
+ from groq import Groq
156
+ from smallest import Smallest
157
+ from smallest import TextToAudioStream
158
+
159
+ llm = Groq(api_key=os.environ.get("GROQ_API_KEY"))
160
+ tts = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
161
+
162
+ async def generate_text(prompt):
163
+ """Async generator for streaming text from Groq. You can use any LLM"""
164
+ completion = llm.chat.completions.create(
165
+ messages=[
166
+ {
167
+ "role": "user",
168
+ "content": prompt,
169
+ }
170
+ ],
171
+ model="llama3-8b-8192",
172
+ stream=True,
173
+ )
174
+
175
+ for chunk in completion:
176
+ text = chunk.choices[0].delta.content
177
+ if text is not None:
178
+ yield text
179
+
180
+ async def save_audio_to_wav(file_path, processor, llm_output):
181
+ with wave.open(file_path, "wb") as wav_file:
182
+ wav_file.setnchannels(1)
183
+ wav_file.setsampwidth(2)
184
+ wav_file.setframerate(24000)
185
+
186
+ async for audio_chunk in processor.process(llm_output):
187
+ wav_file.writeframes(audio_chunk)
188
+
189
+ async def main():
190
+ # Initialize the TTS processor with the TTS instance
191
+ processor = TextToAudioStream(tts_instance=tts)
192
+
193
+ # Generate text asynchronously and process it
194
+ llm_output = generate_text("Explain text to speech like I am five in 5 sentences.")
195
+
196
+ # As an example, save the generated audio to a WAV file.
197
+ await save_audio_to_wav("llm_to_speech.wav", processor, llm_output)
198
+
199
+ if __name__ == "__main__":
200
+ asyncio.run(main())
201
+ ```
202
+
203
+ **Parameters:**
204
+
205
+ - `tts_instance`: Text-to-speech engine (Smallest or AsyncSmallest)
206
+ - `queue_timeout`: Wait time for new text (seconds, default: 5.0)
207
+ - `max_retries`: Number of retry attempts for failed synthesis (default: 3)
208
+
209
+ **Output Format:**
210
+ The processor yields raw audio data chunks without WAV headers for streaming efficiency. These chunks can be:
211
+
212
+ - Played directly through an audio device
213
+ - Saved to a file
214
+ - Streamed over a network
215
+ - Further processed as needed
216
+
217
+
218
+ ## Available Methods
219
+
220
+ ```python
221
+ from smallest.tts import Smallest
222
+
223
+ client = Smallest()
224
+
225
+ print(f"Avalaible Languages: {client.get_languages()}")
226
+ print(f"Available Voices: {client.get_voices()}")
227
+ print(f"Available Models: {client.get_models()}")
228
+ ```
229
+
230
+ ## Technical Note: WAV Headers in Streaming Audio
231
+
232
+ When implementing audio streaming with chunks of synthesized speech, WAV headers are omitted from individual chunks because:
233
+
234
+ #### Technical Issues
235
+ - Each WAV header contains metadata about the entire audio file.
236
+ - Multiple headers would make chunks appear as separate audio files and add redundancy.
237
+ - Headers contain file-specific data (like total size) that's invalid for chunks.
238
+ - Sequential playback of chunks with headers causes audio artifacts (pop sounds) when concatenating or playing audio sequentially.
239
+ - Audio players would try to reinitialize audio settings for each chunk.
240
+
241
+ ### Best Practices
242
+ 1. Stream raw PCM audio data without headers
243
+ 2. Add a single WAV header only when:
244
+ - Saving the complete stream to a file
245
+ - Initializing the audio playback system
246
+ - Converting the stream to a standard audio format
@@ -0,0 +1,219 @@
1
+ ![image](https://i.imgur.com/TJ2tT4g.png)
2
+
3
+
4
+ <div align="center">
5
+ <a href="https://twitter.com/smallest_AI">
6
+ <img src="https://img.shields.io/twitter/url/https/twitter.com/smallest_AI.svg?style=social&label=Follow%20smallest_AI" alt="Twitter">
7
+ </a>
8
+ <a href="https://discord.gg/ywShEyXHBW">
9
+ <img src="https://dcbadge.vercel.app/api/server/ywShEyXHBW?style=flat" alt="Discord">
10
+ </a>
11
+ <a href="https://www.linkedin.com/company/smallest">
12
+ <img src="https://img.shields.io/badge/LinkedIn-Connect-blue" alt="Linkedin">
13
+ </a>
14
+ <a href="https://www.youtube.com/@smallest_ai">
15
+ <img src="https://img.shields.io/static/v1?message=smallest_ai&logo=youtube&label=&color=FF0000&logoColor=white&labelColor=&style=for-the-badge" height=20 alt="Youtube">
16
+ </a>
17
+ </div>
18
+
19
+ ## Official Python Client for Smallest AI API
20
+
21
+ Smallest AI builds high-speed multi-lingual voice models tailored for real-time applications, achieving ultra-realistic audio generation in as fast as ~100 milliseconds for 10 seconds of audio. With this sdk, you can easily convert text into high-quality audio with humanlike expressiveness.
22
+
23
+ Currently, the library supports direct synthesis and the ability to synthesize streamed LLM output, both synchronously and asynchronously.
24
+
25
+ ## Table of Contents
26
+
27
+ - [Installation](#installation)
28
+ - [Get the API Key](#get-the-api-key)
29
+ - [Examples](#examples)
30
+ - [Sync](#sync)
31
+ - [Async](#async)
32
+ - [LLM to Speech](#llm-to-speech)
33
+ - [Available Methods](#available-methods)
34
+ - [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
35
+
36
+ ## Installation
37
+
38
+ To install the package, follow these steps:
39
+
40
+ 1. Clone the repository:
41
+ ```bash
42
+ git clone https://github.com/smallest-inc/smallest-python-sdk.git
43
+ ```
44
+
45
+ 2. Navigate to the cloned directory and install the package:
46
+ ```bash
47
+ cd smallest-python
48
+ pip install .
49
+ ```
50
+
51
+ ## Get the API Key
52
+
53
+ 1. Visit [waves.smallest.ai](https://waves.smallest.ai/) and sign up for an account or log in if you already have an account.
54
+ 2. Navigate to `API Key` tab in your account dashboard.
55
+ 3. Create a new API Key and copy it.
56
+ 4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
57
+
58
+ ## Examples
59
+
60
+ ### Sync
61
+ A synchronous text-to-speech synthesis client.
62
+
63
+ **Basic Usage:**
64
+ ```python
65
+ import os
66
+ from smallest import Smallest
67
+
68
+ def main():
69
+ client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
70
+ audio_data = client.synthesize("Hello, this is a test for sync synthesis function.")
71
+ with open("sync_synthesize.wav", "wb") as f:
72
+ f.write(audio_data)
73
+
74
+ if __name__ == "__main__":
75
+ main()
76
+ ```
77
+
78
+ **Parameters:**
79
+ - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
80
+ - `model`: TTS model to use (default: "lightning")
81
+ - `sample_rate`: Audio sample rate (default: 24000)
82
+ - `voice`: Voice ID (default: "emily")
83
+ - `speed`: Speech speed multiplier (default: 1.0)
84
+ - `add_wav_header`: Include WAV header in output (default: True)
85
+ - `transliterate`: Enable text transliteration (default: False)
86
+ - `remove_extra_silence`: Remove additional silence (default: True)
87
+
88
+ ### Async
89
+ A synchronous text-to-speech synthesis client.
90
+
91
+ **Basic Usage:**
92
+ ```python
93
+ import os
94
+ import asyncio
95
+ import aiofiles
96
+ from smallest import AsyncSmallest
97
+
98
+ client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
99
+
100
+ async def main():
101
+ async with client as tts:
102
+ audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
103
+ async with aiofiles.open("async_synthesize.wav", "wb") as f:
104
+ await f.write(audio_bytes)
105
+
106
+ if __name__ == "__main__":
107
+ asyncio.run(main())
108
+ ```
109
+
110
+ **Parameters:**
111
+ - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
112
+ - `model`: TTS model to use (default: "lightning")
113
+ - `sample_rate`: Audio sample rate (default: 24000)
114
+ - `voice`: Voice ID (default: "emily")
115
+ - `speed`: Speech speed multiplier (default: 1.0)
116
+ - `add_wav_header`: Include WAV header in output (default: True)
117
+ - `transliterate`: Enable text transliteration (default: False)
118
+ - `remove_extra_silence`: Remove additional silence (default: True)
119
+
120
+ ### LLM to Speech
121
+
122
+ The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output with minimal latency. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
123
+
124
+ ```python
125
+ import os
126
+ import wave
127
+ import asyncio
128
+ from groq import Groq
129
+ from smallest import Smallest
130
+ from smallest import TextToAudioStream
131
+
132
+ llm = Groq(api_key=os.environ.get("GROQ_API_KEY"))
133
+ tts = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
134
+
135
+ async def generate_text(prompt):
136
+ """Async generator for streaming text from Groq. You can use any LLM"""
137
+ completion = llm.chat.completions.create(
138
+ messages=[
139
+ {
140
+ "role": "user",
141
+ "content": prompt,
142
+ }
143
+ ],
144
+ model="llama3-8b-8192",
145
+ stream=True,
146
+ )
147
+
148
+ for chunk in completion:
149
+ text = chunk.choices[0].delta.content
150
+ if text is not None:
151
+ yield text
152
+
153
+ async def save_audio_to_wav(file_path, processor, llm_output):
154
+ with wave.open(file_path, "wb") as wav_file:
155
+ wav_file.setnchannels(1)
156
+ wav_file.setsampwidth(2)
157
+ wav_file.setframerate(24000)
158
+
159
+ async for audio_chunk in processor.process(llm_output):
160
+ wav_file.writeframes(audio_chunk)
161
+
162
+ async def main():
163
+ # Initialize the TTS processor with the TTS instance
164
+ processor = TextToAudioStream(tts_instance=tts)
165
+
166
+ # Generate text asynchronously and process it
167
+ llm_output = generate_text("Explain text to speech like I am five in 5 sentences.")
168
+
169
+ # As an example, save the generated audio to a WAV file.
170
+ await save_audio_to_wav("llm_to_speech.wav", processor, llm_output)
171
+
172
+ if __name__ == "__main__":
173
+ asyncio.run(main())
174
+ ```
175
+
176
+ **Parameters:**
177
+
178
+ - `tts_instance`: Text-to-speech engine (Smallest or AsyncSmallest)
179
+ - `queue_timeout`: Wait time for new text (seconds, default: 5.0)
180
+ - `max_retries`: Number of retry attempts for failed synthesis (default: 3)
181
+
182
+ **Output Format:**
183
+ The processor yields raw audio data chunks without WAV headers for streaming efficiency. These chunks can be:
184
+
185
+ - Played directly through an audio device
186
+ - Saved to a file
187
+ - Streamed over a network
188
+ - Further processed as needed
189
+
190
+
191
+ ## Available Methods
192
+
193
+ ```python
194
+ from smallest.tts import Smallest
195
+
196
+ client = Smallest()
197
+
198
+ print(f"Avalaible Languages: {client.get_languages()}")
199
+ print(f"Available Voices: {client.get_voices()}")
200
+ print(f"Available Models: {client.get_models()}")
201
+ ```
202
+
203
+ ## Technical Note: WAV Headers in Streaming Audio
204
+
205
+ When implementing audio streaming with chunks of synthesized speech, WAV headers are omitted from individual chunks because:
206
+
207
+ #### Technical Issues
208
+ - Each WAV header contains metadata about the entire audio file.
209
+ - Multiple headers would make chunks appear as separate audio files and add redundancy.
210
+ - Headers contain file-specific data (like total size) that's invalid for chunks.
211
+ - Sequential playback of chunks with headers causes audio artifacts (pop sounds) when concatenating or playing audio sequentially.
212
+ - Audio players would try to reinitialize audio settings for each chunk.
213
+
214
+ ### Best Practices
215
+ 1. Stream raw PCM audio data without headers
216
+ 2. Add a single WAV header only when:
217
+ - Saving the complete stream to a file
218
+ - Initializing the audio playback system
219
+ - Converting the stream to a standard audio format
@@ -0,0 +1,44 @@
1
+ [project]
2
+ name = "smallestai"
3
+ version = "0.1.0"
4
+ description = "Official Python client for the Smallest AI API"
5
+ authors = [
6
+ {name = "Smallest", email = "info@smallest.ai"},
7
+ ]
8
+ readme = "README.md"
9
+ license = {text = "MIT"}
10
+ requires-python = ">=3.9"
11
+ classifiers = [
12
+ "Intended Audience :: Developers",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3",
15
+ ]
16
+ keywords = ["smallest", "smallest.ai", "tts", "text-to-speech"]
17
+ dependencies = [
18
+ "aiohttp",
19
+ "aiofiles",
20
+ "requests",
21
+ "sacremoses",
22
+ "pydub"
23
+ ]
24
+
25
+ [project.optional-dependencies]
26
+ test = [
27
+ "jiwer",
28
+ "httpx",
29
+ "pytest",
30
+ "pytest-asyncio",
31
+ "deepgram-sdk",
32
+ "python-dotenv"
33
+ ]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/smallest-inc/smallest-python-sdk"
37
+
38
+ [build-system]
39
+ requires = ["setuptools>=61.0"]
40
+ build-backend = "setuptools.build_meta"
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["."]
44
+ include = ["smallest*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ from smallest.tts import Smallest
2
+ from smallest.async_tts import AsyncSmallest
3
+ from smallest.stream_tts import TextToAudioStream
4
+
5
+ __all__ = ["Smallest", "AsyncSmallest", "TextToAudioStream"]
@@ -0,0 +1,151 @@
1
+ import os
2
+ import copy
3
+ import aiohttp
4
+ import aiofiles
5
+ from typing import Optional, Union, List
6
+
7
+ from .models import TTSModels, TTSVoices
8
+ from .exceptions import TTSError, APIError
9
+ from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
10
+ get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
11
+
12
+
13
+ class AsyncSmallest:
14
+ def __init__(
15
+ self,
16
+ api_key: Optional[str] = None,
17
+ model: TTSModels = "lightning",
18
+ sample_rate: int = 24000,
19
+ voice: TTSVoices = "emily",
20
+ speed: Optional[float] = 1.0,
21
+ add_wav_header: Optional[bool] = True,
22
+ transliterate: Optional[bool] = False,
23
+ remove_extra_silence: Optional[bool] = False
24
+ ) -> None:
25
+ """
26
+ AsyncSmallest Instance for asynchronous text-to-speech synthesis.
27
+
28
+ This class provides an asynchronous implementation of the text-to-speech functionality.
29
+ It allows for non-blocking synthesis of speech from text, making it suitable for applications
30
+ that require async processing.
31
+
32
+ Args:
33
+ - api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
34
+ - model (TTSModels): The model to be used for synthesis.
35
+ - sample_rate (int): The sample rate for the audio output.
36
+ - voice (TTSVoices): The voice to be used for synthesis.
37
+ - speed (float): The speed of the speech synthesis.
38
+ - add_wav_header (bool): Whether to add a WAV header to the output audio.
39
+ - transliterate (bool): Whether to transliterate the text.
40
+ - remove_extra_silence (bool): Whether to remove extra silence from the synthesized audio.
41
+
42
+ Methods:
43
+ - get_languages: Returns a list of available languages for synthesis.
44
+ - get_voices: Returns a list of available voices for synthesis.
45
+ - get_models: Returns a list of available models for synthesis.
46
+ - synthesize: Asynchronously converts the provided text into speech and returns the audio content.
47
+ """
48
+ self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
49
+ if not self.api_key:
50
+ raise TTSError("API key is required")
51
+
52
+ self.opts = TTSOptions(
53
+ model=model,
54
+ sample_rate=sample_rate,
55
+ voice=voice,
56
+ api_key=self.api_key,
57
+ add_wav_header=add_wav_header,
58
+ speed=speed,
59
+ transliterate=transliterate,
60
+ remove_extra_silence=remove_extra_silence,
61
+ )
62
+ self.session = None
63
+
64
+ async def __aenter__(self):
65
+ if self.session is None:
66
+ self.session = aiohttp.ClientSession()
67
+ return self
68
+
69
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
70
+ if self.session:
71
+ await self.session.close()
72
+
73
+ def get_languages(self) -> List[str]:
74
+ """Returns a list of available languages."""
75
+ return get_smallest_languages()
76
+
77
+ def get_voices(self) -> List[str]:
78
+ """Returns a list of available voices."""
79
+ return get_smallest_voices()
80
+
81
+ def get_models(self) -> List[str]:
82
+ """Returns a list of available models."""
83
+ return get_smallest_models()
84
+
85
+ async def synthesize(
86
+ self,
87
+ text: str,
88
+ save_as: Optional[str] = None,
89
+ **kwargs
90
+ ) -> Union[bytes, None]:
91
+ """
92
+ Asynchronously synthesize speech from the provided text.
93
+
94
+ Args:
95
+ - text (str): The text to be converted to speech.
96
+ - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
97
+ The file must have a .wav extension.
98
+ - kwargs: Additional optional parameters to override `__init__` options for this call.
99
+
100
+ Returns:
101
+ - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
102
+ otherwise, returns None after saving the audio to the specified file.
103
+
104
+ Raises:
105
+ - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
106
+ - APIError: If the API request fails or returns an error.
107
+ """
108
+ opts = copy.deepcopy(self.opts)
109
+ for key, value in kwargs.items():
110
+ setattr(opts, key, value)
111
+
112
+ validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
113
+
114
+ payload = {
115
+ "text": preprocess_text(text),
116
+ "sample_rate": opts.sample_rate,
117
+ "voice_id": opts.voice,
118
+ "add_wav_header": opts.add_wav_header,
119
+ "speed": opts.speed,
120
+ "model": opts.model,
121
+ "transliterate": opts.transliterate,
122
+ "remove_extra_silence": opts.remove_extra_silence
123
+ }
124
+
125
+ headers = {
126
+ "Authorization": f"Bearer {self.api_key}",
127
+ "Content-Type": "application/json",
128
+ }
129
+
130
+ if not self.session:
131
+ self.session = aiohttp.ClientSession()
132
+
133
+ async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
134
+ if res.status != 200:
135
+ raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
136
+
137
+ audio_content = await res.read()
138
+
139
+ if save_as:
140
+ if not save_as.endswith(".wav"):
141
+ raise TTSError("Invalid file name. Extension must be .wav")
142
+
143
+ if self.opts.add_wav_header:
144
+ async with aiofiles.open(save_as, mode='wb') as f:
145
+ await f.write(audio_content)
146
+ else:
147
+ async with aiofiles.open(save_as, mode='wb') as f:
148
+ await f.write(add_wav_header(audio_content, self.opts.sample_rate))
149
+ return None
150
+
151
+ return audio_content