smallestai 1.3.3__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

@@ -1,8 +1,8 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: smallestai
3
- Version: 1.3.3
3
+ Version: 2.0.0
4
4
  Summary: Official Python client for the Smallest AI API
5
- Author-email: Smallest <info@smallest.ai>
5
+ Author-email: Smallest <support@smallest.ai>
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/smallest-inc/smallest-python-sdk
8
8
  Keywords: smallest,smallest.ai,tts,text-to-speech
@@ -53,10 +53,14 @@ Currently, the library supports direct synthesis and the ability to synthesize s
53
53
 
54
54
  - [Installation](#installation)
55
55
  - [Get the API Key](#get-the-api-key)
56
+ - [Best Practices for Input Text](#best-practices-for-input-text)
56
57
  - [Examples](#examples)
57
- - [Sync](#sync)
58
- - [Async](#async)
58
+ - [Synchronous](#Synchronous)
59
+ - [Aynchronous](#Synchronous)
59
60
  - [LLM to Speech](#llm-to-speech)
61
+ - [Add your Voice](#add-your-voice)
62
+ - [Synchronously](#synchronously)
63
+ - [Asynchronously](#asynchronously)
60
64
  - [Available Methods](#available-methods)
61
65
  - [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
62
66
 
@@ -76,19 +80,30 @@ When using an SDK in your application, make sure to pin to at least the major ve
76
80
  3. Create a new API Key and copy it.
77
81
  4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
78
82
 
83
+ ## Best Practices for Input Text
84
+ While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
85
+
86
+ For optimal voice generation results:
87
+
88
+ 1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
89
+ 2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
90
+ 3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
91
+
79
92
  ## Examples
80
93
 
81
- ### Sync
94
+ ### Synchronous
82
95
  A synchronous text-to-speech synthesis client.
83
96
 
84
97
  **Basic Usage:**
85
98
  ```python
86
- import os
87
99
  from smallest import Smallest
88
100
 
89
101
  def main():
90
- client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
91
- client.synthesize("Hello, this is a test for sync synthesis function.", save_as="sync_synthesize.wav")
102
+ client = Smallest(api_key="SMALLEST_API_KEY")
103
+ client.synthesize(
104
+ text="Hello, this is a test for sync synthesis function.",
105
+ save_as="sync_synthesize.wav"
106
+ )
92
107
 
93
108
  if __name__ == "__main__":
94
109
  main()
@@ -98,7 +113,7 @@ if __name__ == "__main__":
98
113
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
99
114
  - `model`: TTS model to use (default: "lightning")
100
115
  - `sample_rate`: Audio sample rate (default: 24000)
101
- - `voice`: Voice ID (default: "emily")
116
+ - `voice_id`: Voice ID (default: "emily")
102
117
  - `speed`: Speech speed multiplier (default: 1.0)
103
118
  - `add_wav_header`: Include WAV header in output (default: True)
104
119
  - `transliterate`: Enable text transliteration (default: False)
@@ -117,17 +132,16 @@ client.synthesize(
117
132
  ```
118
133
 
119
134
 
120
- ### Async
135
+ ### Asynchronous
121
136
  Asynchronous text-to-speech synthesis client.
122
137
 
123
138
  **Basic Usage:**
124
139
  ```python
125
- import os
126
140
  import asyncio
127
141
  import aiofiles
128
142
  from smallest import AsyncSmallest
129
143
 
130
- client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
144
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
131
145
 
132
146
  async def main():
133
147
  async with client as tts:
@@ -143,7 +157,7 @@ if __name__ == "__main__":
143
157
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
144
158
  - `model`: TTS model to use (default: "lightning")
145
159
  - `sample_rate`: Audio sample rate (default: 24000)
146
- - `voice`: Voice ID (default: "emily")
160
+ - `voice_id`: Voice ID (default: "emily")
147
161
  - `speed`: Speech speed multiplier (default: 1.0)
148
162
  - `add_wav_header`: Include WAV header in output (default: True)
149
163
  - `transliterate`: Enable text transliteration (default: False)
@@ -162,18 +176,16 @@ audio_bytes = await tts.synthesize(
162
176
 
163
177
  ### LLM to Speech
164
178
 
165
- The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output with minimal latency. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
179
+ The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
166
180
 
167
181
  ```python
168
- import os
169
182
  import wave
170
183
  import asyncio
171
184
  from groq import Groq
172
- from smallest import Smallest
173
- from smallest import TextToAudioStream
185
+ from smallest import Smallest, TextToAudioStream
174
186
 
175
- llm = Groq(api_key=os.environ.get("GROQ_API_KEY"))
176
- tts = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
187
+ llm = Groq(api_key="GROQ_API_KEY")
188
+ tts = Smallest(api_key="SMALLEST_API_KEY")
177
189
 
178
190
  async def generate_text(prompt):
179
191
  """Async generator for streaming text from Groq. You can use any LLM"""
@@ -230,16 +242,46 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
230
242
  - Streamed over a network
231
243
  - Further processed as needed
232
244
 
245
+ ## Add your Voice
246
+ The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
247
+
248
+ ### Synchronously
249
+ ```python
250
+ from smallest import Smallest
251
+
252
+ def main():
253
+ client = Smallest(api_key="YOUR_API_KEY")
254
+ res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
255
+ print(res)
256
+
257
+ if __name__ == "__main__":
258
+ main()
259
+ ```
260
+
261
+ ### Asynchronously
262
+ ```python
263
+ import asyncio
264
+ from smallest import AsyncSmallest
265
+
266
+ async def main():
267
+ client = AsyncSmallest(api_key="YOUR_API_KEY")
268
+ res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
269
+ print(res)
270
+
271
+ if __name__ == "__main__":
272
+ asyncio.run(main())
273
+ ```
233
274
 
234
275
  ## Available Methods
235
276
 
236
277
  ```python
237
- from smallest.tts import Smallest
278
+ from smallest import Smallest
238
279
 
239
- client = Smallest()
280
+ client = Smallest(api_key="SMALLEST_API_KEY")
240
281
 
241
- print(f"Avalaible Languages: {client.get_languages()}")
242
- print(f"Available Voices: {client.get_voices()}")
282
+ print(f"Available Languages: {client.get_languages()}")
283
+ print(f"Available Voices: {client.get_voices(model='lightning')}")
284
+ print(f"Available Voices: {client.get_cloned_voices()}")
243
285
  print(f"Available Models: {client.get_models()}")
244
286
  ```
245
287
 
@@ -254,7 +296,7 @@ When implementing audio streaming with chunks of synthesized speech, WAV headers
254
296
  - Sequential playback of chunks with headers causes audio artifacts (pop sounds) when concatenating or playing audio sequentially.
255
297
  - Audio players would try to reinitialize audio settings for each chunk.
256
298
 
257
- ### Best Practices
299
+ ### Best Practices for Audio Streaming
258
300
  1. Stream raw PCM audio data without headers
259
301
  2. Add a single WAV header only when:
260
302
  - Saving the complete stream to a file
@@ -26,10 +26,14 @@ Currently, the library supports direct synthesis and the ability to synthesize s
26
26
 
27
27
  - [Installation](#installation)
28
28
  - [Get the API Key](#get-the-api-key)
29
+ - [Best Practices for Input Text](#best-practices-for-input-text)
29
30
  - [Examples](#examples)
30
- - [Sync](#sync)
31
- - [Async](#async)
31
+ - [Synchronous](#Synchronous)
32
+ - [Aynchronous](#Synchronous)
32
33
  - [LLM to Speech](#llm-to-speech)
34
+ - [Add your Voice](#add-your-voice)
35
+ - [Synchronously](#synchronously)
36
+ - [Asynchronously](#asynchronously)
33
37
  - [Available Methods](#available-methods)
34
38
  - [Technical Note: WAV Headers in Streaming Audio](#technical-note-wav-headers-in-streaming-audio)
35
39
 
@@ -49,19 +53,30 @@ When using an SDK in your application, make sure to pin to at least the major ve
49
53
  3. Create a new API Key and copy it.
50
54
  4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
51
55
 
56
+ ## Best Practices for Input Text
57
+ While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
58
+
59
+ For optimal voice generation results:
60
+
61
+ 1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
62
+ 2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
63
+ 3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
64
+
52
65
  ## Examples
53
66
 
54
- ### Sync
67
+ ### Synchronous
55
68
  A synchronous text-to-speech synthesis client.
56
69
 
57
70
  **Basic Usage:**
58
71
  ```python
59
- import os
60
72
  from smallest import Smallest
61
73
 
62
74
  def main():
63
- client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
64
- client.synthesize("Hello, this is a test for sync synthesis function.", save_as="sync_synthesize.wav")
75
+ client = Smallest(api_key="SMALLEST_API_KEY")
76
+ client.synthesize(
77
+ text="Hello, this is a test for sync synthesis function.",
78
+ save_as="sync_synthesize.wav"
79
+ )
65
80
 
66
81
  if __name__ == "__main__":
67
82
  main()
@@ -71,7 +86,7 @@ if __name__ == "__main__":
71
86
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
72
87
  - `model`: TTS model to use (default: "lightning")
73
88
  - `sample_rate`: Audio sample rate (default: 24000)
74
- - `voice`: Voice ID (default: "emily")
89
+ - `voice_id`: Voice ID (default: "emily")
75
90
  - `speed`: Speech speed multiplier (default: 1.0)
76
91
  - `add_wav_header`: Include WAV header in output (default: True)
77
92
  - `transliterate`: Enable text transliteration (default: False)
@@ -90,17 +105,16 @@ client.synthesize(
90
105
  ```
91
106
 
92
107
 
93
- ### Async
108
+ ### Asynchronous
94
109
  Asynchronous text-to-speech synthesis client.
95
110
 
96
111
  **Basic Usage:**
97
112
  ```python
98
- import os
99
113
  import asyncio
100
114
  import aiofiles
101
115
  from smallest import AsyncSmallest
102
116
 
103
- client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
117
+ client = AsyncSmallest(api_key="SMALLEST_API_KEY")
104
118
 
105
119
  async def main():
106
120
  async with client as tts:
@@ -116,7 +130,7 @@ if __name__ == "__main__":
116
130
  - `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable)
117
131
  - `model`: TTS model to use (default: "lightning")
118
132
  - `sample_rate`: Audio sample rate (default: 24000)
119
- - `voice`: Voice ID (default: "emily")
133
+ - `voice_id`: Voice ID (default: "emily")
120
134
  - `speed`: Speech speed multiplier (default: 1.0)
121
135
  - `add_wav_header`: Include WAV header in output (default: True)
122
136
  - `transliterate`: Enable text transliteration (default: False)
@@ -135,18 +149,16 @@ audio_bytes = await tts.synthesize(
135
149
 
136
150
  ### LLM to Speech
137
151
 
138
- The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output with minimal latency. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
152
+ The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
139
153
 
140
154
  ```python
141
- import os
142
155
  import wave
143
156
  import asyncio
144
157
  from groq import Groq
145
- from smallest import Smallest
146
- from smallest import TextToAudioStream
158
+ from smallest import Smallest, TextToAudioStream
147
159
 
148
- llm = Groq(api_key=os.environ.get("GROQ_API_KEY"))
149
- tts = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
160
+ llm = Groq(api_key="GROQ_API_KEY")
161
+ tts = Smallest(api_key="SMALLEST_API_KEY")
150
162
 
151
163
  async def generate_text(prompt):
152
164
  """Async generator for streaming text from Groq. You can use any LLM"""
@@ -203,16 +215,46 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
203
215
  - Streamed over a network
204
216
  - Further processed as needed
205
217
 
218
+ ## Add your Voice
219
+ The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality.
220
+
221
+ ### Synchronously
222
+ ```python
223
+ from smallest import Smallest
224
+
225
+ def main():
226
+ client = Smallest(api_key="YOUR_API_KEY")
227
+ res = client.add_voice(display_name="My Voice", file_path="my_voice.wav")
228
+ print(res)
229
+
230
+ if __name__ == "__main__":
231
+ main()
232
+ ```
233
+
234
+ ### Asynchronously
235
+ ```python
236
+ import asyncio
237
+ from smallest import AsyncSmallest
238
+
239
+ async def main():
240
+ client = AsyncSmallest(api_key="YOUR_API_KEY")
241
+ res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav")
242
+ print(res)
243
+
244
+ if __name__ == "__main__":
245
+ asyncio.run(main())
246
+ ```
206
247
 
207
248
  ## Available Methods
208
249
 
209
250
  ```python
210
- from smallest.tts import Smallest
251
+ from smallest import Smallest
211
252
 
212
- client = Smallest()
253
+ client = Smallest(api_key="SMALLEST_API_KEY")
213
254
 
214
- print(f"Avalaible Languages: {client.get_languages()}")
215
- print(f"Available Voices: {client.get_voices()}")
255
+ print(f"Available Languages: {client.get_languages()}")
256
+ print(f"Available Voices: {client.get_voices(model='lightning')}")
257
+ print(f"Available Voices: {client.get_cloned_voices()}")
216
258
  print(f"Available Models: {client.get_models()}")
217
259
  ```
218
260
 
@@ -227,7 +269,7 @@ When implementing audio streaming with chunks of synthesized speech, WAV headers
227
269
  - Sequential playback of chunks with headers causes audio artifacts (pop sounds) when concatenating or playing audio sequentially.
228
270
  - Audio players would try to reinitialize audio settings for each chunk.
229
271
 
230
- ### Best Practices
272
+ ### Best Practices for Audio Streaming
231
273
  1. Stream raw PCM audio data without headers
232
274
  2. Add a single WAV header only when:
233
275
  - Saving the complete stream to a file
@@ -1,9 +1,9 @@
1
1
  [project]
2
2
  name = "smallestai"
3
- version = "1.3.3"
3
+ version = "2.0.0"
4
4
  description = "Official Python client for the Smallest AI API"
5
5
  authors = [
6
- {name = "Smallest", email = "info@smallest.ai"},
6
+ {name = "Smallest", email = "support@smallest.ai"},
7
7
  ]
8
8
  readme = "README.md"
9
9
  license = {text = "MIT"}
@@ -0,0 +1,260 @@
1
+ import os
2
+ import copy
3
+ import json
4
+ import aiohttp
5
+ import aiofiles
6
+ import requests
7
+ from typing import Optional, Union, List
8
+
9
+ from smallest.exceptions import TTSError, APIError
10
+ from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
11
+ get_smallest_languages, get_smallest_models, API_BASE_URL)
12
+
13
+
14
+ class AsyncSmallest:
15
+ def __init__(
16
+ self,
17
+ api_key: str = None,
18
+ model: Optional[str] = "lightning",
19
+ sample_rate: Optional[int] = 24000,
20
+ voice_id: Optional[str] = "emily",
21
+ speed: Optional[float] = 1.0,
22
+ add_wav_header: Optional[bool] = True,
23
+ transliterate: Optional[bool] = False,
24
+ remove_extra_silence: Optional[bool] = False
25
+ ) -> None:
26
+ """
27
+ AsyncSmallest Instance for asynchronous text-to-speech synthesis.
28
+
29
+ This class provides an asynchronous implementation of the text-to-speech functionality.
30
+ It allows for non-blocking synthesis of speech from text, making it suitable for applications
31
+ that require async processing.
32
+
33
+ Args:
34
+ - api_key (str): The API key for authentication, export it as 'SMALLEST_API_KEY' in your environment variables.
35
+ - model (TTSModels): The model to be used for synthesis.
36
+ - sample_rate (int): The sample rate for the audio output.
37
+ - voice_id (TTSVoices): The voice to be used for synthesis.
38
+ - speed (float): The speed of the speech synthesis.
39
+ - add_wav_header (bool): Whether to add a WAV header to the output audio.
40
+ - transliterate (bool): Whether to transliterate the text.
41
+ - remove_extra_silence (bool): Whether to remove extra silence from the synthesized audio.
42
+
43
+ Methods:
44
+ - get_languages: Returns a list of available languages for synthesis.
45
+ - get_voices: Returns a list of available voices for synthesis.
46
+ - get_models: Returns a list of available models for synthesis.
47
+ - synthesize: Asynchronously converts the provided text into speech and returns the audio content.
48
+ """
49
+ self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
50
+ if not self.api_key:
51
+ raise TTSError()
52
+ self.chunk_size = 250
53
+
54
+ self.opts = TTSOptions(
55
+ model=model,
56
+ sample_rate=sample_rate,
57
+ voice_id=voice_id,
58
+ api_key=self.api_key,
59
+ add_wav_header=add_wav_header,
60
+ speed=speed,
61
+ transliterate=transliterate,
62
+ remove_extra_silence=remove_extra_silence,
63
+ )
64
+ self.session = None
65
+
66
+
67
+ async def __aenter__(self):
68
+ if self.session is None:
69
+ self.session = aiohttp.ClientSession()
70
+ return self
71
+
72
+
73
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
74
+ if self.session:
75
+ await self.session.close()
76
+
77
+
78
+ async def _ensure_session(self):
79
+ """Ensure session exists for direct calls"""
80
+ if not self.session:
81
+ self.session = aiohttp.ClientSession()
82
+ return True
83
+ return False
84
+
85
+
86
+ def get_languages(self) -> List[str]:
87
+ """Returns a list of available languages."""
88
+ return get_smallest_languages()
89
+
90
+ def get_cloned_voices(self) -> str:
91
+ """Returns a list of your cloned voices."""
92
+ headers = {
93
+ "Authorization": f"Bearer {self.api_key}",
94
+ }
95
+
96
+ res = requests.request("GET", f"{API_BASE_URL}/lightning-large/get_cloned_voices", headers=headers)
97
+ if res.status_code != 200:
98
+ raise APIError(f"Failed to get cloned voices: {res.text}. For more information, visit https://waves.smallest.ai/")
99
+
100
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
101
+
102
+
103
+ def get_voices(
104
+ self,
105
+ model: Optional[str] = "lightning"
106
+ ) -> str:
107
+ """Returns a list of available voices."""
108
+ headers = {
109
+ "Authorization": f"Bearer {self.api_key}",
110
+ }
111
+
112
+ res = requests.request("GET", f"{API_BASE_URL}/{model}/get_voices", headers=headers)
113
+ if res.status_code != 200:
114
+ raise APIError(f"Failed to get voices: {res.text}. For more information, visit https://waves.smallest.ai/")
115
+
116
+ return json.dumps(res.json(), indent=4, ensure_ascii=False)
117
+
118
+
119
+ def get_models(self) -> List[str]:
120
+ """Returns a list of available models."""
121
+ return get_smallest_models()
122
+
123
+
124
+ async def synthesize(
125
+ self,
126
+ text: str,
127
+ save_as: Optional[str] = None,
128
+ **kwargs
129
+ ) -> Union[bytes, None]:
130
+ """
131
+ Asynchronously synthesize speech from the provided text.
132
+
133
+ Args:
134
+ - text (str): The text to be converted to speech.
135
+ - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
136
+ The file must have a .wav extension.
137
+ - kwargs: Additional optional parameters to override `__init__` options for this call.
138
+
139
+ Returns:
140
+ - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
141
+ otherwise, returns None after saving the audio to the specified file.
142
+
143
+ Raises:
144
+ - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
145
+ - APIError: If the API request fails or returns an error.
146
+ """
147
+ should_cleanup = await self._ensure_session()
148
+
149
+ try:
150
+ opts = copy.deepcopy(self.opts)
151
+ for key, value in kwargs.items():
152
+ setattr(opts, key, value)
153
+
154
+ validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed)
155
+
156
+ self.chunk_size = 250
157
+ if opts.model == 'ligtning-large':
158
+ self.chunk_size = 140
159
+
160
+ chunks = chunk_text(text, self.chunk_size)
161
+ audio_content = b""
162
+
163
+ for chunk in chunks:
164
+ payload = {
165
+ "text": preprocess_text(chunk),
166
+ "sample_rate": opts.sample_rate,
167
+ "voice_id": opts.voice_id,
168
+ "add_wav_header": False,
169
+ "speed": opts.speed,
170
+ "model": opts.model,
171
+ "transliterate": opts.transliterate,
172
+ "remove_extra_silence": opts.remove_extra_silence
173
+ }
174
+
175
+ headers = {
176
+ "Authorization": f"Bearer {self.api_key}",
177
+ "Content-Type": "application/json",
178
+ }
179
+
180
+ if not self.session:
181
+ self.session = aiohttp.ClientSession()
182
+
183
+ async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
184
+ if res.status != 200:
185
+ raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
186
+
187
+ audio_content += await res.read()
188
+
189
+ if save_as:
190
+ if not save_as.endswith(".wav"):
191
+ raise TTSError("Invalid file name. Extension must be .wav")
192
+
193
+ async with aiofiles.open(save_as, mode='wb') as f:
194
+ await f.write(add_wav_header(audio_content, opts.sample_rate))
195
+
196
+ return None
197
+
198
+ if opts.add_wav_header:
199
+ return add_wav_header(audio_content, opts.sample_rate)
200
+
201
+ return audio_content
202
+
203
+ finally:
204
+ if should_cleanup and self.session:
205
+ await self.session.close()
206
+ self.session = None
207
+
208
+
209
+ async def add_voice(self, display_name: str, file_path: str) -> str:
210
+ """
211
+ Instantly clone your voice asynchronously.
212
+
213
+ Args:
214
+ - display_name (str): The display name for the new voice.
215
+ - file_path (str): The path to the reference audio file to be cloned.
216
+
217
+ Returns:
218
+ - str: The response from the API as a formatted JSON string.
219
+
220
+ Raises:
221
+ - TTSError: If the file does not exist or is not a valid audio file.
222
+ - APIError: If the API request fails or returns an error.
223
+ """
224
+ url = f"{API_BASE_URL}/lightning-large/add_voice"
225
+
226
+ if not os.path.exists(file_path):
227
+ raise TTSError("Invalid file path. File does not exist.")
228
+
229
+ ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
230
+ file_extension = os.path.splitext(file_path)[1].lower()
231
+ if file_extension not in ALLOWED_AUDIO_EXTENSIONS:
232
+ raise TTSError(f"Invalid file type. Supported formats are: {ALLOWED_AUDIO_EXTENSIONS}")
233
+
234
+ headers = {
235
+ 'Authorization': f"Bearer {self.api_key}",
236
+ }
237
+
238
+ should_cleanup = await self._ensure_session()
239
+
240
+ try:
241
+ async with aiofiles.open(file_path, 'rb') as f:
242
+ file_data = await f.read()
243
+
244
+ data = aiohttp.FormData()
245
+ content_type = file_extension[1:]
246
+
247
+ data.add_field('displayName', display_name)
248
+ data.add_field('file', file_data, filename=file_path, content_type=f"audio/{content_type}")
249
+
250
+ async with self.session.post(url, headers=headers, data=data) as res:
251
+ if res.status != 200:
252
+ raise APIError(f"Failed to add voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
253
+
254
+ return json.dumps(await res.json(), indent=4, ensure_ascii=False)
255
+
256
+ finally:
257
+ if should_cleanup and self.session:
258
+ await self.session.close()
259
+ self.session = None
260
+
@@ -0,0 +1,5 @@
1
+ TTSLanguages = ["en", "hi"]
2
+ TTSModels = [
3
+ "lightning",
4
+ "lightning-large"
5
+ ]