smallestai 3.1.0__tar.gz → 4.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of smallestai might be problematic. Click here for more details.
- {smallestai-3.1.0/smallestai.egg-info → smallestai-4.0.0}/PKG-INFO +2 -1
- {smallestai-3.1.0 → smallestai-4.0.0}/pyproject.toml +2 -1
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/__init__.py +1 -1
- smallestai-4.0.0/smallestai/waves/__init__.py +5 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/waves/async_waves_client.py +42 -69
- smallestai-4.0.0/smallestai/waves/stream_tts.py +207 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/waves/utils.py +3 -49
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/waves/waves_client.py +41 -69
- {smallestai-3.1.0 → smallestai-4.0.0/smallestai.egg-info}/PKG-INFO +2 -1
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai.egg-info/requires.txt +1 -0
- smallestai-3.1.0/smallestai/waves/__init__.py +0 -5
- smallestai-3.1.0/smallestai/waves/stream_tts.py +0 -272
- {smallestai-3.1.0 → smallestai-4.0.0}/LICENSE +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/README.md +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/setup.cfg +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/__init__.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/__init__.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/agent_templates_api.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/agents_api.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/calls_api.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/campaigns_api.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/knowledge_base_api.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/logs_api.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/organization_api.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api/user_api.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api_client.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/api_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/atoms_client.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/configuration.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/exceptions.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/__init__.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/agent_dto.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/agent_dto_language.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/agent_dto_synthesizer.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/agent_dto_synthesizer_voice_config.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/api_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/bad_request_error_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_agent_from_template200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_agent_from_template_request.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_agent_request.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_agent_request_language.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_agent_request_language_synthesizer.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_agent_request_language_synthesizer_voice_config.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_campaign201_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_campaign201_response_data.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_campaign_request.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_knowledge_base201_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/create_knowledge_base_request.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/delete_agent200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_agent_by_id200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_agent_templates200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_agent_templates200_response_data_inner.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_agents200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_agents200_response_data.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_campaign_by_id200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_campaign_by_id200_response_data.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_campaigns200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_campaigns200_response_data_inner.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_campaigns200_response_data_inner_agent.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_campaigns200_response_data_inner_audience.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_campaigns_request.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_conversation_logs200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_conversation_logs200_response_data.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_current_user200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_current_user200_response_data.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_knowledge_base_by_id200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_knowledge_base_items200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_knowledge_bases200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_organization200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_organization200_response_data.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_organization200_response_data_members_inner.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/get_organization200_response_data_subscription.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/internal_server_error_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/knowledge_base_dto.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/knowledge_base_item_dto.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/start_outbound_call200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/start_outbound_call200_response_data.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/start_outbound_call_request.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/unauthorized_error_reponse.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/update_agent200_response.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/update_agent_request.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/update_agent_request_language.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/update_agent_request_synthesizer.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/update_agent_request_synthesizer_voice_config.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/update_agent_request_synthesizer_voice_config_one_of.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/update_agent_request_synthesizer_voice_config_one_of1.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/models/upload_text_to_knowledge_base_request.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/py.typed +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/atoms/rest.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/waves/exceptions.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai/waves/models.py +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai.egg-info/SOURCES.txt +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai.egg-info/dependency_links.txt +0 -0
- {smallestai-3.1.0 → smallestai-4.0.0}/smallestai.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: smallestai
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0.0
|
|
4
4
|
Summary: Official Python client for the Smallest AI API
|
|
5
5
|
Author-email: Smallest <support@smallest.ai>
|
|
6
6
|
License: MIT
|
|
@@ -16,6 +16,7 @@ Requires-Dist: aiohttp
|
|
|
16
16
|
Requires-Dist: aiofiles
|
|
17
17
|
Requires-Dist: requests
|
|
18
18
|
Requires-Dist: pydub
|
|
19
|
+
Requires-Dist: websocket-client
|
|
19
20
|
Requires-Dist: urllib3<3.0.0,>=1.25.3
|
|
20
21
|
Requires-Dist: python-dateutil>=2.8.2
|
|
21
22
|
Requires-Dist: pydantic>=2
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "smallestai"
|
|
3
|
-
version = "
|
|
3
|
+
version = "4.0.0"
|
|
4
4
|
description = "Official Python client for the Smallest AI API"
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Smallest", email = "support@smallest.ai"},
|
|
@@ -19,6 +19,7 @@ dependencies = [
|
|
|
19
19
|
"aiofiles",
|
|
20
20
|
"requests",
|
|
21
21
|
"pydub",
|
|
22
|
+
"websocket-client",
|
|
22
23
|
"urllib3 >= 1.25.3, < 3.0.0",
|
|
23
24
|
"python-dateutil >= 2.8.2",
|
|
24
25
|
"pydantic >= 2",
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
from smallestai.waves.waves_client import WavesClient
|
|
2
|
+
from smallestai.waves.async_waves_client import AsyncWavesClient
|
|
3
|
+
from smallestai.waves.stream_tts import WavesStreamingTTS, TTSConfig
|
|
4
|
+
|
|
5
|
+
__all__ = ["WavesClient", "AsyncWavesClient", "WavesStreamingTTS", "TTSConfig"]
|
|
@@ -4,10 +4,10 @@ import json
|
|
|
4
4
|
import aiohttp
|
|
5
5
|
import aiofiles
|
|
6
6
|
import requests
|
|
7
|
-
from typing import Optional, Union, List
|
|
7
|
+
from typing import Optional, Union, List
|
|
8
8
|
|
|
9
9
|
from smallestai.waves.exceptions import TTSError, APIError
|
|
10
|
-
from smallestai.waves.utils import (TTSOptions, validate_input,
|
|
10
|
+
from smallestai.waves.utils import (TTSOptions, validate_input,
|
|
11
11
|
get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
|
|
12
12
|
|
|
13
13
|
|
|
@@ -22,7 +22,8 @@ class AsyncWavesClient:
|
|
|
22
22
|
consistency: Optional[float] = 0.5,
|
|
23
23
|
similarity: Optional[float] = 0.0,
|
|
24
24
|
enhancement: Optional[int] = 1,
|
|
25
|
-
|
|
25
|
+
language: Optional[str] = "en",
|
|
26
|
+
output_format: Optional[str] = "wav"
|
|
26
27
|
) -> None:
|
|
27
28
|
"""
|
|
28
29
|
AsyncSmallest Instance for asynchronous text-to-speech synthesis.
|
|
@@ -40,7 +41,8 @@ class AsyncWavesClient:
|
|
|
40
41
|
- consistency (float): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. Range - [0, 1]
|
|
41
42
|
- similarity (float): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. Range - [0, 1]
|
|
42
43
|
- enhancement (int): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. Range - [0, 2].
|
|
43
|
-
-
|
|
44
|
+
- language (str): The language for synthesis. Default is "en".
|
|
45
|
+
- output_format (str): The output audio format. Options: "pcm", "mp3", "wav", "mulaw". Default is "pcm".
|
|
44
46
|
|
|
45
47
|
Methods:
|
|
46
48
|
- get_languages: Returns a list of available languages for synthesis.
|
|
@@ -61,11 +63,12 @@ class AsyncWavesClient:
|
|
|
61
63
|
sample_rate=sample_rate,
|
|
62
64
|
voice_id=voice_id,
|
|
63
65
|
api_key=self.api_key,
|
|
64
|
-
add_wav_header=add_wav_header,
|
|
65
66
|
speed=speed,
|
|
66
67
|
consistency=consistency,
|
|
67
68
|
similarity=similarity,
|
|
68
|
-
enhancement=enhancement
|
|
69
|
+
enhancement=enhancement,
|
|
70
|
+
language=language,
|
|
71
|
+
output_format=output_format
|
|
69
72
|
)
|
|
70
73
|
self.session = None
|
|
71
74
|
|
|
@@ -130,18 +133,14 @@ class AsyncWavesClient:
|
|
|
130
133
|
async def synthesize(
|
|
131
134
|
self,
|
|
132
135
|
text: str,
|
|
133
|
-
stream: Optional[bool] = False,
|
|
134
|
-
save_as: Optional[str] = None,
|
|
135
136
|
**kwargs
|
|
136
|
-
) -> Union[bytes
|
|
137
|
+
) -> Union[bytes]:
|
|
137
138
|
"""
|
|
138
139
|
Asynchronously synthesize speech from the provided text.
|
|
139
140
|
|
|
140
141
|
Args:
|
|
141
142
|
- text (str): The text to be converted to speech.
|
|
142
143
|
- stream (Optional[bool]): If True, returns an iterator yielding audio chunks instead of a full byte array.
|
|
143
|
-
- save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
|
|
144
|
-
The file must have a .wav extension.
|
|
145
144
|
- kwargs: Additional optional parameters to override `__init__` options for this call.
|
|
146
145
|
|
|
147
146
|
Returns:
|
|
@@ -151,7 +150,7 @@ class AsyncWavesClient:
|
|
|
151
150
|
- Otherwise, returns the synthesized audio content as bytes.
|
|
152
151
|
|
|
153
152
|
Raises:
|
|
154
|
-
- TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
|
|
153
|
+
- TTSError: If the provided file name does not have a .wav or .mp3 extension when `save_as` is specified.
|
|
155
154
|
- APIError: If the API request fails or returns an error.
|
|
156
155
|
- ValueError: If an unexpected parameter is passed in `kwargs`.
|
|
157
156
|
"""
|
|
@@ -172,65 +171,40 @@ class AsyncWavesClient:
|
|
|
172
171
|
for key, value in kwargs.items():
|
|
173
172
|
setattr(opts, key, value)
|
|
174
173
|
|
|
175
|
-
text = preprocess_text(text)
|
|
176
174
|
validate_input(text, opts.model, opts.sample_rate, opts.speed, opts.consistency, opts.similarity, opts.enhancement)
|
|
177
175
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
"voice_id": opts.voice_id,
|
|
190
|
-
"add_wav_header": False,
|
|
191
|
-
"speed": opts.speed,
|
|
192
|
-
"model": opts.model
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
if opts.model == "lightning-large" or opts.model == "lightning-v2":
|
|
196
|
-
if opts.consistency is not None:
|
|
197
|
-
payload["consistency"] = opts.consistency
|
|
198
|
-
if opts.similarity is not None:
|
|
199
|
-
payload["similarity"] = opts.similarity
|
|
200
|
-
if opts.enhancement is not None:
|
|
201
|
-
payload["enhancement"] = opts.enhancement
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
headers = {
|
|
205
|
-
"Authorization": f"Bearer {self.api_key}",
|
|
206
|
-
"Content-Type": "application/json",
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
|
|
210
|
-
if res.status != 200:
|
|
211
|
-
raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
|
|
212
|
-
|
|
213
|
-
yield await res.read()
|
|
176
|
+
payload = {
|
|
177
|
+
"text": text,
|
|
178
|
+
"voice_id": opts.voice_id,
|
|
179
|
+
"sample_rate": opts.sample_rate,
|
|
180
|
+
"speed": opts.speed,
|
|
181
|
+
"consistency": opts.consistency,
|
|
182
|
+
"similarity": opts.similarity,
|
|
183
|
+
"enhancement": opts.enhancement,
|
|
184
|
+
"language": opts.language,
|
|
185
|
+
"output_format": opts.output_format
|
|
186
|
+
}
|
|
214
187
|
|
|
215
|
-
if
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
return None
|
|
228
|
-
|
|
229
|
-
if opts.add_wav_header:
|
|
230
|
-
return add_wav_header(audio_content, opts.sample_rate)
|
|
188
|
+
if opts.model == "lightning-large" or opts.model == "lightning-v2":
|
|
189
|
+
if opts.consistency is not None:
|
|
190
|
+
payload["consistency"] = opts.consistency
|
|
191
|
+
if opts.similarity is not None:
|
|
192
|
+
payload["similarity"] = opts.similarity
|
|
193
|
+
if opts.enhancement is not None:
|
|
194
|
+
payload["enhancement"] = opts.enhancement
|
|
195
|
+
|
|
196
|
+
headers = {
|
|
197
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
198
|
+
"Content-Type": "application/json",
|
|
199
|
+
}
|
|
231
200
|
|
|
232
|
-
|
|
201
|
+
async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
|
|
202
|
+
if res.status != 200:
|
|
203
|
+
raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
|
|
204
|
+
|
|
205
|
+
audio_bytes = await res.content.read()
|
|
233
206
|
|
|
207
|
+
return audio_bytes
|
|
234
208
|
finally:
|
|
235
209
|
if should_cleanup and self.session:
|
|
236
210
|
await self.session.close()
|
|
@@ -316,9 +290,8 @@ class AsyncWavesClient:
|
|
|
316
290
|
if res.status != 200:
|
|
317
291
|
raise APIError(f"Failed to delete voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
|
|
318
292
|
|
|
319
|
-
return await res.
|
|
320
|
-
|
|
293
|
+
return json.dumps(await res.json(), indent=4, ensure_ascii=False)
|
|
321
294
|
finally:
|
|
322
295
|
if should_cleanup and self.session:
|
|
323
296
|
await self.session.close()
|
|
324
|
-
self.session = None
|
|
297
|
+
self.session = None
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import base64
|
|
3
|
+
import time
|
|
4
|
+
import threading
|
|
5
|
+
import queue
|
|
6
|
+
from typing import Generator
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from websocket import WebSocketApp
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class TTSConfig:
|
|
12
|
+
voice_id: str
|
|
13
|
+
api_key: str
|
|
14
|
+
language: str = "en"
|
|
15
|
+
sample_rate: int = 24000
|
|
16
|
+
speed: float = 1.0
|
|
17
|
+
consistency: float = 0.5
|
|
18
|
+
enhancement: int = 1
|
|
19
|
+
similarity: float = 0
|
|
20
|
+
max_buffer_flush_ms: int = 0
|
|
21
|
+
|
|
22
|
+
class WavesStreamingTTS:
|
|
23
|
+
def __init__(self, config: TTSConfig):
|
|
24
|
+
self.config = config
|
|
25
|
+
self.ws_url = "wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream"
|
|
26
|
+
self.ws = None
|
|
27
|
+
self.audio_queue = queue.Queue()
|
|
28
|
+
self.error_queue = queue.Queue()
|
|
29
|
+
self.is_complete = False
|
|
30
|
+
self.is_connected = False
|
|
31
|
+
self.request_id = None
|
|
32
|
+
|
|
33
|
+
def _get_headers(self):
|
|
34
|
+
return [f"Authorization: Bearer {self.config.api_key}"]
|
|
35
|
+
|
|
36
|
+
def _create_payload(self, text: str, continue_stream: bool = False, flush: bool = False):
|
|
37
|
+
return {
|
|
38
|
+
"voice_id": self.config.voice_id,
|
|
39
|
+
"text": text,
|
|
40
|
+
"language": self.config.language,
|
|
41
|
+
"sample_rate": self.config.sample_rate,
|
|
42
|
+
"speed": self.config.speed,
|
|
43
|
+
"consistency": self.config.consistency,
|
|
44
|
+
"similarity": self.config.similarity,
|
|
45
|
+
"enhancement": self.config.enhancement,
|
|
46
|
+
"max_buffer_flush_ms": self.config.max_buffer_flush_ms,
|
|
47
|
+
"continue": continue_stream,
|
|
48
|
+
"flush": flush
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def _on_open(self, ws):
|
|
52
|
+
self.is_connected = True
|
|
53
|
+
|
|
54
|
+
def _on_message(self, ws, message):
|
|
55
|
+
try:
|
|
56
|
+
data = json.loads(message)
|
|
57
|
+
status = data.get("status", "")
|
|
58
|
+
|
|
59
|
+
if status == "error":
|
|
60
|
+
self.error_queue.put(Exception(data.get("message", "Unknown error")))
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
if not self.request_id:
|
|
64
|
+
self.request_id = data.get("request_id")
|
|
65
|
+
|
|
66
|
+
audio_b64 = data.get("data", {}).get("audio")
|
|
67
|
+
if audio_b64:
|
|
68
|
+
self.audio_queue.put(base64.b64decode(audio_b64))
|
|
69
|
+
|
|
70
|
+
if status == "complete":
|
|
71
|
+
self.is_complete = True
|
|
72
|
+
self.audio_queue.put(None)
|
|
73
|
+
|
|
74
|
+
except Exception as e:
|
|
75
|
+
self.error_queue.put(e)
|
|
76
|
+
|
|
77
|
+
def _on_error(self, ws, error):
|
|
78
|
+
self.error_queue.put(error)
|
|
79
|
+
|
|
80
|
+
def _on_close(self, ws, *args):
|
|
81
|
+
self.is_connected = False
|
|
82
|
+
if not self.is_complete:
|
|
83
|
+
self.audio_queue.put(None)
|
|
84
|
+
|
|
85
|
+
def _connect(self):
|
|
86
|
+
if self.ws:
|
|
87
|
+
self.ws.close()
|
|
88
|
+
|
|
89
|
+
self.ws = WebSocketApp(
|
|
90
|
+
self.ws_url,
|
|
91
|
+
header=self._get_headers(),
|
|
92
|
+
on_open=self._on_open,
|
|
93
|
+
on_message=self._on_message,
|
|
94
|
+
on_error=self._on_error,
|
|
95
|
+
on_close=self._on_close
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
ws_thread = threading.Thread(target=self.ws.run_forever)
|
|
99
|
+
ws_thread.daemon = True
|
|
100
|
+
ws_thread.start()
|
|
101
|
+
|
|
102
|
+
timeout = 5.0
|
|
103
|
+
start_time = time.time()
|
|
104
|
+
while not self.is_connected and time.time() - start_time < timeout:
|
|
105
|
+
time.sleep(0.1)
|
|
106
|
+
|
|
107
|
+
if not self.is_connected:
|
|
108
|
+
raise Exception("Failed to connect to WebSocket")
|
|
109
|
+
|
|
110
|
+
def synthesize(self, text: str) -> Generator[bytes, None, None]:
|
|
111
|
+
self._reset_state()
|
|
112
|
+
self._connect()
|
|
113
|
+
|
|
114
|
+
payload = self._create_payload(text)
|
|
115
|
+
self.ws.send(json.dumps(payload))
|
|
116
|
+
|
|
117
|
+
while True:
|
|
118
|
+
if not self.error_queue.empty():
|
|
119
|
+
raise self.error_queue.get()
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
chunk = self.audio_queue.get(timeout=1.0)
|
|
123
|
+
if chunk is None:
|
|
124
|
+
break
|
|
125
|
+
yield chunk
|
|
126
|
+
except queue.Empty:
|
|
127
|
+
if self.is_complete:
|
|
128
|
+
break
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
self.ws.close()
|
|
132
|
+
|
|
133
|
+
def synthesize_streaming(self, text_stream: Generator[str, None, None],
|
|
134
|
+
continue_stream: bool = True,
|
|
135
|
+
auto_flush: bool = True) -> Generator[bytes, None, None]:
|
|
136
|
+
self._reset_state()
|
|
137
|
+
self._connect()
|
|
138
|
+
|
|
139
|
+
def send_text():
|
|
140
|
+
try:
|
|
141
|
+
for text_chunk in text_stream:
|
|
142
|
+
if text_chunk.strip():
|
|
143
|
+
payload = self._create_payload(text_chunk, continue_stream=continue_stream)
|
|
144
|
+
self.ws.send(json.dumps(payload))
|
|
145
|
+
|
|
146
|
+
if auto_flush:
|
|
147
|
+
flush_payload = self._create_payload("", flush=True)
|
|
148
|
+
self.ws.send(json.dumps(flush_payload))
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self.error_queue.put(e)
|
|
151
|
+
|
|
152
|
+
sender_thread = threading.Thread(target=send_text)
|
|
153
|
+
sender_thread.daemon = True
|
|
154
|
+
sender_thread.start()
|
|
155
|
+
|
|
156
|
+
while True:
|
|
157
|
+
if not self.error_queue.empty():
|
|
158
|
+
raise self.error_queue.get()
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
chunk = self.audio_queue.get(timeout=1.0)
|
|
162
|
+
if chunk is None:
|
|
163
|
+
break
|
|
164
|
+
yield chunk
|
|
165
|
+
except queue.Empty:
|
|
166
|
+
if self.is_complete:
|
|
167
|
+
break
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
self.ws.close()
|
|
171
|
+
|
|
172
|
+
def send_text_chunk(self, text: str, continue_stream: bool = True, flush: bool = False):
|
|
173
|
+
if not self.is_connected:
|
|
174
|
+
raise Exception("WebSocket not connected")
|
|
175
|
+
payload = self._create_payload(text, continue_stream=continue_stream, flush=flush)
|
|
176
|
+
self.ws.send(json.dumps(payload))
|
|
177
|
+
|
|
178
|
+
def flush_buffer(self):
|
|
179
|
+
if not self.is_connected:
|
|
180
|
+
raise Exception("WebSocket not connected")
|
|
181
|
+
payload = self._create_payload("", flush=True)
|
|
182
|
+
self.ws.send(json.dumps(payload))
|
|
183
|
+
|
|
184
|
+
def start_streaming_session(self) -> Generator[bytes, None, None]:
|
|
185
|
+
self._reset_state()
|
|
186
|
+
self._connect()
|
|
187
|
+
|
|
188
|
+
while True:
|
|
189
|
+
if not self.error_queue.empty():
|
|
190
|
+
raise self.error_queue.get()
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
chunk = self.audio_queue.get(timeout=0.1)
|
|
194
|
+
if chunk is None:
|
|
195
|
+
break
|
|
196
|
+
yield chunk
|
|
197
|
+
except queue.Empty:
|
|
198
|
+
if self.is_complete:
|
|
199
|
+
break
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
def _reset_state(self):
|
|
203
|
+
self.audio_queue = queue.Queue()
|
|
204
|
+
self.error_queue = queue.Queue()
|
|
205
|
+
self.is_complete = False
|
|
206
|
+
self.is_connected = False
|
|
207
|
+
self.request_id = None
|
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
import re
|
|
2
|
-
import io
|
|
3
1
|
from typing import List
|
|
4
2
|
from typing import Optional
|
|
5
|
-
from pydub import AudioSegment
|
|
6
3
|
from dataclasses import dataclass
|
|
7
4
|
|
|
8
5
|
from smallestai.waves.exceptions import ValidationError
|
|
@@ -10,7 +7,7 @@ from smallestai.waves.models import TTSModels, TTSLanguages_lightning, TTSLangua
|
|
|
10
7
|
|
|
11
8
|
|
|
12
9
|
API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
|
|
13
|
-
|
|
10
|
+
WEBSOCKET_URL = "wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream"
|
|
14
11
|
SAMPLE_WIDTH = 2
|
|
15
12
|
CHANNELS = 1
|
|
16
13
|
ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
|
|
@@ -22,11 +19,12 @@ class TTSOptions:
|
|
|
22
19
|
sample_rate: int
|
|
23
20
|
voice_id: str
|
|
24
21
|
api_key: str
|
|
25
|
-
add_wav_header: bool
|
|
26
22
|
speed: float
|
|
27
23
|
consistency: float
|
|
28
24
|
similarity: float
|
|
29
25
|
enhancement: int
|
|
26
|
+
language: str
|
|
27
|
+
output_format: str
|
|
30
28
|
|
|
31
29
|
|
|
32
30
|
def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[int] = None):
|
|
@@ -46,50 +44,6 @@ def validate_input(text: str, model: str, sample_rate: int, speed: float, consis
|
|
|
46
44
|
raise ValidationError(f"Invalid enhancement: {enhancement}. Must be between 0 and 2.")
|
|
47
45
|
|
|
48
46
|
|
|
49
|
-
def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: int = 2, channels: int = 1) -> bytes:
|
|
50
|
-
audio = AudioSegment(data=frame_input, sample_width=sample_width, frame_rate=sample_rate, channels=channels)
|
|
51
|
-
wav_buf = io.BytesIO()
|
|
52
|
-
audio.export(wav_buf, format="wav")
|
|
53
|
-
wav_buf.seek(0)
|
|
54
|
-
return wav_buf.read()
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def preprocess_text(text: str) -> str:
|
|
58
|
-
text = text.replace("\n", " ").replace("\t", " ")
|
|
59
|
-
text = re.sub(r'\s+', ' ', text)
|
|
60
|
-
return text.strip()
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def chunk_text(text: str, chunk_size: int = 250) -> List[str]:
|
|
64
|
-
chunks = []
|
|
65
|
-
while text:
|
|
66
|
-
if len(text) <= chunk_size:
|
|
67
|
-
chunks.append(text.strip())
|
|
68
|
-
break
|
|
69
|
-
|
|
70
|
-
chunk_text = text[:chunk_size]
|
|
71
|
-
last_break_index = -1
|
|
72
|
-
|
|
73
|
-
# Find last sentence boundary using regex
|
|
74
|
-
for i in range(len(chunk_text) - 1, -1, -1):
|
|
75
|
-
if SENTENCE_END_REGEX.match(chunk_text[:i + 1]):
|
|
76
|
-
last_break_index = i
|
|
77
|
-
break
|
|
78
|
-
|
|
79
|
-
if last_break_index == -1:
|
|
80
|
-
# Fallback to space if no sentence boundary found
|
|
81
|
-
last_space = chunk_text.rfind(' ')
|
|
82
|
-
if last_space != -1:
|
|
83
|
-
last_break_index = last_space
|
|
84
|
-
else:
|
|
85
|
-
last_break_index = chunk_size - 1
|
|
86
|
-
|
|
87
|
-
chunks.append(text[:last_break_index + 1].strip())
|
|
88
|
-
text = text[last_break_index + 1:].strip()
|
|
89
|
-
|
|
90
|
-
return chunks
|
|
91
|
-
|
|
92
|
-
|
|
93
47
|
def get_smallest_languages(model: str = 'lightning') -> List[str]:
|
|
94
48
|
if model == 'lightning':
|
|
95
49
|
return TTSLanguages_lightning
|