smallestai 2.1.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

Files changed (96) hide show
  1. smallestai/__init__.py +95 -0
  2. smallestai/atoms/__init__.py +182 -0
  3. smallestai/atoms/api/__init__.py +12 -0
  4. smallestai/atoms/api/agent_templates_api.py +573 -0
  5. smallestai/atoms/api/agents_api.py +1465 -0
  6. smallestai/atoms/api/calls_api.py +320 -0
  7. smallestai/atoms/api/campaigns_api.py +1689 -0
  8. smallestai/atoms/api/knowledge_base_api.py +2271 -0
  9. smallestai/atoms/api/logs_api.py +305 -0
  10. smallestai/atoms/api/organization_api.py +285 -0
  11. smallestai/atoms/api/user_api.py +285 -0
  12. smallestai/atoms/api_client.py +797 -0
  13. smallestai/atoms/api_response.py +21 -0
  14. smallestai/atoms/atoms_client.py +560 -0
  15. smallestai/atoms/configuration.py +582 -0
  16. smallestai/atoms/exceptions.py +216 -0
  17. smallestai/atoms/models/__init__.py +72 -0
  18. smallestai/atoms/models/agent_dto.py +130 -0
  19. smallestai/atoms/models/agent_dto_language.py +91 -0
  20. smallestai/atoms/models/agent_dto_synthesizer.py +99 -0
  21. smallestai/atoms/models/agent_dto_synthesizer_voice_config.py +111 -0
  22. smallestai/atoms/models/api_response.py +89 -0
  23. smallestai/atoms/models/bad_request_error_response.py +89 -0
  24. smallestai/atoms/models/create_agent_from_template200_response.py +89 -0
  25. smallestai/atoms/models/create_agent_from_template_request.py +91 -0
  26. smallestai/atoms/models/create_agent_request.py +113 -0
  27. smallestai/atoms/models/create_agent_request_language.py +124 -0
  28. smallestai/atoms/models/create_agent_request_language_synthesizer.py +110 -0
  29. smallestai/atoms/models/create_agent_request_language_synthesizer_voice_config.py +137 -0
  30. smallestai/atoms/models/create_campaign200_response.py +93 -0
  31. smallestai/atoms/models/create_campaign200_response_data.py +106 -0
  32. smallestai/atoms/models/create_campaign200_response_inner.py +106 -0
  33. smallestai/atoms/models/create_campaign201_response.py +93 -0
  34. smallestai/atoms/models/create_campaign201_response_data.py +104 -0
  35. smallestai/atoms/models/create_campaign_request.py +93 -0
  36. smallestai/atoms/models/create_knowledge_base201_response.py +89 -0
  37. smallestai/atoms/models/create_knowledge_base_request.py +89 -0
  38. smallestai/atoms/models/delete_agent200_response.py +87 -0
  39. smallestai/atoms/models/get_agent_by_id200_response.py +93 -0
  40. smallestai/atoms/models/get_agent_templates200_response.py +97 -0
  41. smallestai/atoms/models/get_agent_templates200_response_data_inner.py +97 -0
  42. smallestai/atoms/models/get_agents200_response.py +93 -0
  43. smallestai/atoms/models/get_agents200_response_data.py +101 -0
  44. smallestai/atoms/models/get_campaign_by_id200_response.py +93 -0
  45. smallestai/atoms/models/get_campaign_by_id200_response_data.py +114 -0
  46. smallestai/atoms/models/get_campaigns200_response.py +97 -0
  47. smallestai/atoms/models/get_campaigns200_response_data_inner.py +118 -0
  48. smallestai/atoms/models/get_campaigns200_response_data_inner_agent.py +89 -0
  49. smallestai/atoms/models/get_campaigns200_response_data_inner_audience.py +89 -0
  50. smallestai/atoms/models/get_campaigns_request.py +89 -0
  51. smallestai/atoms/models/get_conversation200_response.py +93 -0
  52. smallestai/atoms/models/get_conversation200_response_data.py +125 -0
  53. smallestai/atoms/models/get_conversation_logs200_response.py +93 -0
  54. smallestai/atoms/models/get_conversation_logs200_response_data.py +125 -0
  55. smallestai/atoms/models/get_current_user200_response.py +93 -0
  56. smallestai/atoms/models/get_current_user200_response_data.py +99 -0
  57. smallestai/atoms/models/get_knowledge_base_by_id200_response.py +93 -0
  58. smallestai/atoms/models/get_knowledge_base_items200_response.py +97 -0
  59. smallestai/atoms/models/get_knowledge_bases200_response.py +97 -0
  60. smallestai/atoms/models/get_organization200_response.py +93 -0
  61. smallestai/atoms/models/get_organization200_response_data.py +105 -0
  62. smallestai/atoms/models/get_organization200_response_data_members_inner.py +89 -0
  63. smallestai/atoms/models/get_organization200_response_data_subscription.py +87 -0
  64. smallestai/atoms/models/internal_server_error_response.py +89 -0
  65. smallestai/atoms/models/knowledge_base_dto.py +93 -0
  66. smallestai/atoms/models/knowledge_base_item_dto.py +124 -0
  67. smallestai/atoms/models/start_outbound_call200_response.py +93 -0
  68. smallestai/atoms/models/start_outbound_call200_response_data.py +87 -0
  69. smallestai/atoms/models/start_outbound_call_request.py +89 -0
  70. smallestai/atoms/models/unauthorized_error_reponse.py +89 -0
  71. smallestai/atoms/models/update_agent200_response.py +89 -0
  72. smallestai/atoms/models/update_agent_request.py +119 -0
  73. smallestai/atoms/models/update_agent_request_language.py +99 -0
  74. smallestai/atoms/models/update_agent_request_synthesizer.py +110 -0
  75. smallestai/atoms/models/update_agent_request_synthesizer_voice_config.py +137 -0
  76. smallestai/atoms/models/update_agent_request_synthesizer_voice_config_one_of.py +111 -0
  77. smallestai/atoms/models/update_agent_request_synthesizer_voice_config_one_of1.py +99 -0
  78. smallestai/atoms/models/upload_text_to_knowledge_base_request.py +89 -0
  79. smallestai/atoms/py.typed +0 -0
  80. smallestai/atoms/rest.py +258 -0
  81. smallestai/waves/__init__.py +5 -0
  82. smallest/async_tts.py → smallestai/waves/async_waves_client.py +60 -47
  83. smallestai/waves/stream_tts.py +272 -0
  84. {smallest → smallestai/waves}/utils.py +8 -8
  85. smallest/tts.py → smallestai/waves/waves_client.py +58 -46
  86. {smallestai-2.1.0.dist-info → smallestai-3.0.0.dist-info}/METADATA +194 -43
  87. smallestai-3.0.0.dist-info/RECORD +92 -0
  88. {smallestai-2.1.0.dist-info → smallestai-3.0.0.dist-info}/WHEEL +1 -1
  89. smallestai-3.0.0.dist-info/top_level.txt +1 -0
  90. smallest/__init__.py +0 -5
  91. smallest/stream_tts.py +0 -161
  92. smallestai-2.1.0.dist-info/RECORD +0 -12
  93. smallestai-2.1.0.dist-info/top_level.txt +0 -1
  94. {smallest → smallestai/waves}/exceptions.py +0 -0
  95. {smallest → smallestai/waves}/models.py +0 -0
  96. {smallestai-2.1.0.dist-info → smallestai-3.0.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,272 @@
1
+ import asyncio
2
+ import time
3
+ from threading import Thread
4
+ from queue import Queue, Empty
5
+ from typing import AsyncGenerator, Optional, Union, List, Dict, Any
6
+
7
+ from smallestai.waves.waves_client import WavesClient
8
+ from smallestai.waves.exceptions import APIError
9
+ from smallestai.waves.async_waves_client import AsyncWavesClient
10
+ from smallestai.waves.utils import SENTENCE_END_REGEX
11
+
12
+ class TextToAudioStream:
13
+ def __init__(
14
+ self,
15
+ tts_instance: Union[WavesClient, AsyncWavesClient],
16
+ queue_timeout: Optional[float] = 5.0,
17
+ max_retries: Optional[int] = 3,
18
+ verbose: bool = False
19
+ ):
20
+ """
21
+ A real-time text-to-speech processor that converts streaming text into audio output.
22
+ Useful for applications requiring immediate audio feedback from text generation,
23
+ such as voice assistants, live captioning, or interactive chatbots.
24
+
25
+ ⚠️ `add_wav_header` is disabled by default for streaming efficiency. Refer to the README for more information.
26
+
27
+ Features:
28
+ - Streams audio chunks as soon as text is available.
29
+ - Handles both sync and async text-to-speech engines.
30
+ - Automatically retries failed synthesis attempts.
31
+ - Low latency between text generation and speech output.
32
+
33
+ Args:
34
+ tts_instance: The text-to-speech engine to use (Smallest or AsyncSmallest)
35
+ queue_timeout: How long to wait for new text (seconds, default: 1.0)
36
+ max_retries: Number of retry attempts for failed synthesis (default: 3)
37
+ verbose: Whether to log detailed metrics about TTS requests (default: False)
38
+ """
39
+ self.tts_instance = tts_instance
40
+ self.tts_instance.opts.add_wav_header = False
41
+ self.sentence_end_regex = SENTENCE_END_REGEX
42
+ self.queue_timeout = queue_timeout
43
+ self.max_retries = max_retries
44
+ self.queue = Queue()
45
+ self.buffer_size = 250
46
+ self.stop_flag = False
47
+ self.verbose = verbose
48
+
49
+ # Metrics tracking
50
+ self.request_count = 0
51
+ self.request_logs: List[Dict[str, Any]] = []
52
+ self.start_time = 0
53
+ self.first_api_response_time = None
54
+ self.end_time = 0
55
+
56
+ if self.tts_instance.opts.model == 'lightning-large':
57
+ self.buffer_size = 140
58
+
59
+
60
+ async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
61
+ """
62
+ Streams the LLM output, splitting it into chunks based on sentence boundaries
63
+ or space characters if no sentence boundary is found before reaching buffer_size.
64
+
65
+ Parameters:
66
+ - llm_output (AsyncGenerator[str, None]): An async generator yielding LLM output.
67
+ """
68
+ buffer = ""
69
+
70
+ async for chunk in llm_output:
71
+ buffer += chunk
72
+
73
+ while len(buffer) > self.buffer_size:
74
+ chunk_text = buffer[:self.buffer_size]
75
+ last_break_index = -1
76
+
77
+ # Find last sentence boundary using regex
78
+ for i in range(len(chunk_text) - 1, -1, -1):
79
+ if self.sentence_end_regex.match(chunk_text[:i + 1]):
80
+ last_break_index = i
81
+ break
82
+
83
+ if last_break_index == -1:
84
+ # Fallback to space if no sentence boundary found
85
+ last_space = chunk_text.rfind(' ')
86
+ if last_space != -1:
87
+ last_break_index = last_space
88
+ else:
89
+ last_break_index = self.buffer_size - 1
90
+
91
+ # Add chunk to queue and update buffer
92
+ self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ')
93
+ buffer = buffer[last_break_index + 1:].strip()
94
+
95
+ # Don't forget the remaining text
96
+ if buffer:
97
+ self.queue.put(f'{buffer.replace("—", " ").strip()} ')
98
+
99
+ self.stop_flag = True
100
+
101
+
102
+ def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
103
+ """Synchronously synthesizes a given sentence."""
104
+ request_start_time = time.time()
105
+ request_id = self.request_count + 1
106
+
107
+ try:
108
+ audio_content = self.tts_instance.synthesize(sentence)
109
+ self.request_count += 1
110
+ request_end_time = time.time()
111
+
112
+ if self.verbose:
113
+ request_duration = request_end_time - request_start_time
114
+ if self.first_api_response_time is None:
115
+ self.first_api_response_time = time.time() - self.start_time
116
+
117
+ self.request_logs.append({
118
+ "id": request_id,
119
+ "text": sentence,
120
+ "start_time": request_start_time - self.start_time,
121
+ "end_time": request_end_time - self.start_time,
122
+ "duration": request_duration,
123
+ "char_count": len(sentence),
124
+ "retries": retries
125
+ })
126
+
127
+ return audio_content
128
+ except APIError as e:
129
+ if retries < self.max_retries:
130
+ if self.verbose:
131
+ print(f"Retry {retries + 1}/{self.max_retries} for request: '{sentence[:30]}...'")
132
+ return self._synthesize_sync(sentence, retries + 1)
133
+ else:
134
+ if self.verbose:
135
+ print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
136
+ return None
137
+
138
+
139
+ async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
140
+ """Asynchronously synthesizes a given sentence."""
141
+ request_start_time = time.time()
142
+ request_id = self.request_count + 1
143
+
144
+ try:
145
+ audio_content = await self.tts_instance.synthesize(sentence)
146
+ self.request_count += 1
147
+ request_end_time = time.time()
148
+
149
+ if self.verbose:
150
+ request_duration = request_end_time - request_start_time
151
+ if self.first_api_response_time is None:
152
+ self.first_api_response_time = time.time() - self.start_time
153
+
154
+ self.request_logs.append({
155
+ "id": request_id,
156
+ "text": sentence,
157
+ "start_time": request_start_time - self.start_time,
158
+ "end_time": request_end_time - self.start_time,
159
+ "duration": request_duration,
160
+ "char_count": len(sentence),
161
+ "retries": retries
162
+ })
163
+
164
+ return audio_content
165
+ except APIError as e:
166
+ if retries < self.max_retries:
167
+ if self.verbose:
168
+ print(f"Retry {retries + 1}/{self.max_retries} for request: '{sentence[:30]}...'")
169
+ return await self._synthesize_async(sentence, retries + 1)
170
+ else:
171
+ if self.verbose:
172
+ print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
173
+ return None
174
+
175
+
176
+ async def _run_synthesis(self) -> AsyncGenerator[bytes, None]:
177
+ """
178
+ Continuously synthesizes sentences from the queue, yielding audio content.
179
+ If no sentences are in the queue, it waits until new data is available or streaming is complete.
180
+ """
181
+ while not self.stop_flag or not self.queue.empty():
182
+ try:
183
+ sentence = self.queue.get_nowait()
184
+
185
+ if isinstance(self.tts_instance, AsyncWavesClient):
186
+ audio_content = await self._synthesize_async(sentence)
187
+ else:
188
+ loop = asyncio.get_running_loop()
189
+ audio_content = await loop.run_in_executor(None, self._synthesize_sync, sentence)
190
+
191
+ if audio_content:
192
+ yield audio_content
193
+
194
+ except Empty:
195
+ # Quick check if we should exit
196
+ if self.stop_flag and self.queue.empty():
197
+ break
198
+
199
+ # Short sleep to avoid busy-waiting
200
+ await asyncio.sleep(0.01) # Much shorter sleep time (10ms)
201
+
202
+
203
+ def _print_verbose_summary(self) -> None:
204
+ """Print a summary of all metrics if verbose mode is enabled."""
205
+ if not self.verbose:
206
+ return
207
+
208
+ total_duration = self.end_time - self.start_time
209
+
210
+ print("\n" + "="*100)
211
+ print(f"TEXT-TO-AUDIO STREAM METRICS")
212
+ print("="*100)
213
+
214
+ print(f"\nOVERALL STATISTICS:")
215
+ print(f" Total requests made: {self.request_count}")
216
+ print(f" Time to first API response: {self.first_api_response_time:.3f}s")
217
+ print(f" Total processing time: {total_duration:.3f}s")
218
+
219
+ # Print table header
220
+ print("\nREQUEST DETAILS:")
221
+ header = f"{'#':4} {'Start (s)':10} {'End (s)':10} {'Duration (s)':12} {'Characters':15} {'Text'}"
222
+ print("\n" + header)
223
+ print("-" * 100)
224
+
225
+ # Print table rows
226
+ for log in self.request_logs:
227
+ row = (
228
+ f"{log['id']:4} "
229
+ f"{log['start_time']:10.3f} "
230
+ f"{log['end_time']:10.3f} "
231
+ f"{log['duration']:12.3f} "
232
+ f"{log['char_count']:15} "
233
+ f"{log['text'][:50]}{'...' if len(log['text']) > 50 else ''}"
234
+ )
235
+ print(row)
236
+
237
+ # Print retry information if any
238
+ if log['retries'] > 0:
239
+ print(f"{'':4} {'':10} {'':10} {'':12} {'':15} Retries: {log['retries']}")
240
+
241
+ print("\n" + "="*100)
242
+
243
+
244
+ async def process(self, llm_output: AsyncGenerator[str, None]) -> AsyncGenerator[bytes, None]:
245
+ """
246
+ Convert streaming text into audio in real-time.
247
+
248
+ Handles the entire pipeline from receiving text to producing audio,
249
+ yielding audio chunks as soon as they're ready.
250
+
251
+ Args:
252
+ llm_output: An async generator that yields text chunks.
253
+
254
+ Yields:
255
+ Raw audio data chunks (without WAV headers) that can be:
256
+ - Played directly through an audio device
257
+ - Saved to a file
258
+ - Streamed over a network
259
+ - Further processed as needed
260
+ """
261
+ self.start_time = time.time()
262
+
263
+ llm_thread = Thread(target=asyncio.run, args=(self._stream_llm_output(llm_output),))
264
+ llm_thread.start()
265
+
266
+ async for audio_content in self._run_synthesis():
267
+ yield audio_content
268
+
269
+ llm_thread.join()
270
+
271
+ self.end_time = time.time()
272
+ self._print_verbose_summary()
@@ -4,15 +4,13 @@ from typing import List
4
4
  from typing import Optional
5
5
  from pydub import AudioSegment
6
6
  from dataclasses import dataclass
7
- from sacremoses import MosesPunctNormalizer
8
7
 
9
- from smallest.exceptions import ValidationError
10
- from smallest.models import TTSModels, TTSLanguages
8
+ from smallestai.waves.exceptions import ValidationError
9
+ from smallestai.waves.models import TTSModels, TTSLanguages
11
10
 
12
11
 
13
12
  API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
14
13
  SENTENCE_END_REGEX = re.compile(r'.*[-.—!?,;:…।|]$')
15
- mpn = MosesPunctNormalizer()
16
14
  SAMPLE_WIDTH = 2
17
15
  CHANNELS = 1
18
16
  ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
@@ -26,9 +24,12 @@ class TTSOptions:
26
24
  api_key: str
27
25
  add_wav_header: bool
28
26
  speed: float
27
+ consistency: float
28
+ similarity: float
29
+ enhancement: int
29
30
 
30
31
 
31
- def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[bool] = None):
32
+ def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[int] = None):
32
33
  if not text:
33
34
  raise ValidationError("Text cannot be empty.")
34
35
  if model not in TTSModels:
@@ -41,8 +42,8 @@ def validate_input(text: str, model: str, sample_rate: int, speed: float, consis
41
42
  raise ValidationError(f"Invalid consistency: {consistency}. Must be between 0.0 and 1.0")
42
43
  if similarity is not None and not 0.0 <= similarity <= 1.0:
43
44
  raise ValidationError(f"Invalid similarity: {similarity}. Must be between 0.0 and 1.0")
44
- if enhancement is not None and not isinstance(enhancement, bool):
45
- raise ValidationError(f"Invalid enhancement: {enhancement}. Must be a boolean value.")
45
+ if enhancement is not None and not 0 <= enhancement <= 2:
46
+ raise ValidationError(f"Invalid enhancement: {enhancement}. Must be between 0 and 2.")
46
47
 
47
48
 
48
49
  def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: int = 2, channels: int = 1) -> bytes:
@@ -56,7 +57,6 @@ def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: i
56
57
  def preprocess_text(text: str) -> str:
57
58
  text = text.replace("\n", " ").replace("\t", " ").replace("—", " ").replace("-", " ").replace("–", " ")
58
59
  text = re.sub(r'\s+', ' ', text)
59
- text = mpn.normalize(text)
60
60
  return text.strip()
61
61
 
62
62
 
@@ -3,13 +3,13 @@ import json
3
3
  import wave
4
4
  import copy
5
5
  import requests
6
- from typing import Optional, Union, List
6
+ from typing import Optional, Union, List, Iterator
7
7
 
8
- from smallest.exceptions import TTSError, APIError
9
- from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
8
+ from smallestai.waves.exceptions import TTSError, APIError
9
+ from smallestai.waves.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
10
10
  get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
11
11
 
12
- class Smallest:
12
+ class WavesClient:
13
13
  def __init__(
14
14
  self,
15
15
  api_key: str = None,
@@ -17,6 +17,9 @@ class Smallest:
17
17
  sample_rate: Optional[int] = 24000,
18
18
  voice_id: Optional[str] = "emily",
19
19
  speed: Optional[float] = 1.0,
20
+ consistency: Optional[float] = 0.5,
21
+ similarity: Optional[float] = 0.0,
22
+ enhancement: Optional[int] = 1,
20
23
  add_wav_header: Optional[bool] = True
21
24
  ) -> None:
22
25
  """
@@ -31,6 +34,9 @@ class Smallest:
31
34
  - sample_rate (int): The sample rate for the audio output.
32
35
  - voice_id (TTSVoices): The voice to be used for synthesis.
33
36
  - speed (float): The speed of the speech synthesis.
37
+ - consistency (float): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. Range - [0, 1]
38
+ - similarity (float): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. Range - [0, 1]
39
+ - enhancement (int): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. Range - [0, 2].
34
40
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
35
41
 
36
42
  Methods:
@@ -42,7 +48,7 @@ class Smallest:
42
48
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
43
49
  if not self.api_key:
44
50
  raise TTSError()
45
- if model == "lightning-large":
51
+ if model == "lightning-large" and voice_id is None:
46
52
  voice_id = "lakshya"
47
53
 
48
54
  self.chunk_size = 250
@@ -53,7 +59,10 @@ class Smallest:
53
59
  voice_id=voice_id,
54
60
  api_key=self.api_key,
55
61
  add_wav_header=add_wav_header,
56
- speed=speed
62
+ speed=speed,
63
+ consistency=consistency,
64
+ similarity=similarity,
65
+ enhancement=enhancement
57
66
  )
58
67
 
59
68
 
@@ -98,27 +107,24 @@ class Smallest:
98
107
  def synthesize(
99
108
  self,
100
109
  text: str,
101
- consistency: Optional[float] = 0.5,
102
- similarity: Optional[float] = 0,
103
- enhancement: Optional[bool] = False,
110
+ stream: Optional[bool] = False,
104
111
  save_as: Optional[str] = None,
105
112
  **kwargs
106
- ) -> Union[bytes, None]:
113
+ ) -> Union[bytes, None, Iterator[bytes]]:
107
114
  """
108
115
  Synthesize speech from the provided text.
109
116
 
110
- Args:
111
117
  - text (str): The text to be converted to speech.
112
- - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
118
+ - stream (Optional[bool]): If True, returns an iterator yielding audio chunks instead of a full byte array.
119
+ - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
113
120
  The file must have a .wav extension.
114
- - consistency (Optional[float]): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
115
- - similarity (Optional[float]): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
116
- - enhancement (Optional[bool]): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
117
121
  - kwargs: Additional optional parameters to override `__init__` options for this call.
118
122
 
119
123
  Returns:
120
- - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
121
- otherwise, returns None after saving the audio to the specified file.
124
+ - Union[bytes, None, Iterator[bytes]]:
125
+ - If `stream=True`, returns an iterator yielding audio chunks.
126
+ - If `save_as` is provided, saves the file and returns None.
127
+ - Otherwise, returns the synthesized audio content as bytes.
122
128
 
123
129
  Raises:
124
130
  - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
@@ -134,42 +140,48 @@ class Smallest:
134
140
  for key, value in kwargs.items():
135
141
  setattr(opts, key, value)
136
142
 
137
- validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed, consistency, similarity, enhancement)
143
+ text = preprocess_text(text)
144
+ validate_input(text, opts.model, opts.sample_rate, opts.speed, opts.consistency, opts.similarity, opts.enhancement)
138
145
 
139
146
  self.chunk_size = 250
140
147
  if opts.model == "lightning-large":
141
148
  self.chunk_size = 140
142
149
 
143
150
  chunks = chunk_text(text, self.chunk_size)
144
- audio_content = b""
145
-
146
- for chunk in chunks:
147
- payload = {
148
- "text": preprocess_text(chunk),
149
- "sample_rate": opts.sample_rate,
150
- "voice_id": opts.voice_id,
151
- "add_wav_header": False,
152
- "speed": opts.speed,
153
- }
154
-
155
- if opts.model == "lightning-large":
156
- if consistency:
157
- payload["consistency"] = consistency
158
- if similarity:
159
- payload["similarity"] = similarity
160
- if enhancement:
161
- payload["enhancement"] = enhancement
162
-
163
- headers = {
164
- "Authorization": f"Bearer {self.api_key}",
165
- "Content-Type": "application/json",
166
- }
167
-
168
- res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
169
- if res.status_code != 200:
170
- raise APIError(f"Failed to synthesize speech: {res.text}. For more information, visit https://waves.smallest.ai/")
151
+
152
+ def audio_stream():
153
+ for chunk in chunks:
154
+ payload = {
155
+ "text": chunk,
156
+ "sample_rate": opts.sample_rate,
157
+ "voice_id": opts.voice_id,
158
+ "add_wav_header": False,
159
+ "speed": opts.speed,
160
+ }
161
+
162
+ if opts.model == "lightning-large":
163
+ if opts.consistency is not None:
164
+ payload["consistency"] = opts.consistency
165
+ if opts.similarity is not None:
166
+ payload["similarity"] = opts.similarity
167
+ if opts.enhancement is not None:
168
+ payload["enhancement"] = opts.enhancement
169
+
170
+ headers = {
171
+ "Authorization": f"Bearer {self.api_key}",
172
+ "Content-Type": "application/json",
173
+ }
174
+
175
+ res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
176
+ if res.status_code != 200:
177
+ raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
178
+
179
+ yield res.content
171
180
 
172
- audio_content += res.content
181
+ if stream:
182
+ return audio_stream()
183
+
184
+ audio_content = b"".join(audio_stream())
173
185
 
174
186
  if save_as:
175
187
  if not save_as.endswith(".wav"):