smallestai 2.1.0__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: smallestai
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <support@smallest.ai>
6
6
  License: MIT
@@ -15,7 +15,6 @@ License-File: LICENSE
15
15
  Requires-Dist: aiohttp
16
16
  Requires-Dist: aiofiles
17
17
  Requires-Dist: requests
18
- Requires-Dist: sacremoses
19
18
  Requires-Dist: pydub
20
19
  Provides-Extra: test
21
20
  Requires-Dist: jiwer; extra == "test"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "smallestai"
3
- version = "2.1.0"
3
+ version = "2.2.0"
4
4
  description = "Official Python client for the Smallest AI API"
5
5
  authors = [
6
6
  {name = "Smallest", email = "support@smallest.ai"},
@@ -18,7 +18,6 @@ dependencies = [
18
18
  "aiohttp",
19
19
  "aiofiles",
20
20
  "requests",
21
- "sacremoses",
22
21
  "pydub"
23
22
  ]
24
23
 
@@ -4,7 +4,7 @@ import json
4
4
  import aiohttp
5
5
  import aiofiles
6
6
  import requests
7
- from typing import Optional, Union, List
7
+ from typing import Optional, Union, List, AsyncIterator
8
8
 
9
9
  from smallest.exceptions import TTSError, APIError
10
10
  from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
@@ -19,6 +19,9 @@ class AsyncSmallest:
19
19
  sample_rate: Optional[int] = 24000,
20
20
  voice_id: Optional[str] = "emily",
21
21
  speed: Optional[float] = 1.0,
22
+ consistency: Optional[float] = 0.5,
23
+ similarity: Optional[float] = 0.0,
24
+ enhancement: Optional[int] = 1,
22
25
  add_wav_header: Optional[bool] = True
23
26
  ) -> None:
24
27
  """
@@ -34,6 +37,9 @@ class AsyncSmallest:
34
37
  - sample_rate (int): The sample rate for the audio output.
35
38
  - voice_id (TTSVoices): The voice to be used for synthesis.
36
39
  - speed (float): The speed of the speech synthesis.
40
+ - consistency (float): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. Range - [0, 1]
41
+ - similarity (float): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. Range - [0, 1]
42
+ - enhancement (int): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. Range - [0, 2].
37
43
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
38
44
 
39
45
  Methods:
@@ -45,7 +51,7 @@ class AsyncSmallest:
45
51
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
46
52
  if not self.api_key:
47
53
  raise TTSError()
48
- if model == "lightning-large":
54
+ if model == "lightning-large" and voice_id is None:
49
55
  voice_id = "lakshya"
50
56
 
51
57
  self.chunk_size = 250
@@ -56,7 +62,10 @@ class AsyncSmallest:
56
62
  voice_id=voice_id,
57
63
  api_key=self.api_key,
58
64
  add_wav_header=add_wav_header,
59
- speed=speed
65
+ speed=speed,
66
+ consistency=consistency,
67
+ similarity=similarity,
68
+ enhancement=enhancement
60
69
  )
61
70
  self.session = None
62
71
 
@@ -121,27 +130,25 @@ class AsyncSmallest:
121
130
  async def synthesize(
122
131
  self,
123
132
  text: str,
124
- consistency: Optional[float] = 0.5,
125
- similarity: Optional[float] = 0,
126
- enhancement: Optional[bool] = False,
133
+ stream: Optional[bool] = False,
127
134
  save_as: Optional[str] = None,
128
135
  **kwargs
129
- ) -> Union[bytes, None]:
136
+ ) -> Union[bytes, None, AsyncIterator[bytes]]:
130
137
  """
131
138
  Asynchronously synthesize speech from the provided text.
132
139
 
133
140
  Args:
134
141
  - text (str): The text to be converted to speech.
142
+ - stream (Optional[bool]): If True, returns an iterator yielding audio chunks instead of a full byte array.
135
143
  - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
136
144
  The file must have a .wav extension.
137
- - consistency (Optional[float]): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
138
- - similarity (Optional[float]): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
139
- - enhancement (Optional[bool]): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
140
145
  - kwargs: Additional optional parameters to override `__init__` options for this call.
141
146
 
142
147
  Returns:
143
- - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
144
- otherwise, returns None after saving the audio to the specified file.
148
+ - Union[bytes, None, Iterator[bytes]]:
149
+ - If `stream=True`, returns an iterator yielding audio chunks.
150
+ - If `save_as` is provided, saves the file and returns None.
151
+ - Otherwise, returns the synthesized audio content as bytes.
145
152
 
146
153
  Raises:
147
154
  - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
@@ -165,44 +172,50 @@ class AsyncSmallest:
165
172
  for key, value in kwargs.items():
166
173
  setattr(opts, key, value)
167
174
 
168
- validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed, consistency, similarity, enhancement)
175
+ text = preprocess_text(text)
176
+ validate_input(text, opts.model, opts.sample_rate, opts.speed, opts.consistency, opts.similarity, opts.enhancement)
169
177
 
170
178
  self.chunk_size = 250
171
179
  if opts.model == 'lightning-large':
172
180
  self.chunk_size = 140
173
181
 
174
182
  chunks = chunk_text(text, self.chunk_size)
175
- audio_content = b""
176
-
177
- for chunk in chunks:
178
- payload = {
179
- "text": preprocess_text(chunk),
180
- "sample_rate": opts.sample_rate,
181
- "voice_id": opts.voice_id,
182
- "add_wav_header": False,
183
- "speed": opts.speed,
184
- "model": opts.model
185
- }
186
-
187
- if opts.model == "lightning-large":
188
- if consistency:
189
- payload["consistency"] = consistency
190
- if similarity:
191
- payload["similarity"] = similarity
192
- if enhancement:
193
- payload["enhancement"] = enhancement
194
-
195
-
196
- headers = {
197
- "Authorization": f"Bearer {self.api_key}",
198
- "Content-Type": "application/json",
199
- }
200
-
201
- async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
202
- if res.status != 200:
203
- raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
204
-
205
- audio_content += await res.read()
183
+
184
+ async def audio_stream():
185
+ for chunk in chunks:
186
+ payload = {
187
+ "text": chunk,
188
+ "sample_rate": opts.sample_rate,
189
+ "voice_id": opts.voice_id,
190
+ "add_wav_header": False,
191
+ "speed": opts.speed,
192
+ "model": opts.model
193
+ }
194
+
195
+ if opts.model == "lightning-large":
196
+ if opts.consistency is not None:
197
+ payload["consistency"] = opts.consistency
198
+ if opts.similarity is not None:
199
+ payload["similarity"] = opts.similarity
200
+ if opts.enhancement is not None:
201
+ payload["enhancement"] = opts.enhancement
202
+
203
+
204
+ headers = {
205
+ "Authorization": f"Bearer {self.api_key}",
206
+ "Content-Type": "application/json",
207
+ }
208
+
209
+ async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
210
+ if res.status != 200:
211
+ raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
212
+
213
+ yield await res.read()
214
+
215
+ if stream:
216
+ return audio_stream()
217
+
218
+ audio_content = b"".join([chunk async for chunk in audio_stream()])
206
219
 
207
220
  if save_as:
208
221
  if not save_as.endswith(".wav"):
@@ -1,7 +1,8 @@
1
1
  import asyncio
2
+ import time
2
3
  from threading import Thread
3
4
  from queue import Queue, Empty
4
- from typing import AsyncGenerator, Optional, Union
5
+ from typing import AsyncGenerator, Optional, Union, List, Dict, Any
5
6
 
6
7
  from smallest.tts import Smallest
7
8
  from smallest.exceptions import APIError
@@ -13,7 +14,8 @@ class TextToAudioStream:
13
14
  self,
14
15
  tts_instance: Union[Smallest, AsyncSmallest],
15
16
  queue_timeout: Optional[float] = 5.0,
16
- max_retries: Optional[int] = 3
17
+ max_retries: Optional[int] = 3,
18
+ verbose: bool = False
17
19
  ):
18
20
  """
19
21
  A real-time text-to-speech processor that converts streaming text into audio output.
@@ -30,8 +32,9 @@ class TextToAudioStream:
30
32
 
31
33
  Args:
32
34
  tts_instance: The text-to-speech engine to use (Smallest or AsyncSmallest)
33
- queue_timeout: How long to wait for new text (seconds, default: 5.0)
35
+ queue_timeout: How long to wait for new text (seconds, default: 1.0)
34
36
  max_retries: Number of retry attempts for failed synthesis (default: 3)
37
+ verbose: Whether to log detailed metrics about TTS requests (default: False)
35
38
  """
36
39
  self.tts_instance = tts_instance
37
40
  self.tts_instance.opts.add_wav_header = False
@@ -41,6 +44,14 @@ class TextToAudioStream:
41
44
  self.queue = Queue()
42
45
  self.buffer_size = 250
43
46
  self.stop_flag = False
47
+ self.verbose = verbose
48
+
49
+ # Metrics tracking
50
+ self.request_count = 0
51
+ self.request_logs: List[Dict[str, Any]] = []
52
+ self.start_time = 0
53
+ self.first_api_response_time = None
54
+ self.end_time = 0
44
55
 
45
56
  if self.tts_instance.opts.model == 'lightning-large':
46
57
  self.buffer_size = 140
@@ -90,24 +101,76 @@ class TextToAudioStream:
90
101
 
91
102
  def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
92
103
  """Synchronously synthesizes a given sentence."""
104
+ request_start_time = time.time()
105
+ request_id = self.request_count + 1
106
+
93
107
  try:
94
- return self.tts_instance.synthesize(sentence)
108
+ audio_content = self.tts_instance.synthesize(sentence)
109
+ self.request_count += 1
110
+ request_end_time = time.time()
111
+
112
+ if self.verbose:
113
+ request_duration = request_end_time - request_start_time
114
+ if self.first_api_response_time is None:
115
+ self.first_api_response_time = time.time() - self.start_time
116
+
117
+ self.request_logs.append({
118
+ "id": request_id,
119
+ "text": sentence,
120
+ "start_time": request_start_time - self.start_time,
121
+ "end_time": request_end_time - self.start_time,
122
+ "duration": request_duration,
123
+ "char_count": len(sentence),
124
+ "retries": retries
125
+ })
126
+
127
+ return audio_content
95
128
  except APIError as e:
96
129
  if retries < self.max_retries:
130
+ if self.verbose:
131
+ print(f"Retry {retries + 1}/{self.max_retries} for request: '{sentence[:30]}...'")
97
132
  return self._synthesize_sync(sentence, retries + 1)
98
133
  else:
99
- raise APIError(f"Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
134
+ if self.verbose:
135
+ print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
136
+ return None
100
137
 
101
138
 
102
139
  async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
103
140
  """Asynchronously synthesizes a given sentence."""
141
+ request_start_time = time.time()
142
+ request_id = self.request_count + 1
143
+
104
144
  try:
105
- return await self.tts_instance.synthesize(sentence)
145
+ audio_content = await self.tts_instance.synthesize(sentence)
146
+ self.request_count += 1
147
+ request_end_time = time.time()
148
+
149
+ if self.verbose:
150
+ request_duration = request_end_time - request_start_time
151
+ if self.first_api_response_time is None:
152
+ self.first_api_response_time = time.time() - self.start_time
153
+
154
+ self.request_logs.append({
155
+ "id": request_id,
156
+ "text": sentence,
157
+ "start_time": request_start_time - self.start_time,
158
+ "end_time": request_end_time - self.start_time,
159
+ "duration": request_duration,
160
+ "char_count": len(sentence),
161
+ "retries": retries
162
+ })
163
+
164
+ return audio_content
106
165
  except APIError as e:
107
166
  if retries < self.max_retries:
167
+ if self.verbose:
168
+ print(f"Retry {retries + 1}/{self.max_retries} for request: '{sentence[:30]}...'")
108
169
  return await self._synthesize_async(sentence, retries + 1)
109
170
  else:
110
- raise APIError(f"Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
171
+ if self.verbose:
172
+ print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
173
+ return None
111
174
 
112
175
 
113
176
  async def _run_synthesis(self) -> AsyncGenerator[bytes, None]:
@@ -117,7 +180,8 @@ class TextToAudioStream:
117
180
  """
118
181
  while not self.stop_flag or not self.queue.empty():
119
182
  try:
120
- sentence = self.queue.get(timeout=self.queue_timeout)
183
+ sentence = self.queue.get_nowait()
184
+
121
185
  if isinstance(self.tts_instance, AsyncSmallest):
122
186
  audio_content = await self._synthesize_async(sentence)
123
187
  else:
@@ -126,10 +190,55 @@ class TextToAudioStream:
126
190
 
127
191
  if audio_content:
128
192
  yield audio_content
193
+
129
194
  except Empty:
130
- if self.stop_flag:
195
+ # Quick check if we should exit
196
+ if self.stop_flag and self.queue.empty():
131
197
  break
132
- await asyncio.sleep(0.1) # avoid busy waiting if the queue is empty
198
+
199
+ # Short sleep to avoid busy-waiting
200
+ await asyncio.sleep(0.01) # Much shorter sleep time (10ms)
201
+
202
+
203
+ def _print_verbose_summary(self) -> None:
204
+ """Print a summary of all metrics if verbose mode is enabled."""
205
+ if not self.verbose:
206
+ return
207
+
208
+ total_duration = self.end_time - self.start_time
209
+
210
+ print("\n" + "="*100)
211
+ print(f"TEXT-TO-AUDIO STREAM METRICS")
212
+ print("="*100)
213
+
214
+ print(f"\nOVERALL STATISTICS:")
215
+ print(f" Total requests made: {self.request_count}")
216
+ print(f" Time to first API response: {self.first_api_response_time:.3f}s")
217
+ print(f" Total processing time: {total_duration:.3f}s")
218
+
219
+ # Print table header
220
+ print("\nREQUEST DETAILS:")
221
+ header = f"{'#':4} {'Start (s)':10} {'End (s)':10} {'Duration (s)':12} {'Characters':15} {'Text'}"
222
+ print("\n" + header)
223
+ print("-" * 100)
224
+
225
+ # Print table rows
226
+ for log in self.request_logs:
227
+ row = (
228
+ f"{log['id']:4} "
229
+ f"{log['start_time']:10.3f} "
230
+ f"{log['end_time']:10.3f} "
231
+ f"{log['duration']:12.3f} "
232
+ f"{log['char_count']:15} "
233
+ f"{log['text'][:50]}{'...' if len(log['text']) > 50 else ''}"
234
+ )
235
+ print(row)
236
+
237
+ # Print retry information if any
238
+ if log['retries'] > 0:
239
+ print(f"{'':4} {'':10} {'':10} {'':12} {'':15} Retries: {log['retries']}")
240
+
241
+ print("\n" + "="*100)
133
242
 
134
243
 
135
244
  async def process(self, llm_output: AsyncGenerator[str, None]) -> AsyncGenerator[bytes, None]:
@@ -149,13 +258,15 @@ class TextToAudioStream:
149
258
  - Streamed over a network
150
259
  - Further processed as needed
151
260
  """
152
- stream_task = asyncio.create_task(self._stream_llm_output(llm_output))
261
+ self.start_time = time.time()
262
+
263
+ llm_thread = Thread(target=asyncio.run, args=(self._stream_llm_output(llm_output),))
264
+ llm_thread.start()
153
265
 
154
- try:
155
- async for audio_content in self._run_synthesis():
156
- yield audio_content
157
- except Exception as e:
158
- raise APIError(f"Error during synthesis processing: {e}")
266
+ async for audio_content in self._run_synthesis():
267
+ yield audio_content
159
268
 
160
- finally:
161
- await stream_task
269
+ llm_thread.join()
270
+
271
+ self.end_time = time.time()
272
+ self._print_verbose_summary()
@@ -3,7 +3,7 @@ import json
3
3
  import wave
4
4
  import copy
5
5
  import requests
6
- from typing import Optional, Union, List
6
+ from typing import Optional, Union, List, Iterator
7
7
 
8
8
  from smallest.exceptions import TTSError, APIError
9
9
  from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
@@ -17,6 +17,9 @@ class Smallest:
17
17
  sample_rate: Optional[int] = 24000,
18
18
  voice_id: Optional[str] = "emily",
19
19
  speed: Optional[float] = 1.0,
20
+ consistency: Optional[float] = 0.5,
21
+ similarity: Optional[float] = 0.0,
22
+ enhancement: Optional[int] = 1,
20
23
  add_wav_header: Optional[bool] = True
21
24
  ) -> None:
22
25
  """
@@ -31,6 +34,9 @@ class Smallest:
31
34
  - sample_rate (int): The sample rate for the audio output.
32
35
  - voice_id (TTSVoices): The voice to be used for synthesis.
33
36
  - speed (float): The speed of the speech synthesis.
37
+ - consistency (float): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. Range - [0, 1]
38
+ - similarity (float): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. Range - [0, 1]
39
+ - enhancement (int): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. Range - [0, 2].
34
40
  - add_wav_header (bool): Whether to add a WAV header to the output audio.
35
41
 
36
42
  Methods:
@@ -42,7 +48,7 @@ class Smallest:
42
48
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
43
49
  if not self.api_key:
44
50
  raise TTSError()
45
- if model == "lightning-large":
51
+ if model == "lightning-large" and voice_id is None:
46
52
  voice_id = "lakshya"
47
53
 
48
54
  self.chunk_size = 250
@@ -53,7 +59,10 @@ class Smallest:
53
59
  voice_id=voice_id,
54
60
  api_key=self.api_key,
55
61
  add_wav_header=add_wav_header,
56
- speed=speed
62
+ speed=speed,
63
+ consistency=consistency,
64
+ similarity=similarity,
65
+ enhancement=enhancement
57
66
  )
58
67
 
59
68
 
@@ -98,27 +107,24 @@ class Smallest:
98
107
  def synthesize(
99
108
  self,
100
109
  text: str,
101
- consistency: Optional[float] = 0.5,
102
- similarity: Optional[float] = 0,
103
- enhancement: Optional[bool] = False,
110
+ stream: Optional[bool] = False,
104
111
  save_as: Optional[str] = None,
105
112
  **kwargs
106
- ) -> Union[bytes, None]:
113
+ ) -> Union[bytes, None, Iterator[bytes]]:
107
114
  """
108
115
  Synthesize speech from the provided text.
109
116
 
110
- Args:
111
117
  - text (str): The text to be converted to speech.
112
- - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
118
+ - stream (Optional[bool]): If True, returns an iterator yielding audio chunks instead of a full byte array.
119
+ - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
113
120
  The file must have a .wav extension.
114
- - consistency (Optional[float]): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model.
115
- - similarity (Optional[float]): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model.
116
- - enhancement (Optional[bool]): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model.
117
121
  - kwargs: Additional optional parameters to override `__init__` options for this call.
118
122
 
119
123
  Returns:
120
- - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
121
- otherwise, returns None after saving the audio to the specified file.
124
+ - Union[bytes, None, Iterator[bytes]]:
125
+ - If `stream=True`, returns an iterator yielding audio chunks.
126
+ - If `save_as` is provided, saves the file and returns None.
127
+ - Otherwise, returns the synthesized audio content as bytes.
122
128
 
123
129
  Raises:
124
130
  - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
@@ -134,42 +140,48 @@ class Smallest:
134
140
  for key, value in kwargs.items():
135
141
  setattr(opts, key, value)
136
142
 
137
- validate_input(preprocess_text(text), opts.model, opts.sample_rate, opts.speed, consistency, similarity, enhancement)
143
+ text = preprocess_text(text)
144
+ validate_input(text, opts.model, opts.sample_rate, opts.speed, opts.consistency, opts.similarity, opts.enhancement)
138
145
 
139
146
  self.chunk_size = 250
140
147
  if opts.model == "lightning-large":
141
148
  self.chunk_size = 140
142
149
 
143
150
  chunks = chunk_text(text, self.chunk_size)
144
- audio_content = b""
145
-
146
- for chunk in chunks:
147
- payload = {
148
- "text": preprocess_text(chunk),
149
- "sample_rate": opts.sample_rate,
150
- "voice_id": opts.voice_id,
151
- "add_wav_header": False,
152
- "speed": opts.speed,
153
- }
154
-
155
- if opts.model == "lightning-large":
156
- if consistency:
157
- payload["consistency"] = consistency
158
- if similarity:
159
- payload["similarity"] = similarity
160
- if enhancement:
161
- payload["enhancement"] = enhancement
162
-
163
- headers = {
164
- "Authorization": f"Bearer {self.api_key}",
165
- "Content-Type": "application/json",
166
- }
167
-
168
- res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
169
- if res.status_code != 200:
170
- raise APIError(f"Failed to synthesize speech: {res.text}. For more information, visit https://waves.smallest.ai/")
151
+
152
+ def audio_stream():
153
+ for chunk in chunks:
154
+ payload = {
155
+ "text": chunk,
156
+ "sample_rate": opts.sample_rate,
157
+ "voice_id": opts.voice_id,
158
+ "add_wav_header": False,
159
+ "speed": opts.speed,
160
+ }
161
+
162
+ if opts.model == "lightning-large":
163
+ if opts.consistency is not None:
164
+ payload["consistency"] = opts.consistency
165
+ if opts.similarity is not None:
166
+ payload["similarity"] = opts.similarity
167
+ if opts.enhancement is not None:
168
+ payload["enhancement"] = opts.enhancement
169
+
170
+ headers = {
171
+ "Authorization": f"Bearer {self.api_key}",
172
+ "Content-Type": "application/json",
173
+ }
174
+
175
+ res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
176
+ if res.status_code != 200:
177
+ raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
178
+
179
+ yield res.content
171
180
 
172
- audio_content += res.content
181
+ if stream:
182
+ return audio_stream()
183
+
184
+ audio_content = b"".join(audio_stream())
173
185
 
174
186
  if save_as:
175
187
  if not save_as.endswith(".wav"):
@@ -4,7 +4,6 @@ from typing import List
4
4
  from typing import Optional
5
5
  from pydub import AudioSegment
6
6
  from dataclasses import dataclass
7
- from sacremoses import MosesPunctNormalizer
8
7
 
9
8
  from smallest.exceptions import ValidationError
10
9
  from smallest.models import TTSModels, TTSLanguages
@@ -12,7 +11,6 @@ from smallest.models import TTSModels, TTSLanguages
12
11
 
13
12
  API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
14
13
  SENTENCE_END_REGEX = re.compile(r'.*[-.—!?,;:…।|]$')
15
- mpn = MosesPunctNormalizer()
16
14
  SAMPLE_WIDTH = 2
17
15
  CHANNELS = 1
18
16
  ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
@@ -26,9 +24,12 @@ class TTSOptions:
26
24
  api_key: str
27
25
  add_wav_header: bool
28
26
  speed: float
27
+ consistency: float
28
+ similarity: float
29
+ enhancement: int
29
30
 
30
31
 
31
- def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[bool] = None):
32
+ def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[int] = None):
32
33
  if not text:
33
34
  raise ValidationError("Text cannot be empty.")
34
35
  if model not in TTSModels:
@@ -41,8 +42,8 @@ def validate_input(text: str, model: str, sample_rate: int, speed: float, consis
41
42
  raise ValidationError(f"Invalid consistency: {consistency}. Must be between 0.0 and 1.0")
42
43
  if similarity is not None and not 0.0 <= similarity <= 1.0:
43
44
  raise ValidationError(f"Invalid similarity: {similarity}. Must be between 0.0 and 1.0")
44
- if enhancement is not None and not isinstance(enhancement, bool):
45
- raise ValidationError(f"Invalid enhancement: {enhancement}. Must be a boolean value.")
45
+ if enhancement is not None and not 0 <= enhancement <= 2:
46
+ raise ValidationError(f"Invalid enhancement: {enhancement}. Must be between 0 and 2.")
46
47
 
47
48
 
48
49
  def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: int = 2, channels: int = 1) -> bytes:
@@ -56,7 +57,6 @@ def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: i
56
57
  def preprocess_text(text: str) -> str:
57
58
  text = text.replace("\n", " ").replace("\t", " ").replace("—", " ").replace("-", " ").replace("–", " ")
58
59
  text = re.sub(r'\s+', ' ', text)
59
- text = mpn.normalize(text)
60
60
  return text.strip()
61
61
 
62
62
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: smallestai
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <support@smallest.ai>
6
6
  License: MIT
@@ -15,7 +15,6 @@ License-File: LICENSE
15
15
  Requires-Dist: aiohttp
16
16
  Requires-Dist: aiofiles
17
17
  Requires-Dist: requests
18
- Requires-Dist: sacremoses
19
18
  Requires-Dist: pydub
20
19
  Provides-Extra: test
21
20
  Requires-Dist: jiwer; extra == "test"
@@ -1,7 +1,6 @@
1
1
  aiohttp
2
2
  aiofiles
3
3
  requests
4
- sacremoses
5
4
  pydub
6
5
 
7
6
  [test]
File without changes
File without changes
File without changes