smallestai 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

smallest/async_tts.py CHANGED
@@ -7,20 +7,20 @@ from typing import Optional, Union, List
7
7
  from .models import TTSModels, TTSVoices
8
8
  from .exceptions import TTSError, APIError
9
9
  from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
10
+ get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
 
13
13
  class AsyncSmallest:
14
14
  def __init__(
15
- self,
16
- api_key: Optional[str] = None,
17
- model: TTSModels = "lightning",
18
- sample_rate: int = 24000,
19
- voice: TTSVoices = "emily",
20
- speed: Optional[float] = 1.0,
21
- add_wav_header: Optional[bool] = True,
22
- transliterate: Optional[bool] = False,
23
- remove_extra_silence: Optional[bool] = False
15
+ self,
16
+ api_key: Optional[str] = None,
17
+ model: TTSModels = "lightning",
18
+ sample_rate: int = 24000,
19
+ voice: TTSVoices = "emily",
20
+ speed: Optional[float] = 1.0,
21
+ add_wav_header: Optional[bool] = True,
22
+ transliterate: Optional[bool] = False,
23
+ remove_extra_silence: Optional[bool] = False
24
24
  ) -> None:
25
25
  """
26
26
  AsyncSmallest Instance for asynchronous text-to-speech synthesis.
@@ -48,6 +48,7 @@ class AsyncSmallest:
48
48
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
49
49
  if not self.api_key:
50
50
  raise TTSError("API key is required")
51
+ self.chunk_size = 250
51
52
 
52
53
  self.opts = TTSOptions(
53
54
  model=model,
@@ -70,6 +71,48 @@ class AsyncSmallest:
70
71
  if self.session:
71
72
  await self.session.close()
72
73
 
74
+ def _split_into_chunks(self, text: str) -> List[str]:
75
+ """
76
+ Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
77
+ """
78
+ chunks = []
79
+ current_chunk = ""
80
+ last_break_index = 0
81
+
82
+ i = 0
83
+ while i < len(text):
84
+ current_chunk += text[i]
85
+
86
+ if text[i] in ".,":
87
+ last_break_index = i
88
+
89
+ if len(current_chunk) >= self.chunk_size:
90
+ if last_break_index > 0:
91
+ chunk = text[:last_break_index + 1].strip()
92
+ chunk = chunk.replace("—", " ")
93
+ chunks.append(chunk)
94
+
95
+ text = text[last_break_index + 1:]
96
+ i = -1
97
+ current_chunk = ""
98
+ last_break_index = 0
99
+ else:
100
+ # No break point found, split at max length
101
+ current_chunk = current_chunk.replace("—", " ")
102
+ chunks.append(current_chunk.strip())
103
+ text = text[self.chunk_size:]
104
+ i = -1
105
+ current_chunk = ""
106
+
107
+ i += 1
108
+
109
+ if text:
110
+ text = text.replace("—", " ")
111
+ chunks.append(text.strip())
112
+
113
+ return chunks
114
+
115
+
73
116
  def get_languages(self) -> List[str]:
74
117
  """Returns a list of available languages."""
75
118
  return get_smallest_languages()
@@ -110,42 +153,45 @@ class AsyncSmallest:
110
153
  setattr(opts, key, value)
111
154
 
112
155
  validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
113
-
114
- payload = {
115
- "text": preprocess_text(text),
116
- "sample_rate": opts.sample_rate,
117
- "voice_id": opts.voice,
118
- "add_wav_header": opts.add_wav_header,
119
- "speed": opts.speed,
120
- "model": opts.model,
121
- "transliterate": opts.transliterate,
122
- "remove_extra_silence": opts.remove_extra_silence
123
- }
124
-
125
- headers = {
126
- "Authorization": f"Bearer {self.api_key}",
127
- "Content-Type": "application/json",
128
- }
129
-
130
- if not self.session:
131
- self.session = aiohttp.ClientSession()
156
+ chunks = self._split_into_chunks(text)
157
+ audio_content = b""
158
+
159
+ for chunk in chunks:
160
+ payload = {
161
+ "text": preprocess_text(chunk),
162
+ "sample_rate": opts.sample_rate,
163
+ "voice_id": opts.voice,
164
+ "add_wav_header": False,
165
+ "speed": opts.speed,
166
+ "model": opts.model,
167
+ "transliterate": opts.transliterate,
168
+ "remove_extra_silence": opts.remove_extra_silence
169
+ }
170
+
171
+ headers = {
172
+ "Authorization": f"Bearer {self.api_key}",
173
+ "Content-Type": "application/json",
174
+ }
175
+
176
+ if not self.session:
177
+ self.session = aiohttp.ClientSession()
132
178
 
133
- async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
134
- if res.status != 200:
135
- raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
179
+ async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
180
+ if res.status != 200:
181
+ raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
136
182
 
137
- audio_content = await res.read()
183
+ audio_content += await res.read()
138
184
 
139
185
  if save_as:
140
186
  if not save_as.endswith(".wav"):
141
187
  raise TTSError("Invalid file name. Extension must be .wav")
142
188
 
143
- if self.opts.add_wav_header:
144
- async with aiofiles.open(save_as, mode='wb') as f:
145
- await f.write(audio_content)
146
- else:
147
- async with aiofiles.open(save_as, mode='wb') as f:
148
- await f.write(add_wav_header(audio_content, self.opts.sample_rate))
189
+ async with aiofiles.open(save_as, mode='wb') as f:
190
+ await f.write(add_wav_header(audio_content, self.opts.sample_rate))
191
+
149
192
  return None
150
193
 
194
+ if opts.add_wav_header:
195
+ return add_wav_header(audio_content, self.opts.sample_rate)
196
+
151
197
  return audio_content
smallest/stream_tts.py CHANGED
@@ -34,13 +34,14 @@ class TextToAudioStream:
34
34
  max_retries: Number of retry attempts for failed synthesis (default: 3)
35
35
  """
36
36
  self.tts_instance = tts_instance
37
+ self.tts_instance.opts.add_wav_header = False
38
+
37
39
  self.sentence_end_regex = SENTENCE_END_REGEX
38
40
  self.queue_timeout = queue_timeout
39
41
  self.max_retries = max_retries
40
42
  self.queue = Queue()
41
43
  self.buffer_size = 250
42
44
  self.stop_flag = False
43
- self.tts_instance.opts.add_wav_header = False
44
45
 
45
46
 
46
47
  async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
@@ -53,7 +54,7 @@ class TextToAudioStream:
53
54
  buffer = ""
54
55
  async for chunk in llm_output:
55
56
  buffer += chunk
56
- if self.sentence_end_regex.match(buffer) or self.buffer_size > 600:
57
+ if self.sentence_end_regex.match(buffer) or len(buffer) > self.buffer_size:
57
58
  self.queue.put(buffer)
58
59
  buffer = ""
59
60
 
smallest/tts.py CHANGED
@@ -1,24 +1,25 @@
1
1
  import os
2
+ import wave
2
3
  import copy
3
4
  import requests
4
5
  from typing import Optional, Union, List
5
6
 
6
7
  from .models import TTSModels, TTSVoices
7
8
  from .exceptions import TTSError, APIError
8
- from .utils import (TTSOptions, validate_input, preprocess_text,
9
- get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
9
+ from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
10
+ get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
10
11
 
11
12
  class Smallest:
12
13
  def __init__(
13
- self,
14
- api_key: Optional[str] = None,
15
- model: TTSModels = "lightning",
16
- sample_rate: int = 24000,
17
- voice: TTSVoices = "emily",
18
- speed: Optional[float] = 1.0,
19
- add_wav_header: Optional[bool] = True,
20
- transliterate: Optional[bool] = False,
21
- remove_extra_silence: Optional[bool] = True
14
+ self,
15
+ api_key: Optional[str] = None,
16
+ model: TTSModels = "lightning",
17
+ sample_rate: int = 24000,
18
+ voice: TTSVoices = "emily",
19
+ speed: Optional[float] = 1.0,
20
+ add_wav_header: Optional[bool] = True,
21
+ transliterate: Optional[bool] = False,
22
+ remove_extra_silence: Optional[bool] = True
22
23
  ) -> None:
23
24
  """
24
25
  Smallest Instance for text-to-speech synthesis.
@@ -46,6 +47,8 @@ class Smallest:
46
47
  if not self.api_key:
47
48
  raise TTSError("API key is required")
48
49
 
50
+ self.chunk_size = 250
51
+
49
52
  self.opts = TTSOptions(
50
53
  model=model,
51
54
  sample_rate=sample_rate,
@@ -56,6 +59,48 @@ class Smallest:
56
59
  transliterate=transliterate,
57
60
  remove_extra_silence=remove_extra_silence
58
61
  )
62
+
63
+ def _split_into_chunks(self, text: str) -> List[str]:
64
+ """
65
+ Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
66
+ """
67
+ chunks = []
68
+ current_chunk = ""
69
+ last_break_index = 0
70
+
71
+ i = 0
72
+ while i < len(text):
73
+ current_chunk += text[i]
74
+
75
+ if text[i] in ".,":
76
+ last_break_index = i
77
+
78
+ if len(current_chunk) >= self.chunk_size:
79
+ if last_break_index > 0:
80
+ chunk = text[:last_break_index + 1].strip()
81
+ chunk = chunk.replace("—", " ")
82
+ chunks.append(chunk)
83
+
84
+ text = text[last_break_index + 1:]
85
+ i = -1
86
+ current_chunk = ""
87
+ last_break_index = 0
88
+ else:
89
+ # No break point found, split at max length
90
+ current_chunk = current_chunk.replace("—", " ")
91
+ chunks.append(current_chunk.strip())
92
+ text = text[self.chunk_size:]
93
+ i = -1
94
+ current_chunk = ""
95
+
96
+ i += 1
97
+
98
+ if text:
99
+ text = text.replace("—", " ")
100
+ chunks.append(text.strip())
101
+
102
+ return chunks
103
+
59
104
 
60
105
  def get_languages(self) -> List[str]:
61
106
  """Returns a list of available languages."""
@@ -98,37 +143,49 @@ class Smallest:
98
143
 
99
144
  validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
100
145
 
101
- payload = {
102
- "text": preprocess_text(text),
103
- "sample_rate": opts.sample_rate,
104
- "voice_id": opts.voice,
105
- "add_wav_header": opts.add_wav_header,
106
- "speed": opts.speed,
107
- "model": opts.model,
108
- "transliterate": opts.transliterate,
109
- "remove_extra_silence": opts.remove_extra_silence,
110
- }
111
-
112
- headers = {
113
- "Authorization": f"Bearer {self.api_key}",
114
- "Content-Type": "application/json",
115
- }
146
+ chunks = self._split_into_chunks(text)
147
+ audio_content = b""
148
+
149
+ for chunk in chunks:
150
+ payload = {
151
+ "text": preprocess_text(chunk),
152
+ "sample_rate": opts.sample_rate,
153
+ "voice_id": opts.voice,
154
+ "add_wav_header": False,
155
+ "speed": opts.speed,
156
+ "model": opts.model,
157
+ "transliterate": opts.transliterate,
158
+ "remove_extra_silence": opts.remove_extra_silence,
159
+ }
160
+
161
+ headers = {
162
+ "Authorization": f"Bearer {self.api_key}",
163
+ "Content-Type": "application/json",
164
+ }
165
+
166
+ res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
167
+ if res.status_code != 200:
168
+ raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
169
+
170
+ audio_content += res.content
171
+
116
172
 
117
173
  res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
118
174
  if res.status_code != 200:
119
175
  raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
120
-
121
- audio_content = res.content
122
176
 
123
177
  if save_as:
124
178
  if not save_as.endswith(".wav"):
125
179
  raise TTSError("Invalid file name. Extension must be .wav")
126
180
 
127
- if self.opts.add_wav_header:
128
- with open(save_as, "wb") as wf:
129
- wf.write(audio_content)
130
- else:
131
- raise TTSError("WAV header is required for saving audio. Set 'add_wav_header=True' to add a WAV header.")
181
+ with wave.open(save_as, "wb") as wf:
182
+ wf.setnchannels(1)
183
+ wf.setsampwidth(2)
184
+ wf.setframerate(self.opts.sample_rate)
185
+ wf.writeframes(audio_content)
132
186
  return None
133
-
187
+
188
+ if self.opts.add_wav_header:
189
+ return add_wav_header(audio_content, self.opts.sample_rate)
190
+
134
191
  return audio_content
smallest/utils.py CHANGED
@@ -11,7 +11,7 @@ from .models import TTSModels, TTSLanguages, TTSVoices
11
11
 
12
12
 
13
13
  API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
14
- SENTENCE_END_REGEX = re.compile(r'.*[-.!?;:…\n]$')
14
+ SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
15
15
  SAMPLE_WIDTH = 2
16
16
  CHANNELS = 1
17
17
 
@@ -53,6 +53,7 @@ def preprocess_text(text: str) -> str:
53
53
  # Replace special characters with their normal form
54
54
  text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('ASCII')
55
55
  text = text.lower()
56
+ text = text.replace("—", " ")
56
57
  # Normalize punctuation using Moses punct normalizer
57
58
  mpn = MosesPunctNormalizer()
58
59
  text = mpn.normalize(text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: smallestai
3
- Version: 1.1.0
3
+ Version: 1.3.0
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <info@smallest.ai>
6
6
  License: MIT
@@ -18,12 +18,12 @@ Requires-Dist: requests
18
18
  Requires-Dist: sacremoses
19
19
  Requires-Dist: pydub
20
20
  Provides-Extra: test
21
- Requires-Dist: jiwer ; extra == 'test'
22
- Requires-Dist: httpx ; extra == 'test'
23
- Requires-Dist: pytest ; extra == 'test'
24
- Requires-Dist: pytest-asyncio ; extra == 'test'
25
- Requires-Dist: deepgram-sdk ; extra == 'test'
26
- Requires-Dist: python-dotenv ; extra == 'test'
21
+ Requires-Dist: jiwer; extra == "test"
22
+ Requires-Dist: httpx; extra == "test"
23
+ Requires-Dist: pytest; extra == "test"
24
+ Requires-Dist: pytest-asyncio; extra == "test"
25
+ Requires-Dist: deepgram-sdk; extra == "test"
26
+ Requires-Dist: python-dotenv; extra == "test"
27
27
 
28
28
  ![image](https://i.imgur.com/TJ2tT4g.png)
29
29
 
@@ -88,9 +88,7 @@ from smallest import Smallest
88
88
 
89
89
  def main():
90
90
  client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
91
- audio_data = client.synthesize("Hello, this is a test for sync synthesis function.")
92
- with open("sync_synthesize.wav", "wb") as f:
93
- f.write(audio_data)
91
+ client.synthesize("Hello, this is a test for sync synthesis function.", save_as="sync_synthesize.wav")
94
92
 
95
93
  if __name__ == "__main__":
96
94
  main()
@@ -104,10 +102,23 @@ if __name__ == "__main__":
104
102
  - `speed`: Speech speed multiplier (default: 1.0)
105
103
  - `add_wav_header`: Include WAV header in output (default: True)
106
104
  - `transliterate`: Enable text transliteration (default: False)
107
- - `remove_extra_silence`: Remove additional silence (default: True)
105
+ - `remove_extra_silence`: Remove additional silence (default: True)
106
+
107
+ These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override these parameters for a specific synthesis request.
108
+
109
+ For example, you can modify the speech speed and sample rate just for a particular synthesis call:
110
+ ```py
111
+ client.synthesize(
112
+ "Hello, this is a test for sync synthesis function.",
113
+ save_as="sync_synthesize.wav",
114
+ speed=1.5, # Overrides default speed
115
+ sample_rate=16000 # Overrides default sample rate
116
+ )
117
+ ```
118
+
108
119
 
109
120
  ### Async
110
- A synchronous text-to-speech synthesis client.
121
+ Asynchronous text-to-speech synthesis client.
111
122
 
112
123
  **Basic Usage:**
113
124
  ```python
@@ -120,9 +131,9 @@ client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
120
131
 
121
132
  async def main():
122
133
  async with client as tts:
123
- audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
134
+ audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
124
135
  async with aiofiles.open("async_synthesize.wav", "wb") as f:
125
- await f.write(audio_bytes)
136
+ await f.write(audio_bytes) # alternatively you can use the `save_as` parameter.
126
137
 
127
138
  if __name__ == "__main__":
128
139
  asyncio.run(main())
@@ -136,7 +147,18 @@ if __name__ == "__main__":
136
147
  - `speed`: Speech speed multiplier (default: 1.0)
137
148
  - `add_wav_header`: Include WAV header in output (default: True)
138
149
  - `transliterate`: Enable text transliteration (default: False)
139
- - `remove_extra_silence`: Remove additional silence (default: True)
150
+ - `remove_extra_silence`: Remove additional silence (default: True)
151
+
152
+ These parameters are part of the AsyncSmallest instance. They can be set when creating the instance (as shown above). However, the synthesize function also accepts kwargs, allowing you to override any of these parameters on a per-request basis.
153
+
154
+ For example, you can modify the speech speed and sample rate just for a particular synthesis request:
155
+ ```py
156
+ audio_bytes = await tts.synthesize(
157
+ "Hello, this is a test of the async synthesis function.",
158
+ speed=1.5, # Overrides default speed
159
+ sample_rate=16000 # Overrides default sample rate
160
+ )
161
+ ```
140
162
 
141
163
  ### LLM to Speech
142
164
 
@@ -0,0 +1,12 @@
1
+ smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
+ smallest/async_tts.py,sha256=zqZGuQUWaV2_if9WVdYutxb9G2UoUAxbyAbNlF1tv3U,7445
3
+ smallest/exceptions.py,sha256=41GLVvNTfRQMQsPLGk0lHuhK2mak8_dVtiFLEtT23Dc,333
4
+ smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
5
+ smallest/stream_tts.py,sha256=4h_AktweZ386qgVIe8UeqO-ZxZO_x6Zj0uJQH09V1CE,5425
6
+ smallest/tts.py,sha256=CHtZwcA2S4zfYfqhv5qikBKOME8XBjS_0R4HXpzXeAU,7325
7
+ smallest/utils.py,sha256=WL71OByTxH8Y1gouP2K5YDDMwqhUdqMJ_bhqNryI3KQ,2222
8
+ smallestai-1.3.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
+ smallestai-1.3.0.dist-info/METADATA,sha256=0lqX-j9c0CkSeA6OeG5RUIdnaeWXWnvWPznCkO7vJCA,9845
10
+ smallestai-1.3.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
+ smallestai-1.3.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
+ smallestai-1.3.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
- smallest/async_tts.py,sha256=w_SY1Oetn5Zorq-8JXA7lGeRHR3kTtBzqotc_hF0hOQ,6010
3
- smallest/exceptions.py,sha256=41GLVvNTfRQMQsPLGk0lHuhK2mak8_dVtiFLEtT23Dc,333
4
- smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
5
- smallest/stream_tts.py,sha256=1j4JpAwrAmwprC98mKQwuhXf0HFxFTlMcZ3_JAdcAK0,5416
6
- smallest/tts.py,sha256=Gr13I-O0qH7EclnR_g29qcpiqITWjgfjCFxFwNxyZrA,5410
7
- smallest/utils.py,sha256=hAgyEfZEnvayzu8qS4LXhpZR8qK7z4gatLWGVOkS3Yg,2183
8
- smallestai-1.1.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
- smallestai-1.1.0.dist-info/METADATA,sha256=e1ivgFjFyvXKPKGXoa8jSH7pqUsmiqoqNk0Q_Mjq3yM,8723
10
- smallestai-1.1.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
11
- smallestai-1.1.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
- smallestai-1.1.0.dist-info/RECORD,,