smallestai 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

smallest/async_tts.py CHANGED
@@ -6,21 +6,21 @@ from typing import Optional, Union, List
6
6
 
7
7
  from .models import TTSModels, TTSVoices
8
8
  from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
9
+ from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
+ get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
 
13
13
  class AsyncSmallest:
14
14
  def __init__(
15
- self,
16
- api_key: Optional[str] = None,
17
- model: TTSModels = "lightning",
18
- sample_rate: int = 24000,
19
- voice: TTSVoices = "emily",
20
- speed: Optional[float] = 1.0,
21
- add_wav_header: Optional[bool] = True,
22
- transliterate: Optional[bool] = False,
23
- remove_extra_silence: Optional[bool] = False
15
+ self,
16
+ api_key: Optional[str] = None,
17
+ model: TTSModels = "lightning",
18
+ sample_rate: int = 24000,
19
+ voice: TTSVoices = "emily",
20
+ speed: Optional[float] = 1.0,
21
+ add_wav_header: Optional[bool] = True,
22
+ transliterate: Optional[bool] = False,
23
+ remove_extra_silence: Optional[bool] = False
24
24
  ) -> None:
25
25
  """
26
26
  AsyncSmallest Instance for asynchronous text-to-speech synthesis.
@@ -47,7 +47,8 @@ class AsyncSmallest:
47
47
  """
48
48
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
49
49
  if not self.api_key:
50
- raise TTSError("API key is required")
50
+ raise TTSError()
51
+ self.chunk_size = 250
51
52
 
52
53
  self.opts = TTSOptions(
53
54
  model=model,
@@ -70,6 +71,7 @@ class AsyncSmallest:
70
71
  if self.session:
71
72
  await self.session.close()
72
73
 
74
+
73
75
  def get_languages(self) -> List[str]:
74
76
  """Returns a list of available languages."""
75
77
  return get_smallest_languages()
@@ -110,42 +112,45 @@ class AsyncSmallest:
110
112
  setattr(opts, key, value)
111
113
 
112
114
  validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
113
-
114
- payload = {
115
- "text": preprocess_text(text),
116
- "sample_rate": opts.sample_rate,
117
- "voice_id": opts.voice,
118
- "add_wav_header": opts.add_wav_header,
119
- "speed": opts.speed,
120
- "model": opts.model,
121
- "transliterate": opts.transliterate,
122
- "remove_extra_silence": opts.remove_extra_silence
123
- }
124
-
125
- headers = {
126
- "Authorization": f"Bearer {self.api_key}",
127
- "Content-Type": "application/json",
128
- }
129
-
130
- if not self.session:
131
- self.session = aiohttp.ClientSession()
115
+ chunks = split_into_chunks(text)
116
+ audio_content = b""
117
+
118
+ for chunk in chunks:
119
+ payload = {
120
+ "text": preprocess_text(chunk),
121
+ "sample_rate": opts.sample_rate,
122
+ "voice_id": opts.voice,
123
+ "add_wav_header": False,
124
+ "speed": opts.speed,
125
+ "model": opts.model,
126
+ "transliterate": opts.transliterate,
127
+ "remove_extra_silence": opts.remove_extra_silence
128
+ }
129
+
130
+ headers = {
131
+ "Authorization": f"Bearer {self.api_key}",
132
+ "Content-Type": "application/json",
133
+ }
134
+
135
+ if not self.session:
136
+ self.session = aiohttp.ClientSession()
132
137
 
133
- async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
134
- if res.status != 200:
135
- raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
138
+ async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
139
+ if res.status != 200:
140
+ raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
136
141
 
137
- audio_content = await res.read()
142
+ audio_content += await res.read()
138
143
 
139
144
  if save_as:
140
145
  if not save_as.endswith(".wav"):
141
146
  raise TTSError("Invalid file name. Extension must be .wav")
142
147
 
143
- if self.opts.add_wav_header:
144
- async with aiofiles.open(save_as, mode='wb') as f:
145
- await f.write(audio_content)
146
- else:
147
- async with aiofiles.open(save_as, mode='wb') as f:
148
- await f.write(add_wav_header(audio_content, self.opts.sample_rate))
148
+ async with aiofiles.open(save_as, mode='wb') as f:
149
+ await f.write(add_wav_header(audio_content, self.opts.sample_rate))
150
+
149
151
  return None
150
152
 
153
+ if opts.add_wav_header:
154
+ return add_wav_header(audio_content, self.opts.sample_rate)
155
+
151
156
  return audio_content
smallest/exceptions.py CHANGED
@@ -1,6 +1,9 @@
1
1
  class TTSError(Exception):
2
2
  """Base exception for TTS SDK"""
3
- pass
3
+ default_message = "API key is required. Please set the `SMALLEST_API_KEY` environment variable or visit https://waves.smallest.ai/ to obtain your API key."
4
+
5
+ def __init__(self, message=None):
6
+ super().__init__(message or self.default_message)
4
7
 
5
8
  class APIError(TTSError):
6
9
  """Raised when the API returns an error"""
smallest/stream_tts.py CHANGED
@@ -34,31 +34,52 @@ class TextToAudioStream:
34
34
  max_retries: Number of retry attempts for failed synthesis (default: 3)
35
35
  """
36
36
  self.tts_instance = tts_instance
37
+ self.tts_instance.opts.add_wav_header = False
38
+
37
39
  self.sentence_end_regex = SENTENCE_END_REGEX
38
40
  self.queue_timeout = queue_timeout
39
41
  self.max_retries = max_retries
40
42
  self.queue = Queue()
41
43
  self.buffer_size = 250
42
44
  self.stop_flag = False
43
- self.tts_instance.opts.add_wav_header = False
44
45
 
45
46
 
46
47
  async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
47
48
  """
48
- Streams the LLM output, splitting it into sentences and adding each to the queue.
49
+ Streams the LLM output, splitting it into sentences based on the regex
50
+ and chunk size, and adding each chunk to the queue.
49
51
 
50
52
  Parameters:
51
53
  - llm_output (AsyncGenerator[str, None]): An async generator yielding LLM output.
52
54
  """
53
55
  buffer = ""
56
+ last_break_index = 0
57
+
54
58
  async for chunk in llm_output:
55
59
  buffer += chunk
56
- if self.sentence_end_regex.match(buffer) or self.buffer_size > 600:
57
- self.queue.put(buffer)
58
- buffer = ""
60
+ i = 0
61
+
62
+ while i < len(buffer):
63
+ current_chunk = buffer[:i + 1]
64
+ if self.sentence_end_regex.match(current_chunk):
65
+ last_break_index = i
66
+
67
+ if len(current_chunk) >= self.buffer_size:
68
+ if last_break_index > 0:
69
+ self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip())
70
+ buffer = buffer[last_break_index + 1:]
71
+ else:
72
+ # No sentence boundary, split at max chunk size
73
+ self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip())
74
+ buffer = buffer[self.buffer_size:]
75
+
76
+ last_break_index = 0
77
+ i = -1
78
+
79
+ i += 1
59
80
 
60
81
  if buffer:
61
- self.queue.put(buffer)
82
+ self.queue.put(buffer.replace("—", " ").strip())
62
83
 
63
84
  self.stop_flag = True # completion flag when LLM output ends
64
85
 
smallest/tts.py CHANGED
@@ -6,20 +6,20 @@ from typing import Optional, Union, List
6
6
 
7
7
  from .models import TTSModels, TTSVoices
8
8
  from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
9
+ from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
+ get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
  class Smallest:
13
13
  def __init__(
14
- self,
15
- api_key: Optional[str] = None,
16
- model: TTSModels = "lightning",
17
- sample_rate: int = 24000,
18
- voice: TTSVoices = "emily",
19
- speed: Optional[float] = 1.0,
20
- add_wav_header: Optional[bool] = True,
21
- transliterate: Optional[bool] = False,
22
- remove_extra_silence: Optional[bool] = True
14
+ self,
15
+ api_key: Optional[str] = None,
16
+ model: TTSModels = "lightning",
17
+ sample_rate: int = 24000,
18
+ voice: TTSVoices = "emily",
19
+ speed: Optional[float] = 1.0,
20
+ add_wav_header: Optional[bool] = True,
21
+ transliterate: Optional[bool] = False,
22
+ remove_extra_silence: Optional[bool] = True
23
23
  ) -> None:
24
24
  """
25
25
  Smallest Instance for text-to-speech synthesis.
@@ -45,7 +45,9 @@ class Smallest:
45
45
  """
46
46
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
47
47
  if not self.api_key:
48
- raise TTSError("API key is required")
48
+ raise TTSError()
49
+
50
+ self.chunk_size = 250
49
51
 
50
52
  self.opts = TTSOptions(
51
53
  model=model,
@@ -57,6 +59,7 @@ class Smallest:
57
59
  transliterate=transliterate,
58
60
  remove_extra_silence=remove_extra_silence
59
61
  )
62
+
60
63
 
61
64
  def get_languages(self) -> List[str]:
62
65
  """Returns a list of available languages."""
@@ -99,41 +102,49 @@ class Smallest:
99
102
 
100
103
  validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
101
104
 
102
- payload = {
103
- "text": preprocess_text(text),
104
- "sample_rate": opts.sample_rate,
105
- "voice_id": opts.voice,
106
- "add_wav_header": opts.add_wav_header,
107
- "speed": opts.speed,
108
- "model": opts.model,
109
- "transliterate": opts.transliterate,
110
- "remove_extra_silence": opts.remove_extra_silence,
111
- }
112
-
113
- headers = {
114
- "Authorization": f"Bearer {self.api_key}",
115
- "Content-Type": "application/json",
116
- }
105
+ chunks = split_into_chunks(text)
106
+ audio_content = b""
107
+
108
+ for chunk in chunks:
109
+ payload = {
110
+ "text": preprocess_text(chunk),
111
+ "sample_rate": opts.sample_rate,
112
+ "voice_id": opts.voice,
113
+ "add_wav_header": False,
114
+ "speed": opts.speed,
115
+ "model": opts.model,
116
+ "transliterate": opts.transliterate,
117
+ "remove_extra_silence": opts.remove_extra_silence,
118
+ }
119
+
120
+ headers = {
121
+ "Authorization": f"Bearer {self.api_key}",
122
+ "Content-Type": "application/json",
123
+ }
124
+
125
+ res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
126
+ if res.status_code != 200:
127
+ raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
128
+
129
+ audio_content += res.content
130
+
117
131
 
118
132
  res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
119
133
  if res.status_code != 200:
120
134
  raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
121
-
122
- audio_content = res.content
123
135
 
124
136
  if save_as:
125
137
  if not save_as.endswith(".wav"):
126
138
  raise TTSError("Invalid file name. Extension must be .wav")
127
139
 
128
- if self.opts.add_wav_header:
129
- with open(save_as, "wb") as wf:
130
- wf.write(audio_content)
131
- else:
132
- with wave.open(save_as, "wb") as wf:
133
- wf.setnchannels(1)
134
- wf.setsampwidth(2)
135
- wf.setframerate(self.opts.sample_rate)
136
- wf.writeframes(audio_content)
140
+ with wave.open(save_as, "wb") as wf:
141
+ wf.setnchannels(1)
142
+ wf.setsampwidth(2)
143
+ wf.setframerate(self.opts.sample_rate)
144
+ wf.writeframes(audio_content)
137
145
  return None
138
-
146
+
147
+ if self.opts.add_wav_header:
148
+ return add_wav_header(audio_content, self.opts.sample_rate)
149
+
139
150
  return audio_content
smallest/utils.py CHANGED
@@ -11,7 +11,7 @@ from .models import TTSModels, TTSLanguages, TTSVoices
11
11
 
12
12
 
13
13
  API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
14
- SENTENCE_END_REGEX = re.compile(r'.*[-.!?;:…\n]$')
14
+ SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
15
15
  SAMPLE_WIDTH = 2
16
16
  CHANNELS = 1
17
17
 
@@ -53,11 +53,56 @@ def preprocess_text(text: str) -> str:
53
53
  # Replace special characters with their normal form
54
54
  text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('ASCII')
55
55
  text = text.lower()
56
+ text = text.replace("—", " ")
56
57
  # Normalize punctuation using Moses punct normalizer
57
58
  mpn = MosesPunctNormalizer()
58
59
  text = mpn.normalize(text)
59
60
  return text.strip()
60
61
 
62
+ def split_into_chunks(self, text: str) -> List[str]:
63
+ """
64
+ Splits the input text into chunks based on sentence boundaries
65
+ defined by SENTENCE_END_REGEX and the maximum chunk size.
66
+ """
67
+ chunks = []
68
+ current_chunk = ""
69
+ last_break_index = 0
70
+
71
+ i = 0
72
+ while i < len(text):
73
+ current_chunk += text[i]
74
+
75
+ # Check for sentence boundary using regex
76
+ if SENTENCE_END_REGEX.match(current_chunk):
77
+ last_break_index = i
78
+
79
+ if len(current_chunk) >= self.chunk_size:
80
+ if last_break_index > 0:
81
+ # Split at the last valid sentence boundary
82
+ chunk = text[:last_break_index + 1].strip()
83
+ chunk = chunk.replace("—", " ")
84
+ chunks.append(chunk)
85
+
86
+ text = text[last_break_index + 1:]
87
+ i = -1 # Reset index to process the remaining text
88
+ current_chunk = ""
89
+ last_break_index = 0
90
+ else:
91
+ # No sentence boundary found, split at max length
92
+ current_chunk = current_chunk.replace("—", " ")
93
+ chunks.append(current_chunk.strip())
94
+ text = text[self.chunk_size:]
95
+ i = -1 # Reset index to process the remaining text
96
+ current_chunk = ""
97
+
98
+ i += 1
99
+
100
+ if text:
101
+ text = text.replace("—", " ")
102
+ chunks.append(text.strip())
103
+
104
+ return chunks
105
+
61
106
 
62
107
  def get_smallest_languages() -> List[str]:
63
108
  return list(TTSLanguages.__args__)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: smallestai
3
- Version: 1.2.0
3
+ Version: 1.3.1
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <info@smallest.ai>
6
6
  License: MIT
@@ -18,12 +18,12 @@ Requires-Dist: requests
18
18
  Requires-Dist: sacremoses
19
19
  Requires-Dist: pydub
20
20
  Provides-Extra: test
21
- Requires-Dist: jiwer ; extra == 'test'
22
- Requires-Dist: httpx ; extra == 'test'
23
- Requires-Dist: pytest ; extra == 'test'
24
- Requires-Dist: pytest-asyncio ; extra == 'test'
25
- Requires-Dist: deepgram-sdk ; extra == 'test'
26
- Requires-Dist: python-dotenv ; extra == 'test'
21
+ Requires-Dist: jiwer; extra == "test"
22
+ Requires-Dist: httpx; extra == "test"
23
+ Requires-Dist: pytest; extra == "test"
24
+ Requires-Dist: pytest-asyncio; extra == "test"
25
+ Requires-Dist: deepgram-sdk; extra == "test"
26
+ Requires-Dist: python-dotenv; extra == "test"
27
27
 
28
28
  ![image](https://i.imgur.com/TJ2tT4g.png)
29
29
 
@@ -102,7 +102,20 @@ if __name__ == "__main__":
102
102
  - `speed`: Speech speed multiplier (default: 1.0)
103
103
  - `add_wav_header`: Include WAV header in output (default: True)
104
104
  - `transliterate`: Enable text transliteration (default: False)
105
- - `remove_extra_silence`: Remove additional silence (default: True)
105
+ - `remove_extra_silence`: Remove additional silence (default: True)
106
+
107
+ These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override these parameters for a specific synthesis request.
108
+
109
+ For example, you can modify the speech speed and sample rate just for a particular synthesis call:
110
+ ```py
111
+ client.synthesize(
112
+ "Hello, this is a test for sync synthesis function.",
113
+ save_as="sync_synthesize.wav",
114
+ speed=1.5, # Overrides default speed
115
+ sample_rate=16000 # Overrides default sample rate
116
+ )
117
+ ```
118
+
106
119
 
107
120
  ### Async
108
121
  Asynchronous text-to-speech synthesis client.
@@ -134,7 +147,18 @@ if __name__ == "__main__":
134
147
  - `speed`: Speech speed multiplier (default: 1.0)
135
148
  - `add_wav_header`: Include WAV header in output (default: True)
136
149
  - `transliterate`: Enable text transliteration (default: False)
137
- - `remove_extra_silence`: Remove additional silence (default: True)
150
+ - `remove_extra_silence`: Remove additional silence (default: True)
151
+
152
+ These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis.
153
+
154
+ For example, you can modify the speech speed and sample rate just for a particular synthesis request:
155
+ ```py
156
+ audio_bytes = await tts.synthesize(
157
+ "Hello, this is a test of the async synthesis function.",
158
+ speed=1.5, # Overrides default speed
159
+ sample_rate=16000 # Overrides default sample rate
160
+ )
161
+ ```
138
162
 
139
163
  ### LLM to Speech
140
164
 
@@ -0,0 +1,12 @@
1
+ smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
+ smallest/async_tts.py,sha256=ReYTePzR0e3UcWxwnetGnwO4q_F7g2LRZPxIVfmgB4Y,6120
3
+ smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
+ smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
5
+ smallest/stream_tts.py,sha256=0OypcUzgP7CN3VGcGJDnQ2FDw2JOzPaSQ1cXK69k5dY,6198
6
+ smallest/tts.py,sha256=l8VHaOE8-Feg3Ey8C3osOrLs3ffYz0q_J1ACiEtZ8y0,5999
7
+ smallest/utils.py,sha256=Xg4sYqhTgEgmT6H0qMNmlVlF1ilqY2BUCBTlk-_yONg,3795
8
+ smallestai-1.3.1.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
+ smallestai-1.3.1.dist-info/METADATA,sha256=anPPrjMmnWa_b1S65Wg23efwLUaoww7pbyU2qLhZTAk,9853
10
+ smallestai-1.3.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
+ smallestai-1.3.1.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
+ smallestai-1.3.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
- smallest/async_tts.py,sha256=w_SY1Oetn5Zorq-8JXA7lGeRHR3kTtBzqotc_hF0hOQ,6010
3
- smallest/exceptions.py,sha256=41GLVvNTfRQMQsPLGk0lHuhK2mak8_dVtiFLEtT23Dc,333
4
- smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
5
- smallest/stream_tts.py,sha256=1j4JpAwrAmwprC98mKQwuhXf0HFxFTlMcZ3_JAdcAK0,5416
6
- smallest/tts.py,sha256=2v6kBPC_hocWdvt2Uua2aQgzg3BKQGIFXpq2DZMz9aA,5540
7
- smallest/utils.py,sha256=hAgyEfZEnvayzu8qS4LXhpZR8qK7z4gatLWGVOkS3Yg,2183
8
- smallestai-1.2.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
- smallestai-1.2.0.dist-info/METADATA,sha256=qsNxKNN-KNTshjeBBh9AhS_yeYzlHEqPlt_9zuV2fGY,8717
10
- smallestai-1.2.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
11
- smallestai-1.2.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
- smallestai-1.2.0.dist-info/RECORD,,