smallestai 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

smallest/async_tts.py CHANGED
@@ -6,7 +6,7 @@ from typing import Optional, Union, List
6
6
 
7
7
  from .models import TTSModels, TTSVoices
8
8
  from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
9
+ from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
10
  get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
 
@@ -47,7 +47,7 @@ class AsyncSmallest:
47
47
  """
48
48
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
49
49
  if not self.api_key:
50
- raise TTSError("API key is required")
50
+ raise TTSError()
51
51
  self.chunk_size = 250
52
52
 
53
53
  self.opts = TTSOptions(
@@ -71,47 +71,6 @@ class AsyncSmallest:
71
71
  if self.session:
72
72
  await self.session.close()
73
73
 
74
- def _split_into_chunks(self, text: str) -> List[str]:
75
- """
76
- Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
77
- """
78
- chunks = []
79
- current_chunk = ""
80
- last_break_index = 0
81
-
82
- i = 0
83
- while i < len(text):
84
- current_chunk += text[i]
85
-
86
- if text[i] in ".,":
87
- last_break_index = i
88
-
89
- if len(current_chunk) >= self.chunk_size:
90
- if last_break_index > 0:
91
- chunk = text[:last_break_index + 1].strip()
92
- chunk = chunk.replace("—", " ")
93
- chunks.append(chunk)
94
-
95
- text = text[last_break_index + 1:]
96
- i = -1
97
- current_chunk = ""
98
- last_break_index = 0
99
- else:
100
- # No break point found, split at max length
101
- current_chunk = current_chunk.replace("—", " ")
102
- chunks.append(current_chunk.strip())
103
- text = text[self.chunk_size:]
104
- i = -1
105
- current_chunk = ""
106
-
107
- i += 1
108
-
109
- if text:
110
- text = text.replace("—", " ")
111
- chunks.append(text.strip())
112
-
113
- return chunks
114
-
115
74
 
116
75
  def get_languages(self) -> List[str]:
117
76
  """Returns a list of available languages."""
@@ -153,7 +112,7 @@ class AsyncSmallest:
153
112
  setattr(opts, key, value)
154
113
 
155
114
  validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
156
- chunks = self._split_into_chunks(text)
115
+ chunks = split_into_chunks(text)
157
116
  audio_content = b""
158
117
 
159
118
  for chunk in chunks:
smallest/exceptions.py CHANGED
@@ -1,6 +1,9 @@
1
1
  class TTSError(Exception):
2
2
  """Base exception for TTS SDK"""
3
- pass
3
+ default_message = "API key is required. Please set the `SMALLEST_API_KEY` environment variable or visit https://waves.smallest.ai/ to obtain your API key."
4
+
5
+ def __init__(self, message=None):
6
+ super().__init__(message or self.default_message)
4
7
 
5
8
  class APIError(TTSError):
6
9
  """Raised when the API returns an error"""
smallest/stream_tts.py CHANGED
@@ -46,20 +46,40 @@ class TextToAudioStream:
46
46
 
47
47
  async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
48
48
  """
49
- Streams the LLM output, splitting it into sentences and adding each to the queue.
49
+ Streams the LLM output, splitting it into sentences based on the regex
50
+ and chunk size, and adding each chunk to the queue.
50
51
 
51
52
  Parameters:
52
53
  - llm_output (AsyncGenerator[str, None]): An async generator yielding LLM output.
53
54
  """
54
55
  buffer = ""
56
+ last_break_index = 0
57
+
55
58
  async for chunk in llm_output:
56
59
  buffer += chunk
57
- if self.sentence_end_regex.match(buffer) or len(buffer) > self.buffer_size:
58
- self.queue.put(buffer)
59
- buffer = ""
60
+ i = 0
61
+
62
+ while i < len(buffer):
63
+ current_chunk = buffer[:i + 1]
64
+ if self.sentence_end_regex.match(current_chunk):
65
+ last_break_index = i
66
+
67
+ if len(current_chunk) >= self.buffer_size:
68
+ if last_break_index > 0:
69
+ self.queue.put(buffer[:last_break_index + 1].replace("—", " ").strip())
70
+ buffer = buffer[last_break_index + 1:]
71
+ else:
72
+ # No sentence boundary, split at max chunk size
73
+ self.queue.put(buffer[:self.buffer_size].replace("—", " ").strip())
74
+ buffer = buffer[self.buffer_size:]
75
+
76
+ last_break_index = 0
77
+ i = -1
78
+
79
+ i += 1
60
80
 
61
81
  if buffer:
62
- self.queue.put(buffer)
82
+ self.queue.put(buffer.replace("—", " ").strip())
63
83
 
64
84
  self.stop_flag = True # completion flag when LLM output ends
65
85
 
smallest/tts.py CHANGED
@@ -6,7 +6,7 @@ from typing import Optional, Union, List
6
6
 
7
7
  from .models import TTSModels, TTSVoices
8
8
  from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
9
+ from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
10
  get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
  class Smallest:
@@ -45,7 +45,7 @@ class Smallest:
45
45
  """
46
46
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
47
47
  if not self.api_key:
48
- raise TTSError("API key is required")
48
+ raise TTSError()
49
49
 
50
50
  self.chunk_size = 250
51
51
 
@@ -59,47 +59,6 @@ class Smallest:
59
59
  transliterate=transliterate,
60
60
  remove_extra_silence=remove_extra_silence
61
61
  )
62
-
63
- def _split_into_chunks(self, text: str) -> List[str]:
64
- """
65
- Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
66
- """
67
- chunks = []
68
- current_chunk = ""
69
- last_break_index = 0
70
-
71
- i = 0
72
- while i < len(text):
73
- current_chunk += text[i]
74
-
75
- if text[i] in ".,":
76
- last_break_index = i
77
-
78
- if len(current_chunk) >= self.chunk_size:
79
- if last_break_index > 0:
80
- chunk = text[:last_break_index + 1].strip()
81
- chunk = chunk.replace("—", " ")
82
- chunks.append(chunk)
83
-
84
- text = text[last_break_index + 1:]
85
- i = -1
86
- current_chunk = ""
87
- last_break_index = 0
88
- else:
89
- # No break point found, split at max length
90
- current_chunk = current_chunk.replace("—", " ")
91
- chunks.append(current_chunk.strip())
92
- text = text[self.chunk_size:]
93
- i = -1
94
- current_chunk = ""
95
-
96
- i += 1
97
-
98
- if text:
99
- text = text.replace("—", " ")
100
- chunks.append(text.strip())
101
-
102
- return chunks
103
62
 
104
63
 
105
64
  def get_languages(self) -> List[str]:
@@ -143,7 +102,7 @@ class Smallest:
143
102
 
144
103
  validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
145
104
 
146
- chunks = self._split_into_chunks(text)
105
+ chunks = split_into_chunks(text)
147
106
  audio_content = b""
148
107
 
149
108
  for chunk in chunks:
smallest/utils.py CHANGED
@@ -59,6 +59,50 @@ def preprocess_text(text: str) -> str:
59
59
  text = mpn.normalize(text)
60
60
  return text.strip()
61
61
 
62
+ def split_into_chunks(self, text: str) -> List[str]:
63
+ """
64
+ Splits the input text into chunks based on sentence boundaries
65
+ defined by SENTENCE_END_REGEX and the maximum chunk size.
66
+ """
67
+ chunks = []
68
+ current_chunk = ""
69
+ last_break_index = 0
70
+
71
+ i = 0
72
+ while i < len(text):
73
+ current_chunk += text[i]
74
+
75
+ # Check for sentence boundary using regex
76
+ if SENTENCE_END_REGEX.match(current_chunk):
77
+ last_break_index = i
78
+
79
+ if len(current_chunk) >= self.chunk_size:
80
+ if last_break_index > 0:
81
+ # Split at the last valid sentence boundary
82
+ chunk = text[:last_break_index + 1].strip()
83
+ chunk = chunk.replace("—", " ")
84
+ chunks.append(chunk)
85
+
86
+ text = text[last_break_index + 1:]
87
+ i = -1 # Reset index to process the remaining text
88
+ current_chunk = ""
89
+ last_break_index = 0
90
+ else:
91
+ # No sentence boundary found, split at max length
92
+ current_chunk = current_chunk.replace("—", " ")
93
+ chunks.append(current_chunk.strip())
94
+ text = text[self.chunk_size:]
95
+ i = -1 # Reset index to process the remaining text
96
+ current_chunk = ""
97
+
98
+ i += 1
99
+
100
+ if text:
101
+ text = text.replace("—", " ")
102
+ chunks.append(text.strip())
103
+
104
+ return chunks
105
+
62
106
 
63
107
  def get_smallest_languages() -> List[str]:
64
108
  return list(TTSLanguages.__args__)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: smallestai
3
- Version: 1.3.0
3
+ Version: 1.3.1
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <info@smallest.ai>
6
6
  License: MIT
@@ -104,7 +104,7 @@ if __name__ == "__main__":
104
104
  - `transliterate`: Enable text transliteration (default: False)
105
105
  - `remove_extra_silence`: Remove additional silence (default: True)
106
106
 
107
- These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override these parameters for a specific synthesis request.
107
+ These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override these parameters for a specific synthesis request.
108
108
 
109
109
  For example, you can modify the speech speed and sample rate just for a particular synthesis call:
110
110
  ```py
@@ -149,7 +149,7 @@ if __name__ == "__main__":
149
149
  - `transliterate`: Enable text transliteration (default: False)
150
150
  - `remove_extra_silence`: Remove additional silence (default: True)
151
151
 
152
- These parameters are part of the AsyncSmallest instance. They can be set when creating the instance (as shown above). However, the synthesize function also accepts kwargs, allowing you to override any of these parameters on a per-request basis.
152
+ These parameters are part of the `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis.
153
153
 
154
154
  For example, you can modify the speech speed and sample rate just for a particular synthesis request:
155
155
  ```py
@@ -0,0 +1,12 @@
1
+ smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
+ smallest/async_tts.py,sha256=ReYTePzR0e3UcWxwnetGnwO4q_F7g2LRZPxIVfmgB4Y,6120
3
+ smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
+ smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
5
+ smallest/stream_tts.py,sha256=0OypcUzgP7CN3VGcGJDnQ2FDw2JOzPaSQ1cXK69k5dY,6198
6
+ smallest/tts.py,sha256=l8VHaOE8-Feg3Ey8C3osOrLs3ffYz0q_J1ACiEtZ8y0,5999
7
+ smallest/utils.py,sha256=Xg4sYqhTgEgmT6H0qMNmlVlF1ilqY2BUCBTlk-_yONg,3795
8
+ smallestai-1.3.1.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
+ smallestai-1.3.1.dist-info/METADATA,sha256=anPPrjMmnWa_b1S65Wg23efwLUaoww7pbyU2qLhZTAk,9853
10
+ smallestai-1.3.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
+ smallestai-1.3.1.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
+ smallestai-1.3.1.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
- smallest/async_tts.py,sha256=zqZGuQUWaV2_if9WVdYutxb9G2UoUAxbyAbNlF1tv3U,7445
3
- smallest/exceptions.py,sha256=41GLVvNTfRQMQsPLGk0lHuhK2mak8_dVtiFLEtT23Dc,333
4
- smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
5
- smallest/stream_tts.py,sha256=4h_AktweZ386qgVIe8UeqO-ZxZO_x6Zj0uJQH09V1CE,5425
6
- smallest/tts.py,sha256=CHtZwcA2S4zfYfqhv5qikBKOME8XBjS_0R4HXpzXeAU,7325
7
- smallest/utils.py,sha256=WL71OByTxH8Y1gouP2K5YDDMwqhUdqMJ_bhqNryI3KQ,2222
8
- smallestai-1.3.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
- smallestai-1.3.0.dist-info/METADATA,sha256=0lqX-j9c0CkSeA6OeG5RUIdnaeWXWnvWPznCkO7vJCA,9845
10
- smallestai-1.3.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
- smallestai-1.3.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
- smallestai-1.3.0.dist-info/RECORD,,