smallestai 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: smallestai
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <info@smallest.ai>
6
6
  License: MIT
@@ -102,7 +102,20 @@ if __name__ == "__main__":
102
102
  - `speed`: Speech speed multiplier (default: 1.0)
103
103
  - `add_wav_header`: Include WAV header in output (default: True)
104
104
  - `transliterate`: Enable text transliteration (default: False)
105
- - `remove_extra_silence`: Remove additional silence (default: True)
105
+ - `remove_extra_silence`: Remove additional silence (default: True)
106
+
107
+ These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override these parameters for a specific synthesis request.
108
+
109
+ For example, you can modify the speech speed and sample rate just for a particular synthesis call:
110
+ ```py
111
+ client.synthesize(
112
+ "Hello, this is a test for sync synthesis function.",
113
+ save_as="sync_synthesize.wav",
114
+ speed=1.5, # Overrides default speed
115
+ sample_rate=16000 # Overrides default sample rate
116
+ )
117
+ ```
118
+
106
119
 
107
120
  ### Async
108
121
  Asynchronous text-to-speech synthesis client.
@@ -134,7 +147,18 @@ if __name__ == "__main__":
134
147
  - `speed`: Speech speed multiplier (default: 1.0)
135
148
  - `add_wav_header`: Include WAV header in output (default: True)
136
149
  - `transliterate`: Enable text transliteration (default: False)
137
- - `remove_extra_silence`: Remove additional silence (default: True)
150
+ - `remove_extra_silence`: Remove additional silence (default: True)
151
+
152
+ These parameters are part of the AsyncSmallest instance. They can be set when creating the instance (as shown above). However, the synthesize function also accepts kwargs, allowing you to override any of these parameters on a per-request basis.
153
+
154
+ For example, you can modify the speech speed and sample rate just for a particular synthesis request:
155
+ ```py
156
+ audio_bytes = await tts.synthesize(
157
+ "Hello, this is a test of the async synthesis function.",
158
+ speed=1.5, # Overrides default speed
159
+ sample_rate=16000 # Overrides default sample rate
160
+ )
161
+ ```
138
162
 
139
163
  ### LLM to Speech
140
164
 
@@ -75,7 +75,20 @@ if __name__ == "__main__":
75
75
  - `speed`: Speech speed multiplier (default: 1.0)
76
76
  - `add_wav_header`: Include WAV header in output (default: True)
77
77
  - `transliterate`: Enable text transliteration (default: False)
78
- - `remove_extra_silence`: Remove additional silence (default: True)
78
+ - `remove_extra_silence`: Remove additional silence (default: True)
79
+
80
+ These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override these parameters for a specific synthesis request.
81
+
82
+ For example, you can modify the speech speed and sample rate just for a particular synthesis call:
83
+ ```py
84
+ client.synthesize(
85
+ "Hello, this is a test for sync synthesis function.",
86
+ save_as="sync_synthesize.wav",
87
+ speed=1.5, # Overrides default speed
88
+ sample_rate=16000 # Overrides default sample rate
89
+ )
90
+ ```
91
+
79
92
 
80
93
  ### Async
81
94
  Asynchronous text-to-speech synthesis client.
@@ -107,7 +120,18 @@ if __name__ == "__main__":
107
120
  - `speed`: Speech speed multiplier (default: 1.0)
108
121
  - `add_wav_header`: Include WAV header in output (default: True)
109
122
  - `transliterate`: Enable text transliteration (default: False)
110
- - `remove_extra_silence`: Remove additional silence (default: True)
123
+ - `remove_extra_silence`: Remove additional silence (default: True)
124
+
125
+ These parameters are part of the AsyncSmallest instance. They can be set when creating the instance (as shown above). However, the synthesize function also accepts kwargs, allowing you to override any of these parameters on a per-request basis.
126
+
127
+ For example, you can modify the speech speed and sample rate just for a particular synthesis request:
128
+ ```py
129
+ audio_bytes = await tts.synthesize(
130
+ "Hello, this is a test of the async synthesis function.",
131
+ speed=1.5, # Overrides default speed
132
+ sample_rate=16000 # Overrides default sample rate
133
+ )
134
+ ```
111
135
 
112
136
  ### LLM to Speech
113
137
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "smallestai"
3
- version = "1.2.0"
3
+ version = "1.3.0"
4
4
  description = "Official Python client for the Smallest AI API"
5
5
  authors = [
6
6
  {name = "Smallest", email = "info@smallest.ai"},
@@ -7,20 +7,20 @@ from typing import Optional, Union, List
7
7
  from .models import TTSModels, TTSVoices
8
8
  from .exceptions import TTSError, APIError
9
9
  from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
10
+ get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
 
13
13
  class AsyncSmallest:
14
14
  def __init__(
15
- self,
16
- api_key: Optional[str] = None,
17
- model: TTSModels = "lightning",
18
- sample_rate: int = 24000,
19
- voice: TTSVoices = "emily",
20
- speed: Optional[float] = 1.0,
21
- add_wav_header: Optional[bool] = True,
22
- transliterate: Optional[bool] = False,
23
- remove_extra_silence: Optional[bool] = False
15
+ self,
16
+ api_key: Optional[str] = None,
17
+ model: TTSModels = "lightning",
18
+ sample_rate: int = 24000,
19
+ voice: TTSVoices = "emily",
20
+ speed: Optional[float] = 1.0,
21
+ add_wav_header: Optional[bool] = True,
22
+ transliterate: Optional[bool] = False,
23
+ remove_extra_silence: Optional[bool] = False
24
24
  ) -> None:
25
25
  """
26
26
  AsyncSmallest Instance for asynchronous text-to-speech synthesis.
@@ -48,6 +48,7 @@ class AsyncSmallest:
48
48
  self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
49
49
  if not self.api_key:
50
50
  raise TTSError("API key is required")
51
+ self.chunk_size = 250
51
52
 
52
53
  self.opts = TTSOptions(
53
54
  model=model,
@@ -70,6 +71,48 @@ class AsyncSmallest:
70
71
  if self.session:
71
72
  await self.session.close()
72
73
 
74
+ def _split_into_chunks(self, text: str) -> List[str]:
75
+ """
76
+ Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
77
+ """
78
+ chunks = []
79
+ current_chunk = ""
80
+ last_break_index = 0
81
+
82
+ i = 0
83
+ while i < len(text):
84
+ current_chunk += text[i]
85
+
86
+ if text[i] in ".,":
87
+ last_break_index = i
88
+
89
+ if len(current_chunk) >= self.chunk_size:
90
+ if last_break_index > 0:
91
+ chunk = text[:last_break_index + 1].strip()
92
+ chunk = chunk.replace("—", " ")
93
+ chunks.append(chunk)
94
+
95
+ text = text[last_break_index + 1:]
96
+ i = -1
97
+ current_chunk = ""
98
+ last_break_index = 0
99
+ else:
100
+ # No break point found, split at max length
101
+ current_chunk = current_chunk.replace("—", " ")
102
+ chunks.append(current_chunk.strip())
103
+ text = text[self.chunk_size:]
104
+ i = -1
105
+ current_chunk = ""
106
+
107
+ i += 1
108
+
109
+ if text:
110
+ text = text.replace("—", " ")
111
+ chunks.append(text.strip())
112
+
113
+ return chunks
114
+
115
+
73
116
  def get_languages(self) -> List[str]:
74
117
  """Returns a list of available languages."""
75
118
  return get_smallest_languages()
@@ -110,42 +153,45 @@ class AsyncSmallest:
110
153
  setattr(opts, key, value)
111
154
 
112
155
  validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
113
-
114
- payload = {
115
- "text": preprocess_text(text),
116
- "sample_rate": opts.sample_rate,
117
- "voice_id": opts.voice,
118
- "add_wav_header": opts.add_wav_header,
119
- "speed": opts.speed,
120
- "model": opts.model,
121
- "transliterate": opts.transliterate,
122
- "remove_extra_silence": opts.remove_extra_silence
123
- }
124
-
125
- headers = {
126
- "Authorization": f"Bearer {self.api_key}",
127
- "Content-Type": "application/json",
128
- }
129
-
130
- if not self.session:
131
- self.session = aiohttp.ClientSession()
156
+ chunks = self._split_into_chunks(text)
157
+ audio_content = b""
158
+
159
+ for chunk in chunks:
160
+ payload = {
161
+ "text": preprocess_text(chunk),
162
+ "sample_rate": opts.sample_rate,
163
+ "voice_id": opts.voice,
164
+ "add_wav_header": False,
165
+ "speed": opts.speed,
166
+ "model": opts.model,
167
+ "transliterate": opts.transliterate,
168
+ "remove_extra_silence": opts.remove_extra_silence
169
+ }
170
+
171
+ headers = {
172
+ "Authorization": f"Bearer {self.api_key}",
173
+ "Content-Type": "application/json",
174
+ }
175
+
176
+ if not self.session:
177
+ self.session = aiohttp.ClientSession()
132
178
 
133
- async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
134
- if res.status != 200:
135
- raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
179
+ async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
180
+ if res.status != 200:
181
+ raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
136
182
 
137
- audio_content = await res.read()
183
+ audio_content += await res.read()
138
184
 
139
185
  if save_as:
140
186
  if not save_as.endswith(".wav"):
141
187
  raise TTSError("Invalid file name. Extension must be .wav")
142
188
 
143
- if self.opts.add_wav_header:
144
- async with aiofiles.open(save_as, mode='wb') as f:
145
- await f.write(audio_content)
146
- else:
147
- async with aiofiles.open(save_as, mode='wb') as f:
148
- await f.write(add_wav_header(audio_content, self.opts.sample_rate))
189
+ async with aiofiles.open(save_as, mode='wb') as f:
190
+ await f.write(add_wav_header(audio_content, self.opts.sample_rate))
191
+
149
192
  return None
150
193
 
194
+ if opts.add_wav_header:
195
+ return add_wav_header(audio_content, self.opts.sample_rate)
196
+
151
197
  return audio_content
@@ -34,13 +34,14 @@ class TextToAudioStream:
34
34
  max_retries: Number of retry attempts for failed synthesis (default: 3)
35
35
  """
36
36
  self.tts_instance = tts_instance
37
+ self.tts_instance.opts.add_wav_header = False
38
+
37
39
  self.sentence_end_regex = SENTENCE_END_REGEX
38
40
  self.queue_timeout = queue_timeout
39
41
  self.max_retries = max_retries
40
42
  self.queue = Queue()
41
43
  self.buffer_size = 250
42
44
  self.stop_flag = False
43
- self.tts_instance.opts.add_wav_header = False
44
45
 
45
46
 
46
47
  async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
@@ -53,7 +54,7 @@ class TextToAudioStream:
53
54
  buffer = ""
54
55
  async for chunk in llm_output:
55
56
  buffer += chunk
56
- if self.sentence_end_regex.match(buffer) or self.buffer_size > 600:
57
+ if self.sentence_end_regex.match(buffer) or len(buffer) > self.buffer_size:
57
58
  self.queue.put(buffer)
58
59
  buffer = ""
59
60
 
@@ -6,20 +6,20 @@ from typing import Optional, Union, List
6
6
 
7
7
  from .models import TTSModels, TTSVoices
8
8
  from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text,
10
- get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
9
+ from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
10
+ get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
  class Smallest:
13
13
  def __init__(
14
- self,
15
- api_key: Optional[str] = None,
16
- model: TTSModels = "lightning",
17
- sample_rate: int = 24000,
18
- voice: TTSVoices = "emily",
19
- speed: Optional[float] = 1.0,
20
- add_wav_header: Optional[bool] = True,
21
- transliterate: Optional[bool] = False,
22
- remove_extra_silence: Optional[bool] = True
14
+ self,
15
+ api_key: Optional[str] = None,
16
+ model: TTSModels = "lightning",
17
+ sample_rate: int = 24000,
18
+ voice: TTSVoices = "emily",
19
+ speed: Optional[float] = 1.0,
20
+ add_wav_header: Optional[bool] = True,
21
+ transliterate: Optional[bool] = False,
22
+ remove_extra_silence: Optional[bool] = True
23
23
  ) -> None:
24
24
  """
25
25
  Smallest Instance for text-to-speech synthesis.
@@ -47,6 +47,8 @@ class Smallest:
47
47
  if not self.api_key:
48
48
  raise TTSError("API key is required")
49
49
 
50
+ self.chunk_size = 250
51
+
50
52
  self.opts = TTSOptions(
51
53
  model=model,
52
54
  sample_rate=sample_rate,
@@ -57,6 +59,48 @@ class Smallest:
57
59
  transliterate=transliterate,
58
60
  remove_extra_silence=remove_extra_silence
59
61
  )
62
+
63
+ def _split_into_chunks(self, text: str) -> List[str]:
64
+ """
65
+ Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
66
+ """
67
+ chunks = []
68
+ current_chunk = ""
69
+ last_break_index = 0
70
+
71
+ i = 0
72
+ while i < len(text):
73
+ current_chunk += text[i]
74
+
75
+ if text[i] in ".,":
76
+ last_break_index = i
77
+
78
+ if len(current_chunk) >= self.chunk_size:
79
+ if last_break_index > 0:
80
+ chunk = text[:last_break_index + 1].strip()
81
+ chunk = chunk.replace("—", " ")
82
+ chunks.append(chunk)
83
+
84
+ text = text[last_break_index + 1:]
85
+ i = -1
86
+ current_chunk = ""
87
+ last_break_index = 0
88
+ else:
89
+ # No break point found, split at max length
90
+ current_chunk = current_chunk.replace("—", " ")
91
+ chunks.append(current_chunk.strip())
92
+ text = text[self.chunk_size:]
93
+ i = -1
94
+ current_chunk = ""
95
+
96
+ i += 1
97
+
98
+ if text:
99
+ text = text.replace("—", " ")
100
+ chunks.append(text.strip())
101
+
102
+ return chunks
103
+
60
104
 
61
105
  def get_languages(self) -> List[str]:
62
106
  """Returns a list of available languages."""
@@ -99,41 +143,49 @@ class Smallest:
99
143
 
100
144
  validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
101
145
 
102
- payload = {
103
- "text": preprocess_text(text),
104
- "sample_rate": opts.sample_rate,
105
- "voice_id": opts.voice,
106
- "add_wav_header": opts.add_wav_header,
107
- "speed": opts.speed,
108
- "model": opts.model,
109
- "transliterate": opts.transliterate,
110
- "remove_extra_silence": opts.remove_extra_silence,
111
- }
112
-
113
- headers = {
114
- "Authorization": f"Bearer {self.api_key}",
115
- "Content-Type": "application/json",
116
- }
146
+ chunks = self._split_into_chunks(text)
147
+ audio_content = b""
148
+
149
+ for chunk in chunks:
150
+ payload = {
151
+ "text": preprocess_text(chunk),
152
+ "sample_rate": opts.sample_rate,
153
+ "voice_id": opts.voice,
154
+ "add_wav_header": False,
155
+ "speed": opts.speed,
156
+ "model": opts.model,
157
+ "transliterate": opts.transliterate,
158
+ "remove_extra_silence": opts.remove_extra_silence,
159
+ }
160
+
161
+ headers = {
162
+ "Authorization": f"Bearer {self.api_key}",
163
+ "Content-Type": "application/json",
164
+ }
165
+
166
+ res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
167
+ if res.status_code != 200:
168
+ raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
169
+
170
+ audio_content += res.content
171
+
117
172
 
118
173
  res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
119
174
  if res.status_code != 200:
120
175
  raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
121
-
122
- audio_content = res.content
123
176
 
124
177
  if save_as:
125
178
  if not save_as.endswith(".wav"):
126
179
  raise TTSError("Invalid file name. Extension must be .wav")
127
180
 
128
- if self.opts.add_wav_header:
129
- with open(save_as, "wb") as wf:
130
- wf.write(audio_content)
131
- else:
132
- with wave.open(save_as, "wb") as wf:
133
- wf.setnchannels(1)
134
- wf.setsampwidth(2)
135
- wf.setframerate(self.opts.sample_rate)
136
- wf.writeframes(audio_content)
181
+ with wave.open(save_as, "wb") as wf:
182
+ wf.setnchannels(1)
183
+ wf.setsampwidth(2)
184
+ wf.setframerate(self.opts.sample_rate)
185
+ wf.writeframes(audio_content)
137
186
  return None
138
-
187
+
188
+ if self.opts.add_wav_header:
189
+ return add_wav_header(audio_content, self.opts.sample_rate)
190
+
139
191
  return audio_content
@@ -11,7 +11,7 @@ from .models import TTSModels, TTSLanguages, TTSVoices
11
11
 
12
12
 
13
13
  API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
14
- SENTENCE_END_REGEX = re.compile(r'.*[-.!?;:…\n]$')
14
+ SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
15
15
  SAMPLE_WIDTH = 2
16
16
  CHANNELS = 1
17
17
 
@@ -53,6 +53,7 @@ def preprocess_text(text: str) -> str:
53
53
  # Replace special characters with their normal form
54
54
  text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('ASCII')
55
55
  text = text.lower()
56
+ text = text.replace("—", " ")
56
57
  # Normalize punctuation using Moses punct normalizer
57
58
  mpn = MosesPunctNormalizer()
58
59
  text = mpn.normalize(text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: smallestai
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Official Python client for the Smallest AI API
5
5
  Author-email: Smallest <info@smallest.ai>
6
6
  License: MIT
@@ -102,7 +102,20 @@ if __name__ == "__main__":
102
102
  - `speed`: Speech speed multiplier (default: 1.0)
103
103
  - `add_wav_header`: Include WAV header in output (default: True)
104
104
  - `transliterate`: Enable text transliteration (default: False)
105
- - `remove_extra_silence`: Remove additional silence (default: True)
105
+ - `remove_extra_silence`: Remove additional silence (default: True)
106
+
107
+ These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override these parameters for a specific synthesis request.
108
+
109
+ For example, you can modify the speech speed and sample rate just for a particular synthesis call:
110
+ ```py
111
+ client.synthesize(
112
+ "Hello, this is a test for sync synthesis function.",
113
+ save_as="sync_synthesize.wav",
114
+ speed=1.5, # Overrides default speed
115
+ sample_rate=16000 # Overrides default sample rate
116
+ )
117
+ ```
118
+
106
119
 
107
120
  ### Async
108
121
  Asynchronous text-to-speech synthesis client.
@@ -134,7 +147,18 @@ if __name__ == "__main__":
134
147
  - `speed`: Speech speed multiplier (default: 1.0)
135
148
  - `add_wav_header`: Include WAV header in output (default: True)
136
149
  - `transliterate`: Enable text transliteration (default: False)
137
- - `remove_extra_silence`: Remove additional silence (default: True)
150
+ - `remove_extra_silence`: Remove additional silence (default: True)
151
+
152
+ These parameters are part of the AsyncSmallest instance. They can be set when creating the instance (as shown above). However, the synthesize function also accepts kwargs, allowing you to override any of these parameters on a per-request basis.
153
+
154
+ For example, you can modify the speech speed and sample rate just for a particular synthesis request:
155
+ ```py
156
+ audio_bytes = await tts.synthesize(
157
+ "Hello, this is a test of the async synthesis function.",
158
+ speed=1.5, # Overrides default speed
159
+ sample_rate=16000 # Overrides default sample rate
160
+ )
161
+ ```
138
162
 
139
163
  ### LLM to Speech
140
164
 
File without changes
File without changes