smallestai 1.3.3__py3-none-any.whl → 1.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of smallestai might be problematic. Click here for more details.

smallest/async_tts.py CHANGED
@@ -4,16 +4,16 @@ import aiohttp
4
4
  import aiofiles
5
5
  from typing import Optional, Union, List
6
6
 
7
- from .models import TTSModels, TTSVoices
8
- from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
7
+ from smallest.models import TTSModels, TTSVoices
8
+ from smallest.exceptions import TTSError, APIError
9
+ from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
10
  get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
 
13
13
  class AsyncSmallest:
14
14
  def __init__(
15
15
  self,
16
- api_key: Optional[str] = None,
16
+ api_key: str = None,
17
17
  model: TTSModels = "lightning",
18
18
  sample_rate: int = 24000,
19
19
  voice: TTSVoices = "emily",
@@ -25,8 +25,8 @@ class AsyncSmallest:
25
25
  """
26
26
  AsyncSmallest Instance for asynchronous text-to-speech synthesis.
27
27
 
28
- This class provides an asynchronous implementation of the text-to-speech functionality.
29
- It allows for non-blocking synthesis of speech from text, making it suitable for applications
28
+ This class provides an asynchronous implementation of the text-to-speech functionality.
29
+ It allows for non-blocking synthesis of speech from text, making it suitable for applications
30
30
  that require async processing.
31
31
 
32
32
  Args:
@@ -49,7 +49,7 @@ class AsyncSmallest:
49
49
  if not self.api_key:
50
50
  raise TTSError()
51
51
  self.chunk_size = 250
52
-
52
+
53
53
  self.opts = TTSOptions(
54
54
  model=model,
55
55
  sample_rate=sample_rate,
@@ -61,7 +61,7 @@ class AsyncSmallest:
61
61
  remove_extra_silence=remove_extra_silence,
62
62
  )
63
63
  self.session = None
64
-
64
+
65
65
  async def __aenter__(self):
66
66
  if self.session is None:
67
67
  self.session = aiohttp.ClientSession()
@@ -75,7 +75,7 @@ class AsyncSmallest:
75
75
  def get_languages(self) -> List[str]:
76
76
  """Returns a list of available languages."""
77
77
  return get_smallest_languages()
78
-
78
+
79
79
  def get_voices(self) -> List[str]:
80
80
  """Returns a list of available voices."""
81
81
  return get_smallest_voices()
@@ -83,7 +83,7 @@ class AsyncSmallest:
83
83
  def get_models(self) -> List[str]:
84
84
  """Returns a list of available models."""
85
85
  return get_smallest_models()
86
-
86
+
87
87
  async def synthesize(
88
88
  self,
89
89
  text: str,
@@ -95,12 +95,12 @@ class AsyncSmallest:
95
95
 
96
96
  Args:
97
97
  - text (str): The text to be converted to speech.
98
- - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
98
+ - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
99
99
  The file must have a .wav extension.
100
100
  - kwargs: Additional optional parameters to override `__init__` options for this call.
101
101
 
102
102
  Returns:
103
- - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
103
+ - Union[bytes, None]: The synthesized audio content in bytes if `save_as` is not specified;
104
104
  otherwise, returns None after saving the audio to the specified file.
105
105
 
106
106
  Raises:
@@ -111,7 +111,8 @@ class AsyncSmallest:
111
111
  for key, value in kwargs.items():
112
112
  setattr(opts, key, value)
113
113
 
114
- validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
114
+ validate_input(preprocess_text(text), opts.voice, opts.model, opts.sample_rate, opts.speed)
115
+
115
116
  chunks = split_into_chunks(text)
116
117
  audio_content = b""
117
118
 
@@ -134,17 +135,17 @@ class AsyncSmallest:
134
135
 
135
136
  if not self.session:
136
137
  self.session = aiohttp.ClientSession()
137
-
138
+
138
139
  async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
139
140
  if res.status != 200:
140
141
  raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
141
-
142
+
142
143
  audio_content += await res.read()
143
144
 
144
145
  if save_as:
145
146
  if not save_as.endswith(".wav"):
146
147
  raise TTSError("Invalid file name. Extension must be .wav")
147
-
148
+
148
149
  async with aiofiles.open(save_as, mode='wb') as f:
149
150
  await f.write(add_wav_header(audio_content, self.opts.sample_rate))
150
151
 
@@ -152,5 +153,5 @@ class AsyncSmallest:
152
153
 
153
154
  if opts.add_wav_header:
154
155
  return add_wav_header(audio_content, self.opts.sample_rate)
155
-
156
+
156
157
  return audio_content
smallest/models.py CHANGED
@@ -1,7 +1,23 @@
1
- from typing import Literal
1
+ from typing import Literal, List, Tuple, cast
2
+ import aiohttp
3
+ import asyncio
2
4
 
3
- TTSModels = Literal["lightning"]
4
- TTSLanguages = Literal["en", "hi"]
5
- TTSVoices = Literal["emily", "jasmine", "arman", "james", "mithali", "aravind", "raj",
6
- "arjun", "sanya", "saina", "pooja", "saurabh", "nisha", "mansi", "radhika", "kajal",
7
- "raghav", "deepika", "niharika", "monika", "raman", "diya", "ananya", "william"]
5
+ API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
6
+
7
+ async def _fetch_voice_and_model() -> Tuple[List[str], List[str]]:
8
+ async with aiohttp.ClientSession() as session:
9
+ async with session.get(f"{API_BASE_URL}/voice/get-all-models") as response:
10
+ api_response = await response.json()
11
+
12
+ voices = []
13
+ for model in api_response:
14
+ for voice in model['voiceIds']:
15
+ voices.append(voice['voiceId'])
16
+ models = [model['modelName'] for model in api_response]
17
+ return models, voices
18
+
19
+ models, voices = asyncio.run(_fetch_voice_and_model())
20
+
21
+ TTSLanguages = ["en", "hi"]
22
+ TTSModels = models
23
+ TTSVoices = voices
smallest/stream_tts.py CHANGED
@@ -3,10 +3,10 @@ from threading import Thread
3
3
  from queue import Queue, Empty
4
4
  from typing import AsyncGenerator, Optional, Union
5
5
 
6
- from .tts import Smallest
7
- from .exceptions import APIError
8
- from .async_tts import AsyncSmallest
9
- from .utils import SENTENCE_END_REGEX
6
+ from smallest.tts import Smallest
7
+ from smallest.exceptions import APIError
8
+ from smallest.async_tts import AsyncSmallest
9
+ from smallest.utils import SENTENCE_END_REGEX
10
10
 
11
11
  class TextToAudioStream:
12
12
  def __init__(
smallest/tts.py CHANGED
@@ -4,15 +4,15 @@ import copy
4
4
  import requests
5
5
  from typing import Optional, Union, List
6
6
 
7
- from .models import TTSModels, TTSVoices
8
- from .exceptions import TTSError, APIError
9
- from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
7
+ from smallest.models import TTSModels, TTSVoices
8
+ from smallest.exceptions import TTSError, APIError
9
+ from smallest.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, split_into_chunks,
10
10
  get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
11
11
 
12
12
  class Smallest:
13
13
  def __init__(
14
14
  self,
15
- api_key: Optional[str] = None,
15
+ api_key: str = None,
16
16
  model: TTSModels = "lightning",
17
17
  sample_rate: int = 24000,
18
18
  voice: TTSVoices = "emily",
@@ -100,7 +100,7 @@ class Smallest:
100
100
  for key, value in kwargs.items():
101
101
  setattr(opts, key, value)
102
102
 
103
- validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
103
+ validate_input(preprocess_text(text), opts.voice, opts.model, opts.sample_rate, opts.speed)
104
104
 
105
105
  chunks = split_into_chunks(text)
106
106
  audio_content = b""
smallest/utils.py CHANGED
@@ -6,8 +6,8 @@ from pydub import AudioSegment
6
6
  from dataclasses import dataclass
7
7
  from sacremoses import MosesPunctNormalizer
8
8
 
9
- from .exceptions import ValidationError
10
- from .models import TTSModels, TTSLanguages, TTSVoices
9
+ from smallest.exceptions import ValidationError
10
+ from smallest.models import TTSModels, TTSLanguages, TTSVoices
11
11
 
12
12
 
13
13
  API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
@@ -32,12 +32,12 @@ class TTSOptions:
32
32
  def validate_input(text: str, voice: TTSVoices, model: TTSModels, sample_rate: int, speed: float):
33
33
  if not text:
34
34
  raise ValidationError("Text cannot be empty")
35
- if voice not in TTSVoices.__args__:
35
+ if voice not in TTSVoices:
36
36
  raise ValidationError(f"Invalid voice: {voice}")
37
- if model not in ['lightning']:
37
+ if model not in TTSModels:
38
38
  raise ValidationError(f"Invalid model: {model}")
39
- if not 8000 <= sample_rate <= 48000:
40
- raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 48000")
39
+ if not 8000 <= sample_rate <= 24000:
40
+ raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 24000")
41
41
  if not 0.5 <= speed <= 2.0:
42
42
  raise ValidationError(f"Invalid speed: {speed}. Must be between 0.5 and 2.0")
43
43
 
@@ -51,65 +51,59 @@ def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: i
51
51
 
52
52
 
53
53
  def preprocess_text(text: str) -> str:
54
- # Replace special characters with their normal form
55
- text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('ASCII')
56
- text = text.lower()
57
- text = text.replace("—", " ")
58
- # Normalize punctuation using Moses punct normalizer
54
+ text = text.replace("\n", " ").replace("\t", " ").replace("—", " ")
55
+ text = re.sub(r'\s+', ' ', text)
59
56
  mpn = MosesPunctNormalizer()
60
57
  text = mpn.normalize(text)
61
58
  return text.strip()
62
59
 
60
+
63
61
  def split_into_chunks(text: str) -> List[str]:
64
- """
65
- Splits the input text into chunks based on sentence boundaries
66
- defined by SENTENCE_END_REGEX and the maximum chunk size.
67
- """
68
- chunks = []
69
- current_chunk = ""
70
- last_break_index = 0
71
-
72
- i = 0
73
- while i < len(text):
74
- current_chunk += text[i]
75
-
76
- # Check for sentence boundary using regex
77
- if SENTENCE_END_REGEX.match(current_chunk):
62
+ """
63
+ Splits the input text into chunks based on sentence boundaries
64
+ defined by SENTENCE_END_REGEX and the maximum chunk size.
65
+ Only splits at valid sentence boundaries to avoid breaking words.
66
+ """
67
+ chunks = []
68
+ while text:
69
+ # If the remaining text is shorter than chunk size, add it as final chunk
70
+ if len(text) <= CHUNK_SIZE:
71
+ chunks.append(text.strip())
72
+ break
73
+
74
+ # Find the last sentence boundary within CHUNK_SIZE
75
+ chunk_text = text[:CHUNK_SIZE]
76
+ last_break_index = -1
77
+
78
+ # Check each character in reverse order to find last punctuation
79
+ for i in range(len(chunk_text) - 1, -1, -1):
80
+ if chunk_text[i] in '-.—!?;:…\n':
78
81
  last_break_index = i
82
+ break
79
83
 
80
- if len(current_chunk) >= CHUNK_SIZE:
81
- if last_break_index > 0:
82
- # Split at the last valid sentence boundary
83
- chunk = text[:last_break_index + 1].strip()
84
- chunk = chunk.replace("—", " ")
85
- chunks.append(chunk)
86
-
87
- text = text[last_break_index + 1:]
88
- i = -1 # Reset index to process the remaining text
89
- current_chunk = ""
90
- last_break_index = 0
91
- else:
92
- # No sentence boundary found, split at max length
93
- current_chunk = current_chunk.replace("—", " ")
94
- chunks.append(current_chunk.strip())
95
- text = text[CHUNK_SIZE:]
96
- i = -1 # Reset index to process the remaining text
97
- current_chunk = ""
98
-
99
- i += 1
100
-
101
- if text:
102
- text = text.replace("—", " ")
103
- chunks.append(text.strip())
84
+ if last_break_index == -1:
85
+ # If no punctuation found in chunk, look for the last space
86
+ # to avoid breaking words
87
+ last_space = chunk_text.rfind(' ')
88
+ if last_space != -1:
89
+ last_break_index = last_space
90
+ else:
91
+ # If no space found, use the full chunk size
92
+ last_break_index = CHUNK_SIZE - 1
93
+
94
+ # Add the chunk up to the break point
95
+ chunks.append(text[:last_break_index + 1].strip())
96
+ # Continue with remaining text
97
+ text = text[last_break_index + 1:].strip()
104
98
 
105
- return chunks
99
+ return chunks
106
100
 
107
101
 
108
102
  def get_smallest_languages() -> List[str]:
109
- return list(TTSLanguages.__args__)
103
+ return list(TTSLanguages)
110
104
 
111
105
  def get_smallest_voices() -> List[str]:
112
- return list(TTSVoices.__args__)
106
+ return list(TTSVoices)
113
107
 
114
108
  def get_smallest_models() -> List[str]:
115
109
  return ["lightning"]
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: smallestai
3
- Version: 1.3.3
3
+ Version: 1.3.4
4
4
  Summary: Official Python client for the Smallest AI API
5
- Author-email: Smallest <info@smallest.ai>
5
+ Author-email: Smallest <support@smallest.ai>
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/smallest-inc/smallest-python-sdk
8
8
  Keywords: smallest,smallest.ai,tts,text-to-speech
@@ -53,6 +53,7 @@ Currently, the library supports direct synthesis and the ability to synthesize s
53
53
 
54
54
  - [Installation](#installation)
55
55
  - [Get the API Key](#get-the-api-key)
56
+ - [Best Practices for Input Text](#best-practices-for-input-text)
56
57
  - [Examples](#examples)
57
58
  - [Sync](#sync)
58
59
  - [Async](#async)
@@ -76,6 +77,15 @@ When using an SDK in your application, make sure to pin to at least the major ve
76
77
  3. Create a new API Key and copy it.
77
78
  4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication.
78
79
 
80
+ ## Best Practices for Input Text
81
+ While the `transliterate` parameter is provided, please note that it is not fully supported and may not perform consistently across all cases. It is recommended to use the model without relying on this parameter.
82
+
83
+ For optimal voice generation results:
84
+
85
+ 1. For English, provide the input in Latin script (e.g., "Hello, how are you?").
86
+ 2. For Hindi, provide the input in Devanagari script (e.g., "नमस्ते, आप कैसे हैं?").
87
+ 3. For code-mixed input, use Latin script for English and Devanagari script for Hindi (e.g., "Hello, आप कैसे हैं?").
88
+
79
89
  ## Examples
80
90
 
81
91
  ### Sync
@@ -162,7 +172,7 @@ audio_bytes = await tts.synthesize(
162
172
 
163
173
  ### LLM to Speech
164
174
 
165
- The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output with minimal latency. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
175
+ The `TextToAudioStream` class provides real-time text-to-speech processing, converting streaming text into audio output. It's particularly useful for applications like voice assistants, live captioning, or interactive chatbots that require immediate audio feedback from text generation. Supports both synchronous and asynchronous TTS instance.
166
176
 
167
177
  ```python
168
178
  import os
@@ -236,7 +246,7 @@ The processor yields raw audio data chunks without WAV headers for streaming eff
236
246
  ```python
237
247
  from smallest.tts import Smallest
238
248
 
239
- client = Smallest()
249
+ client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
240
250
 
241
251
  print(f"Avalaible Languages: {client.get_languages()}")
242
252
  print(f"Available Voices: {client.get_voices()}")
@@ -254,7 +264,7 @@ When implementing audio streaming with chunks of synthesized speech, WAV headers
254
264
  - Sequential playback of chunks with headers causes audio artifacts (pop sounds) when concatenating or playing audio sequentially.
255
265
  - Audio players would try to reinitialize audio settings for each chunk.
256
266
 
257
- ### Best Practices
267
+ ### Best Practices for Audio Streaming
258
268
  1. Stream raw PCM audio data without headers
259
269
  2. Add a single WAV header only when:
260
270
  - Saving the complete stream to a file
@@ -0,0 +1,12 @@
1
+ smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
+ smallest/async_tts.py,sha256=2BrNMxq0PDtF7CCZqYPnrp9D0qxZndCgT31EbdrnV-E,6084
3
+ smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
+ smallest/models.py,sha256=Ndmek9f5VWDjxaNPfSmNk-xP55Y6uXzkzI5V54FnuvU,771
5
+ smallest/stream_tts.py,sha256=9sSGR9F_BiSSB1IsiUJP-How0t4-3qdYyTJ-H7ESkMk,6230
6
+ smallest/tts.py,sha256=Km3-rFf4D_-XXLi8CAVsiYrw5D-OQRLDHl-LTUh83ec,6030
7
+ smallest/utils.py,sha256=kIlS3wQaICT3R4B8R3HpywmXMABJUkCgbvFziStfno8,3527
8
+ smallestai-1.3.4.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
+ smallestai-1.3.4.dist-info/METADATA,sha256=yfREoK1kPNxKBTWcE_aRp8ByEF-m86nkdyiiBpF2Q4k,10584
10
+ smallestai-1.3.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
+ smallestai-1.3.4.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
+ smallestai-1.3.4.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
2
- smallest/async_tts.py,sha256=ReYTePzR0e3UcWxwnetGnwO4q_F7g2LRZPxIVfmgB4Y,6120
3
- smallest/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
4
- smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
5
- smallest/stream_tts.py,sha256=0OypcUzgP7CN3VGcGJDnQ2FDw2JOzPaSQ1cXK69k5dY,6198
6
- smallest/tts.py,sha256=l8VHaOE8-Feg3Ey8C3osOrLs3ffYz0q_J1ACiEtZ8y0,5999
7
- smallest/utils.py,sha256=hrta82o-rJRaOHTVKqHqC86_T56jAuvqJHIizAEqFok,3796
8
- smallestai-1.3.3.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
9
- smallestai-1.3.3.dist-info/METADATA,sha256=ji7TsSt2C9_GGNjRuh8DAyBXipVRuFeouLoGi76JFAU,9856
10
- smallestai-1.3.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
11
- smallestai-1.3.3.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
12
- smallestai-1.3.3.dist-info/RECORD,,