PyPI - smallestai - Versions diffs - 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

smallestai 1.1.0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of smallestai might be problematic. Click here for more details.

Files changed (10) hide show

smallest/async_tts.py +85 -39
smallest/stream_tts.py +3 -2
smallest/tts.py +91 -34
smallest/utils.py +2 -1
{smallestai-1.1.0.dist-info → smallestai-1.3.0.dist-info}/METADATA +37 -15
smallestai-1.3.0.dist-info/RECORD +12 -0
{smallestai-1.1.0.dist-info → smallestai-1.3.0.dist-info}/WHEEL +1 -1
smallestai-1.1.0.dist-info/RECORD +0 -12
{smallestai-1.1.0.dist-info → smallestai-1.3.0.dist-info}/LICENSE +0 -0
{smallestai-1.1.0.dist-info → smallestai-1.3.0.dist-info}/top_level.txt +0 -0

smallest/async_tts.py CHANGED Viewed

@@ -7,20 +7,20 @@ from typing import Optional, Union, List
 from .models import TTSModels, TTSVoices
 from .exceptions import TTSError, APIError
 from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
-                     get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
+                     get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
 class AsyncSmallest:
     def __init__(
-            self,
-            api_key: Optional[str] = None,
-            model: TTSModels = "lightning",
-            sample_rate: int = 24000,
-            voice: TTSVoices = "emily",
-            speed: Optional[float] = 1.0,
-            add_wav_header: Optional[bool] = True,
-            transliterate: Optional[bool] = False,
-            remove_extra_silence: Optional[bool] = False
+        self,
+        api_key: Optional[str] = None,
+        model: TTSModels = "lightning",
+        sample_rate: int = 24000,
+        voice: TTSVoices = "emily",
+        speed: Optional[float] = 1.0,
+        add_wav_header: Optional[bool] = True,
+        transliterate: Optional[bool] = False,
+        remove_extra_silence: Optional[bool] = False
     ) -> None:
         """
         AsyncSmallest Instance for asynchronous text-to-speech synthesis.
@@ -48,6 +48,7 @@ class AsyncSmallest:
         self.api_key = api_key or os.environ.get("SMALLEST_API_KEY")
         if not self.api_key:
             raise TTSError("API key is required")
+        self.chunk_size = 250
         self.opts = TTSOptions(
             model=model,
@@ -70,6 +71,48 @@ class AsyncSmallest:
         if self.session:
             await self.session.close()
+    def _split_into_chunks(self, text: str) -> List[str]:
+        """
+        Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
+        """
+        chunks = []
+        current_chunk = ""
+        last_break_index = 0
+        i = 0
+        while i < len(text):
+            current_chunk += text[i]
+            if text[i] in ".,":
+                last_break_index = i
+            if len(current_chunk) >= self.chunk_size:
+                if last_break_index > 0:
+                    chunk = text[:last_break_index + 1].strip()
+                    chunk = chunk.replace("—", " ")
+                    chunks.append(chunk)
+                    text = text[last_break_index + 1:]
+                    i = -1
+                    current_chunk = ""
+                    last_break_index = 0
+                else:
+                    # No break point found, split at max length
+                    current_chunk = current_chunk.replace("—", " ")
+                    chunks.append(current_chunk.strip())
+                    text = text[self.chunk_size:]
+                    i = -1
+                    current_chunk = ""
+            i += 1
+        if text:
+            text = text.replace("—", " ")
+            chunks.append(text.strip())
+        return chunks
     def get_languages(self) -> List[str]:
         """Returns a list of available languages."""
         return get_smallest_languages()
@@ -110,42 +153,45 @@ class AsyncSmallest:
             setattr(opts, key, value)
         validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
-        payload = {
-            "text": preprocess_text(text),
-            "sample_rate": opts.sample_rate,
-            "voice_id": opts.voice,
-            "add_wav_header": opts.add_wav_header,
-            "speed": opts.speed,
-            "model": opts.model,
-            "transliterate": opts.transliterate,
-            "remove_extra_silence": opts.remove_extra_silence
-        }
-        headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json",
-        }
-        if not self.session:
-            self.session = aiohttp.ClientSession()
+        chunks = self._split_into_chunks(text)
+        audio_content = b""
+        for chunk in chunks:
+            payload = {
+                "text": preprocess_text(chunk),
+                "sample_rate": opts.sample_rate,
+                "voice_id": opts.voice,
+                "add_wav_header": False,
+                "speed": opts.speed,
+                "model": opts.model,
+                "transliterate": opts.transliterate,
+                "remove_extra_silence": opts.remove_extra_silence
+            }
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            }
+            if not self.session:
+                self.session = aiohttp.ClientSession()
-        async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
-            if res.status != 200:
-                raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
+            async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
+                if res.status != 200:
+                    raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
-            audio_content = await res.read()
+                audio_content += await res.read()
         if save_as:
             if not save_as.endswith(".wav"):
                 raise TTSError("Invalid file name. Extension must be .wav")
-            if self.opts.add_wav_header:
-                async with aiofiles.open(save_as, mode='wb') as f:
-                    await f.write(audio_content)
-            else:
-                async with aiofiles.open(save_as, mode='wb') as f:
-                    await f.write(add_wav_header(audio_content, self.opts.sample_rate))
+            async with aiofiles.open(save_as, mode='wb') as f:
+                await f.write(add_wav_header(audio_content, self.opts.sample_rate))
             return None
+        if opts.add_wav_header:
+            return add_wav_header(audio_content, self.opts.sample_rate)
         return audio_content

smallest/stream_tts.py CHANGED Viewed

@@ -34,13 +34,14 @@ class TextToAudioStream:
             max_retries: Number of retry attempts for failed synthesis (default: 3)
         """
         self.tts_instance = tts_instance
+        self.tts_instance.opts.add_wav_header = False
         self.sentence_end_regex = SENTENCE_END_REGEX
         self.queue_timeout = queue_timeout
         self.max_retries = max_retries
         self.queue = Queue()
         self.buffer_size = 250
         self.stop_flag = False
-        self.tts_instance.opts.add_wav_header = False
     async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
@@ -53,7 +54,7 @@ class TextToAudioStream:
         buffer = ""
         async for chunk in llm_output:
             buffer += chunk
-            if self.sentence_end_regex.match(buffer) or self.buffer_size > 600:
+            if self.sentence_end_regex.match(buffer) or len(buffer) > self.buffer_size:
                 self.queue.put(buffer)
                 buffer = ""

smallest/tts.py CHANGED Viewed

@@ -1,24 +1,25 @@
 import os
+import wave
 import copy
 import requests
 from typing import Optional, Union, List
 from .models import TTSModels, TTSVoices
 from .exceptions import TTSError, APIError
-from .utils import (TTSOptions, validate_input, preprocess_text,
-get_smallest_languages, get_smallest_voices, get_smallest_models, API_BASE_URL)
+from .utils import (TTSOptions, validate_input, preprocess_text, add_wav_header,
+get_smallest_languages, get_smallest_voices, get_smallest_models, SENTENCE_END_REGEX, API_BASE_URL)
 class Smallest:
     def __init__(
-            self,
-            api_key: Optional[str] = None,
-            model: TTSModels = "lightning",
-            sample_rate: int = 24000,
-            voice: TTSVoices = "emily",
-            speed: Optional[float] = 1.0,
-            add_wav_header: Optional[bool] = True,
-            transliterate: Optional[bool] = False,
-            remove_extra_silence: Optional[bool] = True
+        self,
+        api_key: Optional[str] = None,
+        model: TTSModels = "lightning",
+        sample_rate: int = 24000,
+        voice: TTSVoices = "emily",
+        speed: Optional[float] = 1.0,
+        add_wav_header: Optional[bool] = True,
+        transliterate: Optional[bool] = False,
+        remove_extra_silence: Optional[bool] = True
     ) -> None:
         """
         Smallest Instance for text-to-speech synthesis.
@@ -46,6 +47,8 @@ class Smallest:
         if not self.api_key:
             raise TTSError("API key is required")
+        self.chunk_size = 250
         self.opts = TTSOptions(
             model=model,
             sample_rate=sample_rate,
@@ -56,6 +59,48 @@ class Smallest:
             transliterate=transliterate,
             remove_extra_silence=remove_extra_silence
         )
+    def _split_into_chunks(self, text: str) -> List[str]:
+        """
+        Splits the input text into chunks based on sentence boundaries and the maximum chunk size.
+        """
+        chunks = []
+        current_chunk = ""
+        last_break_index = 0
+        i = 0
+        while i < len(text):
+            current_chunk += text[i]
+            if text[i] in ".,":
+                last_break_index = i
+            if len(current_chunk) >= self.chunk_size:
+                if last_break_index > 0:
+                    chunk = text[:last_break_index + 1].strip()
+                    chunk = chunk.replace("—", " ")
+                    chunks.append(chunk)
+                    text = text[last_break_index + 1:]
+                    i = -1
+                    current_chunk = ""
+                    last_break_index = 0
+                else:
+                    # No break point found, split at max length
+                    current_chunk = current_chunk.replace("—", " ")
+                    chunks.append(current_chunk.strip())
+                    text = text[self.chunk_size:]
+                    i = -1
+                    current_chunk = ""
+            i += 1
+        if text:
+            text = text.replace("—", " ")
+            chunks.append(text.strip())
+        return chunks
     def get_languages(self) -> List[str]:
         """Returns a list of available languages."""
@@ -98,37 +143,49 @@ class Smallest:
         validate_input(text, opts.voice, opts.model, opts.sample_rate, opts.speed)
-        payload = {
-            "text": preprocess_text(text),
-            "sample_rate": opts.sample_rate,
-            "voice_id": opts.voice,
-            "add_wav_header": opts.add_wav_header,
-            "speed": opts.speed,
-            "model": opts.model,
-            "transliterate": opts.transliterate,
-            "remove_extra_silence": opts.remove_extra_silence,
-        }
-        headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json",
-        }
+        chunks = self._split_into_chunks(text)
+        audio_content = b""
+        for chunk in chunks:
+            payload = {
+                "text": preprocess_text(chunk),
+                "sample_rate": opts.sample_rate,
+                "voice_id": opts.voice,
+                "add_wav_header": False,
+                "speed": opts.speed,
+                "model": opts.model,
+                "transliterate": opts.transliterate,
+                "remove_extra_silence": opts.remove_extra_silence,
+            }
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            }
+            res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
+            if res.status_code != 200:
+                raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
+            audio_content += res.content
         res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
         if res.status_code != 200:
             raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
-        audio_content = res.content
         if save_as:
             if not save_as.endswith(".wav"):
                 raise TTSError("Invalid file name. Extension must be .wav")
-            if self.opts.add_wav_header:
-                with open(save_as, "wb") as wf:
-                    wf.write(audio_content)
-            else:
-                raise TTSError("WAV header is required for saving audio. Set 'add_wav_header=True' to add a WAV header.")
+            with wave.open(save_as, "wb") as wf:
+                wf.setnchannels(1)
+                wf.setsampwidth(2)
+                wf.setframerate(self.opts.sample_rate)
+                wf.writeframes(audio_content)
             return None
+        if self.opts.add_wav_header:
+            return add_wav_header(audio_content, self.opts.sample_rate)
         return audio_content

smallest/utils.py CHANGED Viewed

@@ -11,7 +11,7 @@ from .models import TTSModels, TTSLanguages, TTSVoices
 API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
-SENTENCE_END_REGEX = re.compile(r'.*[-.!?;:…\n]$')
+SENTENCE_END_REGEX = re.compile(r'.*[-.—!?;:…\n]$')
 SAMPLE_WIDTH = 2
 CHANNELS = 1
@@ -53,6 +53,7 @@ def preprocess_text(text: str) -> str:
     # Replace special characters with their normal form
     text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('ASCII')
     text = text.lower()
+    text = text.replace("—", " ")
     # Normalize punctuation using Moses punct normalizer
     mpn = MosesPunctNormalizer()
     text = mpn.normalize(text)

{smallestai-1.1.0.dist-info → smallestai-1.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: smallestai
-Version: 1.1.0
+Version: 1.3.0
 Summary: Official Python client for the Smallest AI API
 Author-email: Smallest <info@smallest.ai>
 License: MIT
@@ -18,12 +18,12 @@ Requires-Dist: requests
 Requires-Dist: sacremoses
 Requires-Dist: pydub
 Provides-Extra: test
-Requires-Dist: jiwer ; extra == 'test'
-Requires-Dist: httpx ; extra == 'test'
-Requires-Dist: pytest ; extra == 'test'
-Requires-Dist: pytest-asyncio ; extra == 'test'
-Requires-Dist: deepgram-sdk ; extra == 'test'
-Requires-Dist: python-dotenv ; extra == 'test'
+Requires-Dist: jiwer; extra == "test"
+Requires-Dist: httpx; extra == "test"
+Requires-Dist: pytest; extra == "test"
+Requires-Dist: pytest-asyncio; extra == "test"
+Requires-Dist: deepgram-sdk; extra == "test"
+Requires-Dist: python-dotenv; extra == "test"
 ![image](https://i.imgur.com/TJ2tT4g.png)
@@ -88,9 +88,7 @@ from smallest import Smallest
 def main():
     client = Smallest(api_key=os.environ.get("SMALLEST_API_KEY"))
-    audio_data = client.synthesize("Hello, this is a test for sync synthesis function.")
-    with open("sync_synthesize.wav", "wb") as f:
-        f.write(audio_data)
+    client.synthesize("Hello, this is a test for sync synthesis function.", save_as="sync_synthesize.wav")
 if __name__ == "__main__":
     main()
@@ -104,10 +102,23 @@ if __name__ == "__main__":
 - `speed`: Speech speed multiplier (default: 1.0)
 - `add_wav_header`: Include WAV header in output (default: True)
 - `transliterate`: Enable text transliteration (default: False)
-- `remove_extra_silence`: Remove additional silence (default: True)
+- `remove_extra_silence`: Remove additional silence (default: True)
+These parameters are part of the `Smallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts kwargs, allowing you to override these parameters for a specific synthesis request.
+For example, you can modify the speech speed and sample rate just for a particular synthesis call:
+```py
+client.synthesize(
+    "Hello, this is a test for sync synthesis function.",
+    save_as="sync_synthesize.wav",
+    speed=1.5,  # Overrides default speed
+    sample_rate=16000  # Overrides default sample rate
+)
+```
 ### Async
-A synchronous text-to-speech synthesis client.
+Asynchronous text-to-speech synthesis client.
 **Basic Usage:**
 ```python
@@ -120,9 +131,9 @@ client = AsyncSmallest(api_key=os.environ.get("SMALLEST_API_KEY"))
 async def main():
     async with client as tts:
-        audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
+        audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.")
         async with aiofiles.open("async_synthesize.wav", "wb") as f:
-            await f.write(audio_bytes)
+            await f.write(audio_bytes) # alternatively you can use the `save_as` parameter.
 if __name__ == "__main__":
     asyncio.run(main())
@@ -136,7 +147,18 @@ if __name__ == "__main__":
 - `speed`: Speech speed multiplier (default: 1.0)
 - `add_wav_header`: Include WAV header in output (default: True)
 - `transliterate`: Enable text transliteration (default: False)
-- `remove_extra_silence`: Remove additional silence (default: True)
+- `remove_extra_silence`: Remove additional silence (default: True)
+These parameters are part of the AsyncSmallest instance. They can be set when creating the instance (as shown above). However, the synthesize function also accepts kwargs, allowing you to override any of these parameters on a per-request basis.
+For example, you can modify the speech speed and sample rate just for a particular synthesis request:
+```py
+audio_bytes = await tts.synthesize(
+    "Hello, this is a test of the async synthesis function.",
+    speed=1.5,  # Overrides default speed
+    sample_rate=16000  # Overrides default sample rate
+)
+```
 ### LLM to Speech

smallestai-1.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
+smallest/async_tts.py,sha256=zqZGuQUWaV2_if9WVdYutxb9G2UoUAxbyAbNlF1tv3U,7445
+smallest/exceptions.py,sha256=41GLVvNTfRQMQsPLGk0lHuhK2mak8_dVtiFLEtT23Dc,333
+smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
+smallest/stream_tts.py,sha256=4h_AktweZ386qgVIe8UeqO-ZxZO_x6Zj0uJQH09V1CE,5425
+smallest/tts.py,sha256=CHtZwcA2S4zfYfqhv5qikBKOME8XBjS_0R4HXpzXeAU,7325
+smallest/utils.py,sha256=WL71OByTxH8Y1gouP2K5YDDMwqhUdqMJ_bhqNryI3KQ,2222
+smallestai-1.3.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
+smallestai-1.3.0.dist-info/METADATA,sha256=0lqX-j9c0CkSeA6OeG5RUIdnaeWXWnvWPznCkO7vJCA,9845
+smallestai-1.3.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+smallestai-1.3.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
+smallestai-1.3.0.dist-info/RECORD,,

{smallestai-1.1.0.dist-info → smallestai-1.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.5.0)
+Generator: setuptools (75.6.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

smallestai-1.1.0.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-smallest/__init__.py,sha256=vaoIBml_IobavpVvFazB86iikg2iEy4h3ddxqv_0Fy4,190
-smallest/async_tts.py,sha256=w_SY1Oetn5Zorq-8JXA7lGeRHR3kTtBzqotc_hF0hOQ,6010
-smallest/exceptions.py,sha256=41GLVvNTfRQMQsPLGk0lHuhK2mak8_dVtiFLEtT23Dc,333
-smallest/models.py,sha256=R5UZZA9SibrJ2DsWPi_mkKI13WfyC-MLd-7kptfjns4,390
-smallest/stream_tts.py,sha256=1j4JpAwrAmwprC98mKQwuhXf0HFxFTlMcZ3_JAdcAK0,5416
-smallest/tts.py,sha256=Gr13I-O0qH7EclnR_g29qcpiqITWjgfjCFxFwNxyZrA,5410
-smallest/utils.py,sha256=hAgyEfZEnvayzu8qS4LXhpZR8qK7z4gatLWGVOkS3Yg,2183
-smallestai-1.1.0.dist-info/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
-smallestai-1.1.0.dist-info/METADATA,sha256=e1ivgFjFyvXKPKGXoa8jSH7pqUsmiqoqNk0Q_Mjq3yM,8723
-smallestai-1.1.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
-smallestai-1.1.0.dist-info/top_level.txt,sha256=i5ktbWkG-2aS28vrYTeuhKtA-tY2ZG7SHgLHi87QTLw,9
-smallestai-1.1.0.dist-info/RECORD,,

{smallestai-1.1.0.dist-info → smallestai-1.3.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{smallestai-1.1.0.dist-info → smallestai-1.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

smallestai 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

Potentially problematic release.

smallestai 1.1.0py3-none-any.whl → 1.3.0py3-none-any.whl