PyPI - smallestai - Versions diffs - 3.0.3__tar.gz → 4.0.0__tar.gz - Mend

smallestai 3.0.3tar.gz → 4.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of smallestai might be problematic. Click here for more details.

Files changed (96) hide show

{smallestai-3.0.3/smallestai.egg-info → smallestai-4.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: smallestai
-Version: 3.0.3
+Version: 4.0.0
 Summary: Official Python client for the Smallest AI API
 Author-email: Smallest <support@smallest.ai>
 License: MIT
@@ -16,6 +16,7 @@ Requires-Dist: aiohttp
 Requires-Dist: aiofiles
 Requires-Dist: requests
 Requires-Dist: pydub
+Requires-Dist: websocket-client
 Requires-Dist: urllib3<3.0.0,>=1.25.3
 Requires-Dist: python-dateutil>=2.8.2
 Requires-Dist: pydantic>=2

{smallestai-3.0.3 → smallestai-4.0.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "smallestai"
-version = "3.0.3"
+version = "4.0.0"
 description = "Official Python client for the Smallest AI API"
 authors = [
     {name = "Smallest", email = "support@smallest.ai"},
@@ -19,6 +19,7 @@ dependencies = [
     "aiofiles",
     "requests",
     "pydub",
+    "websocket-client",
     "urllib3 >= 1.25.3, < 3.0.0",
     "python-dateutil >= 2.8.2",
     "pydantic >= 2",

{smallestai-3.0.3 → smallestai-4.0.0}/smallestai/__init__.py RENAMED Viewed

@@ -84,7 +84,7 @@ from smallestai.atoms import (
 from smallestai.waves import (
     WavesClient,
     AsyncWavesClient,
-    TextToAudioStream
+    WavesStreamingTTS
 )
 from smallestai.atoms import __all__ as atoms_all

smallestai-4.0.0/smallestai/waves/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from smallestai.waves.waves_client import WavesClient
+from smallestai.waves.async_waves_client import AsyncWavesClient
+from smallestai.waves.stream_tts import WavesStreamingTTS, TTSConfig
+__all__ = ["WavesClient", "AsyncWavesClient", "WavesStreamingTTS", "TTSConfig"]

{smallestai-3.0.3 → smallestai-4.0.0}/smallestai/waves/async_waves_client.py RENAMED Viewed

@@ -4,10 +4,10 @@ import json
 import aiohttp
 import aiofiles
 import requests
-from typing import Optional, Union, List, AsyncIterator
+from typing import Optional, Union, List
 from smallestai.waves.exceptions import TTSError, APIError
-from smallestai.waves.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
+from smallestai.waves.utils import (TTSOptions, validate_input,
                      get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
@@ -22,7 +22,8 @@ class AsyncWavesClient:
         consistency: Optional[float] = 0.5,
         similarity: Optional[float] = 0.0,
         enhancement: Optional[int] = 1,
-        add_wav_header: Optional[bool] = True
+        language: Optional[str] = "en",
+        output_format: Optional[str] = "wav"
     ) -> None:
         """
         AsyncSmallest Instance for asynchronous text-to-speech synthesis.
@@ -40,7 +41,8 @@ class AsyncWavesClient:
         - consistency (float): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. Range - [0, 1]
         - similarity (float): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. Range - [0, 1]
         - enhancement (int): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. Range - [0, 2].
-        - add_wav_header (bool): Whether to add a WAV header to the output audio.
+        - language (str): The language for synthesis. Default is "en".
+        - output_format (str): The output audio format. Options: "pcm", "mp3", "wav", "mulaw". Default is "pcm".
         Methods:
         - get_languages: Returns a list of available languages for synthesis.
@@ -61,11 +63,12 @@ class AsyncWavesClient:
             sample_rate=sample_rate,
             voice_id=voice_id,
             api_key=self.api_key,
-            add_wav_header=add_wav_header,
             speed=speed,
             consistency=consistency,
             similarity=similarity,
-            enhancement=enhancement
+            enhancement=enhancement,
+            language=language,
+            output_format=output_format
         )
         self.session = None
@@ -89,9 +92,9 @@ class AsyncWavesClient:
         return False
-    def get_languages(self) -> List[str]:
+    def get_languages(self, model="lightning") -> List[str]:
         """Returns a list of available languages."""
-        return get_smallest_languages()
+        return get_smallest_languages(model)
     def get_cloned_voices(self) -> str:
         """Returns a list of your cloned voices."""
@@ -130,18 +133,14 @@ class AsyncWavesClient:
     async def synthesize(
             self,
             text: str,
-            stream: Optional[bool] = False,
-            save_as: Optional[str] = None,
             **kwargs
-        ) -> Union[bytes, None, AsyncIterator[bytes]]:
+        ) -> Union[bytes]:
         """
         Asynchronously synthesize speech from the provided text.
         Args:
         - text (str): The text to be converted to speech.
         - stream (Optional[bool]): If True, returns an iterator yielding audio chunks instead of a full byte array.
-        - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
-                                   The file must have a .wav extension.
         - kwargs: Additional optional parameters to override `__init__` options for this call.
         Returns:
@@ -151,7 +150,7 @@ class AsyncWavesClient:
             - Otherwise, returns the synthesized audio content as bytes.
         Raises:
-        - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
+        - TTSError: If the provided file name does not have a .wav or .mp3 extension when `save_as` is specified.
         - APIError: If the API request fails or returns an error.
         - ValueError: If an unexpected parameter is passed in `kwargs`.
         """
@@ -172,65 +171,40 @@ class AsyncWavesClient:
             for key, value in kwargs.items():
                 setattr(opts, key, value)
-            text = preprocess_text(text)
             validate_input(text, opts.model, opts.sample_rate, opts.speed, opts.consistency, opts.similarity, opts.enhancement)
-            self.chunk_size = 250
-            if opts.model == 'lightning-large':
-                self.chunk_size = 140
-            chunks = chunk_text(text, self.chunk_size)
-            async def audio_stream():
-                for chunk in chunks:
-                    payload = {
-                        "text": chunk,
-                        "sample_rate": opts.sample_rate,
-                        "voice_id": opts.voice_id,
-                        "add_wav_header": False,
-                        "speed": opts.speed,
-                        "model": opts.model
-                    }
-                    if opts.model == "lightning-large":
-                        if opts.consistency is not None:
-                            payload["consistency"] = opts.consistency
-                        if opts.similarity is not None:
-                            payload["similarity"] = opts.similarity
-                        if opts.enhancement is not None:
-                            payload["enhancement"] = opts.enhancement
-                    headers = {
-                        "Authorization": f"Bearer {self.api_key}",
-                        "Content-Type": "application/json",
-                    }
-                    async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
-                        if res.status != 200:
-                            raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
-                        yield await res.read()
+            payload = {
+                "text": text,
+                "voice_id": opts.voice_id,
+                "sample_rate": opts.sample_rate,
+                "speed": opts.speed,
+                "consistency": opts.consistency,
+                "similarity": opts.similarity,
+                "enhancement": opts.enhancement,
+                "language": opts.language,
+                "output_format": opts.output_format
+            }
-            if stream:
-                return audio_stream()
-            audio_content = b"".join([chunk async for chunk in audio_stream()])
-            if save_as:
-                if not save_as.endswith(".wav"):
-                    raise TTSError("Invalid file name. Extension must be .wav")
-                async with aiofiles.open(save_as, mode='wb') as f:
-                    await f.write(add_wav_header(audio_content, opts.sample_rate))
-                return None
-            if opts.add_wav_header:
-                return add_wav_header(audio_content, opts.sample_rate)
+            if opts.model == "lightning-large" or opts.model == "lightning-v2":
+                if opts.consistency is not None:
+                    payload["consistency"] = opts.consistency
+                if opts.similarity is not None:
+                    payload["similarity"] = opts.similarity
+                if opts.enhancement is not None:
+                    payload["enhancement"] = opts.enhancement
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            }
-            return audio_content
+            async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
+                if res.status != 200:
+                    raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
+                audio_bytes = await res.content.read()
+            return audio_bytes
         finally:
             if should_cleanup and self.session:
                 await self.session.close()
@@ -316,9 +290,8 @@ class AsyncWavesClient:
                 if res.status != 200:
                     raise APIError(f"Failed to delete voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
-                return await res.text()
+                return json.dumps(await res.json(), indent=4, ensure_ascii=False)
         finally:
             if should_cleanup and self.session:
                 await self.session.close()
-                self.session = None
+                self.session = None

smallestai-4.0.0/smallestai/waves/models.py ADDED Viewed

@@ -0,0 +1,8 @@
+TTSLanguages_lightning = ["en", "hi"]
+TTSLanguages_lightning_large = ["en", "hi"]
+TTSLanguages_lightning_v2 = ["en", "hi", "mr", "kn", "ta", "bn", "gu", "de", "fr", "es", "it", "pl", "nl", "ru", "ar", "he"]
+TTSModels = [
+    "lightning",
+    "lightning-large",
+    "lightning-v2"
+]

smallestai-4.0.0/smallestai/waves/stream_tts.py ADDED Viewed

@@ -0,0 +1,207 @@
+import json
+import base64
+import time
+import threading
+import queue
+from typing import Generator
+from dataclasses import dataclass
+from websocket import WebSocketApp
+@dataclass
+class TTSConfig:
+    voice_id: str
+    api_key: str
+    language: str = "en"
+    sample_rate: int = 24000
+    speed: float = 1.0
+    consistency: float = 0.5
+    enhancement: int = 1
+    similarity: float = 0
+    max_buffer_flush_ms: int = 0
+class WavesStreamingTTS:
+    def __init__(self, config: TTSConfig):
+        self.config = config
+        self.ws_url = "wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream"
+        self.ws = None
+        self.audio_queue = queue.Queue()
+        self.error_queue = queue.Queue()
+        self.is_complete = False
+        self.is_connected = False
+        self.request_id = None
+    def _get_headers(self):
+        return [f"Authorization: Bearer {self.config.api_key}"]
+    def _create_payload(self, text: str, continue_stream: bool = False, flush: bool = False):
+        return {
+            "voice_id": self.config.voice_id,
+            "text": text,
+            "language": self.config.language,
+            "sample_rate": self.config.sample_rate,
+            "speed": self.config.speed,
+            "consistency": self.config.consistency,
+            "similarity": self.config.similarity,
+            "enhancement": self.config.enhancement,
+            "max_buffer_flush_ms": self.config.max_buffer_flush_ms,
+            "continue": continue_stream,
+            "flush": flush
+        }
+    def _on_open(self, ws):
+        self.is_connected = True
+    def _on_message(self, ws, message):
+        try:
+            data = json.loads(message)
+            status = data.get("status", "")
+            if status == "error":
+                self.error_queue.put(Exception(data.get("message", "Unknown error")))
+                return
+            if not self.request_id:
+                self.request_id = data.get("request_id")
+            audio_b64 = data.get("data", {}).get("audio")
+            if audio_b64:
+                self.audio_queue.put(base64.b64decode(audio_b64))
+            if status == "complete":
+                self.is_complete = True
+                self.audio_queue.put(None)
+        except Exception as e:
+            self.error_queue.put(e)
+    def _on_error(self, ws, error):
+        self.error_queue.put(error)
+    def _on_close(self, ws, *args):
+        self.is_connected = False
+        if not self.is_complete:
+            self.audio_queue.put(None)
+    def _connect(self):
+        if self.ws:
+            self.ws.close()
+        self.ws = WebSocketApp(
+            self.ws_url,
+            header=self._get_headers(),
+            on_open=self._on_open,
+            on_message=self._on_message,
+            on_error=self._on_error,
+            on_close=self._on_close
+        )
+        ws_thread = threading.Thread(target=self.ws.run_forever)
+        ws_thread.daemon = True
+        ws_thread.start()
+        timeout = 5.0
+        start_time = time.time()
+        while not self.is_connected and time.time() - start_time < timeout:
+            time.sleep(0.1)
+        if not self.is_connected:
+            raise Exception("Failed to connect to WebSocket")
+    def synthesize(self, text: str) -> Generator[bytes, None, None]:
+        self._reset_state()
+        self._connect()
+        payload = self._create_payload(text)
+        self.ws.send(json.dumps(payload))
+        while True:
+            if not self.error_queue.empty():
+                raise self.error_queue.get()
+            try:
+                chunk = self.audio_queue.get(timeout=1.0)
+                if chunk is None:
+                    break
+                yield chunk
+            except queue.Empty:
+                if self.is_complete:
+                    break
+                continue
+        self.ws.close()
+    def synthesize_streaming(self, text_stream: Generator[str, None, None],
+                           continue_stream: bool = True,
+                           auto_flush: bool = True) -> Generator[bytes, None, None]:
+        self._reset_state()
+        self._connect()
+        def send_text():
+            try:
+                for text_chunk in text_stream:
+                    if text_chunk.strip():
+                        payload = self._create_payload(text_chunk, continue_stream=continue_stream)
+                        self.ws.send(json.dumps(payload))
+                if auto_flush:
+                    flush_payload = self._create_payload("", flush=True)
+                    self.ws.send(json.dumps(flush_payload))
+            except Exception as e:
+                self.error_queue.put(e)
+        sender_thread = threading.Thread(target=send_text)
+        sender_thread.daemon = True
+        sender_thread.start()
+        while True:
+            if not self.error_queue.empty():
+                raise self.error_queue.get()
+            try:
+                chunk = self.audio_queue.get(timeout=1.0)
+                if chunk is None:
+                    break
+                yield chunk
+            except queue.Empty:
+                if self.is_complete:
+                    break
+                continue
+        self.ws.close()
+    def send_text_chunk(self, text: str, continue_stream: bool = True, flush: bool = False):
+        if not self.is_connected:
+            raise Exception("WebSocket not connected")
+        payload = self._create_payload(text, continue_stream=continue_stream, flush=flush)
+        self.ws.send(json.dumps(payload))
+    def flush_buffer(self):
+        if not self.is_connected:
+            raise Exception("WebSocket not connected")
+        payload = self._create_payload("", flush=True)
+        self.ws.send(json.dumps(payload))
+    def start_streaming_session(self) -> Generator[bytes, None, None]:
+        self._reset_state()
+        self._connect()
+        while True:
+            if not self.error_queue.empty():
+                raise self.error_queue.get()
+            try:
+                chunk = self.audio_queue.get(timeout=0.1)
+                if chunk is None:
+                    break
+                yield chunk
+            except queue.Empty:
+                if self.is_complete:
+                    break
+                continue
+    def _reset_state(self):
+        self.audio_queue = queue.Queue()
+        self.error_queue = queue.Queue()
+        self.is_complete = False
+        self.is_connected = False
+        self.request_id = None

smallestai-4.0.0/smallestai/waves/utils.py ADDED Viewed

@@ -0,0 +1,58 @@
+from typing import List
+from typing import Optional
+from dataclasses import dataclass
+from smallestai.waves.exceptions import ValidationError
+from smallestai.waves.models import TTSModels, TTSLanguages_lightning, TTSLanguages_lightning_large, TTSLanguages_lightning_v2
+API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
+WEBSOCKET_URL = "wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream"
+SAMPLE_WIDTH = 2
+CHANNELS = 1
+ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
+@dataclass
+class TTSOptions:
+    model: str
+    sample_rate: int
+    voice_id: str
+    api_key: str
+    speed: float
+    consistency: float
+    similarity: float
+    enhancement: int
+    language: str
+    output_format: str
+def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[int] = None):
+    if not text:
+        raise ValidationError("Text cannot be empty.")
+    if model not in TTSModels:
+        raise ValidationError(f"Invalid model: {model}. Must be one of {TTSModels}")
+    if not 8000 <= sample_rate <= 24000:
+        raise ValidationError(f"Invalid sample rate: {sample_rate}. Must be between 8000 and 24000")
+    if not 0.5 <= speed <= 2.0:
+        raise ValidationError(f"Invalid speed: {speed}. Must be between 0.5 and 2.0")
+    if consistency is not None and not 0.0 <= consistency <= 1.0:
+        raise ValidationError(f"Invalid consistency: {consistency}. Must be between 0.0 and 1.0")
+    if similarity is not None and not 0.0 <= similarity <= 1.0:
+        raise ValidationError(f"Invalid similarity: {similarity}. Must be between 0.0 and 1.0")
+    if enhancement is not None and not 0 <= enhancement <= 2:
+        raise ValidationError(f"Invalid enhancement: {enhancement}. Must be between 0 and 2.")
+def get_smallest_languages(model: str = 'lightning') -> List[str]:
+    if model == 'lightning':
+        return TTSLanguages_lightning
+    elif model == 'lightning-large':
+        return TTSLanguages_lightning_large
+    elif model == 'lightning-v2':
+        return TTSLanguages_lightning_v2
+    else:
+        raise ValidationError(f"Invalid model: {model}. Must be one of {TTSModels}")
+def get_smallest_models() -> List[str]:
+    return TTSModels

smallestai 3.0.3__tar.gz → 4.0.0__tar.gz

Potentially problematic release.

smallestai 3.0.3tar.gz → 4.0.0tar.gz