PyPI - smallestai - Versions diffs - 3.0.3__py3-none-any.whl → 4.0.0__py3-none-any.whl - Mend

smallestai 3.0.3py3-none-any.whl → 4.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of smallestai might be problematic. Click here for more details.

Files changed (12) hide show

smallestai/__init__.py +1 -1
smallestai/waves/__init__.py +2 -2
smallestai/waves/async_waves_client.py +44 -71
smallestai/waves/models.py +5 -2
smallestai/waves/stream_tts.py +189 -254
smallestai/waves/utils.py +13 -52
smallestai/waves/waves_client.py +43 -71
{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/METADATA +2 -1
{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/RECORD +12 -12
{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/WHEEL +1 -1
{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/licenses/LICENSE +0 -0
{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/top_level.txt +0 -0

smallestai/__init__.py CHANGED Viewed

@@ -84,7 +84,7 @@ from smallestai.atoms import (
 from smallestai.waves import (
     WavesClient,
     AsyncWavesClient,
-    TextToAudioStream
+    WavesStreamingTTS
 )
 from smallestai.atoms import __all__ as atoms_all

smallestai/waves/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from smallestai.waves.waves_client import WavesClient
 from smallestai.waves.async_waves_client import AsyncWavesClient
-from smallestai.waves.stream_tts import TextToAudioStream
+from smallestai.waves.stream_tts import WavesStreamingTTS, TTSConfig
-__all__ = ["WavesClient", "AsyncWavesClient", "TextToAudioStream"]
+__all__ = ["WavesClient", "AsyncWavesClient", "WavesStreamingTTS", "TTSConfig"]

smallestai/waves/async_waves_client.py CHANGED Viewed

@@ -4,10 +4,10 @@ import json
 import aiohttp
 import aiofiles
 import requests
-from typing import Optional, Union, List, AsyncIterator
+from typing import Optional, Union, List
 from smallestai.waves.exceptions import TTSError, APIError
-from smallestai.waves.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
+from smallestai.waves.utils import (TTSOptions, validate_input,
                      get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
@@ -22,7 +22,8 @@ class AsyncWavesClient:
         consistency: Optional[float] = 0.5,
         similarity: Optional[float] = 0.0,
         enhancement: Optional[int] = 1,
-        add_wav_header: Optional[bool] = True
+        language: Optional[str] = "en",
+        output_format: Optional[str] = "wav"
     ) -> None:
         """
         AsyncSmallest Instance for asynchronous text-to-speech synthesis.
@@ -40,7 +41,8 @@ class AsyncWavesClient:
         - consistency (float): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. Range - [0, 1]
         - similarity (float): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. Range - [0, 1]
         - enhancement (int): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. Range - [0, 2].
-        - add_wav_header (bool): Whether to add a WAV header to the output audio.
+        - language (str): The language for synthesis. Default is "en".
+        - output_format (str): The output audio format. Options: "pcm", "mp3", "wav", "mulaw". Default is "pcm".
         Methods:
         - get_languages: Returns a list of available languages for synthesis.
@@ -61,11 +63,12 @@ class AsyncWavesClient:
             sample_rate=sample_rate,
             voice_id=voice_id,
             api_key=self.api_key,
-            add_wav_header=add_wav_header,
             speed=speed,
             consistency=consistency,
             similarity=similarity,
-            enhancement=enhancement
+            enhancement=enhancement,
+            language=language,
+            output_format=output_format
         )
         self.session = None
@@ -89,9 +92,9 @@ class AsyncWavesClient:
         return False
-    def get_languages(self) -> List[str]:
+    def get_languages(self, model="lightning") -> List[str]:
         """Returns a list of available languages."""
-        return get_smallest_languages()
+        return get_smallest_languages(model)
     def get_cloned_voices(self) -> str:
         """Returns a list of your cloned voices."""
@@ -130,18 +133,14 @@ class AsyncWavesClient:
     async def synthesize(
             self,
             text: str,
-            stream: Optional[bool] = False,
-            save_as: Optional[str] = None,
             **kwargs
-        ) -> Union[bytes, None, AsyncIterator[bytes]]:
+        ) -> Union[bytes]:
         """
         Asynchronously synthesize speech from the provided text.
         Args:
         - text (str): The text to be converted to speech.
         - stream (Optional[bool]): If True, returns an iterator yielding audio chunks instead of a full byte array.
-        - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
-                                   The file must have a .wav extension.
         - kwargs: Additional optional parameters to override `__init__` options for this call.
         Returns:
@@ -151,7 +150,7 @@ class AsyncWavesClient:
             - Otherwise, returns the synthesized audio content as bytes.
         Raises:
-        - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
+        - TTSError: If the provided file name does not have a .wav or .mp3 extension when `save_as` is specified.
         - APIError: If the API request fails or returns an error.
         - ValueError: If an unexpected parameter is passed in `kwargs`.
         """
@@ -172,65 +171,40 @@ class AsyncWavesClient:
             for key, value in kwargs.items():
                 setattr(opts, key, value)
-            text = preprocess_text(text)
             validate_input(text, opts.model, opts.sample_rate, opts.speed, opts.consistency, opts.similarity, opts.enhancement)
-            self.chunk_size = 250
-            if opts.model == 'lightning-large':
-                self.chunk_size = 140
-            chunks = chunk_text(text, self.chunk_size)
-            async def audio_stream():
-                for chunk in chunks:
-                    payload = {
-                        "text": chunk,
-                        "sample_rate": opts.sample_rate,
-                        "voice_id": opts.voice_id,
-                        "add_wav_header": False,
-                        "speed": opts.speed,
-                        "model": opts.model
-                    }
-                    if opts.model == "lightning-large":
-                        if opts.consistency is not None:
-                            payload["consistency"] = opts.consistency
-                        if opts.similarity is not None:
-                            payload["similarity"] = opts.similarity
-                        if opts.enhancement is not None:
-                            payload["enhancement"] = opts.enhancement
-                    headers = {
-                        "Authorization": f"Bearer {self.api_key}",
-                        "Content-Type": "application/json",
-                    }
-                    async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
-                        if res.status != 200:
-                            raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
-                        yield await res.read()
+            payload = {
+                "text": text,
+                "voice_id": opts.voice_id,
+                "sample_rate": opts.sample_rate,
+                "speed": opts.speed,
+                "consistency": opts.consistency,
+                "similarity": opts.similarity,
+                "enhancement": opts.enhancement,
+                "language": opts.language,
+                "output_format": opts.output_format
+            }
-            if stream:
-                return audio_stream()
-            audio_content = b"".join([chunk async for chunk in audio_stream()])
-            if save_as:
-                if not save_as.endswith(".wav"):
-                    raise TTSError("Invalid file name. Extension must be .wav")
-                async with aiofiles.open(save_as, mode='wb') as f:
-                    await f.write(add_wav_header(audio_content, opts.sample_rate))
-                return None
-            if opts.add_wav_header:
-                return add_wav_header(audio_content, opts.sample_rate)
+            if opts.model == "lightning-large" or opts.model == "lightning-v2":
+                if opts.consistency is not None:
+                    payload["consistency"] = opts.consistency
+                if opts.similarity is not None:
+                    payload["similarity"] = opts.similarity
+                if opts.enhancement is not None:
+                    payload["enhancement"] = opts.enhancement
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            }
-            return audio_content
+            async with self.session.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers) as res:
+                if res.status != 200:
+                    raise APIError(f"Failed to synthesize speech: {await res.text()}. For more information, visit https://waves.smallest.ai/")
+                audio_bytes = await res.content.read()
+            return audio_bytes
         finally:
             if should_cleanup and self.session:
                 await self.session.close()
@@ -316,9 +290,8 @@ class AsyncWavesClient:
                 if res.status != 200:
                     raise APIError(f"Failed to delete voice: {await res.text()}. For more information, visit https://waves.smallest.ai/")
-                return await res.text()
+                return json.dumps(await res.json(), indent=4, ensure_ascii=False)
         finally:
             if should_cleanup and self.session:
                 await self.session.close()
-                self.session = None
+                self.session = None

smallestai/waves/models.py CHANGED Viewed

@@ -1,5 +1,8 @@
-TTSLanguages = ["en", "hi", "ta", "fr", "de", "pl"]
+TTSLanguages_lightning = ["en", "hi"]
+TTSLanguages_lightning_large = ["en", "hi"]
+TTSLanguages_lightning_v2 = ["en", "hi", "mr", "kn", "ta", "bn", "gu", "de", "fr", "es", "it", "pl", "nl", "ru", "ar", "he"]
 TTSModels = [
     "lightning",
-    "lightning-large"
+    "lightning-large",
+    "lightning-v2"
 ]

smallestai/waves/stream_tts.py CHANGED Viewed

@@ -1,272 +1,207 @@
-import asyncio
+import json
+import base64
 import time
-from threading import Thread
-from queue import Queue, Empty
-from typing import AsyncGenerator, Optional, Union, List, Dict, Any
-from smallestai.waves.waves_client import WavesClient
-from smallestai.waves.exceptions import APIError
-from smallestai.waves.async_waves_client import AsyncWavesClient
-from smallestai.waves.utils import SENTENCE_END_REGEX
-class TextToAudioStream:
-    def __init__(
-        self,
-        tts_instance: Union[WavesClient, AsyncWavesClient],
-        queue_timeout: Optional[float] = 5.0,
-        max_retries: Optional[int] = 3,
-        verbose: bool = False
-    ):
-        """
-        A real-time text-to-speech processor that converts streaming text into audio output.
-        Useful for applications requiring immediate audio feedback from text generation,
-        such as voice assistants, live captioning, or interactive chatbots.
-        ⚠️ `add_wav_header` is disabled by default for streaming efficiency. Refer to the README for more information.
-        Features:
-        - Streams audio chunks as soon as text is available.
-        - Handles both sync and async text-to-speech engines.
-        - Automatically retries failed synthesis attempts.
-        - Low latency between text generation and speech output.
-        Args:
-            tts_instance: The text-to-speech engine to use (Smallest or AsyncSmallest)
-            queue_timeout: How long to wait for new text (seconds, default: 1.0)
-            max_retries: Number of retry attempts for failed synthesis (default: 3)
-            verbose: Whether to log detailed metrics about TTS requests (default: False)
-        """
-        self.tts_instance = tts_instance
-        self.tts_instance.opts.add_wav_header = False
-        self.sentence_end_regex = SENTENCE_END_REGEX
-        self.queue_timeout = queue_timeout
-        self.max_retries = max_retries
-        self.queue = Queue()
-        self.buffer_size = 250
-        self.stop_flag = False
-        self.verbose = verbose
+import threading
+import queue
+from typing import Generator
+from dataclasses import dataclass
+from websocket import WebSocketApp
+@dataclass
+class TTSConfig:
+    voice_id: str
+    api_key: str
+    language: str = "en"
+    sample_rate: int = 24000
+    speed: float = 1.0
+    consistency: float = 0.5
+    enhancement: int = 1
+    similarity: float = 0
+    max_buffer_flush_ms: int = 0
+class WavesStreamingTTS:
+    def __init__(self, config: TTSConfig):
+        self.config = config
+        self.ws_url = "wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream"
+        self.ws = None
+        self.audio_queue = queue.Queue()
+        self.error_queue = queue.Queue()
+        self.is_complete = False
+        self.is_connected = False
+        self.request_id = None
-        # Metrics tracking
-        self.request_count = 0
-        self.request_logs: List[Dict[str, Any]] = []
-        self.start_time = 0
-        self.first_api_response_time = None
-        self.end_time = 0
-        if self.tts_instance.opts.model == 'lightning-large':
-            self.buffer_size = 140
-    async def _stream_llm_output(self, llm_output: AsyncGenerator[str, None]) -> None:
-        """
-        Streams the LLM output, splitting it into chunks based on sentence boundaries
-        or space characters if no sentence boundary is found before reaching buffer_size.
-        Parameters:
-        - llm_output (AsyncGenerator[str, None]): An async generator yielding LLM output.
-        """
-        buffer = ""
-        async for chunk in llm_output:
-            buffer += chunk
-            while len(buffer) > self.buffer_size:
-                chunk_text = buffer[:self.buffer_size]
-                last_break_index = -1
-                # Find last sentence boundary using regex
-                for i in range(len(chunk_text) - 1, -1, -1):
-                    if self.sentence_end_regex.match(chunk_text[:i + 1]):
-                        last_break_index = i
-                        break
-                if last_break_index == -1:
-                    # Fallback to space if no sentence boundary found
-                    last_space = chunk_text.rfind(' ')
-                    if last_space != -1:
-                        last_break_index = last_space
-                    else:
-                        last_break_index = self.buffer_size - 1
-                # Add chunk to queue and update buffer
-                self.queue.put(f'{buffer[:last_break_index + 1].replace("—", " ").strip()} ')
-                buffer = buffer[last_break_index + 1:].strip()
-        # Don't forget the remaining text
-        if buffer:
-            self.queue.put(f'{buffer.replace("—", " ").strip()} ')
-        self.stop_flag = True
-    def _synthesize_sync(self, sentence: str, retries: int = 0) -> Optional[bytes]:
-        """Synchronously synthesizes a given sentence."""
-        request_start_time = time.time()
-        request_id = self.request_count + 1
+    def _get_headers(self):
+        return [f"Authorization: Bearer {self.config.api_key}"]
+    def _create_payload(self, text: str, continue_stream: bool = False, flush: bool = False):
+        return {
+            "voice_id": self.config.voice_id,
+            "text": text,
+            "language": self.config.language,
+            "sample_rate": self.config.sample_rate,
+            "speed": self.config.speed,
+            "consistency": self.config.consistency,
+            "similarity": self.config.similarity,
+            "enhancement": self.config.enhancement,
+            "max_buffer_flush_ms": self.config.max_buffer_flush_ms,
+            "continue": continue_stream,
+            "flush": flush
+        }
+    def _on_open(self, ws):
+        self.is_connected = True
+    def _on_message(self, ws, message):
         try:
-            audio_content = self.tts_instance.synthesize(sentence)
-            self.request_count += 1
-            request_end_time = time.time()
+            data = json.loads(message)
+            status = data.get("status", "")
-            if self.verbose:
-                request_duration = request_end_time - request_start_time
-                if self.first_api_response_time is None:
-                    self.first_api_response_time = time.time() - self.start_time
+            if status == "error":
+                self.error_queue.put(Exception(data.get("message", "Unknown error")))
+                return
-                self.request_logs.append({
-                    "id": request_id,
-                    "text": sentence,
-                    "start_time": request_start_time - self.start_time,
-                    "end_time": request_end_time - self.start_time,
-                    "duration": request_duration,
-                    "char_count": len(sentence),
-                    "retries": retries
-                })
+            if not self.request_id:
+                self.request_id = data.get("request_id")
-            return audio_content
-        except APIError as e:
-            if retries < self.max_retries:
-                if self.verbose:
-                    print(f"Retry {retries + 1}/{self.max_retries} for request: '{sentence[:30]}...'")
-                return self._synthesize_sync(sentence, retries + 1)
-            else:
-                if self.verbose:
-                    print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
-                return None
+            audio_b64 = data.get("data", {}).get("audio")
+            if audio_b64:
+                self.audio_queue.put(base64.b64decode(audio_b64))
+            if status == "complete":
+                self.is_complete = True
+                self.audio_queue.put(None)
+        except Exception as e:
+            self.error_queue.put(e)
+    def _on_error(self, ws, error):
+        self.error_queue.put(error)
+    def _on_close(self, ws, *args):
+        self.is_connected = False
+        if not self.is_complete:
+            self.audio_queue.put(None)
+    def _connect(self):
+        if self.ws:
+            self.ws.close()
-    async def _synthesize_async(self, sentence: str, retries: int = 0) -> Optional[bytes]:
-        """Asynchronously synthesizes a given sentence."""
-        request_start_time = time.time()
-        request_id = self.request_count + 1
+        self.ws = WebSocketApp(
+            self.ws_url,
+            header=self._get_headers(),
+            on_open=self._on_open,
+            on_message=self._on_message,
+            on_error=self._on_error,
+            on_close=self._on_close
+        )
-        try:
-            audio_content = await self.tts_instance.synthesize(sentence)
-            self.request_count += 1
-            request_end_time = time.time()
+        ws_thread = threading.Thread(target=self.ws.run_forever)
+        ws_thread.daemon = True
+        ws_thread.start()
+        timeout = 5.0
+        start_time = time.time()
+        while not self.is_connected and time.time() - start_time < timeout:
+            time.sleep(0.1)
-            if self.verbose:
-                request_duration = request_end_time - request_start_time
-                if self.first_api_response_time is None:
-                    self.first_api_response_time = time.time() - self.start_time
-                self.request_logs.append({
-                    "id": request_id,
-                    "text": sentence,
-                    "start_time": request_start_time - self.start_time,
-                    "end_time": request_end_time - self.start_time,
-                    "duration": request_duration,
-                    "char_count": len(sentence),
-                    "retries": retries
-                })
+        if not self.is_connected:
+            raise Exception("Failed to connect to WebSocket")
+    def synthesize(self, text: str) -> Generator[bytes, None, None]:
+        self._reset_state()
+        self._connect()
+        payload = self._create_payload(text)
+        self.ws.send(json.dumps(payload))
+        while True:
+            if not self.error_queue.empty():
+                raise self.error_queue.get()
-            return audio_content
-        except APIError as e:
-            if retries < self.max_retries:
-                if self.verbose:
-                    print(f"Retry {retries + 1}/{self.max_retries} for request: '{sentence[:30]}...'")
-                return await self._synthesize_async(sentence, retries + 1)
-            else:
-                if self.verbose:
-                    print(f"Synthesis failed for sentence: {sentence} - Error: {e}. Retries Exhausted, for more information, visit https://waves.smallest.ai/")
-                return None
-    async def _run_synthesis(self) -> AsyncGenerator[bytes, None]:
-        """
-        Continuously synthesizes sentences from the queue, yielding audio content.
-        If no sentences are in the queue, it waits until new data is available or streaming is complete.
-        """
-        while not self.stop_flag or not self.queue.empty():
             try:
-                sentence = self.queue.get_nowait()
-                if isinstance(self.tts_instance, AsyncWavesClient):
-                    audio_content = await self._synthesize_async(sentence)
-                else:
-                    loop = asyncio.get_running_loop()
-                    audio_content = await loop.run_in_executor(None, self._synthesize_sync, sentence)
-                if audio_content:
-                    yield audio_content
-            except Empty:
-                # Quick check if we should exit
-                if self.stop_flag and self.queue.empty():
+                chunk = self.audio_queue.get(timeout=1.0)
+                if chunk is None:
                     break
-                # Short sleep to avoid busy-waiting
-                await asyncio.sleep(0.01)  # Much shorter sleep time (10ms)
-    def _print_verbose_summary(self) -> None:
-        """Print a summary of all metrics if verbose mode is enabled."""
-        if not self.verbose:
-            return
-        total_duration = self.end_time - self.start_time
-        print("\n" + "="*100)
-        print(f"TEXT-TO-AUDIO STREAM METRICS")
-        print("="*100)
+                yield chunk
+            except queue.Empty:
+                if self.is_complete:
+                    break
+                continue
+        self.ws.close()
+    def synthesize_streaming(self, text_stream: Generator[str, None, None],
+                           continue_stream: bool = True,
+                           auto_flush: bool = True) -> Generator[bytes, None, None]:
+        self._reset_state()
+        self._connect()
-        print(f"\nOVERALL STATISTICS:")
-        print(f"  Total requests made: {self.request_count}")
-        print(f"  Time to first API response: {self.first_api_response_time:.3f}s")
-        print(f"  Total processing time: {total_duration:.3f}s")
+        def send_text():
+            try:
+                for text_chunk in text_stream:
+                    if text_chunk.strip():
+                        payload = self._create_payload(text_chunk, continue_stream=continue_stream)
+                        self.ws.send(json.dumps(payload))
+                if auto_flush:
+                    flush_payload = self._create_payload("", flush=True)
+                    self.ws.send(json.dumps(flush_payload))
+            except Exception as e:
+                self.error_queue.put(e)
-        # Print table header
-        print("\nREQUEST DETAILS:")
-        header = f"{'#':4} {'Start (s)':10} {'End (s)':10} {'Duration (s)':12} {'Characters':15} {'Text'}"
-        print("\n" + header)
-        print("-" * 100)
+        sender_thread = threading.Thread(target=send_text)
+        sender_thread.daemon = True
+        sender_thread.start()
-        # Print table rows
-        for log in self.request_logs:
-            row = (
-                f"{log['id']:4} "
-                f"{log['start_time']:10.3f} "
-                f"{log['end_time']:10.3f} "
-                f"{log['duration']:12.3f} "
-                f"{log['char_count']:15} "
-                f"{log['text'][:50]}{'...' if len(log['text']) > 50 else ''}"
-            )
-            print(row)
-            # Print retry information if any
-            if log['retries'] > 0:
-                print(f"{'':4} {'':10} {'':10} {'':12} {'':15} Retries: {log['retries']}")
+        while True:
+            if not self.error_queue.empty():
+                raise self.error_queue.get()
-        print("\n" + "="*100)
-    async def process(self, llm_output: AsyncGenerator[str, None]) -> AsyncGenerator[bytes, None]:
-        """
-        Convert streaming text into audio in real-time.
-        Handles the entire pipeline from receiving text to producing audio,
-        yielding audio chunks as soon as they're ready.
-        Args:
-            llm_output: An async generator that yields text chunks.
-        Yields:
-            Raw audio data chunks (without WAV headers) that can be:
-            - Played directly through an audio device
-            - Saved to a file
-            - Streamed over a network
-            - Further processed as needed
-        """
-        self.start_time = time.time()
-        llm_thread = Thread(target=asyncio.run, args=(self._stream_llm_output(llm_output),))
-        llm_thread.start()
-        async for audio_content in self._run_synthesis():
-            yield audio_content
-        llm_thread.join()
+            try:
+                chunk = self.audio_queue.get(timeout=1.0)
+                if chunk is None:
+                    break
+                yield chunk
+            except queue.Empty:
+                if self.is_complete:
+                    break
+                continue
+        self.ws.close()
+    def send_text_chunk(self, text: str, continue_stream: bool = True, flush: bool = False):
+        if not self.is_connected:
+            raise Exception("WebSocket not connected")
+        payload = self._create_payload(text, continue_stream=continue_stream, flush=flush)
+        self.ws.send(json.dumps(payload))
+    def flush_buffer(self):
+        if not self.is_connected:
+            raise Exception("WebSocket not connected")
+        payload = self._create_payload("", flush=True)
+        self.ws.send(json.dumps(payload))
+    def start_streaming_session(self) -> Generator[bytes, None, None]:
+        self._reset_state()
+        self._connect()
-        self.end_time = time.time()
-        self._print_verbose_summary()
+        while True:
+            if not self.error_queue.empty():
+                raise self.error_queue.get()
+            try:
+                chunk = self.audio_queue.get(timeout=0.1)
+                if chunk is None:
+                    break
+                yield chunk
+            except queue.Empty:
+                if self.is_complete:
+                    break
+                continue
+    def _reset_state(self):
+        self.audio_queue = queue.Queue()
+        self.error_queue = queue.Queue()
+        self.is_complete = False
+        self.is_connected = False
+        self.request_id = None

smallestai/waves/utils.py CHANGED Viewed

@@ -1,16 +1,13 @@
-import re
-import io
 from typing import List
 from typing import Optional
-from pydub import AudioSegment
 from dataclasses import dataclass
 from smallestai.waves.exceptions import ValidationError
-from smallestai.waves.models import TTSModels, TTSLanguages
+from smallestai.waves.models import TTSModels, TTSLanguages_lightning, TTSLanguages_lightning_large, TTSLanguages_lightning_v2
 API_BASE_URL = "https://waves-api.smallest.ai/api/v1"
-SENTENCE_END_REGEX = re.compile(r'.*[-.—!?,;:…।|]$')
+WEBSOCKET_URL = "wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream"
 SAMPLE_WIDTH = 2
 CHANNELS = 1
 ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav']
@@ -22,11 +19,12 @@ class TTSOptions:
     sample_rate: int
     voice_id: str
     api_key: str
-    add_wav_header: bool
     speed: float
     consistency: float
     similarity: float
     enhancement: int
+    language: str
+    output_format: str
 def validate_input(text: str, model: str, sample_rate: int, speed: float, consistency: Optional[float] = None, similarity: Optional[float] = None, enhancement: Optional[int] = None):
@@ -46,52 +44,15 @@ def validate_input(text: str, model: str, sample_rate: int, speed: float, consis
         raise ValidationError(f"Invalid enhancement: {enhancement}. Must be between 0 and 2.")
-def add_wav_header(frame_input: bytes, sample_rate: int = 24000, sample_width: int = 2, channels: int = 1) -> bytes:
-    audio = AudioSegment(data=frame_input, sample_width=sample_width, frame_rate=sample_rate, channels=channels)
-    wav_buf = io.BytesIO()
-    audio.export(wav_buf, format="wav")
-    wav_buf.seek(0)
-    return wav_buf.read()
-def preprocess_text(text: str) -> str:
-    text = text.replace("\n", " ").replace("\t", " ")
-    text = re.sub(r'\s+', ' ', text)
-    return text.strip()
-def chunk_text(text: str, chunk_size: int = 250) -> List[str]:
-    chunks = []
-    while text:
-        if len(text) <= chunk_size:
-            chunks.append(text.strip())
-            break
-        chunk_text = text[:chunk_size]
-        last_break_index = -1
-        # Find last sentence boundary using regex
-        for i in range(len(chunk_text) - 1, -1, -1):
-            if SENTENCE_END_REGEX.match(chunk_text[:i + 1]):
-                last_break_index = i
-                break
-        if last_break_index == -1:
-            # Fallback to space if no sentence boundary found
-            last_space = chunk_text.rfind(' ')
-            if last_space != -1:
-                last_break_index = last_space
-            else:
-                last_break_index = chunk_size - 1
-        chunks.append(text[:last_break_index + 1].strip())
-        text = text[last_break_index + 1:].strip()
-    return chunks
-def get_smallest_languages() -> List[str]:
-    return TTSLanguages
+def get_smallest_languages(model: str = 'lightning') -> List[str]:
+    if model == 'lightning':
+        return TTSLanguages_lightning
+    elif model == 'lightning-large':
+        return TTSLanguages_lightning_large
+    elif model == 'lightning-v2':
+        return TTSLanguages_lightning_v2
+    else:
+        raise ValidationError(f"Invalid model: {model}. Must be one of {TTSModels}")
 def get_smallest_models() -> List[str]:
     return TTSModels

smallestai/waves/waves_client.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
 import json
-import wave
 import copy
 import requests
-from typing import Optional, Union, List, Iterator
+from typing import Optional, Union, List
 from smallestai.waves.exceptions import TTSError, APIError
-from smallestai.waves.utils import (TTSOptions, validate_input, preprocess_text, add_wav_header, chunk_text,
-get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
+from smallestai.waves.utils import (TTSOptions, validate_input,
+                        get_smallest_languages, get_smallest_models, ALLOWED_AUDIO_EXTENSIONS, API_BASE_URL)
 class WavesClient:
     def __init__(
@@ -20,7 +19,8 @@ class WavesClient:
         consistency: Optional[float] = 0.5,
         similarity: Optional[float] = 0.0,
         enhancement: Optional[int] = 1,
-        add_wav_header: Optional[bool] = True
+        language: Optional[str] = "en",
+        output_format: Optional[str] = "wav"
     ) -> None:
         """
         Smallest Instance for text-to-speech synthesis.
@@ -37,7 +37,8 @@ class WavesClient:
         - consistency (float): This parameter controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. Range - [0, 1]
         - similarity (float): This parameter controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. Range - [0, 1]
         - enhancement (int): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. Range - [0, 2].
-        - add_wav_header (bool): Whether to add a WAV header to the output audio.
+        - language (str): The language for synthesis. Default is "en".
+        - output_format (str): The output audio format. Options: "pcm", "mp3", "wav", "mulaw". Default is "pcm".
         Methods:
         - get_languages: Returns a list of available languages for synthesis.
@@ -58,17 +59,18 @@ class WavesClient:
             sample_rate=sample_rate,
             voice_id=voice_id,
             api_key=self.api_key,
-            add_wav_header=add_wav_header,
             speed=speed,
             consistency=consistency,
             similarity=similarity,
-            enhancement=enhancement
+            enhancement=enhancement,
+            language=language,
+            output_format=output_format
         )
-    def get_languages(self) -> List[str]:
+    def get_languages(self, model:str="lightning") -> List[str]:
         """Returns a list of available languages."""
-        return get_smallest_languages()
+        return get_smallest_languages(model)
     def get_cloned_voices(self) -> str:
         """Returns a list of your cloned voices."""
@@ -107,17 +109,13 @@ class WavesClient:
     def synthesize(
             self,
             text: str,
-            stream: Optional[bool] = False,
-            save_as: Optional[str] = None,
             **kwargs
-        ) -> Union[bytes, None, Iterator[bytes]]:
+        ) -> Union[bytes]:
         """
         Synthesize speech from the provided text.
         - text (str): The text to be converted to speech.
         - stream (Optional[bool]): If True, returns an iterator yielding audio chunks instead of a full byte array.
-        - save_as (Optional[str]): If provided, the synthesized audio will be saved to this file path.
-                                   The file must have a .wav extension.
         - kwargs: Additional optional parameters to override `__init__` options for this call.
         Returns:
@@ -127,7 +125,7 @@ class WavesClient:
             - Otherwise, returns the synthesized audio content as bytes.
         Raises:
-        - TTSError: If the provided file name does not have a .wav extension when `save_as` is specified.
+        - TTSError: If the provided file name does not have a .wav or .mp3 extension when `save_as` is specified.
         - APIError: If the API request fails or returns an error.
         """
         opts = copy.deepcopy(self.opts)
@@ -140,64 +138,38 @@ class WavesClient:
         for key, value in kwargs.items():
             setattr(opts, key, value)
-        text = preprocess_text(text)
         validate_input(text, opts.model, opts.sample_rate, opts.speed, opts.consistency, opts.similarity, opts.enhancement)
-        self.chunk_size = 250
-        if opts.model == "lightning-large":
-            self.chunk_size = 140
-        chunks = chunk_text(text, self.chunk_size)
-        def audio_stream():
-            for chunk in chunks:
-                payload = {
-                    "text": chunk,
-                    "sample_rate": opts.sample_rate,
-                    "voice_id": opts.voice_id,
-                    "add_wav_header": False,
-                    "speed": opts.speed,
-                }
-                if opts.model == "lightning-large":
-                    if opts.consistency is not None:
-                        payload["consistency"] = opts.consistency
-                    if opts.similarity is not None:
-                        payload["similarity"] = opts.similarity
-                    if opts.enhancement is not None:
-                        payload["enhancement"] = opts.enhancement
-                headers = {
-                    "Authorization": f"Bearer {self.api_key}",
-                    "Content-Type": "application/json",
-                }
-                res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
-                if res.status_code != 200:
-                    raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
+        payload = {
+            "text": text,
+            "voice_id": opts.voice_id,
+            "sample_rate": opts.sample_rate,
+            "speed": opts.speed,
+            "consistency": opts.consistency,
+            "similarity": opts.similarity,
+            "enhancement": opts.enhancement,
+            "language": opts.language,
+            "output_format": opts.output_format
+        }
-                yield res.content
-        if stream:
-            return audio_stream()
-        audio_content = b"".join(audio_stream())
+        if opts.model == "lightning-large" or opts.model == "lightning-v2":
+            if opts.consistency is not None:
+                payload["consistency"] = opts.consistency
+            if opts.similarity is not None:
+                payload["similarity"] = opts.similarity
+            if opts.enhancement is not None:
+                payload["enhancement"] = opts.enhancement
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
-        if save_as:
-            if not save_as.endswith(".wav"):
-                raise TTSError("Invalid file name. Extension must be .wav")
-            with wave.open(save_as, "wb") as wf:
-                wf.setnchannels(1)
-                wf.setsampwidth(2)
-                wf.setframerate(opts.sample_rate)
-                wf.writeframes(audio_content)
-            return None
-        if opts.add_wav_header:
-            return add_wav_header(audio_content, opts.sample_rate)
-        return audio_content
+        res = requests.post(f"{API_BASE_URL}/{opts.model}/get_speech", json=payload, headers=headers)
+        if res.status_code != 200:
+            raise APIError(f"Failed to synthesize speech: {res.text}. Please check if you have set the correct API key. For more information, visit https://waves.smallest.ai/")
+        return res.content
     def add_voice(self, display_name: str, file_path: str) -> str:
@@ -262,4 +234,4 @@ class WavesClient:
         if response.status_code != 200:
             raise APIError(f"Failed to delete voice: {response.text}. For more information, visit https://waves.smallest.ai/")
-        return json.dumps(response.json(), indent=4, ensure_ascii=False)
+        return json.dumps(response.json(), indent=4, ensure_ascii=False)

{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: smallestai
-Version: 3.0.3
+Version: 4.0.0
 Summary: Official Python client for the Smallest AI API
 Author-email: Smallest <support@smallest.ai>
 License: MIT
@@ -16,6 +16,7 @@ Requires-Dist: aiohttp
 Requires-Dist: aiofiles
 Requires-Dist: requests
 Requires-Dist: pydub
+Requires-Dist: websocket-client
 Requires-Dist: urllib3<3.0.0,>=1.25.3
 Requires-Dist: python-dateutil>=2.8.2
 Requires-Dist: pydantic>=2

{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-smallestai/__init__.py,sha256=zVO8iaNFVgNErxEt58AuB1npc7MR8x8Oi9A-Z2t8Q6w,2624
+smallestai/__init__.py,sha256=lY4DcFTosH2W0KVVN7pSbJmZBu067wG_y1u3GwGNru8,2624
 smallestai/atoms/__init__.py,sha256=cn5_9tVsUwFQ_zdAZv263P4ow4N7dxRWCYAz82GjwuI,9342
 smallestai/atoms/api_client.py,sha256=EcyN6nFp9U4u8TPJx3a9ZvbM2T4a9xrHGopQGLZuJpw,27448
 smallestai/atoms/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
@@ -73,15 +73,15 @@ smallestai/atoms/models/update_agent_request_synthesizer_voice_config.py,sha256=
 smallestai/atoms/models/update_agent_request_synthesizer_voice_config_one_of.py,sha256=8nGPcJ_CRUlXXjy3vCjpmbHWVBwQo2ebFP1K0MZPAsk,3955
 smallestai/atoms/models/update_agent_request_synthesizer_voice_config_one_of1.py,sha256=9AJxgngoNSMvDbceajIqnG23PY4rw84coTh7yUTNS3c,3487
 smallestai/atoms/models/upload_text_to_knowledge_base_request.py,sha256=Sxg0vRv_naT15odE8fBUeyjwLpEYOmQwGcJuzRRr90A,2587
-smallestai/waves/__init__.py,sha256=Hkq7N2nuz_wS7pC6QeUnIU1MzQnX_nrhfXGpjGSvFhQ,244
-smallestai/waves/async_waves_client.py,sha256=hv9rQ8-ykWuHoAcmZPhwtX_-AAQT4H4G3H8c4BhO5-0,12658
+smallestai/waves/__init__.py,sha256=hxyqisgFiKiroxupuZeNXpXFIbnivmdgPrid3CnLhh0,268
+smallestai/waves/async_waves_client.py,sha256=BgiSqd2UjwECCPwuh2dyhLSBP0inIsbPUEbduWTJrmI,11704
 smallestai/waves/exceptions.py,sha256=nY6I8fCXe2By54CytQ0-i3hFiYtt8TYAKj0g6OYsCjc,585
-smallestai/waves/models.py,sha256=egN4V_HiWIQBLKQdXt1ax1W-1tLK42xqx4FALHyMxh8,108
-smallestai/waves/stream_tts.py,sha256=Ppjwp1jXpUSpyNkwCnesMYQbAdyzKLMj_1o1iTb3jaA,10958
-smallestai/waves/utils.py,sha256=0VqMA4apJ-9U7abOznVXqUYEEAxQ2JkpLGyFhcJ_Kbw,3307
-smallestai/waves/waves_client.py,sha256=XKdPVWs-HZDzlxzF1x3cMdJQ_q71ZFS1P5oltzj2KO4,10740
-smallestai-3.0.3.dist-info/licenses/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
-smallestai-3.0.3.dist-info/METADATA,sha256=QG7FNlKA5cKXGGi1ay_bCRM4s4aWf64A70r1T2yM68I,20392
-smallestai-3.0.3.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-smallestai-3.0.3.dist-info/top_level.txt,sha256=pdJzm1VC2J6RxoobATz45L9U3cki4AFLigsfvETz7Io,11
-smallestai-3.0.3.dist-info/RECORD,,
+smallestai/waves/models.py,sha256=FaMVkOFyNCVpWvyMCmqkv3t1wmnfCs1HIULxLr1L8XE,283
+smallestai/waves/stream_tts.py,sha256=c9r8mZuuFjbyWsUrlZ1jb0WNX7-lR39EXDUqyF-5g14,6792
+smallestai/waves/utils.py,sha256=sqDpfa5SC60C_kJZo4MKxlDfkX7RRzO6aJ2hKpNMemE,2273
+smallestai/waves/waves_client.py,sha256=U6aqClYL49cTtYisvpUVhas2miGZiCfqwTU0eDUY548,9770
+smallestai-4.0.0.dist-info/licenses/LICENSE,sha256=kK3HNKhN7luQhkjkNWIvy9_gizbEDUM4mSv_HWq9uuM,1068
+smallestai-4.0.0.dist-info/METADATA,sha256=bk0xBChPACeJiL8j6zxbHGnty60N5cjDH1sLPx32hLM,20424
+smallestai-4.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+smallestai-4.0.0.dist-info/top_level.txt,sha256=pdJzm1VC2J6RxoobATz45L9U3cki4AFLigsfvETz7Io,11
+smallestai-4.0.0.dist-info/RECORD,,

{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.3.1)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{smallestai-3.0.3.dist-info → smallestai-4.0.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

smallestai 3.0.3__py3-none-any.whl → 4.0.0__py3-none-any.whl

Potentially problematic release.

smallestai 3.0.3py3-none-any.whl → 4.0.0py3-none-any.whl