PyPI - kugelaudio - Versions diffs - 0.1.0__tar.gz - Mend

kugelaudio 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

kugelaudio-0.1.0/.gitignore +202 -0
kugelaudio-0.1.0/CHANGELOG.md +20 -0
kugelaudio-0.1.0/LICENSE +21 -0
kugelaudio-0.1.0/PKG-INFO +433 -0
kugelaudio-0.1.0/README.md +403 -0
kugelaudio-0.1.0/kugelaudio/__init__.py +68 -0
kugelaudio-0.1.0/kugelaudio/client.py +588 -0
kugelaudio-0.1.0/kugelaudio/exceptions.py +48 -0
kugelaudio-0.1.0/kugelaudio/models.py +276 -0
kugelaudio-0.1.0/kugelaudio/py.typed +0 -0
kugelaudio-0.1.0/kugelaudio/streaming.py +235 -0
kugelaudio-0.1.0/pyproject.toml +62 -0

kugelaudio-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,202 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+.DS_Store
+node_modules/
+/voice_samples
+# C extensions
+*.so
+/emilia_de
+/data
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.env.local
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the enitre vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+/data
+.DS_Store

kugelaudio-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,20 @@
+# Changelog
+All notable changes to the KugelAudio Python SDK will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.1.0] - 2024-12-17
+### Added
+- Initial release of the KugelAudio Python SDK
+- **Models API**: List available TTS models (`client.models.list()`)
+- **Voices API**: List voices (`client.voices.list()`) and get voice details (`client.voices.get()`)
+- **TTS Generation**: Generate complete audio (`client.tts.generate()`)
+- **Streaming**: Real-time audio streaming via WebSocket (`client.tts.stream()`)
+- **Async Support**: Full async/await support (`stream_async()`, `generate_async()`)
+- **Streaming Sessions**: LLM integration for real-time TTS (`client.tts.streaming_session()`)
+- **Audio Utilities**: Save to WAV, get duration, RTF calculation
+- **Error Handling**: Typed exceptions for auth, rate limits, validation errors
+- **Single URL Architecture**: Connect to TTS server directly for minimal latency

kugelaudio-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 KugelAudio
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

kugelaudio-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,433 @@
+Metadata-Version: 2.4
+Name: kugelaudio
+Version: 0.1.0
+Summary: Official Python SDK for KugelAudio TTS API
+Project-URL: Homepage, https://kugelaudio.com
+Project-URL: Documentation, https://docs.kugelaudio.com
+Project-URL: Repository, https://github.com/kugelaudio/kugelaudio-python
+Author-email: KugelAudio <support@kugelaudio.com>
+License: MIT
+License-File: LICENSE
+Keywords: audio,streaming,text-to-speech,tts,websocket
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
+Requires-Python: >=3.9
+Requires-Dist: httpx>=0.24.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: websockets>=11.0
+Provides-Extra: dev
+Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
+Requires-Dist: pytest>=7.0.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# KugelAudio Python SDK
+Official Python SDK for the KugelAudio Text-to-Speech API.
+## Installation
+```bash
+pip install kugelaudio
+```
+Or with `uv`:
+```bash
+uv add kugelaudio
+```
+## Quick Start
+```python
+from kugelaudio import KugelAudio
+# Initialize the client - just needs an API key!
+client = KugelAudio(api_key="your_api_key")
+# Generate speech
+audio = client.tts.generate(
+    text="Hello, world!",
+    model="kugel-one-turbo",
+)
+# Save to file
+audio.save("output.wav")
+```
+## Client Configuration
+```python
+from kugelaudio import KugelAudio
+# Simple setup - single URL handles everything
+client = KugelAudio(api_key="your_api_key")
+# Or with custom options
+client = KugelAudio(
+    api_key="your_api_key",           # Required: Your API key
+    api_url="https://api.kugelaudio.com",  # Optional: API base URL (default)
+    timeout=60.0,                      # Optional: Request timeout in seconds
+)
+```
+### Single URL Architecture
+The SDK uses a **single URL** for both REST API and WebSocket streaming. The TTS server provides both REST endpoints (`/v1/models`, `/v1/voices`) and WebSocket (`/ws/tts`) - no proxy needed, minimal latency.
+### Local Development
+For local development, point directly to your TTS server:
+```python
+client = KugelAudio(
+    api_key="your_api_key",
+    api_url="http://localhost:8000",   # TTS server handles everything
+)
+```
+Or if you have separate backend and TTS servers:
+```python
+client = KugelAudio(
+    api_key="your_api_key",
+    api_url="http://localhost:8001",   # Backend for REST API
+    tts_url="http://localhost:8000",   # TTS server for WebSocket streaming
+)
+```
+## Available Models
+| Model ID | Name | Parameters | Description |
+|----------|------|------------|-------------|
+| `kugel-one-turbo` | Kugel One Turbo | 1.5B | Fast, low-latency model for real-time applications |
+| `kugel-one` | Kugel One | 7B | Premium quality model for pre-recorded content |
+### List Available Models
+```python
+models = client.models.list()
+for model in models:
+    print(f"{model.id}: {model.name}")
+    print(f"  Description: {model.description}")
+    print(f"  Parameters: {model.parameters}")
+    print(f"  Max Input: {model.max_input_length} characters")
+    print(f"  Sample Rate: {model.sample_rate} Hz")
+```
+## Voices
+### List Available Voices
+```python
+# List all available voices
+voices = client.voices.list()
+for voice in voices:
+    print(f"{voice.id}: {voice.name}")
+    print(f"  Category: {voice.category}")
+    print(f"  Languages: {', '.join(voice.supported_languages)}")
+# Filter by language
+german_voices = client.voices.list(language="de")
+# Get only public voices
+public_voices = client.voices.list(include_public=True)
+# Limit results
+first_10 = client.voices.list(limit=10)
+```
+### Get a Specific Voice
+```python
+voice = client.voices.get(voice_id=123)
+print(f"Voice: {voice.name}")
+print(f"Sample text: {voice.sample_text}")
+```
+## Text-to-Speech Generation
+### Basic Generation (Non-Streaming)
+Generate complete audio and receive it all at once:
+```python
+audio = client.tts.generate(
+    text="Hello, this is a test of the KugelAudio text-to-speech system.",
+    model="kugel-one-turbo",  # 'kugel-one-turbo' (fast) or 'kugel-one' (quality)
+    voice_id=123,              # Optional: specific voice ID
+    cfg_scale=2.0,             # Guidance scale (1.0-5.0)
+    max_new_tokens=2048,       # Maximum tokens to generate
+    sample_rate=24000,         # Output sample rate
+    speaker_prefix=True,       # Add speaker prefix for better quality
+)
+# Audio properties
+print(f"Duration: {audio.duration_seconds:.2f}s")
+print(f"Samples: {audio.samples}")
+print(f"Sample rate: {audio.sample_rate} Hz")
+print(f"Generation time: {audio.generation_ms:.0f}ms")
+print(f"RTF: {audio.rtf:.2f}")  # Real-time factor
+# Save to WAV file
+audio.save("output.wav")
+# Get raw PCM bytes
+pcm_data = audio.audio
+# Get WAV bytes (with header)
+wav_bytes = audio.to_wav_bytes()
+```
+### Streaming Audio Output
+Receive audio chunks as they are generated for lower latency:
+```python
+# Synchronous streaming
+for item in client.tts.stream(
+    text="Hello, this is streaming audio.",
+    model="kugel-one-turbo",
+):
+    if hasattr(item, 'audio'):  # AudioChunk
+        # Process audio chunk immediately
+        print(f"Chunk {item.index}: {len(item.audio)} bytes, {item.samples} samples")
+        # play_audio(item.audio)
+    elif isinstance(item, dict) and item.get('final'):
+        # Final stats
+        print(f"Total duration: {item.get('dur_ms', 0):.0f}ms")
+        print(f"Time to first audio: {item.get('ttfa_ms', 0):.0f}ms")
+```
+### Async Streaming
+For async applications:
+```python
+import asyncio
+async def generate_speech():
+    async for item in client.tts.stream_async(
+        text="Async streaming example.",
+        model="kugel-one-turbo",
+    ):
+        if hasattr(item, 'audio'):
+            # Process chunk
+            pass
+asyncio.run(generate_speech())
+```
+### Async Generation
+```python
+import asyncio
+async def main():
+    audio = await client.tts.generate_async(
+        text="Async generation example.",
+        model="kugel-one-turbo",
+    )
+    audio.save("async_output.wav")
+asyncio.run(main())
+```
+## LLM Integration: Streaming Text Input
+For real-time TTS when streaming text from an LLM (like GPT-4, Claude, etc.):
+### Async Streaming Session
+```python
+import asyncio
+async def stream_from_llm():
+    # Simulate LLM token stream
+    llm_tokens = ["Hello, ", "this ", "is ", "a ", "streamed ", "response."]
+    async with client.tts.streaming_session(
+        voice_id=123,
+        cfg_scale=2.0,
+        flush_timeout_ms=500,  # Auto-flush after 500ms of no input
+    ) as session:
+        # Send tokens as they arrive from LLM
+        for token in llm_tokens:
+            async for chunk in session.send(token):
+                # Play audio chunk immediately
+                play_audio(chunk.audio)
+        # Flush any remaining text
+        async for chunk in session.flush():
+            play_audio(chunk.audio)
+asyncio.run(stream_from_llm())
+```
+### Synchronous Streaming Session
+```python
+with client.tts.streaming_session_sync(voice_id=123) as session:
+    for token in llm_tokens:
+        for chunk in session.send(token):
+            play_audio(chunk.audio)
+    for chunk in session.flush():
+        play_audio(chunk.audio)
+```
+## Error Handling
+```python
+from kugelaudio import KugelAudio
+from kugelaudio.exceptions import (
+    KugelAudioError,
+    AuthenticationError,
+    RateLimitError,
+    InsufficientCreditsError,
+    ValidationError,
+    ConnectionError,
+)
+try:
+    audio = client.tts.generate(text="Hello!")
+except AuthenticationError:
+    print("Invalid API key")
+except RateLimitError:
+    print("Rate limit exceeded, please wait")
+except InsufficientCreditsError:
+    print("Not enough credits, please top up")
+except ValidationError as e:
+    print(f"Invalid request: {e}")
+except ConnectionError:
+    print("Failed to connect to server")
+except KugelAudioError as e:
+    print(f"API error: {e}")
+```
+## Data Models
+### AudioChunk
+Represents a single audio chunk from streaming:
+```python
+class AudioChunk:
+    audio: bytes          # Raw PCM16 audio data
+    encoding: str         # 'pcm_s16le'
+    index: int           # Chunk index (0-based)
+    sample_rate: int     # Sample rate (24000)
+    samples: int         # Number of samples in chunk
+    @property
+    def duration_seconds(self) -> float:
+        """Duration of this chunk in seconds."""
+```
+### AudioResponse
+Complete audio response from generation:
+```python
+class AudioResponse:
+    audio: bytes              # Complete PCM16 audio
+    sample_rate: int          # Sample rate (24000)
+    samples: int              # Total samples
+    duration_ms: float        # Duration in milliseconds
+    generation_ms: float      # Generation time in milliseconds
+    rtf: float               # Real-time factor
+    @property
+    def duration_seconds(self) -> float:
+        """Duration in seconds."""
+    def save(self, path: str) -> None:
+        """Save as WAV file."""
+    def to_wav_bytes(self) -> bytes:
+        """Get WAV file as bytes."""
+```
+### Model
+TTS model information:
+```python
+class Model:
+    id: str                   # 'kugel-one-turbo' or 'kugel-one'
+    name: str                 # Human-readable name
+    description: str          # Model description
+    parameters: str           # Parameter count ('1.5B', '7B')
+    max_input_length: int     # Maximum input characters
+    sample_rate: int          # Output sample rate
+```
+### Voice
+Voice information:
+```python
+class Voice:
+    id: int                          # Voice ID
+    name: str                        # Voice name
+    description: Optional[str]       # Description
+    category: Optional[VoiceCategory]  # 'premade', 'cloned', 'generated'
+    sex: Optional[VoiceSex]          # 'male', 'female', 'neutral'
+    age: Optional[VoiceAge]          # 'young', 'middle_aged', 'old'
+    supported_languages: List[str]   # ['en', 'de', ...]
+    sample_text: Optional[str]       # Sample text for preview
+    avatar_url: Optional[str]        # Avatar image URL
+    sample_url: Optional[str]        # Sample audio URL
+    is_public: bool                  # Whether voice is public
+    verified: bool                   # Whether voice is verified
+```
+## Complete Example
+```python
+from kugelaudio import KugelAudio
+# Initialize client
+client = KugelAudio(api_key="your_api_key")
+# List available models
+print("Available Models:")
+for model in client.models.list():
+    print(f"  - {model.id}: {model.name} ({model.parameters})")
+# List available voices
+print("\nAvailable Voices:")
+for voice in client.voices.list(limit=5):
+    print(f"  - {voice.id}: {voice.name}")
+# Generate audio
+print("\nGenerating audio...")
+audio = client.tts.generate(
+    text="Welcome to KugelAudio. This is an example of high-quality text-to-speech synthesis.",
+    model="kugel-one-turbo",
+)
+print(f"Generated {audio.duration_seconds:.2f}s of audio in {audio.generation_ms:.0f}ms")
+print(f"Real-time factor: {audio.rtf:.2f}x")
+# Save to file
+audio.save("example.wav")
+print("Saved to example.wav")
+# Close client
+client.close()
+```
+## License
+MIT