kugelaudio 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,202 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ .DS_Store
7
+
8
+ node_modules/
9
+
10
+ /voice_samples
11
+ # C extensions
12
+ *.so
13
+ /emilia_de
14
+ /data
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+ cover/
58
+
59
+ # Translations
60
+ *.mo
61
+ *.pot
62
+
63
+ # Django stuff:
64
+ *.log
65
+ local_settings.py
66
+ db.sqlite3
67
+ db.sqlite3-journal
68
+
69
+ # Flask stuff:
70
+ instance/
71
+ .webassets-cache
72
+
73
+ # Scrapy stuff:
74
+ .scrapy
75
+
76
+ # Sphinx documentation
77
+ docs/_build/
78
+
79
+ # PyBuilder
80
+ .pybuilder/
81
+ target/
82
+
83
+ # Jupyter Notebook
84
+ .ipynb_checkpoints
85
+
86
+ # IPython
87
+ profile_default/
88
+ ipython_config.py
89
+
90
+ # pyenv
91
+ # For a library or package, you might want to ignore these files since the code is
92
+ # intended to run in multiple environments; otherwise, check them in:
93
+ # .python-version
94
+
95
+ # pipenv
96
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
98
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
99
+ # install all needed dependencies.
100
+ #Pipfile.lock
101
+
102
+ # UV
103
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
104
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
105
+ # commonly ignored for libraries.
106
+ #uv.lock
107
+
108
+ # poetry
109
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
110
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
111
+ # commonly ignored for libraries.
112
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
113
+ #poetry.lock
114
+
115
+ # pdm
116
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
117
+ #pdm.lock
118
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
119
+ # in version control.
120
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
121
+ .pdm.toml
122
+ .pdm-python
123
+ .pdm-build/
124
+
125
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
126
+ __pypackages__/
127
+
128
+ # Celery stuff
129
+ celerybeat-schedule
130
+ celerybeat.pid
131
+
132
+ # SageMath parsed files
133
+ *.sage.py
134
+
135
+ # Environments
136
+ .env
137
+ .env.local
138
+ .venv
139
+ env/
140
+ venv/
141
+ ENV/
142
+ env.bak/
143
+ venv.bak/
144
+
145
+ # Spyder project settings
146
+ .spyderproject
147
+ .spyproject
148
+
149
+ # Rope project settings
150
+ .ropeproject
151
+
152
+ # mkdocs documentation
153
+ /site
154
+
155
+ # mypy
156
+ .mypy_cache/
157
+ .dmypy.json
158
+ dmypy.json
159
+
160
+ # Pyre type checker
161
+ .pyre/
162
+
163
+ # pytype static type analyzer
164
+ .pytype/
165
+
166
+ # Cython debug symbols
167
+ cython_debug/
168
+
169
+ # PyCharm
170
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
171
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
172
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
173
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
174
+ #.idea/
175
+
176
+ # Abstra
177
+ # Abstra is an AI-powered process automation framework.
178
+ # Ignore directories containing user credentials, local state, and settings.
179
+ # Learn more at https://abstra.io/docs
180
+ .abstra/
181
+
182
+ # Visual Studio Code
183
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
184
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
185
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
186
+ # you could uncomment the following to ignore the enitre vscode folder
187
+ # .vscode/
188
+
189
+ # Ruff stuff:
190
+ .ruff_cache/
191
+
192
+ # PyPI configuration file
193
+ .pypirc
194
+
195
+ # Cursor
196
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
197
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
198
+ # refer to https://docs.cursor.com/context/ignore-files
199
+ .cursorignore
200
+ .cursorindexingignore
201
+ /data
202
+ .DS_Store
@@ -0,0 +1,20 @@
1
+ # Changelog
2
+
3
+ All notable changes to the KugelAudio Python SDK will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2024-12-17
9
+
10
+ ### Added
11
+ - Initial release of the KugelAudio Python SDK
12
+ - **Models API**: List available TTS models (`client.models.list()`)
13
+ - **Voices API**: List voices (`client.voices.list()`) and get voice details (`client.voices.get()`)
14
+ - **TTS Generation**: Generate complete audio (`client.tts.generate()`)
15
+ - **Streaming**: Real-time audio streaming via WebSocket (`client.tts.stream()`)
16
+ - **Async Support**: Full async/await support (`stream_async()`, `generate_async()`)
17
+ - **Streaming Sessions**: LLM integration for real-time TTS (`client.tts.streaming_session()`)
18
+ - **Audio Utilities**: Save to WAV, get duration, RTF calculation
19
+ - **Error Handling**: Typed exceptions for auth, rate limits, validation errors
20
+ - **Single URL Architecture**: Connect to TTS server directly for minimal latency
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 KugelAudio
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,433 @@
1
+ Metadata-Version: 2.4
2
+ Name: kugelaudio
3
+ Version: 0.1.0
4
+ Summary: Official Python SDK for KugelAudio TTS API
5
+ Project-URL: Homepage, https://kugelaudio.com
6
+ Project-URL: Documentation, https://docs.kugelaudio.com
7
+ Project-URL: Repository, https://github.com/kugelaudio/kugelaudio-python
8
+ Author-email: KugelAudio <support@kugelaudio.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: audio,streaming,text-to-speech,tts,websocket
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
22
+ Requires-Python: >=3.9
23
+ Requires-Dist: httpx>=0.24.0
24
+ Requires-Dist: pydantic>=2.0.0
25
+ Requires-Dist: websockets>=11.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
28
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
29
+ Description-Content-Type: text/markdown
30
+
31
+ # KugelAudio Python SDK
32
+
33
+ Official Python SDK for the KugelAudio Text-to-Speech API.
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install kugelaudio
39
+ ```
40
+
41
+ Or with `uv`:
42
+
43
+ ```bash
44
+ uv add kugelaudio
45
+ ```
46
+
47
+ ## Quick Start
48
+
49
+ ```python
50
+ from kugelaudio import KugelAudio
51
+
52
+ # Initialize the client - just needs an API key!
53
+ client = KugelAudio(api_key="your_api_key")
54
+
55
+ # Generate speech
56
+ audio = client.tts.generate(
57
+ text="Hello, world!",
58
+ model="kugel-one-turbo",
59
+ )
60
+
61
+ # Save to file
62
+ audio.save("output.wav")
63
+ ```
64
+
65
+ ## Client Configuration
66
+
67
+ ```python
68
+ from kugelaudio import KugelAudio
69
+
70
+ # Simple setup - single URL handles everything
71
+ client = KugelAudio(api_key="your_api_key")
72
+
73
+ # Or with custom options
74
+ client = KugelAudio(
75
+ api_key="your_api_key", # Required: Your API key
76
+ api_url="https://api.kugelaudio.com", # Optional: API base URL (default)
77
+ timeout=60.0, # Optional: Request timeout in seconds
78
+ )
79
+ ```
80
+
81
+ ### Single URL Architecture
82
+
83
+ The SDK uses a **single URL** for both REST API and WebSocket streaming. The TTS server provides both REST endpoints (`/v1/models`, `/v1/voices`) and WebSocket (`/ws/tts`) - no proxy needed, minimal latency.
84
+
85
+ ### Local Development
86
+
87
+ For local development, point directly to your TTS server:
88
+
89
+ ```python
90
+ client = KugelAudio(
91
+ api_key="your_api_key",
92
+ api_url="http://localhost:8000", # TTS server handles everything
93
+ )
94
+ ```
95
+
96
+ Or if you have separate backend and TTS servers:
97
+
98
+ ```python
99
+ client = KugelAudio(
100
+ api_key="your_api_key",
101
+ api_url="http://localhost:8001", # Backend for REST API
102
+ tts_url="http://localhost:8000", # TTS server for WebSocket streaming
103
+ )
104
+ ```
105
+
106
+ ## Available Models
107
+
108
+ | Model ID | Name | Parameters | Description |
109
+ |----------|------|------------|-------------|
110
+ | `kugel-one-turbo` | Kugel One Turbo | 1.5B | Fast, low-latency model for real-time applications |
111
+ | `kugel-one` | Kugel One | 7B | Premium quality model for pre-recorded content |
112
+
113
+ ### List Available Models
114
+
115
+ ```python
116
+ models = client.models.list()
117
+
118
+ for model in models:
119
+ print(f"{model.id}: {model.name}")
120
+ print(f" Description: {model.description}")
121
+ print(f" Parameters: {model.parameters}")
122
+ print(f" Max Input: {model.max_input_length} characters")
123
+ print(f" Sample Rate: {model.sample_rate} Hz")
124
+ ```
125
+
126
+ ## Voices
127
+
128
+ ### List Available Voices
129
+
130
+ ```python
131
+ # List all available voices
132
+ voices = client.voices.list()
133
+
134
+ for voice in voices:
135
+ print(f"{voice.id}: {voice.name}")
136
+ print(f" Category: {voice.category}")
137
+ print(f" Languages: {', '.join(voice.supported_languages)}")
138
+
139
+ # Filter by language
140
+ german_voices = client.voices.list(language="de")
141
+
142
+ # Get only public voices
143
+ public_voices = client.voices.list(include_public=True)
144
+
145
+ # Limit results
146
+ first_10 = client.voices.list(limit=10)
147
+ ```
148
+
149
+ ### Get a Specific Voice
150
+
151
+ ```python
152
+ voice = client.voices.get(voice_id=123)
153
+ print(f"Voice: {voice.name}")
154
+ print(f"Sample text: {voice.sample_text}")
155
+ ```
156
+
157
+ ## Text-to-Speech Generation
158
+
159
+ ### Basic Generation (Non-Streaming)
160
+
161
+ Generate complete audio and receive it all at once:
162
+
163
+ ```python
164
+ audio = client.tts.generate(
165
+ text="Hello, this is a test of the KugelAudio text-to-speech system.",
166
+ model="kugel-one-turbo", # 'kugel-one-turbo' (fast) or 'kugel-one' (quality)
167
+ voice_id=123, # Optional: specific voice ID
168
+ cfg_scale=2.0, # Guidance scale (1.0-5.0)
169
+ max_new_tokens=2048, # Maximum tokens to generate
170
+ sample_rate=24000, # Output sample rate
171
+ speaker_prefix=True, # Add speaker prefix for better quality
172
+ )
173
+
174
+ # Audio properties
175
+ print(f"Duration: {audio.duration_seconds:.2f}s")
176
+ print(f"Samples: {audio.samples}")
177
+ print(f"Sample rate: {audio.sample_rate} Hz")
178
+ print(f"Generation time: {audio.generation_ms:.0f}ms")
179
+ print(f"RTF: {audio.rtf:.2f}") # Real-time factor
180
+
181
+ # Save to WAV file
182
+ audio.save("output.wav")
183
+
184
+ # Get raw PCM bytes
185
+ pcm_data = audio.audio
186
+
187
+ # Get WAV bytes (with header)
188
+ wav_bytes = audio.to_wav_bytes()
189
+ ```
190
+
191
+ ### Streaming Audio Output
192
+
193
+ Receive audio chunks as they are generated for lower latency:
194
+
195
+ ```python
196
+ # Synchronous streaming
197
+ for item in client.tts.stream(
198
+ text="Hello, this is streaming audio.",
199
+ model="kugel-one-turbo",
200
+ ):
201
+ if hasattr(item, 'audio'): # AudioChunk
202
+ # Process audio chunk immediately
203
+ print(f"Chunk {item.index}: {len(item.audio)} bytes, {item.samples} samples")
204
+ # play_audio(item.audio)
205
+ elif isinstance(item, dict) and item.get('final'):
206
+ # Final stats
207
+ print(f"Total duration: {item.get('dur_ms', 0):.0f}ms")
208
+ print(f"Time to first audio: {item.get('ttfa_ms', 0):.0f}ms")
209
+ ```
210
+
211
+ ### Async Streaming
212
+
213
+ For async applications:
214
+
215
+ ```python
216
+ import asyncio
217
+
218
+ async def generate_speech():
219
+ async for item in client.tts.stream_async(
220
+ text="Async streaming example.",
221
+ model="kugel-one-turbo",
222
+ ):
223
+ if hasattr(item, 'audio'):
224
+ # Process chunk
225
+ pass
226
+
227
+ asyncio.run(generate_speech())
228
+ ```
229
+
230
+ ### Async Generation
231
+
232
+ ```python
233
+ import asyncio
234
+
235
+ async def main():
236
+ audio = await client.tts.generate_async(
237
+ text="Async generation example.",
238
+ model="kugel-one-turbo",
239
+ )
240
+ audio.save("async_output.wav")
241
+
242
+ asyncio.run(main())
243
+ ```
244
+
245
+ ## LLM Integration: Streaming Text Input
246
+
247
+ For real-time TTS when streaming text from an LLM (like GPT-4, Claude, etc.):
248
+
249
+ ### Async Streaming Session
250
+
251
+ ```python
252
+ import asyncio
253
+
254
+ async def stream_from_llm():
255
+ # Simulate LLM token stream
256
+ llm_tokens = ["Hello, ", "this ", "is ", "a ", "streamed ", "response."]
257
+
258
+ async with client.tts.streaming_session(
259
+ voice_id=123,
260
+ cfg_scale=2.0,
261
+ flush_timeout_ms=500, # Auto-flush after 500ms of no input
262
+ ) as session:
263
+ # Send tokens as they arrive from LLM
264
+ for token in llm_tokens:
265
+ async for chunk in session.send(token):
266
+ # Play audio chunk immediately
267
+ play_audio(chunk.audio)
268
+
269
+ # Flush any remaining text
270
+ async for chunk in session.flush():
271
+ play_audio(chunk.audio)
272
+
273
+ asyncio.run(stream_from_llm())
274
+ ```
275
+
276
+ ### Synchronous Streaming Session
277
+
278
+ ```python
279
+ with client.tts.streaming_session_sync(voice_id=123) as session:
280
+ for token in llm_tokens:
281
+ for chunk in session.send(token):
282
+ play_audio(chunk.audio)
283
+
284
+ for chunk in session.flush():
285
+ play_audio(chunk.audio)
286
+ ```
287
+
288
+ ## Error Handling
289
+
290
+ ```python
291
+ from kugelaudio import KugelAudio
292
+ from kugelaudio.exceptions import (
293
+ KugelAudioError,
294
+ AuthenticationError,
295
+ RateLimitError,
296
+ InsufficientCreditsError,
297
+ ValidationError,
298
+ ConnectionError,
299
+ )
300
+
301
+ try:
302
+ audio = client.tts.generate(text="Hello!")
303
+ except AuthenticationError:
304
+ print("Invalid API key")
305
+ except RateLimitError:
306
+ print("Rate limit exceeded, please wait")
307
+ except InsufficientCreditsError:
308
+ print("Not enough credits, please top up")
309
+ except ValidationError as e:
310
+ print(f"Invalid request: {e}")
311
+ except ConnectionError:
312
+ print("Failed to connect to server")
313
+ except KugelAudioError as e:
314
+ print(f"API error: {e}")
315
+ ```
316
+
317
+ ## Data Models
318
+
319
+ ### AudioChunk
320
+
321
+ Represents a single audio chunk from streaming:
322
+
323
+ ```python
324
+ class AudioChunk:
325
+ audio: bytes # Raw PCM16 audio data
326
+ encoding: str # 'pcm_s16le'
327
+ index: int # Chunk index (0-based)
328
+ sample_rate: int # Sample rate (24000)
329
+ samples: int # Number of samples in chunk
330
+
331
+ @property
332
+ def duration_seconds(self) -> float:
333
+ """Duration of this chunk in seconds."""
334
+ ```
335
+
336
+ ### AudioResponse
337
+
338
+ Complete audio response from generation:
339
+
340
+ ```python
341
+ class AudioResponse:
342
+ audio: bytes # Complete PCM16 audio
343
+ sample_rate: int # Sample rate (24000)
344
+ samples: int # Total samples
345
+ duration_ms: float # Duration in milliseconds
346
+ generation_ms: float # Generation time in milliseconds
347
+ rtf: float # Real-time factor
348
+
349
+ @property
350
+ def duration_seconds(self) -> float:
351
+ """Duration in seconds."""
352
+
353
+ def save(self, path: str) -> None:
354
+ """Save as WAV file."""
355
+
356
+ def to_wav_bytes(self) -> bytes:
357
+ """Get WAV file as bytes."""
358
+ ```
359
+
360
+ ### Model
361
+
362
+ TTS model information:
363
+
364
+ ```python
365
+ class Model:
366
+ id: str # 'kugel-one-turbo' or 'kugel-one'
367
+ name: str # Human-readable name
368
+ description: str # Model description
369
+ parameters: str # Parameter count ('1.5B', '7B')
370
+ max_input_length: int # Maximum input characters
371
+ sample_rate: int # Output sample rate
372
+ ```
373
+
374
+ ### Voice
375
+
376
+ Voice information:
377
+
378
+ ```python
379
+ class Voice:
380
+ id: int # Voice ID
381
+ name: str # Voice name
382
+ description: Optional[str] # Description
383
+ category: Optional[VoiceCategory] # 'premade', 'cloned', 'generated'
384
+ sex: Optional[VoiceSex] # 'male', 'female', 'neutral'
385
+ age: Optional[VoiceAge] # 'young', 'middle_aged', 'old'
386
+ supported_languages: List[str] # ['en', 'de', ...]
387
+ sample_text: Optional[str] # Sample text for preview
388
+ avatar_url: Optional[str] # Avatar image URL
389
+ sample_url: Optional[str] # Sample audio URL
390
+ is_public: bool # Whether voice is public
391
+ verified: bool # Whether voice is verified
392
+ ```
393
+
394
+ ## Complete Example
395
+
396
+ ```python
397
+ from kugelaudio import KugelAudio
398
+
399
+ # Initialize client
400
+ client = KugelAudio(api_key="your_api_key")
401
+
402
+ # List available models
403
+ print("Available Models:")
404
+ for model in client.models.list():
405
+ print(f" - {model.id}: {model.name} ({model.parameters})")
406
+
407
+ # List available voices
408
+ print("\nAvailable Voices:")
409
+ for voice in client.voices.list(limit=5):
410
+ print(f" - {voice.id}: {voice.name}")
411
+
412
+ # Generate audio
413
+ print("\nGenerating audio...")
414
+ audio = client.tts.generate(
415
+ text="Welcome to KugelAudio. This is an example of high-quality text-to-speech synthesis.",
416
+ model="kugel-one-turbo",
417
+ )
418
+
419
+ print(f"Generated {audio.duration_seconds:.2f}s of audio in {audio.generation_ms:.0f}ms")
420
+ print(f"Real-time factor: {audio.rtf:.2f}x")
421
+
422
+ # Save to file
423
+ audio.save("example.wav")
424
+ print("Saved to example.wav")
425
+
426
+ # Close client
427
+ client.close()
428
+ ```
429
+
430
+ ## License
431
+
432
+ MIT
433
+