kugelaudio 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,20 @@
1
+ # Changelog
2
+
3
+ All notable changes to the KugelAudio JavaScript/TypeScript SDK will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2024-12-17
9
+
10
+ ### Added
11
+ - Initial release of the KugelAudio JavaScript/TypeScript SDK
12
+ - **Models API**: List available TTS models (`client.models.list()`)
13
+ - **Voices API**: List voices (`client.voices.list()`) and get voice details (`client.voices.get()`)
14
+ - **TTS Generation**: Generate complete audio (`client.tts.generate()`)
15
+ - **Streaming**: Real-time audio streaming via WebSocket (`client.tts.stream()`)
16
+ - **Audio Utilities**: `createWavBlob()`, `createWavFile()`, `decodePCM16()`, `base64ToArrayBuffer()`
17
+ - **TypeScript**: Full type definitions for all APIs
18
+ - **Error Handling**: Typed exceptions for auth, rate limits, validation errors
19
+ - **Single URL Architecture**: Connect to TTS server directly for minimal latency
20
+ - **Browser Support**: Works in modern browsers with WebSocket support
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 KugelAudio
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,499 @@
1
+ # KugelAudio JavaScript/TypeScript SDK
2
+
3
+ Official JavaScript/TypeScript SDK for the KugelAudio Text-to-Speech API.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install kugelaudio
9
+ ```
10
+
11
+ Or with yarn:
12
+
13
+ ```bash
14
+ yarn add kugelaudio
15
+ ```
16
+
17
+ Or with pnpm:
18
+
19
+ ```bash
20
+ pnpm add kugelaudio
21
+ ```
22
+
23
+ ## Quick Start
24
+
25
+ ```typescript
26
+ import { KugelAudio } from 'kugelaudio';
27
+
28
+ // Initialize the client - just needs an API key!
29
+ const client = new KugelAudio({ apiKey: 'your_api_key' });
30
+
31
+ // Generate speech
32
+ const audio = await client.tts.generate({
33
+ text: 'Hello, world!',
34
+ model: 'kugel-one-turbo',
35
+ });
36
+
37
+ // Create a playable blob (browser)
38
+ const blob = new Blob([audio.audio], { type: 'audio/wav' });
39
+ const url = URL.createObjectURL(blob);
40
+ const audioElement = new Audio(url);
41
+ audioElement.play();
42
+ ```
43
+
44
+ ## Client Configuration
45
+
46
+ ```typescript
47
+ import { KugelAudio } from 'kugelaudio';
48
+
49
+ // Simple setup - single URL handles everything
50
+ const client = new KugelAudio({ apiKey: 'your_api_key' });
51
+
52
+ // Or with custom options
53
+ const client = new KugelAudio({
54
+ apiKey: 'your_api_key', // Required: Your API key
55
+ apiUrl: 'https://api.kugelaudio.com', // Optional: API base URL (default)
56
+ timeout: 60000, // Optional: Request timeout in ms
57
+ });
58
+ ```
59
+
60
+ ### Single URL Architecture
61
+
62
+ The SDK uses a **single URL** for both REST API and WebSocket streaming. The TTS server provides both REST endpoints (`/v1/models`, `/v1/voices`) and WebSocket (`/ws/tts`) - no proxy needed, minimal latency.
63
+
64
+ ### Local Development
65
+
66
+ For local development, point directly to your TTS server:
67
+
68
+ ```typescript
69
+ const client = new KugelAudio({
70
+ apiKey: 'your_api_key',
71
+ apiUrl: 'http://localhost:8000', // TTS server handles everything
72
+ });
73
+ ```
74
+
75
+ Or if you have separate backend and TTS servers:
76
+
77
+ ```typescript
78
+ const client = new KugelAudio({
79
+ apiKey: 'your_api_key',
80
+ apiUrl: 'http://localhost:8001', // Backend for REST API
81
+ ttsUrl: 'http://localhost:8000', // TTS server for WebSocket streaming
82
+ });
83
+ ```
84
+
85
+ ## Available Models
86
+
87
+ | Model ID | Name | Parameters | Description |
88
+ |----------|------|------------|-------------|
89
+ | `kugel-one-turbo` | Kugel One Turbo | 1.5B | Fast, low-latency model for real-time applications |
90
+ | `kugel-one` | Kugel One | 7B | Premium quality model for pre-recorded content |
91
+
92
+ ### List Available Models
93
+
94
+ ```typescript
95
+ const models = await client.models.list();
96
+
97
+ for (const model of models) {
98
+ console.log(`${model.id}: ${model.name}`);
99
+ console.log(` Description: ${model.description}`);
100
+ console.log(` Parameters: ${model.parameters}`);
101
+ console.log(` Max Input: ${model.maxInputLength} characters`);
102
+ console.log(` Sample Rate: ${model.sampleRate} Hz`);
103
+ }
104
+ ```
105
+
106
+ ## Voices
107
+
108
+ ### List Available Voices
109
+
110
+ ```typescript
111
+ // List all available voices
112
+ const voices = await client.voices.list();
113
+
114
+ for (const voice of voices) {
115
+ console.log(`${voice.id}: ${voice.name}`);
116
+ console.log(` Category: ${voice.category}`);
117
+ console.log(` Languages: ${voice.supportedLanguages.join(', ')}`);
118
+ }
119
+
120
+ // Filter by language
121
+ const germanVoices = await client.voices.list({ language: 'de' });
122
+
123
+ // Get only public voices
124
+ const publicVoices = await client.voices.list({ includePublic: true });
125
+
126
+ // Limit results
127
+ const first10 = await client.voices.list({ limit: 10 });
128
+ ```
129
+
130
+ ### Get a Specific Voice
131
+
132
+ ```typescript
133
+ const voice = await client.voices.get(123);
134
+ console.log(`Voice: ${voice.name}`);
135
+ console.log(`Sample text: ${voice.sampleText}`);
136
+ ```
137
+
138
+ ## Text-to-Speech Generation
139
+
140
+ ### Basic Generation (Non-Streaming)
141
+
142
+ Generate complete audio and receive it all at once:
143
+
144
+ ```typescript
145
+ const audio = await client.tts.generate({
146
+ text: 'Hello, this is a test of the KugelAudio text-to-speech system.',
147
+ model: 'kugel-one-turbo', // 'kugel-one-turbo' (fast) or 'kugel-one' (quality)
148
+ voiceId: 123, // Optional: specific voice ID
149
+ cfgScale: 2.0, // Guidance scale (1.0-5.0)
150
+ maxNewTokens: 2048, // Maximum tokens to generate
151
+ sampleRate: 24000, // Output sample rate
152
+ speakerPrefix: true, // Add speaker prefix for better quality
153
+ });
154
+
155
+ // Audio properties
156
+ console.log(`Duration: ${audio.durationMs}ms`);
157
+ console.log(`Samples: ${audio.samples}`);
158
+ console.log(`Sample rate: ${audio.sampleRate} Hz`);
159
+ console.log(`Generation time: ${audio.generationMs}ms`);
160
+ console.log(`RTF: ${audio.rtf}`); // Real-time factor
161
+
162
+ // audio.audio is an ArrayBuffer with PCM16 data
163
+ ```
164
+
165
+ ### Playing Audio in Browser
166
+
167
+ ```typescript
168
+ import { createWavBlob } from 'kugelaudio';
169
+
170
+ const audio = await client.tts.generate({
171
+ text: 'Hello, world!',
172
+ model: 'kugel-one-turbo',
173
+ });
174
+
175
+ // Create WAV blob for playback
176
+ const wavBlob = createWavBlob(audio.audio, audio.sampleRate);
177
+ const url = URL.createObjectURL(wavBlob);
178
+
179
+ // Play with Audio element
180
+ const audioElement = new Audio(url);
181
+ audioElement.play();
182
+
183
+ // Or with Web Audio API
184
+ const audioContext = new AudioContext();
185
+ const arrayBuffer = await wavBlob.arrayBuffer();
186
+ const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
187
+ const source = audioContext.createBufferSource();
188
+ source.buffer = audioBuffer;
189
+ source.connect(audioContext.destination);
190
+ source.start();
191
+ ```
192
+
193
+ ### Streaming Audio Output
194
+
195
+ Receive audio chunks as they are generated for lower latency:
196
+
197
+ ```typescript
198
+ await client.tts.stream(
199
+ {
200
+ text: 'Hello, this is streaming audio.',
201
+ model: 'kugel-one-turbo',
202
+ },
203
+ {
204
+ onOpen: () => {
205
+ console.log('WebSocket connected');
206
+ },
207
+ onChunk: (chunk) => {
208
+ console.log(`Chunk ${chunk.index}: ${chunk.samples} samples`);
209
+ // chunk.audio is base64-encoded PCM16 data
210
+ // Use base64ToArrayBuffer() to decode
211
+ playAudioChunk(chunk);
212
+ },
213
+ onFinal: (stats) => {
214
+ console.log(`Total duration: ${stats.durationMs}ms`);
215
+ console.log(`Time to first audio: ${stats.ttfaMs}ms`);
216
+ console.log(`Generation time: ${stats.generationMs}ms`);
217
+ console.log(`RTF: ${stats.rtf}`);
218
+ },
219
+ onError: (error) => {
220
+ console.error('TTS error:', error);
221
+ },
222
+ onClose: () => {
223
+ console.log('WebSocket closed');
224
+ },
225
+ }
226
+ );
227
+ ```
228
+
229
+ ### Processing Audio Chunks
230
+
231
+ ```typescript
232
+ import { base64ToArrayBuffer, decodePCM16 } from 'kugelaudio';
233
+
234
+ // In streaming callback:
235
+ onChunk: (chunk) => {
236
+ // Decode base64 to ArrayBuffer
237
+ const pcmBuffer = base64ToArrayBuffer(chunk.audio);
238
+
239
+ // Convert PCM16 to Float32 for Web Audio API
240
+ const float32Data = decodePCM16(chunk.audio);
241
+
242
+ // Play with Web Audio API
243
+ const audioBuffer = audioContext.createBuffer(1, float32Data.length, chunk.sampleRate);
244
+ audioBuffer.copyToChannel(float32Data, 0);
245
+
246
+ const source = audioContext.createBufferSource();
247
+ source.buffer = audioBuffer;
248
+ source.connect(audioContext.destination);
249
+ source.start();
250
+ }
251
+ ```
252
+
253
+ ## Error Handling
254
+
255
+ ```typescript
256
+ import { KugelAudio } from 'kugelaudio';
257
+ import {
258
+ KugelAudioError,
259
+ AuthenticationError,
260
+ RateLimitError,
261
+ InsufficientCreditsError,
262
+ ValidationError,
263
+ ConnectionError,
264
+ } from 'kugelaudio';
265
+
266
+ try {
267
+ const audio = await client.tts.generate({ text: 'Hello!' });
268
+ } catch (error) {
269
+ if (error instanceof AuthenticationError) {
270
+ console.error('Invalid API key');
271
+ } else if (error instanceof RateLimitError) {
272
+ console.error('Rate limit exceeded, please wait');
273
+ } else if (error instanceof InsufficientCreditsError) {
274
+ console.error('Not enough credits, please top up');
275
+ } else if (error instanceof ValidationError) {
276
+ console.error(`Invalid request: ${error.message}`);
277
+ } else if (error instanceof ConnectionError) {
278
+ console.error('Failed to connect to server');
279
+ } else if (error instanceof KugelAudioError) {
280
+ console.error(`API error: ${error.message}`);
281
+ }
282
+ }
283
+ ```
284
+
285
+ ## TypeScript Types
286
+
287
+ ### KugelAudioOptions
288
+
289
+ ```typescript
290
+ interface KugelAudioOptions {
291
+ apiKey: string; // Required
292
+ apiUrl?: string; // Default: 'https://api.kugelaudio.com'
293
+ ttsUrl?: string; // Default: same as apiUrl (backend proxies to TTS)
294
+ timeout?: number; // Default: 60000 (ms)
295
+ }
296
+ ```
297
+
298
+ ### GenerateOptions
299
+
300
+ ```typescript
301
+ interface GenerateOptions {
302
+ text: string; // Required: Text to synthesize
303
+ model?: string; // Default: 'kugel-one-turbo'
304
+ voiceId?: number; // Optional: Voice ID
305
+ cfgScale?: number; // Default: 2.0
306
+ maxNewTokens?: number; // Default: 2048
307
+ sampleRate?: number; // Default: 24000
308
+ speakerPrefix?: boolean; // Default: true
309
+ }
310
+ ```
311
+
312
+ ### AudioChunk
313
+
314
+ ```typescript
315
+ interface AudioChunk {
316
+ audio: string; // Base64-encoded PCM16 audio
317
+ encoding: string; // 'pcm_s16le'
318
+ index: number; // Chunk index (0-based)
319
+ sampleRate: number; // Sample rate (24000)
320
+ samples: number; // Number of samples in chunk
321
+ }
322
+ ```
323
+
324
+ ### AudioResponse
325
+
326
+ ```typescript
327
+ interface AudioResponse {
328
+ audio: ArrayBuffer; // Complete PCM16 audio
329
+ sampleRate: number; // Sample rate (24000)
330
+ samples: number; // Total samples
331
+ durationMs: number; // Duration in milliseconds
332
+ generationMs: number; // Generation time in milliseconds
333
+ rtf: number; // Real-time factor
334
+ }
335
+ ```
336
+
337
+ ### GenerationStats
338
+
339
+ ```typescript
340
+ interface GenerationStats {
341
+ final: true;
342
+ chunks: number; // Number of chunks generated
343
+ totalSamples: number; // Total samples generated
344
+ durationMs: number; // Audio duration in ms
345
+ generationMs: number; // Generation time in ms
346
+ ttfaMs: number; // Time to first audio in ms
347
+ rtf: number; // Real-time factor
348
+ }
349
+ ```
350
+
351
+ ### StreamCallbacks
352
+
353
+ ```typescript
354
+ interface StreamCallbacks {
355
+ onOpen?: () => void;
356
+ onChunk?: (chunk: AudioChunk) => void;
357
+ onFinal?: (stats: GenerationStats) => void;
358
+ onError?: (error: Error) => void;
359
+ onClose?: () => void;
360
+ }
361
+ ```
362
+
363
+ ### Model
364
+
365
+ ```typescript
366
+ interface Model {
367
+ id: string; // 'kugel-one-turbo' or 'kugel-one'
368
+ name: string; // Human-readable name
369
+ description: string; // Model description
370
+ parameters: string; // Parameter count ('1.5B', '7B')
371
+ maxInputLength: number; // Maximum input characters
372
+ sampleRate: number; // Output sample rate
373
+ }
374
+ ```
375
+
376
+ ### Voice
377
+
378
+ ```typescript
379
+ interface Voice {
380
+ id: number; // Voice ID
381
+ name: string; // Voice name
382
+ description?: string; // Description
383
+ category?: VoiceCategory; // 'premade' | 'cloned' | 'generated'
384
+ sex?: VoiceSex; // 'male' | 'female' | 'neutral'
385
+ age?: VoiceAge; // 'young' | 'middle_aged' | 'old'
386
+ supportedLanguages: string[]; // ['en', 'de', ...]
387
+ sampleText?: string; // Sample text for preview
388
+ avatarUrl?: string; // Avatar image URL
389
+ sampleUrl?: string; // Sample audio URL
390
+ isPublic: boolean; // Whether voice is public
391
+ verified: boolean; // Whether voice is verified
392
+ }
393
+ ```
394
+
395
+ ## Utility Functions
396
+
397
+ ### base64ToArrayBuffer
398
+
399
+ Convert base64 string to ArrayBuffer:
400
+
401
+ ```typescript
402
+ import { base64ToArrayBuffer } from 'kugelaudio';
403
+
404
+ const buffer = base64ToArrayBuffer(chunk.audio);
405
+ ```
406
+
407
+ ### decodePCM16
408
+
409
+ Convert base64 PCM16 to Float32Array for Web Audio API:
410
+
411
+ ```typescript
412
+ import { decodePCM16 } from 'kugelaudio';
413
+
414
+ const floatData = decodePCM16(chunk.audio);
415
+ ```
416
+
417
+ ### createWavFile
418
+
419
+ Create a WAV file from PCM16 data:
420
+
421
+ ```typescript
422
+ import { createWavFile } from 'kugelaudio';
423
+
424
+ const wavBuffer = createWavFile(pcmArrayBuffer, 24000);
425
+ ```
426
+
427
+ ### createWavBlob
428
+
429
+ Create a playable Blob from PCM16 data:
430
+
431
+ ```typescript
432
+ import { createWavBlob } from 'kugelaudio';
433
+
434
+ const blob = createWavBlob(pcmArrayBuffer, 24000);
435
+ const url = URL.createObjectURL(blob);
436
+ ```
437
+
438
+ ## Complete Example
439
+
440
+ ```typescript
441
+ import { KugelAudio, createWavBlob } from 'kugelaudio';
442
+
443
+ async function main() {
444
+ // Initialize client
445
+ const client = new KugelAudio({ apiKey: 'your_api_key' });
446
+
447
+ // List available models
448
+ console.log('Available Models:');
449
+ const models = await client.models.list();
450
+ for (const model of models) {
451
+ console.log(` - ${model.id}: ${model.name} (${model.parameters})`);
452
+ }
453
+
454
+ // List available voices
455
+ console.log('\nAvailable Voices:');
456
+ const voices = await client.voices.list({ limit: 5 });
457
+ for (const voice of voices) {
458
+ console.log(` - ${voice.id}: ${voice.name}`);
459
+ }
460
+
461
+ // Generate audio with streaming
462
+ console.log('\nGenerating audio (streaming)...');
463
+ const chunks: ArrayBuffer[] = [];
464
+ let ttfa: number | undefined;
465
+ const startTime = Date.now();
466
+
467
+ await client.tts.stream(
468
+ {
469
+ text: 'Welcome to KugelAudio. This is an example of high-quality text-to-speech synthesis.',
470
+ model: 'kugel-one-turbo',
471
+ },
472
+ {
473
+ onChunk: (chunk) => {
474
+ if (!ttfa) {
475
+ ttfa = Date.now() - startTime;
476
+ console.log(`Time to first audio: ${ttfa}ms`);
477
+ }
478
+ chunks.push(base64ToArrayBuffer(chunk.audio));
479
+ },
480
+ onFinal: (stats) => {
481
+ console.log(`Generated ${stats.durationMs}ms of audio`);
482
+ console.log(`Generation time: ${stats.generationMs}ms`);
483
+ console.log(`RTF: ${stats.rtf}x`);
484
+ },
485
+ }
486
+ );
487
+ }
488
+
489
+ main();
490
+ ```
491
+
492
+ ## Browser Support
493
+
494
+ The SDK works in modern browsers with WebSocket support. For Node.js, ensure you have a WebSocket implementation available.
495
+
496
+ ## License
497
+
498
+ MIT
499
+