@lokutor/sdk 1.1.2 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -62,6 +62,41 @@ interface SynthesizeOptions {
62
62
  steps?: number;
63
63
  visemes?: boolean;
64
64
  }
65
+ /**
66
+ * Browser audio configuration options
67
+ */
68
+ interface BrowserAudioOptions {
69
+ inputSampleRate?: number;
70
+ outputSampleRate?: number;
71
+ autoGainControl?: boolean;
72
+ echoCancellation?: boolean;
73
+ noiseSuppression?: boolean;
74
+ analyserEnabled?: boolean;
75
+ onInputError?: (error: Error) => void;
76
+ }
77
+ /**
78
+ * Voice agent conversation options
79
+ */
80
+ interface VoiceAgentOptions {
81
+ prompt?: string;
82
+ voice?: VoiceStyle;
83
+ language?: Language;
84
+ serverUrl?: string;
85
+ visemes?: boolean;
86
+ onTranscription?: (text: string, isUser: boolean) => void;
87
+ onVisemes?: (visemes: Viseme[]) => void;
88
+ onStatusChange?: (status: string) => void;
89
+ onError?: (err: any) => void;
90
+ }
91
+ /**
92
+ * Viseme data for lip-sync animation
93
+ * Format: {"v": index, "c": character, "t": timestamp}
94
+ */
95
+ interface Viseme {
96
+ v: number;
97
+ c: string;
98
+ t: number;
99
+ }
65
100
 
66
101
  /**
67
102
  * Main client for Lokutor Voice Agent SDK
@@ -77,14 +112,19 @@ declare class VoiceAgentClient {
77
112
  private onTranscription?;
78
113
  private onResponse?;
79
114
  private onAudioCallback?;
115
+ private onVisemesCallback?;
80
116
  private onStatus?;
81
117
  private onError?;
82
118
  private isConnected;
83
119
  private messages;
120
+ private visemeListeners;
121
+ private wantVisemes;
84
122
  constructor(config: LokutorConfig & {
85
123
  prompt: string;
86
124
  voice?: VoiceStyle;
87
125
  language?: Language;
126
+ visemes?: boolean;
127
+ onVisemes?: (visemes: Viseme[]) => void;
88
128
  });
89
129
  /**
90
130
  * Connect to the Lokutor Voice Agent server
@@ -110,6 +150,7 @@ declare class VoiceAgentClient {
110
150
  private audioListeners;
111
151
  private emit;
112
152
  onAudio(callback: (data: Uint8Array) => void): void;
153
+ onVisemes(callback: (visemes: Viseme[]) => void): void;
113
154
  /**
114
155
  * Disconnect from the server
115
156
  */
@@ -171,4 +212,193 @@ declare function simpleTTS(options: SynthesizeOptions & {
171
212
  onAudio: (buf: Uint8Array) => void;
172
213
  }): Promise<void>;
173
214
 
174
- export { AUDIO_CONFIG, DEFAULT_URLS, Language, type LokutorConfig, type SynthesizeOptions, TTSClient, VoiceAgentClient, VoiceStyle, simpleConversation, simpleTTS };
215
+ /**
216
+ * Audio utility functions for format conversion, resampling, and PCM processing
217
+ */
218
+ /**
219
+ * Convert 16-bit PCM (Int16) to 32-bit Float
220
+ * @param int16Data Int16Array of PCM audio
221
+ * @returns Float32Array normalized to [-1, 1]
222
+ */
223
+ declare function pcm16ToFloat32(int16Data: Int16Array): Float32Array;
224
+ /**
225
+ * Convert 32-bit Float to 16-bit PCM (Int16)
226
+ * @param float32Data Float32Array normalized to [-1, 1]
227
+ * @returns Int16Array of PCM audio
228
+ */
229
+ declare function float32ToPcm16(float32Data: Float32Array): Int16Array;
230
+ /**
231
+ * Resample audio data from one sample rate to another using linear interpolation
232
+ * @param input Float32Array of input audio
233
+ * @param inputRate Original sample rate in Hz
234
+ * @param outputRate Target sample rate in Hz
235
+ * @returns Float32Array of resampled audio
236
+ */
237
+ declare function resample(input: Float32Array, inputRate: number, outputRate: number): Float32Array;
238
+ /**
239
+ * Apply a simple low-pass filter for anti-aliasing during downsampling
240
+ * @param data Float32Array of audio
241
+ * @param cutoffFreq Cutoff frequency in Hz
242
+ * @param sampleRate Sample rate in Hz
243
+ * @returns Filtered Float32Array
244
+ */
245
+ declare function applyLowPassFilter(data: Float32Array, cutoffFreq: number, sampleRate: number): Float32Array;
246
+ /**
247
+ * Resample audio with anti-aliasing low-pass filter
248
+ * Best used when downsampling to prevent aliasing artifacts
249
+ * @param input Float32Array of input audio
250
+ * @param inputRate Original sample rate in Hz
251
+ * @param outputRate Target sample rate in Hz
252
+ * @returns Float32Array of resampled and filtered audio
253
+ */
254
+ declare function resampleWithAntiAliasing(input: Float32Array, inputRate: number, outputRate: number): Float32Array;
255
+ /**
256
+ * Convert raw audio samples to Uint8Array (bytes)
257
+ * @param data Int16Array of PCM audio
258
+ * @returns Uint8Array containing PCM bytes
259
+ */
260
+ declare function pcm16ToBytes(data: Int16Array): Uint8Array;
261
+ /**
262
+ * Convert bytes to Int16Array
263
+ * @param bytes Uint8Array of PCM bytes
264
+ * @returns Int16Array of PCM audio
265
+ */
266
+ declare function bytesToPcm16(bytes: Uint8Array): Int16Array;
267
+ /**
268
+ * Normalize audio amplitude to prevent clipping
269
+ * @param data Float32Array of audio
270
+ * @param targetPeak Peak level to normalize to (0-1)
271
+ * @returns Normalized Float32Array
272
+ */
273
+ declare function normalizeAudio(data: Float32Array, targetPeak?: number): Float32Array;
274
+ /**
275
+ * Calculate RMS (Root Mean Square) amplitude
276
+ * @param data Float32Array or Uint8Array of audio
277
+ * @returns RMS value (0-1 for normalized float, 0-255 for byte data)
278
+ */
279
+ declare function calculateRMS(data: Float32Array | Uint8Array): number;
280
+ /**
281
+ * Create a resample function factory for streaming audio
282
+ * Useful for processing audio in chunks
283
+ */
284
+ declare class StreamResampler {
285
+ private inputBuffer;
286
+ private inputRate;
287
+ private outputRate;
288
+ constructor(inputRate: number, outputRate: number);
289
+ /**
290
+ * Process a chunk of audio and return resampled data
291
+ * @param inputChunk Float32Array chunk to process
292
+ * @param flush If true, output remaining buffered samples
293
+ * @returns Resampled Float32Array (may be empty if more data needed)
294
+ */
295
+ process(inputChunk: Float32Array, flush?: boolean): Float32Array;
296
+ reset(): void;
297
+ }
298
+
299
+ /**
300
+ * Configuration for browser audio handling
301
+ */
302
+ interface BrowserAudioConfig {
303
+ inputSampleRate?: number;
304
+ outputSampleRate?: number;
305
+ autoGainControl?: boolean;
306
+ echoCancellation?: boolean;
307
+ noiseSuppression?: boolean;
308
+ onInputError?: (error: Error) => void;
309
+ }
310
+ /**
311
+ * Analyser configuration for audio visualization
312
+ */
313
+ interface AnalyserConfig {
314
+ enabled?: boolean;
315
+ fftSize?: number;
316
+ }
317
+ /**
318
+ * Browser-based audio manager for Web Audio API operations
319
+ * Handles microphone input, speaker output, and visualization
320
+ */
321
+ declare class BrowserAudioManager {
322
+ private audioContext;
323
+ private mediaStreamAudioSourceNode;
324
+ private scriptProcessor;
325
+ private analyserNode;
326
+ private mediaStream;
327
+ private nextPlaybackTime;
328
+ private activeSources;
329
+ private playbackQueue;
330
+ private inputSampleRate;
331
+ private outputSampleRate;
332
+ private autoGainControl;
333
+ private echoCancellation;
334
+ private noiseSuppression;
335
+ private onAudioInput?;
336
+ private onInputError?;
337
+ private isMuted;
338
+ private isListening;
339
+ constructor(config?: BrowserAudioConfig);
340
+ /**
341
+ * Initialize the AudioContext and analyser
342
+ */
343
+ init(analyserConfig?: AnalyserConfig): Promise<void>;
344
+ /**
345
+ * Start capturing audio from the microphone
346
+ */
347
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
348
+ /**
349
+ * Internal method to process microphone audio data
350
+ */
351
+ private _processAudioInput;
352
+ /**
353
+ * Stop capturing microphone input
354
+ */
355
+ stopMicrophone(): void;
356
+ /**
357
+ * Play back audio received from the server
358
+ * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
359
+ */
360
+ playAudio(pcm16Data: Uint8Array): void;
361
+ /**
362
+ * Internal method to schedule and play audio with sample-accurate timing
363
+ */
364
+ private _schedulePlayback;
365
+ /**
366
+ * Stop all currently playing audio and clear the queue
367
+ */
368
+ stopPlayback(): void;
369
+ /**
370
+ * Toggle mute state
371
+ */
372
+ setMuted(muted: boolean): void;
373
+ /**
374
+ * Get current mute state
375
+ */
376
+ isMicMuted(): boolean;
377
+ /**
378
+ * Get current amplitude from analyser (for visualization)
379
+ * Returns value between 0 and 1
380
+ */
381
+ getAmplitude(): number;
382
+ /**
383
+ * Get frequency data from analyser for visualization
384
+ */
385
+ getFrequencyData(): Uint8Array;
386
+ /**
387
+ * Get time-domain data from analyser for waveform visualization
388
+ */
389
+ getWaveformData(): Uint8Array;
390
+ /**
391
+ * Cleanup and close AudioContext
392
+ */
393
+ cleanup(): void;
394
+ /**
395
+ * Get current audio context state
396
+ */
397
+ getState(): 'running' | 'suspended' | 'closed' | 'interrupted' | null;
398
+ /**
399
+ * Check if microphone is currently listening
400
+ */
401
+ isRecording(): boolean;
402
+ }
403
+
404
+ export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.d.ts CHANGED
@@ -62,6 +62,41 @@ interface SynthesizeOptions {
62
62
  steps?: number;
63
63
  visemes?: boolean;
64
64
  }
65
+ /**
66
+ * Browser audio configuration options
67
+ */
68
+ interface BrowserAudioOptions {
69
+ inputSampleRate?: number;
70
+ outputSampleRate?: number;
71
+ autoGainControl?: boolean;
72
+ echoCancellation?: boolean;
73
+ noiseSuppression?: boolean;
74
+ analyserEnabled?: boolean;
75
+ onInputError?: (error: Error) => void;
76
+ }
77
+ /**
78
+ * Voice agent conversation options
79
+ */
80
+ interface VoiceAgentOptions {
81
+ prompt?: string;
82
+ voice?: VoiceStyle;
83
+ language?: Language;
84
+ serverUrl?: string;
85
+ visemes?: boolean;
86
+ onTranscription?: (text: string, isUser: boolean) => void;
87
+ onVisemes?: (visemes: Viseme[]) => void;
88
+ onStatusChange?: (status: string) => void;
89
+ onError?: (err: any) => void;
90
+ }
91
+ /**
92
+ * Viseme data for lip-sync animation
93
+ * Format: {"v": index, "c": character, "t": timestamp}
94
+ */
95
+ interface Viseme {
96
+ v: number;
97
+ c: string;
98
+ t: number;
99
+ }
65
100
 
66
101
  /**
67
102
  * Main client for Lokutor Voice Agent SDK
@@ -77,14 +112,19 @@ declare class VoiceAgentClient {
77
112
  private onTranscription?;
78
113
  private onResponse?;
79
114
  private onAudioCallback?;
115
+ private onVisemesCallback?;
80
116
  private onStatus?;
81
117
  private onError?;
82
118
  private isConnected;
83
119
  private messages;
120
+ private visemeListeners;
121
+ private wantVisemes;
84
122
  constructor(config: LokutorConfig & {
85
123
  prompt: string;
86
124
  voice?: VoiceStyle;
87
125
  language?: Language;
126
+ visemes?: boolean;
127
+ onVisemes?: (visemes: Viseme[]) => void;
88
128
  });
89
129
  /**
90
130
  * Connect to the Lokutor Voice Agent server
@@ -110,6 +150,7 @@ declare class VoiceAgentClient {
110
150
  private audioListeners;
111
151
  private emit;
112
152
  onAudio(callback: (data: Uint8Array) => void): void;
153
+ onVisemes(callback: (visemes: Viseme[]) => void): void;
113
154
  /**
114
155
  * Disconnect from the server
115
156
  */
@@ -171,4 +212,193 @@ declare function simpleTTS(options: SynthesizeOptions & {
171
212
  onAudio: (buf: Uint8Array) => void;
172
213
  }): Promise<void>;
173
214
 
174
- export { AUDIO_CONFIG, DEFAULT_URLS, Language, type LokutorConfig, type SynthesizeOptions, TTSClient, VoiceAgentClient, VoiceStyle, simpleConversation, simpleTTS };
215
+ /**
216
+ * Audio utility functions for format conversion, resampling, and PCM processing
217
+ */
218
+ /**
219
+ * Convert 16-bit PCM (Int16) to 32-bit Float
220
+ * @param int16Data Int16Array of PCM audio
221
+ * @returns Float32Array normalized to [-1, 1]
222
+ */
223
+ declare function pcm16ToFloat32(int16Data: Int16Array): Float32Array;
224
+ /**
225
+ * Convert 32-bit Float to 16-bit PCM (Int16)
226
+ * @param float32Data Float32Array normalized to [-1, 1]
227
+ * @returns Int16Array of PCM audio
228
+ */
229
+ declare function float32ToPcm16(float32Data: Float32Array): Int16Array;
230
+ /**
231
+ * Resample audio data from one sample rate to another using linear interpolation
232
+ * @param input Float32Array of input audio
233
+ * @param inputRate Original sample rate in Hz
234
+ * @param outputRate Target sample rate in Hz
235
+ * @returns Float32Array of resampled audio
236
+ */
237
+ declare function resample(input: Float32Array, inputRate: number, outputRate: number): Float32Array;
238
+ /**
239
+ * Apply a simple low-pass filter for anti-aliasing during downsampling
240
+ * @param data Float32Array of audio
241
+ * @param cutoffFreq Cutoff frequency in Hz
242
+ * @param sampleRate Sample rate in Hz
243
+ * @returns Filtered Float32Array
244
+ */
245
+ declare function applyLowPassFilter(data: Float32Array, cutoffFreq: number, sampleRate: number): Float32Array;
246
+ /**
247
+ * Resample audio with anti-aliasing low-pass filter
248
+ * Best used when downsampling to prevent aliasing artifacts
249
+ * @param input Float32Array of input audio
250
+ * @param inputRate Original sample rate in Hz
251
+ * @param outputRate Target sample rate in Hz
252
+ * @returns Float32Array of resampled and filtered audio
253
+ */
254
+ declare function resampleWithAntiAliasing(input: Float32Array, inputRate: number, outputRate: number): Float32Array;
255
+ /**
256
+ * Convert raw audio samples to Uint8Array (bytes)
257
+ * @param data Int16Array of PCM audio
258
+ * @returns Uint8Array containing PCM bytes
259
+ */
260
+ declare function pcm16ToBytes(data: Int16Array): Uint8Array;
261
+ /**
262
+ * Convert bytes to Int16Array
263
+ * @param bytes Uint8Array of PCM bytes
264
+ * @returns Int16Array of PCM audio
265
+ */
266
+ declare function bytesToPcm16(bytes: Uint8Array): Int16Array;
267
+ /**
268
+ * Normalize audio amplitude to prevent clipping
269
+ * @param data Float32Array of audio
270
+ * @param targetPeak Peak level to normalize to (0-1)
271
+ * @returns Normalized Float32Array
272
+ */
273
+ declare function normalizeAudio(data: Float32Array, targetPeak?: number): Float32Array;
274
+ /**
275
+ * Calculate RMS (Root Mean Square) amplitude
276
+ * @param data Float32Array or Uint8Array of audio
277
+ * @returns RMS value (0-1 for normalized float, 0-255 for byte data)
278
+ */
279
+ declare function calculateRMS(data: Float32Array | Uint8Array): number;
280
+ /**
281
+ * Create a resample function factory for streaming audio
282
+ * Useful for processing audio in chunks
283
+ */
284
+ declare class StreamResampler {
285
+ private inputBuffer;
286
+ private inputRate;
287
+ private outputRate;
288
+ constructor(inputRate: number, outputRate: number);
289
+ /**
290
+ * Process a chunk of audio and return resampled data
291
+ * @param inputChunk Float32Array chunk to process
292
+ * @param flush If true, output remaining buffered samples
293
+ * @returns Resampled Float32Array (may be empty if more data needed)
294
+ */
295
+ process(inputChunk: Float32Array, flush?: boolean): Float32Array;
296
+ reset(): void;
297
+ }
298
+
299
+ /**
300
+ * Configuration for browser audio handling
301
+ */
302
+ interface BrowserAudioConfig {
303
+ inputSampleRate?: number;
304
+ outputSampleRate?: number;
305
+ autoGainControl?: boolean;
306
+ echoCancellation?: boolean;
307
+ noiseSuppression?: boolean;
308
+ onInputError?: (error: Error) => void;
309
+ }
310
+ /**
311
+ * Analyser configuration for audio visualization
312
+ */
313
+ interface AnalyserConfig {
314
+ enabled?: boolean;
315
+ fftSize?: number;
316
+ }
317
+ /**
318
+ * Browser-based audio manager for Web Audio API operations
319
+ * Handles microphone input, speaker output, and visualization
320
+ */
321
+ declare class BrowserAudioManager {
322
+ private audioContext;
323
+ private mediaStreamAudioSourceNode;
324
+ private scriptProcessor;
325
+ private analyserNode;
326
+ private mediaStream;
327
+ private nextPlaybackTime;
328
+ private activeSources;
329
+ private playbackQueue;
330
+ private inputSampleRate;
331
+ private outputSampleRate;
332
+ private autoGainControl;
333
+ private echoCancellation;
334
+ private noiseSuppression;
335
+ private onAudioInput?;
336
+ private onInputError?;
337
+ private isMuted;
338
+ private isListening;
339
+ constructor(config?: BrowserAudioConfig);
340
+ /**
341
+ * Initialize the AudioContext and analyser
342
+ */
343
+ init(analyserConfig?: AnalyserConfig): Promise<void>;
344
+ /**
345
+ * Start capturing audio from the microphone
346
+ */
347
+ startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
348
+ /**
349
+ * Internal method to process microphone audio data
350
+ */
351
+ private _processAudioInput;
352
+ /**
353
+ * Stop capturing microphone input
354
+ */
355
+ stopMicrophone(): void;
356
+ /**
357
+ * Play back audio received from the server
358
+ * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
359
+ */
360
+ playAudio(pcm16Data: Uint8Array): void;
361
+ /**
362
+ * Internal method to schedule and play audio with sample-accurate timing
363
+ */
364
+ private _schedulePlayback;
365
+ /**
366
+ * Stop all currently playing audio and clear the queue
367
+ */
368
+ stopPlayback(): void;
369
+ /**
370
+ * Toggle mute state
371
+ */
372
+ setMuted(muted: boolean): void;
373
+ /**
374
+ * Get current mute state
375
+ */
376
+ isMicMuted(): boolean;
377
+ /**
378
+ * Get current amplitude from analyser (for visualization)
379
+ * Returns value between 0 and 1
380
+ */
381
+ getAmplitude(): number;
382
+ /**
383
+ * Get frequency data from analyser for visualization
384
+ */
385
+ getFrequencyData(): Uint8Array;
386
+ /**
387
+ * Get time-domain data from analyser for waveform visualization
388
+ */
389
+ getWaveformData(): Uint8Array;
390
+ /**
391
+ * Cleanup and close AudioContext
392
+ */
393
+ cleanup(): void;
394
+ /**
395
+ * Get current audio context state
396
+ */
397
+ getState(): 'running' | 'suspended' | 'closed' | 'interrupted' | null;
398
+ /**
399
+ * Check if microphone is currently listening
400
+ */
401
+ isRecording(): boolean;
402
+ }
403
+
404
+ export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };