@livefantasia/speechengine-client 0.5.1-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 LiveFantasia.ai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,402 @@
1
+ # LiveFantasia SpeechEngine Client for Node.js
2
+
3
+ [![npm version](https://badge.fury.io/js/%40livefantasia%2Fspeechengine-client.svg)](https://badge.fury.io/js/%40livefantasia%2Fspeechengine-client)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+ [![Node.js Version](https://img.shields.io/badge/node-%3E%3D20.0.0-brightgreen.svg)](https://nodejs.org/)
6
+
7
+ A powerful Node.js client library for the LiveFantasia SpeechEngine platform, providing real-time speech recognition capabilities through WebSocket streaming.
8
+
9
+ ## Features
10
+
11
+ - ๐ŸŽค **Real-time Speech Recognition**: Stream audio data and receive live transcription results
12
+ - ๐ŸŒ **WebSocket Streaming**: Efficient real-time communication with the SpeechEngine API
13
+ - ๐Ÿ”„ **Multiple Sessions**: Support for concurrent streaming sessions
14
+ - ๐ŸŽฏ **TypeScript Support**: Full TypeScript definitions included
15
+ - ๐Ÿ“Š **Session Management**: Built-in session lifecycle management and statistics
16
+ - ๐Ÿ› ๏ธ **Utility Classes**: Helper classes like `TranscriptionManager` for easy result handling
17
+ - ๐ŸŽต **Audio Format Support**: Support 16KHz, 16Bits, mono Wave format.
18
+ - ๐ŸŒ **Multi-language**: Support for multiple languages
19
+ - ๐Ÿ“ **Comprehensive Examples**: Rich set of examples for different use cases
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ npm install @livefantasia/speechengine-client
25
+ ```
26
+
27
+ ### Optional Dependencies
28
+
29
+ For real-time microphone examples, you may want to install one of these packages depending on your platform:
30
+
31
+ ```bash
32
+ # For Apple Silicon compatibility (recommended)
33
+ npm install mic
34
+ ```
35
+
36
+ > **Note**: These packages are only required if you want to run the real-time microphone examples. They are not needed for the core library functionality.
37
+
38
+ #### Platform Compatibility Notes
39
+
40
+ **Apple Silicon (M1/M2/M3) Macs:**
41
+ - โœ… **Recommended**: Use `mic` module for real-time microphone examples
42
+ - โŒ **Avoid**: `naudiodon` can cause segmentation faults and build failures on ARM architecture
43
+
44
+ **Intel/x86 Systems:**
45
+ - โœ… Both `mic` and `naudiodon` should work
46
+ - ๐Ÿ’ก **Tip**: `mic` is more universally compatible across platforms
47
+
48
+ **CI/CD Environments:**
49
+ - โš ๏ธ **Important**: `naudiodon` requires native compilation and may fail in containerized environments (Ubuntu, Alpine Linux)
50
+ - โœ… **Solution**: Use `mic` or exclude audio dependencies from CI builds if not needed
51
+
52
+ **Build Issues with `naudiodon`:**
53
+ If you encounter build failures related to `naudiodon`, this is typically due to:
54
+ - Missing system audio libraries (ALSA, PulseAudio on Linux)
55
+ - Incompatible architecture (ARM vs x86)
56
+ - Missing build tools (node-gyp, Python, C++ compiler)
57
+
58
+ **Recommended approach**: Use the `real-time-microphone-node-mic.ts` example with the `mic` module for better cross-platform compatibility.
59
+
60
+ ## Quick Start
61
+
62
+ ### 1. Set up your API key
63
+
64
+ ```bash
65
+ export SPEECHENGINE_API_KEY="your-api-key-here"
66
+ ```
67
+
68
+ ### 2. Basic streaming example
69
+
70
+ ```typescript
71
+ import { SpeechEngineClient, TranscriptionManager } from '@livefantasia/speechengine-client';
72
+ import * as fs from 'fs';
73
+
74
+ async function basicExample() {
75
+ // Initialize the client
76
+ const client = new SpeechEngineClient({
77
+ apiKey: process.env.SPEECHENGINE_API_KEY!
78
+ });
79
+
80
+ try {
81
+ // Create a streaming session
82
+ const session = await client.createSession({
83
+ language: 'en',
84
+ });
85
+
86
+ // Use TranscriptionManager for easy result handling
87
+ const transcriptionManager = new TranscriptionManager();
88
+
89
+ // Set up event handlers
90
+ session.on('ready', () => {
91
+ console.log('Session ready, starting audio stream...');
92
+ });
93
+
94
+ session.on('transcriptionUpdate', (message) => {
95
+ transcriptionManager.processUpdate(message);
96
+ console.log('Live transcription:', transcriptionManager.getCurrentTranscription());
97
+ });
98
+
99
+ session.on('end', () => {
100
+ console.log('Final transcription:', transcriptionManager.getFinalTranscription());
101
+ });
102
+
103
+ // Connect to the session
104
+ await session.connect();
105
+
106
+ // Stream audio data
107
+ const audioData = fs.readFileSync('path/to/your/audio.wav');
108
+ await session.sendAudio(audioData);
109
+ await session.endStream();
110
+
111
+ } catch (error) {
112
+ console.error('Error:', error);
113
+ } finally {
114
+ await client.close();
115
+ }
116
+ }
117
+
118
+ basicExample();
119
+ ```
120
+
121
+ ## Conventions
122
+
123
+ - All message payloads emitted to your handlers use camelCase, consistent with Node.js conventions.
124
+ - `segmentId`, `text`, `startMs`, `endMs`, `isFinal`, `utteranceOrder`, `words[]` with `word`, `startMs`, `endMs`.
125
+ - Stream start options are provided in camelCase via `startStream(options)` and are converted internally to the serverโ€™s snake_case.
126
+ - Configure defaults at session creation using `SessionConfig` camelCase fields.
127
+
128
+ ### Stream Start Options
129
+
130
+ Use `startStream(options)` to enable word timestamps and Voice Activity Detection (VAD):
131
+
132
+ ```typescript
133
+ await session.startStream({
134
+ wordTimestamp: true,
135
+ vadThreshold: 0.6, // number in (0,1)
136
+ vadMinSilenceDuration: 0.2, // number in (0,1)
137
+ vadMinSpeechDuration: 0.2 // number in (0,1)
138
+ });
139
+ ```
140
+
141
+ These options are validated locally; invalid values throw a `ClientErrorCode.INVALID_PARAMETER` error before any network call.
142
+
143
+ ## API Reference
144
+
145
+ ### SpeechEngineClient
146
+
147
+ The main client class for interacting with the SpeechEngine API.
148
+
149
+ #### Constructor
150
+
151
+ ```typescript
152
+ const client = new SpeechEngineClient(config: SpeechEngineClientConfig);
153
+ ```
154
+
155
+ **Configuration Options:**
156
+ - `apiKey: string` - Your SpeechEngine API key
157
+ - `baseUrl?: string` - Base URL for the API (optional)
158
+ - `timeout?: number` - Connection timeout in milliseconds (default: 10000)
159
+
160
+ #### Methods
161
+
162
+ ##### `createSession(config: SessionConfig): Promise<StreamingSession>`
163
+
164
+ Creates a new streaming session.
165
+
166
+ **Session Configuration:**
167
+ - `language: Language` - Language code (e.g., 'en', 'es', 'fr')
168
+ - `enableWordTimestamps?: boolean` - Enable word-level timestamps
169
+ - `maxAlternatives?: number` - Maximum number of alternative transcriptions
170
+
171
+ ##### `close(): Promise<void>`
172
+
173
+ Closes the client and all active sessions.
174
+
175
+ ### StreamingSession
176
+
177
+ Represents an active streaming session.
178
+
179
+ #### Events
180
+
181
+ - `ready` - Session is ready to receive audio
182
+ - `transcriptionUpdate` - New transcription data received
183
+ - `end` - Session ended
184
+ - `error` - Error occurred
185
+
186
+ #### Methods
187
+
188
+ ##### `connect(): Promise<void>`
189
+
190
+ Connects to the streaming session.
191
+
192
+ ##### `sendAudio(audioData: Buffer): Promise<void>`
193
+
194
+ Sends audio data to the session.
195
+
196
+ ##### `endStream(): Promise<void>`
197
+
198
+ Ends the audio stream and finalizes transcription.
199
+
200
+ ##### `disconnect(): Promise<void>`
201
+
202
+ Disconnects from the session.
203
+
204
+ ### TranscriptionManager
205
+
206
+ Utility class for managing transcription results.
207
+
208
+ #### Methods
209
+
210
+ ##### `processUpdate(message: TranscriptionUpdateMessage): void`
211
+
212
+ Processes a transcription update message.
213
+
214
+ ##### `getCurrentTranscription(): string`
215
+
216
+ Gets the current assembled transcription.
217
+
218
+ ##### `getFinalTranscription(): string`
219
+
220
+ Gets the final transcription result.
221
+
222
+ ##### `getSegments(): TranscriptionSegment[]`
223
+
224
+ Gets all transcription segments.
225
+
226
+ ## Examples
227
+
228
+ The library comes with comprehensive examples in the `examples/` directory:
229
+
230
+ ### Basic Examples
231
+
232
+ - **`simple-streaming.ts`** - Recommended streaming workflow using TranscriptionManager
233
+ - **`minimal-streaming.ts`** - Direct event handling without utilities
234
+
235
+ ### Advanced Examples
236
+
237
+ - **`multiple-sessions.ts`** - Managing multiple concurrent sessions
238
+ - **`error-handling.ts`** - Comprehensive error handling patterns
239
+
240
+ ### Streaming Examples
241
+
242
+ - **`real-time-microphone-node-mic.ts`** - Real-time microphone streaming with mic module (Apple Silicon compatible)
243
+ - **`file-streaming.ts`** - Streaming audio from files
244
+
245
+ ### Running Examples
246
+
247
+ ```bash
248
+ # Basic streaming example
249
+ npx ts-node examples/basic/simple-streaming.ts
250
+
251
+ # Streaming with VAD options
252
+ npx ts-node examples/basic/simple-streaming-vad.ts
253
+
254
+ # Real-time microphone (Apple Silicon compatible)
255
+ npm install mic
256
+ npx ts-node examples/streaming/real-time-microphone-node-mic.ts
257
+
258
+ # Multiple sessions
259
+ npx ts-node examples/advanced/multiple-sessions.ts
260
+ ```
261
+
262
+ ## Apple Silicon Compatibility
263
+
264
+ The real-time microphone example uses the `mic` module which provides excellent compatibility with Apple Silicon Macs (M1/M2/M3). This avoids the known issues with `naudiodon`/PortAudio that can cause segmentation faults on ARM-based Macs.
265
+
266
+ ```bash
267
+ npm install mic
268
+ npx ts-node examples/streaming/real-time-microphone-node-mic.ts
269
+ ```
270
+
271
+ ### Why `mic` over `naudiodon`?
272
+
273
+ **Cross-Platform Stability:**
274
+ - `mic` works reliably across macOS (Intel & Apple Silicon), Linux, and Windows
275
+ - `naudiodon` has known compatibility issues with ARM architecture and CI/CD environments
276
+
277
+ **Build Reliability:**
278
+ - `mic` has fewer native dependencies and simpler build requirements
279
+ - `naudiodon` requires PortAudio and can fail in containerized environments (Docker, CI/CD)
280
+
281
+ **Development Experience:**
282
+ - `mic` provides a simpler API for basic microphone access
283
+ - Less prone to segmentation faults and memory issues on Apple Silicon
284
+
285
+ If you encounter build issues with audio dependencies in your CI/CD pipeline, consider excluding them from your production dependencies or using the file-based streaming examples instead.
286
+
287
+
288
+
289
+ ## Supported Languages
290
+
291
+ - English (`en`)
292
+ - Spanish (`es`)
293
+ - French (`fr`)
294
+ - German (`de`)
295
+ - Italian (`it`)
296
+ - Portuguese (`pt`)
297
+ - And more...
298
+
299
+ ## Error Handling
300
+
301
+ The library provides comprehensive error handling with specific error types:
302
+
303
+ ```typescript
304
+ import { SpeechEngineError } from '@livefantasia/speechengine-client';
305
+
306
+ try {
307
+ await session.connect();
308
+ } catch (error) {
309
+ if (error instanceof SpeechEngineError) {
310
+ console.error('SpeechEngine Error:', error.code, error.message);
311
+ console.error('Category:', error.category);
312
+ console.error('Retryable:', error.retryable);
313
+ }
314
+ }
315
+ ```
316
+
317
+ ## Logging
318
+ The clientโ€™s logs can be routed into your applicationโ€™s logger. By default, logs print to the console at `info` level.
319
+
320
+ ### Winston integration example
321
+
322
+ ```typescript
323
+ import { createSpeechEngineClient } from '@livefantasia/speechengine-client';
324
+ import winston from 'winston';
325
+
326
+ const appLogger = winston.createLogger({
327
+ level: 'info',
328
+ transports: [new winston.transports.Console()],
329
+ });
330
+
331
+ const client = createSpeechEngineClient({
332
+ baseUrl: 'https://api.livefantasia.com',
333
+ apiKey: process.env.SPEECHENGINE_API_KEY!,
334
+ logger: {
335
+ level: 'info',
336
+ enableConsole: false,
337
+ customHandler: (entry) => {
338
+ const level = entry.level.toLowerCase();
339
+ const prefix = `${entry.component}${entry.sessionId ? ':' + entry.sessionId : ''}`;
340
+ const message = `${prefix} - ${entry.message}`;
341
+ const meta = entry.data ? { data: entry.data, ts: entry.timestamp.toISOString() } : { ts: entry.timestamp.toISOString() };
342
+ appLogger.log({ level, message, ...meta });
343
+ },
344
+ },
345
+ });
346
+ ```
347
+
348
+ Notes:
349
+ - Set `enableConsole: false` to prevent duplicate console output.
350
+ - `customHandler` receives structured entries; you control formatting and routing.
351
+ - Sensitive auth data (JWTs and `Bearer` tokens) is redacted before logging.
352
+
353
+ ## Development
354
+
355
+ ### Building
356
+
357
+ ```bash
358
+ npm run build
359
+ ```
360
+
361
+ ### Testing
362
+
363
+ ```bash
364
+ npm test
365
+ npm run test:coverage
366
+ ```
367
+
368
+ ### Linting
369
+
370
+ ```bash
371
+ npm run lint
372
+ npm run lint:fix
373
+ ```
374
+
375
+ ### Type Checking
376
+
377
+ ```bash
378
+ npm run type-check
379
+ ```
380
+
381
+ ## Requirements
382
+
383
+ - Node.js >= 20.0.0
384
+ - TypeScript >= 5.1.0 (for development)
385
+
386
+ ## License
387
+
388
+ MIT License - see the [LICENSE](LICENSE) file for details.
389
+
390
+ ## Support
391
+
392
+ - **Documentation**: [API Documentation](docs/)
393
+ - **Issues**: [GitHub Issues](https://github.com/livefantasia/speechengine-client-node/issues)
394
+ - **Examples**: See the `examples/` directory for comprehensive usage examples
395
+
396
+ ## Contributing
397
+
398
+ We welcome contributions! Please see our contributing guidelines for more information.
399
+
400
+ ---
401
+
402
+ Made with โค๏ธ by [LiveFantasia](https://livefantasia.com)
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Main SpeechEngine Client for LiveFantasia SpeechEngine Platform
3
+ * Provides session management and WebSocket streaming capabilities
4
+ */
5
+ /// <reference types="node" />
6
+ import { EventEmitter } from 'events';
7
+ import { SpeechEngineClientConfig, SessionConfig } from '../types';
8
+ import { StreamingSession } from '../session/StreamingSession';
9
+ /**
10
+ * Main client class for interacting with LiveFantasia SpeechEngine
11
+ * Supports multiple concurrent sessions and thread-safe operations
12
+ */
13
+ export declare class SpeechEngineClient extends EventEmitter {
14
+ private readonly config;
15
+ private readonly activeSessions;
16
+ private readonly sessionStats;
17
+ private readonly logger;
18
+ constructor(config: SpeechEngineClientConfig);
19
+ /**
20
+ * Create a new streaming session by initiating with the Control Plane.
21
+ * This method handles token generation and WebSocket connection.
22
+ * @param config Configuration for the session, like language and sample rate.
23
+ * @returns A StreamingSession instance ready to be connected.
24
+ */
25
+ createSession(config?: Partial<SessionConfig>): Promise<StreamingSession>;
26
+ /**
27
+ * Validates an API response and returns detailed error information
28
+ */
29
+ private validateApiResponse;
30
+ /**
31
+ * Intelligently parse ControlPlane error responses and map to appropriate error codes
32
+ */
33
+ private parseControlPlaneError;
34
+ getSession(sessionId: string): StreamingSession | undefined;
35
+ getActiveSessions(): string[];
36
+ getActiveSessionCount(): number;
37
+ closeSession(sessionId: string): Promise<void>;
38
+ closeAllSessions(): Promise<void>;
39
+ getStats(): {
40
+ activeSessionCount: number;
41
+ totalSessionsCreated: number;
42
+ };
43
+ private validateAndSetDefaults;
44
+ private setupLogger;
45
+ private mapLogLevel;
46
+ private setupSessionEventHandlers;
47
+ private cleanupSession;
48
+ /**
49
+ * Validate VAD parameters to ensure they are within valid ranges
50
+ */
51
+ private validateVadParameters;
52
+ }
53
+ export declare function createSpeechEngineClient(config: SpeechEngineClientConfig): SpeechEngineClient;