@360labs/live-transcribe 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +743 -59
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,59 +5,104 @@
|
|
|
5
5
|
[](https://www.npmjs.com/package/@360labs/live-transcribe)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
[](https://www.typescriptlang.org/)
|
|
8
|
-
[]()
|
|
9
9
|
|
|
10
10
|
**Built by 360labs**
|
|
11
11
|
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Table of Contents
|
|
15
|
+
|
|
16
|
+
- [Features](#features)
|
|
17
|
+
- [Installation](#installation)
|
|
18
|
+
- [Quick Start](#quick-start)
|
|
19
|
+
- [Providers](#providers)
|
|
20
|
+
- [Web Speech API](#web-speech-api-browser)
|
|
21
|
+
- [Deepgram](#deepgram)
|
|
22
|
+
- [AssemblyAI](#assemblyai)
|
|
23
|
+
- [Custom Provider](#custom-provider)
|
|
24
|
+
- [Session Management](#session-management)
|
|
25
|
+
- [Events](#events)
|
|
26
|
+
- [Export Formats](#export-formats)
|
|
27
|
+
- [Supported Languages](#supported-languages)
|
|
28
|
+
- [API Reference](#api-reference)
|
|
29
|
+
- [Examples](#examples)
|
|
30
|
+
- [Browser Support](#browser-support)
|
|
31
|
+
- [Contributing](#contributing)
|
|
32
|
+
- [License](#license)
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
12
36
|
## Features
|
|
13
37
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
38
|
+
| Feature | Description |
|
|
39
|
+
|---------|-------------|
|
|
40
|
+
| **Multi-Provider Support** | Web Speech API, Deepgram, AssemblyAI, and custom providers |
|
|
41
|
+
| **Real-time Transcription** | Live results with interim and final transcripts |
|
|
42
|
+
| **40+ Languages** | Extensive language support across all providers |
|
|
43
|
+
| **Session Management** | Full control with start, stop, pause, and resume |
|
|
44
|
+
| **Voice Activity Detection** | Automatic speech detection (VAD) |
|
|
45
|
+
| **Audio Recording** | Built-in recording capabilities |
|
|
46
|
+
| **Export Formats** | JSON, Plain Text, SRT, VTT, CSV |
|
|
47
|
+
| **TypeScript First** | Complete type definitions and IntelliSense support |
|
|
48
|
+
| **Event-Driven** | Subscribe to transcription events easily |
|
|
49
|
+
| **Lightweight** | ~200KB package size with zero runtime dependencies |
|
|
50
|
+
| **Cross-Platform** | Works in browsers and Node.js |
|
|
51
|
+
|
|
52
|
+
---
|
|
25
53
|
|
|
26
54
|
## Installation
|
|
27
55
|
|
|
28
56
|
```bash
|
|
57
|
+
# npm
|
|
29
58
|
npm install @360labs/live-transcribe
|
|
59
|
+
|
|
60
|
+
# yarn
|
|
61
|
+
yarn add @360labs/live-transcribe
|
|
62
|
+
|
|
63
|
+
# pnpm
|
|
64
|
+
pnpm add @360labs/live-transcribe
|
|
30
65
|
```
|
|
31
66
|
|
|
67
|
+
---
|
|
68
|
+
|
|
32
69
|
## Quick Start
|
|
33
70
|
|
|
71
|
+
### Basic Usage (Web Speech API)
|
|
72
|
+
|
|
34
73
|
```typescript
|
|
35
74
|
import { createTranscriber, TranscriptionProvider } from '@360labs/live-transcribe';
|
|
36
75
|
|
|
37
|
-
// Create a transcriber
|
|
76
|
+
// Create a transcriber (Web Speech API - no API key required)
|
|
38
77
|
const transcriber = createTranscriber({
|
|
39
78
|
provider: TranscriptionProvider.WebSpeechAPI,
|
|
40
79
|
language: 'en-US',
|
|
80
|
+
interimResults: true,
|
|
41
81
|
});
|
|
42
82
|
|
|
43
|
-
// Listen for transcription
|
|
83
|
+
// Listen for transcription results
|
|
44
84
|
transcriber.on('transcript', (result) => {
|
|
45
|
-
|
|
46
|
-
|
|
85
|
+
if (result.isFinal) {
|
|
86
|
+
console.log('Final:', result.text);
|
|
87
|
+
} else {
|
|
88
|
+
console.log('Interim:', result.text);
|
|
89
|
+
}
|
|
47
90
|
});
|
|
48
91
|
|
|
92
|
+
// Handle errors
|
|
49
93
|
transcriber.on('error', (error) => {
|
|
50
|
-
console.error('
|
|
94
|
+
console.error('Error:', error.message);
|
|
51
95
|
});
|
|
52
96
|
|
|
53
97
|
// Start transcribing
|
|
98
|
+
await transcriber.initialize();
|
|
54
99
|
await transcriber.start();
|
|
55
100
|
|
|
56
101
|
// Stop when done
|
|
57
102
|
await transcriber.stop();
|
|
58
103
|
```
|
|
59
104
|
|
|
60
|
-
|
|
105
|
+
### Using Sessions (Recommended)
|
|
61
106
|
|
|
62
107
|
```typescript
|
|
63
108
|
import { createSession, TranscriptionProvider } from '@360labs/live-transcribe';
|
|
@@ -68,98 +113,730 @@ const session = createSession({
|
|
|
68
113
|
language: 'en-US',
|
|
69
114
|
});
|
|
70
115
|
|
|
71
|
-
//
|
|
116
|
+
// Access the provider for events
|
|
117
|
+
session.provider.on('transcript', (result) => {
|
|
118
|
+
console.log(result.text);
|
|
119
|
+
|
|
120
|
+
// Add to session for later export
|
|
121
|
+
if (result.isFinal) {
|
|
122
|
+
session.addTranscript(result);
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// Lifecycle control
|
|
72
127
|
await session.start();
|
|
73
|
-
session.pause();
|
|
74
|
-
session.resume();
|
|
128
|
+
session.pause(); // Pause transcription
|
|
129
|
+
session.resume(); // Resume transcription
|
|
75
130
|
await session.stop();
|
|
76
131
|
|
|
77
|
-
// Get
|
|
132
|
+
// Get results
|
|
78
133
|
const transcripts = session.getTranscripts();
|
|
79
|
-
const
|
|
134
|
+
const fullText = session.getFullText();
|
|
135
|
+
const stats = session.getStatistics();
|
|
136
|
+
|
|
137
|
+
// Export in various formats
|
|
138
|
+
const srtFile = session.export('srt');
|
|
139
|
+
const jsonFile = session.export('json');
|
|
80
140
|
```
|
|
81
141
|
|
|
82
|
-
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Providers
|
|
83
145
|
|
|
84
146
|
### Web Speech API (Browser)
|
|
85
147
|
|
|
148
|
+
The Web Speech API is built into modern browsers and requires no API key. It's perfect for quick prototypes and applications that don't need cloud-based accuracy.
|
|
149
|
+
|
|
86
150
|
```typescript
|
|
151
|
+
import { createTranscriber, TranscriptionProvider } from '@360labs/live-transcribe';
|
|
152
|
+
|
|
87
153
|
const transcriber = createTranscriber({
|
|
88
154
|
provider: TranscriptionProvider.WebSpeechAPI,
|
|
89
155
|
language: 'en-US',
|
|
90
|
-
interimResults: true,
|
|
156
|
+
interimResults: true, // Get real-time interim results
|
|
91
157
|
});
|
|
158
|
+
|
|
159
|
+
// Check browser support
|
|
160
|
+
if (transcriber.isSupported()) {
|
|
161
|
+
await transcriber.initialize();
|
|
162
|
+
await transcriber.start();
|
|
163
|
+
}
|
|
92
164
|
```
|
|
93
165
|
|
|
166
|
+
**Pros:**
|
|
167
|
+
- No API key required
|
|
168
|
+
- Free to use
|
|
169
|
+
- Works offline (in some browsers)
|
|
170
|
+
- Low latency
|
|
171
|
+
|
|
172
|
+
**Cons:**
|
|
173
|
+
- Accuracy varies by browser
|
|
174
|
+
- Limited language support compared to cloud providers
|
|
175
|
+
- Requires internet connection in most browsers
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
94
179
|
### Deepgram
|
|
95
180
|
|
|
181
|
+
[Deepgram](https://deepgram.com) offers high-accuracy, real-time transcription with advanced features like speaker diarization and custom vocabularies.
|
|
182
|
+
|
|
96
183
|
```typescript
|
|
184
|
+
import { createTranscriber, TranscriptionProvider } from '@360labs/live-transcribe';
|
|
185
|
+
|
|
97
186
|
const transcriber = createTranscriber({
|
|
98
187
|
provider: TranscriptionProvider.Deepgram,
|
|
99
188
|
apiKey: 'your-deepgram-api-key',
|
|
100
189
|
language: 'en-US',
|
|
101
|
-
model: 'nova-2',
|
|
190
|
+
model: 'nova-2', // Latest model
|
|
191
|
+
punctuate: true, // Auto-punctuation
|
|
192
|
+
interimResults: true,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
transcriber.on('transcript', (result) => {
|
|
196
|
+
console.log(result.text);
|
|
197
|
+
console.log('Confidence:', result.confidence);
|
|
102
198
|
});
|
|
199
|
+
|
|
200
|
+
await transcriber.initialize();
|
|
201
|
+
await transcriber.start();
|
|
103
202
|
```
|
|
104
203
|
|
|
204
|
+
**Configuration Options:**
|
|
205
|
+
|
|
206
|
+
| Option | Type | Default | Description |
|
|
207
|
+
|--------|------|---------|-------------|
|
|
208
|
+
| `apiKey` | string | required | Your Deepgram API key |
|
|
209
|
+
| `model` | string | 'nova-2' | Model to use (nova-2, nova, enhanced, base) |
|
|
210
|
+
| `language` | string | 'en-US' | Language code |
|
|
211
|
+
| `punctuate` | boolean | true | Enable auto-punctuation |
|
|
212
|
+
| `interimResults` | boolean | true | Enable interim results |
|
|
213
|
+
| `smartFormat` | boolean | false | Enable smart formatting |
|
|
214
|
+
| `diarize` | boolean | false | Enable speaker diarization |
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
105
218
|
### AssemblyAI
|
|
106
219
|
|
|
220
|
+
[AssemblyAI](https://assemblyai.com) provides state-of-the-art transcription with features like automatic language detection and content moderation.
|
|
221
|
+
|
|
107
222
|
```typescript
|
|
223
|
+
import { createTranscriber, TranscriptionProvider } from '@360labs/live-transcribe';
|
|
224
|
+
|
|
108
225
|
const transcriber = createTranscriber({
|
|
109
226
|
provider: TranscriptionProvider.AssemblyAI,
|
|
110
227
|
apiKey: 'your-assemblyai-api-key',
|
|
111
228
|
sampleRate: 16000,
|
|
112
229
|
});
|
|
230
|
+
|
|
231
|
+
transcriber.on('transcript', (result) => {
|
|
232
|
+
console.log(result.text);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
await transcriber.initialize();
|
|
236
|
+
await transcriber.start();
|
|
113
237
|
```
|
|
114
238
|
|
|
115
|
-
|
|
239
|
+
**Configuration Options:**
|
|
240
|
+
|
|
241
|
+
| Option | Type | Default | Description |
|
|
242
|
+
|--------|------|---------|-------------|
|
|
243
|
+
| `apiKey` | string | required | Your AssemblyAI API key |
|
|
244
|
+
| `sampleRate` | number | 16000 | Audio sample rate in Hz |
|
|
245
|
+
| `wordBoost` | string[] | [] | Words to boost recognition |
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
### Custom Provider
|
|
250
|
+
|
|
251
|
+
You can create custom providers by extending the `BaseTranscriber` class:
|
|
252
|
+
|
|
253
|
+
```typescript
|
|
254
|
+
import { BaseTranscriber, TranscriptionConfig, SessionState } from '@360labs/live-transcribe';
|
|
255
|
+
|
|
256
|
+
class MyCustomProvider extends BaseTranscriber {
|
|
257
|
+
private recognition: any;
|
|
258
|
+
|
|
259
|
+
constructor(config: TranscriptionConfig) {
|
|
260
|
+
super(config);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
isSupported(): boolean {
|
|
264
|
+
return true; // Check if your provider is available
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
async initialize(): Promise<void> {
|
|
268
|
+
// Initialize your provider
|
|
269
|
+
this.setState(SessionState.INITIALIZING);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
async start(): Promise<void> {
|
|
273
|
+
this.setState(SessionState.ACTIVE);
|
|
274
|
+
this.emit('start');
|
|
275
|
+
// Start transcription
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
async stop(): Promise<void> {
|
|
279
|
+
this.setState(SessionState.STOPPED);
|
|
280
|
+
this.emit('stop');
|
|
281
|
+
// Stop transcription
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
pause(): void {
|
|
285
|
+
this.setState(SessionState.PAUSED);
|
|
286
|
+
this.emit('pause');
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
resume(): void {
|
|
290
|
+
this.setState(SessionState.ACTIVE);
|
|
291
|
+
this.emit('resume');
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
sendAudio(audioData: ArrayBuffer): void {
|
|
295
|
+
// Send audio to your provider
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
async cleanup(): Promise<void> {
|
|
299
|
+
// Clean up resources
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
```
|
|
116
303
|
|
|
117
|
-
|
|
304
|
+
---
|
|
118
305
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
306
|
+
## Session Management
|
|
307
|
+
|
|
308
|
+
Sessions provide a higher-level API for managing transcription with built-in transcript storage and export capabilities.
|
|
309
|
+
|
|
310
|
+
```typescript
|
|
311
|
+
import { createSession, SessionManager, TranscriptionProvider } from '@360labs/live-transcribe';
|
|
312
|
+
|
|
313
|
+
// Single session
|
|
314
|
+
const session = createSession({
|
|
315
|
+
provider: TranscriptionProvider.WebSpeechAPI,
|
|
316
|
+
language: 'en-US',
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
// Session properties
|
|
320
|
+
console.log(session.id); // Unique session ID
|
|
321
|
+
console.log(session.getState()); // Current state
|
|
322
|
+
|
|
323
|
+
// Multiple sessions with SessionManager
|
|
324
|
+
const manager = new SessionManager();
|
|
325
|
+
|
|
326
|
+
const session1 = manager.createSession({
|
|
327
|
+
provider: TranscriptionProvider.WebSpeechAPI,
|
|
328
|
+
language: 'en-US',
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
const session2 = manager.createSession({
|
|
332
|
+
provider: TranscriptionProvider.WebSpeechAPI,
|
|
333
|
+
language: 'es-ES',
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// Get all sessions
|
|
337
|
+
const allSessions = manager.getAllSessions();
|
|
338
|
+
|
|
339
|
+
// Get session by ID
|
|
340
|
+
const session = manager.getSession('session-id');
|
|
341
|
+
|
|
342
|
+
// Get active sessions
|
|
343
|
+
const activeSessions = manager.getActiveSessions();
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### Session States
|
|
347
|
+
|
|
348
|
+
```typescript
|
|
349
|
+
import { SessionState } from '@360labs/live-transcribe';
|
|
350
|
+
|
|
351
|
+
// Available states
|
|
352
|
+
SessionState.IDLE // Initial state
|
|
353
|
+
SessionState.INITIALIZING // Provider initializing
|
|
354
|
+
SessionState.ACTIVE // Transcription in progress
|
|
355
|
+
SessionState.PAUSED // Transcription paused
|
|
356
|
+
SessionState.STOPPING // Stopping transcription
|
|
357
|
+
SessionState.STOPPED // Transcription stopped
|
|
358
|
+
SessionState.ERROR // Error occurred
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
---
|
|
129
362
|
|
|
130
363
|
## Events
|
|
131
364
|
|
|
365
|
+
Subscribe to events for real-time updates:
|
|
366
|
+
|
|
132
367
|
```typescript
|
|
133
|
-
transcriber
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
transcriber.on('
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
368
|
+
const transcriber = createTranscriber({ /* config */ });
|
|
369
|
+
|
|
370
|
+
// Transcript events
|
|
371
|
+
transcriber.on('transcript', (result) => {
|
|
372
|
+
console.log('Text:', result.text);
|
|
373
|
+
console.log('Is Final:', result.isFinal);
|
|
374
|
+
console.log('Confidence:', result.confidence);
|
|
375
|
+
console.log('Timestamp:', result.timestamp);
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
transcriber.on('final', (result) => {
|
|
379
|
+
// Only final transcripts
|
|
380
|
+
console.log('Final transcript:', result.text);
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
transcriber.on('interim', (result) => {
|
|
384
|
+
// Only interim transcripts
|
|
385
|
+
console.log('Interim:', result.text);
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
// Lifecycle events
|
|
389
|
+
transcriber.on('start', () => {
|
|
390
|
+
console.log('Transcription started');
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
transcriber.on('stop', () => {
|
|
394
|
+
console.log('Transcription stopped');
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
transcriber.on('pause', () => {
|
|
398
|
+
console.log('Transcription paused');
|
|
399
|
+
});
|
|
400
|
+
|
|
401
|
+
transcriber.on('resume', () => {
|
|
402
|
+
console.log('Transcription resumed');
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
// State changes
|
|
406
|
+
transcriber.on('stateChange', (state) => {
|
|
407
|
+
console.log('State changed to:', state);
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
// Error handling
|
|
411
|
+
transcriber.on('error', (error) => {
|
|
412
|
+
console.error('Error code:', error.code);
|
|
413
|
+
console.error('Error message:', error.message);
|
|
414
|
+
console.error('Provider:', error.provider);
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
// Remove listeners
|
|
418
|
+
transcriber.off('transcript', myHandler);
|
|
419
|
+
transcriber.removeAllListeners();
|
|
142
420
|
```
|
|
143
421
|
|
|
422
|
+
### TranscriptionResult Object
|
|
423
|
+
|
|
424
|
+
```typescript
|
|
425
|
+
interface TranscriptionResult {
|
|
426
|
+
text: string; // Transcribed text
|
|
427
|
+
isFinal: boolean; // Is this a final result?
|
|
428
|
+
confidence?: number; // Confidence score (0-1)
|
|
429
|
+
timestamp: number; // Unix timestamp
|
|
430
|
+
speaker?: string; // Speaker ID (if diarization enabled)
|
|
431
|
+
language?: string; // Detected language
|
|
432
|
+
words?: Word[]; // Word-level timing
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
interface Word {
|
|
436
|
+
text: string;
|
|
437
|
+
start: number; // Start time in ms
|
|
438
|
+
end: number; // End time in ms
|
|
439
|
+
confidence?: number;
|
|
440
|
+
}
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
---
|
|
444
|
+
|
|
445
|
+
## Export Formats
|
|
446
|
+
|
|
447
|
+
Export transcripts in multiple formats:
|
|
448
|
+
|
|
449
|
+
```typescript
|
|
450
|
+
const session = createSession({ /* config */ });
|
|
451
|
+
|
|
452
|
+
// Add transcripts during session
|
|
453
|
+
session.provider.on('transcript', (result) => {
|
|
454
|
+
if (result.isFinal) {
|
|
455
|
+
session.addTranscript(result);
|
|
456
|
+
}
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
// After transcription, export in various formats
|
|
460
|
+
|
|
461
|
+
// JSON - Full data with metadata
|
|
462
|
+
const jsonExport = session.export('json');
|
|
463
|
+
console.log(jsonExport.data); // JSON string
|
|
464
|
+
console.log(jsonExport.filename); // 'transcript-{id}.json'
|
|
465
|
+
console.log(jsonExport.mimeType); // 'application/json'
|
|
466
|
+
|
|
467
|
+
// Plain Text - Just the text
|
|
468
|
+
const textExport = session.export('text');
|
|
469
|
+
// Output: "Hello world. How are you today?"
|
|
470
|
+
|
|
471
|
+
// SRT - SubRip subtitles
|
|
472
|
+
const srtExport = session.export('srt');
|
|
473
|
+
// Output:
|
|
474
|
+
// 1
|
|
475
|
+
// 00:00:01,000 --> 00:00:03,500
|
|
476
|
+
// Hello world.
|
|
477
|
+
//
|
|
478
|
+
// 2
|
|
479
|
+
// 00:00:04,000 --> 00:00:06,500
|
|
480
|
+
// How are you today?
|
|
481
|
+
|
|
482
|
+
// VTT - WebVTT subtitles
|
|
483
|
+
const vttExport = session.export('vtt');
|
|
484
|
+
// Output:
|
|
485
|
+
// WEBVTT
|
|
486
|
+
//
|
|
487
|
+
// 00:00:01.000 --> 00:00:03.500
|
|
488
|
+
// Hello world.
|
|
489
|
+
//
|
|
490
|
+
// 00:00:04.000 --> 00:00:06.500
|
|
491
|
+
// How are you today?
|
|
492
|
+
|
|
493
|
+
// CSV - Spreadsheet format
|
|
494
|
+
const csvExport = session.export('csv');
|
|
495
|
+
// Output: timestamp,text,confidence,isFinal
|
|
496
|
+
// 1234567890,Hello world,0.95,true
|
|
497
|
+
|
|
498
|
+
// Download in browser
|
|
499
|
+
function downloadTranscript(format: string) {
|
|
500
|
+
const exported = session.export(format);
|
|
501
|
+
const blob = new Blob([exported.data], { type: exported.mimeType });
|
|
502
|
+
const url = URL.createObjectURL(blob);
|
|
503
|
+
const a = document.createElement('a');
|
|
504
|
+
a.href = url;
|
|
505
|
+
a.download = exported.filename;
|
|
506
|
+
a.click();
|
|
507
|
+
URL.revokeObjectURL(url);
|
|
508
|
+
}
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
---
|
|
512
|
+
|
|
513
|
+
## Supported Languages
|
|
514
|
+
|
|
515
|
+
The library supports 40+ languages. Language support varies by provider.
|
|
516
|
+
|
|
517
|
+
### Web Speech API Languages
|
|
518
|
+
|
|
519
|
+
| Language | Code | Language | Code |
|
|
520
|
+
|----------|------|----------|------|
|
|
521
|
+
| English (US) | `en-US` | English (UK) | `en-GB` |
|
|
522
|
+
| English (Australia) | `en-AU` | English (India) | `en-IN` |
|
|
523
|
+
| Spanish (Spain) | `es-ES` | Spanish (Mexico) | `es-MX` |
|
|
524
|
+
| French (France) | `fr-FR` | French (Canada) | `fr-CA` |
|
|
525
|
+
| German | `de-DE` | Italian | `it-IT` |
|
|
526
|
+
| Portuguese (Brazil) | `pt-BR` | Portuguese (Portugal) | `pt-PT` |
|
|
527
|
+
| Chinese (Simplified) | `zh-CN` | Chinese (Traditional) | `zh-TW` |
|
|
528
|
+
| Japanese | `ja-JP` | Korean | `ko-KR` |
|
|
529
|
+
| Hindi | `hi-IN` | Arabic (Saudi Arabia) | `ar-SA` |
|
|
530
|
+
| Russian | `ru-RU` | Dutch | `nl-NL` |
|
|
531
|
+
| Polish | `pl-PL` | Turkish | `tr-TR` |
|
|
532
|
+
| Thai | `th-TH` | Vietnamese | `vi-VN` |
|
|
533
|
+
| Indonesian | `id-ID` | Hebrew | `he-IL` |
|
|
534
|
+
| Czech | `cs-CZ` | Greek | `el-GR` |
|
|
535
|
+
| Swedish | `sv-SE` | Danish | `da-DK` |
|
|
536
|
+
| Finnish | `fi-FI` | Norwegian | `no-NO` |
|
|
537
|
+
| Ukrainian | `uk-UA` | Romanian | `ro-RO` |
|
|
538
|
+
| Hungarian | `hu-HU` | Malay | `ms-MY` |
|
|
539
|
+
|
|
540
|
+
### Setting Language
|
|
541
|
+
|
|
542
|
+
```typescript
|
|
543
|
+
// At creation
|
|
544
|
+
const transcriber = createTranscriber({
|
|
545
|
+
provider: TranscriptionProvider.WebSpeechAPI,
|
|
546
|
+
language: 'es-ES', // Spanish (Spain)
|
|
547
|
+
});
|
|
548
|
+
|
|
549
|
+
// Or use session
|
|
550
|
+
const session = createSession({
|
|
551
|
+
provider: TranscriptionProvider.WebSpeechAPI,
|
|
552
|
+
language: 'fr-FR', // French
|
|
553
|
+
});
|
|
554
|
+
```
|
|
555
|
+
|
|
556
|
+
---
|
|
557
|
+
|
|
558
|
+
## API Reference
|
|
559
|
+
|
|
560
|
+
### createTranscriber(config)
|
|
561
|
+
|
|
562
|
+
Creates a new transcriber instance.
|
|
563
|
+
|
|
564
|
+
```typescript
|
|
565
|
+
function createTranscriber(config: TranscriptionConfig): ITranscriptionProvider;
|
|
566
|
+
```
|
|
567
|
+
|
|
568
|
+
**Config Options:**
|
|
569
|
+
|
|
570
|
+
| Option | Type | Required | Default | Description |
|
|
571
|
+
|--------|------|----------|---------|-------------|
|
|
572
|
+
| `provider` | TranscriptionProvider | Yes | - | Provider to use |
|
|
573
|
+
| `apiKey` | string | For cloud | - | API key for cloud providers |
|
|
574
|
+
| `language` | string | No | 'en-US' | Language code |
|
|
575
|
+
| `interimResults` | boolean | No | true | Enable interim results |
|
|
576
|
+
| `punctuation` | boolean | No | true | Enable auto-punctuation |
|
|
577
|
+
| `profanityFilter` | boolean | No | false | Filter profanity |
|
|
578
|
+
|
|
579
|
+
### createSession(config)
|
|
580
|
+
|
|
581
|
+
Creates a new transcription session.
|
|
582
|
+
|
|
583
|
+
```typescript
|
|
584
|
+
function createSession(config: TranscriptionConfig): TranscriptionSession;
|
|
585
|
+
```
|
|
586
|
+
|
|
587
|
+
### TranscriptionSession
|
|
588
|
+
|
|
589
|
+
| Method | Returns | Description |
|
|
590
|
+
|--------|---------|-------------|
|
|
591
|
+
| `start()` | Promise<void> | Start transcription |
|
|
592
|
+
| `stop()` | Promise<void> | Stop transcription |
|
|
593
|
+
| `pause()` | void | Pause transcription |
|
|
594
|
+
| `resume()` | void | Resume transcription |
|
|
595
|
+
| `getState()` | SessionState | Get current state |
|
|
596
|
+
| `getTranscripts(finalOnly?)` | TranscriptionResult[] | Get all transcripts |
|
|
597
|
+
| `getFullText()` | string | Get concatenated text |
|
|
598
|
+
| `getStatistics()` | SessionStatistics | Get session stats |
|
|
599
|
+
| `addTranscript(result)` | void | Add a transcript |
|
|
600
|
+
| `export(format)` | ExportResult | Export transcripts |
|
|
601
|
+
|
|
602
|
+
### SessionStatistics
|
|
603
|
+
|
|
604
|
+
```typescript
|
|
605
|
+
interface SessionStatistics {
|
|
606
|
+
wordCount: number;
|
|
607
|
+
transcriptCount: number;
|
|
608
|
+
duration: number;
|
|
609
|
+
averageConfidence: number;
|
|
610
|
+
}
|
|
611
|
+
```
|
|
612
|
+
|
|
613
|
+
---
|
|
614
|
+
|
|
144
615
|
## Examples
|
|
145
616
|
|
|
146
|
-
|
|
617
|
+
### React Integration
|
|
618
|
+
|
|
619
|
+
```tsx
|
|
620
|
+
import React, { useState, useEffect, useRef } from 'react';
|
|
621
|
+
import { createSession, TranscriptionProvider, TranscriptionSession } from '@360labs/live-transcribe';
|
|
622
|
+
|
|
623
|
+
function TranscriptionComponent() {
|
|
624
|
+
const [isRecording, setIsRecording] = useState(false);
|
|
625
|
+
const [transcript, setTranscript] = useState('');
|
|
626
|
+
const sessionRef = useRef<TranscriptionSession | null>(null);
|
|
627
|
+
|
|
628
|
+
useEffect(() => {
|
|
629
|
+
return () => {
|
|
630
|
+
sessionRef.current?.stop();
|
|
631
|
+
};
|
|
632
|
+
}, []);
|
|
633
|
+
|
|
634
|
+
const startRecording = async () => {
|
|
635
|
+
const session = createSession({
|
|
636
|
+
provider: TranscriptionProvider.WebSpeechAPI,
|
|
637
|
+
language: 'en-US',
|
|
638
|
+
});
|
|
639
|
+
|
|
640
|
+
session.provider.on('transcript', (result) => {
|
|
641
|
+
if (result.isFinal) {
|
|
642
|
+
setTranscript(prev => prev + ' ' + result.text);
|
|
643
|
+
session.addTranscript(result);
|
|
644
|
+
}
|
|
645
|
+
});
|
|
646
|
+
|
|
647
|
+
sessionRef.current = session;
|
|
648
|
+
await session.start();
|
|
649
|
+
setIsRecording(true);
|
|
650
|
+
};
|
|
651
|
+
|
|
652
|
+
const stopRecording = async () => {
|
|
653
|
+
await sessionRef.current?.stop();
|
|
654
|
+
setIsRecording(false);
|
|
655
|
+
};
|
|
656
|
+
|
|
657
|
+
return (
|
|
658
|
+
<div>
|
|
659
|
+
<button onClick={isRecording ? stopRecording : startRecording}>
|
|
660
|
+
{isRecording ? 'Stop' : 'Start'} Recording
|
|
661
|
+
</button>
|
|
662
|
+
<p>{transcript}</p>
|
|
663
|
+
</div>
|
|
664
|
+
);
|
|
665
|
+
}
|
|
666
|
+
```
|
|
147
667
|
|
|
148
|
-
|
|
149
|
-
- React integration
|
|
150
|
-
- Vue integration
|
|
151
|
-
- Multiple providers
|
|
152
|
-
- Session management
|
|
153
|
-
- Export formats
|
|
154
|
-
- Custom providers
|
|
668
|
+
### Vue Integration
|
|
155
669
|
|
|
156
|
-
|
|
670
|
+
```vue
|
|
671
|
+
<template>
|
|
672
|
+
<div>
|
|
673
|
+
<button @click="toggleRecording">
|
|
674
|
+
{{ isRecording ? 'Stop' : 'Start' }} Recording
|
|
675
|
+
</button>
|
|
676
|
+
<p>{{ transcript }}</p>
|
|
677
|
+
</div>
|
|
678
|
+
</template>
|
|
157
679
|
|
|
158
|
-
|
|
680
|
+
<script setup lang="ts">
|
|
681
|
+
import { ref, onUnmounted } from 'vue';
|
|
682
|
+
import { createSession, TranscriptionProvider } from '@360labs/live-transcribe';
|
|
683
|
+
|
|
684
|
+
const isRecording = ref(false);
|
|
685
|
+
const transcript = ref('');
|
|
686
|
+
let session: any = null;
|
|
687
|
+
|
|
688
|
+
const toggleRecording = async () => {
|
|
689
|
+
if (isRecording.value) {
|
|
690
|
+
await session?.stop();
|
|
691
|
+
isRecording.value = false;
|
|
692
|
+
} else {
|
|
693
|
+
session = createSession({
|
|
694
|
+
provider: TranscriptionProvider.WebSpeechAPI,
|
|
695
|
+
language: 'en-US',
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
session.provider.on('transcript', (result: any) => {
|
|
699
|
+
if (result.isFinal) {
|
|
700
|
+
transcript.value += ' ' + result.text;
|
|
701
|
+
}
|
|
702
|
+
});
|
|
703
|
+
|
|
704
|
+
await session.start();
|
|
705
|
+
isRecording.value = true;
|
|
706
|
+
}
|
|
707
|
+
};
|
|
708
|
+
|
|
709
|
+
onUnmounted(() => {
|
|
710
|
+
session?.stop();
|
|
711
|
+
});
|
|
712
|
+
</script>
|
|
713
|
+
```
|
|
714
|
+
|
|
715
|
+
### Node.js with Deepgram
|
|
716
|
+
|
|
717
|
+
```typescript
|
|
718
|
+
import { createTranscriber, TranscriptionProvider } from '@360labs/live-transcribe';
|
|
719
|
+
import { createReadStream } from 'fs';
|
|
720
|
+
|
|
721
|
+
const transcriber = createTranscriber({
|
|
722
|
+
provider: TranscriptionProvider.Deepgram,
|
|
723
|
+
apiKey: process.env.DEEPGRAM_API_KEY,
|
|
724
|
+
language: 'en-US',
|
|
725
|
+
});
|
|
726
|
+
|
|
727
|
+
transcriber.on('transcript', (result) => {
|
|
728
|
+
console.log(result.text);
|
|
729
|
+
});
|
|
730
|
+
|
|
731
|
+
await transcriber.initialize();
|
|
732
|
+
await transcriber.start();
|
|
733
|
+
|
|
734
|
+
// Send audio data
|
|
735
|
+
const audioStream = createReadStream('audio.wav');
|
|
736
|
+
audioStream.on('data', (chunk) => {
|
|
737
|
+
transcriber.sendAudio(chunk);
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
audioStream.on('end', async () => {
|
|
741
|
+
await transcriber.stop();
|
|
742
|
+
});
|
|
743
|
+
```
|
|
744
|
+
|
|
745
|
+
---
|
|
746
|
+
|
|
747
|
+
## Browser Support
|
|
748
|
+
|
|
749
|
+
| Browser | Web Speech API | WebSocket (Cloud) |
|
|
750
|
+
|---------|---------------|-------------------|
|
|
751
|
+
| Chrome 33+ | ✅ Full | ✅ Full |
|
|
752
|
+
| Edge 79+ | ✅ Full | ✅ Full |
|
|
753
|
+
| Safari 14.1+ | ✅ Partial | ✅ Full |
|
|
754
|
+
| Firefox | ❌ | ✅ Full |
|
|
755
|
+
| Opera 20+ | ✅ Full | ✅ Full |
|
|
756
|
+
|
|
757
|
+
**Note:** Web Speech API requires an internet connection in most browsers as it uses cloud-based recognition.
|
|
758
|
+
|
|
759
|
+
---
|
|
760
|
+
|
|
761
|
+
## Error Handling
|
|
762
|
+
|
|
763
|
+
```typescript
|
|
764
|
+
import { TranscriptionError, ErrorCode } from '@360labs/live-transcribe';
|
|
765
|
+
|
|
766
|
+
transcriber.on('error', (error: TranscriptionError) => {
|
|
767
|
+
switch (error.code) {
|
|
768
|
+
case ErrorCode.MICROPHONE_ACCESS_DENIED:
|
|
769
|
+
console.log('Please allow microphone access');
|
|
770
|
+
break;
|
|
771
|
+
case ErrorCode.NETWORK_ERROR:
|
|
772
|
+
console.log('Network error - check your connection');
|
|
773
|
+
break;
|
|
774
|
+
case ErrorCode.AUTHENTICATION_FAILED:
|
|
775
|
+
console.log('Invalid API key');
|
|
776
|
+
break;
|
|
777
|
+
case ErrorCode.UNSUPPORTED_BROWSER:
|
|
778
|
+
console.log('Browser not supported');
|
|
779
|
+
break;
|
|
780
|
+
default:
|
|
781
|
+
console.log('Error:', error.message);
|
|
782
|
+
}
|
|
783
|
+
});
|
|
784
|
+
```
|
|
785
|
+
|
|
786
|
+
### Error Codes
|
|
787
|
+
|
|
788
|
+
| Code | Description |
|
|
789
|
+
|------|-------------|
|
|
790
|
+
| `INITIALIZATION_FAILED` | Provider failed to initialize |
|
|
791
|
+
| `AUTHENTICATION_FAILED` | Invalid or missing API key |
|
|
792
|
+
| `NETWORK_ERROR` | Network connection error |
|
|
793
|
+
| `MICROPHONE_ACCESS_DENIED` | Microphone permission denied |
|
|
794
|
+
| `UNSUPPORTED_BROWSER` | Browser doesn't support required APIs |
|
|
795
|
+
| `INVALID_CONFIG` | Invalid configuration provided |
|
|
796
|
+
| `PROVIDER_ERROR` | Provider-specific error |
|
|
797
|
+
| `UNKNOWN_ERROR` | Unknown error occurred |
|
|
798
|
+
|
|
799
|
+
---
|
|
800
|
+
|
|
801
|
+
## Audio Processing Utilities
|
|
802
|
+
|
|
803
|
+
The library includes audio processing utilities:
|
|
804
|
+
|
|
805
|
+
```typescript
|
|
806
|
+
import { AudioProcessor } from '@360labs/live-transcribe';
|
|
807
|
+
|
|
808
|
+
// Convert Float32 to Int16 (for sending to APIs)
|
|
809
|
+
const int16Data = AudioProcessor.convertFloat32ToInt16(float32Array);
|
|
810
|
+
|
|
811
|
+
// Convert Int16 to Float32
|
|
812
|
+
const float32Data = AudioProcessor.convertInt16ToFloat32(int16Array);
|
|
813
|
+
|
|
814
|
+
// Resample audio
|
|
815
|
+
const resampled = AudioProcessor.resampleBuffer(buffer, 44100, 16000);
|
|
816
|
+
|
|
817
|
+
// Normalize audio levels
|
|
818
|
+
const normalized = AudioProcessor.normalizeBuffer(buffer);
|
|
819
|
+
|
|
820
|
+
// Apply gain
|
|
821
|
+
const amplified = AudioProcessor.applyGain(buffer, 1.5);
|
|
822
|
+
|
|
823
|
+
// Mix two audio buffers
|
|
824
|
+
const mixed = AudioProcessor.mixBuffers(buffer1, buffer2, 0.5);
|
|
825
|
+
```
|
|
826
|
+
|
|
827
|
+
---
|
|
159
828
|
|
|
160
829
|
## Contributing
|
|
161
830
|
|
|
162
|
-
|
|
831
|
+
We welcome contributions! Please see our [Contributing Guide](./CONTRIBUTING.md) for details.
|
|
832
|
+
|
|
833
|
+
1. Fork the repository
|
|
834
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
835
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
836
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
837
|
+
5. Open a Pull Request
|
|
838
|
+
|
|
839
|
+
---
|
|
163
840
|
|
|
164
841
|
## License
|
|
165
842
|
|
|
@@ -167,4 +844,11 @@ This project is licensed under the MIT License - see the [LICENSE](./LICENSE) fi
|
|
|
167
844
|
|
|
168
845
|
---
|
|
169
846
|
|
|
170
|
-
|
|
847
|
+
## Support
|
|
848
|
+
|
|
849
|
+
- **Issues:** [GitHub Issues](https://github.com/360labs/live-transcribe/issues)
|
|
850
|
+
- **Discussions:** [GitHub Discussions](https://github.com/360labs/live-transcribe/discussions)
|
|
851
|
+
|
|
852
|
+
---
|
|
853
|
+
|
|
854
|
+
**Made with ❤️ by 360labs**
|
package/package.json
CHANGED