whisper.rn 0.5.0-rc.1 → 0.5.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +128 -50
  2. package/android/src/main/CMakeLists.txt +1 -0
  3. package/android/src/main/java/com/rnwhisper/RNWhisper.java +35 -0
  4. package/android/src/main/java/com/rnwhisper/WhisperContext.java +33 -0
  5. package/android/src/main/jni.cpp +81 -0
  6. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +5 -0
  7. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +5 -0
  8. package/cpp/jsi/RNWhisperJSI.cpp +42 -6
  9. package/ios/RNWhisper.mm +11 -0
  10. package/ios/RNWhisperContext.h +1 -0
  11. package/ios/RNWhisperContext.mm +46 -0
  12. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  13. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  14. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  15. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  16. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  17. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  18. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  19. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  20. package/lib/commonjs/AudioSessionIos.js +2 -1
  21. package/lib/commonjs/AudioSessionIos.js.map +1 -1
  22. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  23. package/lib/commonjs/index.js +50 -10
  24. package/lib/commonjs/index.js.map +1 -1
  25. package/lib/commonjs/jest-mock.js +126 -0
  26. package/lib/commonjs/jest-mock.js.map +1 -0
  27. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +857 -0
  28. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -0
  29. package/lib/commonjs/realtime-transcription/SliceManager.js +233 -0
  30. package/lib/commonjs/realtime-transcription/SliceManager.js.map +1 -0
  31. package/lib/commonjs/realtime-transcription/adapters/AudioPcmStreamAdapter.js +133 -0
  32. package/lib/commonjs/realtime-transcription/adapters/AudioPcmStreamAdapter.js.map +1 -0
  33. package/lib/commonjs/realtime-transcription/adapters/JestAudioStreamAdapter.js +201 -0
  34. package/lib/commonjs/realtime-transcription/adapters/JestAudioStreamAdapter.js.map +1 -0
  35. package/lib/commonjs/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js +309 -0
  36. package/lib/commonjs/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js.map +1 -0
  37. package/lib/commonjs/realtime-transcription/index.js +27 -0
  38. package/lib/commonjs/realtime-transcription/index.js.map +1 -0
  39. package/lib/commonjs/realtime-transcription/types.js +114 -0
  40. package/lib/commonjs/realtime-transcription/types.js.map +1 -0
  41. package/lib/commonjs/utils/WavFileReader.js +158 -0
  42. package/lib/commonjs/utils/WavFileReader.js.map +1 -0
  43. package/lib/commonjs/utils/WavFileWriter.js +181 -0
  44. package/lib/commonjs/utils/WavFileWriter.js.map +1 -0
  45. package/lib/commonjs/utils/common.js +25 -0
  46. package/lib/commonjs/utils/common.js.map +1 -0
  47. package/lib/module/AudioSessionIos.js +2 -1
  48. package/lib/module/AudioSessionIos.js.map +1 -1
  49. package/lib/module/NativeRNWhisper.js.map +1 -1
  50. package/lib/module/index.js +48 -10
  51. package/lib/module/index.js.map +1 -1
  52. package/lib/module/jest-mock.js +124 -0
  53. package/lib/module/jest-mock.js.map +1 -0
  54. package/lib/module/realtime-transcription/RealtimeTranscriber.js +851 -0
  55. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -0
  56. package/lib/module/realtime-transcription/SliceManager.js +226 -0
  57. package/lib/module/realtime-transcription/SliceManager.js.map +1 -0
  58. package/lib/module/realtime-transcription/adapters/AudioPcmStreamAdapter.js +124 -0
  59. package/lib/module/realtime-transcription/adapters/AudioPcmStreamAdapter.js.map +1 -0
  60. package/lib/module/realtime-transcription/adapters/JestAudioStreamAdapter.js +194 -0
  61. package/lib/module/realtime-transcription/adapters/JestAudioStreamAdapter.js.map +1 -0
  62. package/lib/module/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js +302 -0
  63. package/lib/module/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js.map +1 -0
  64. package/lib/module/realtime-transcription/index.js +8 -0
  65. package/lib/module/realtime-transcription/index.js.map +1 -0
  66. package/lib/module/realtime-transcription/types.js +107 -0
  67. package/lib/module/realtime-transcription/types.js.map +1 -0
  68. package/lib/module/utils/WavFileReader.js +151 -0
  69. package/lib/module/utils/WavFileReader.js.map +1 -0
  70. package/lib/module/utils/WavFileWriter.js +174 -0
  71. package/lib/module/utils/WavFileWriter.js.map +1 -0
  72. package/lib/module/utils/common.js +18 -0
  73. package/lib/module/utils/common.js.map +1 -0
  74. package/lib/typescript/AudioSessionIos.d.ts +1 -1
  75. package/lib/typescript/AudioSessionIos.d.ts.map +1 -1
  76. package/lib/typescript/NativeRNWhisper.d.ts +1 -0
  77. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  78. package/lib/typescript/index.d.ts +8 -4
  79. package/lib/typescript/index.d.ts.map +1 -1
  80. package/lib/typescript/jest-mock.d.ts +2 -0
  81. package/lib/typescript/jest-mock.d.ts.map +1 -0
  82. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +166 -0
  83. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -0
  84. package/lib/typescript/realtime-transcription/SliceManager.d.ts +72 -0
  85. package/lib/typescript/realtime-transcription/SliceManager.d.ts.map +1 -0
  86. package/lib/typescript/realtime-transcription/adapters/AudioPcmStreamAdapter.d.ts +22 -0
  87. package/lib/typescript/realtime-transcription/adapters/AudioPcmStreamAdapter.d.ts.map +1 -0
  88. package/lib/typescript/realtime-transcription/adapters/JestAudioStreamAdapter.d.ts +44 -0
  89. package/lib/typescript/realtime-transcription/adapters/JestAudioStreamAdapter.d.ts.map +1 -0
  90. package/lib/typescript/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.d.ts +75 -0
  91. package/lib/typescript/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.d.ts.map +1 -0
  92. package/lib/typescript/realtime-transcription/index.d.ts +6 -0
  93. package/lib/typescript/realtime-transcription/index.d.ts.map +1 -0
  94. package/lib/typescript/realtime-transcription/types.d.ts +222 -0
  95. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -0
  96. package/lib/typescript/utils/WavFileReader.d.ts +61 -0
  97. package/lib/typescript/utils/WavFileReader.d.ts.map +1 -0
  98. package/lib/typescript/utils/WavFileWriter.d.ts +57 -0
  99. package/lib/typescript/utils/WavFileWriter.d.ts.map +1 -0
  100. package/lib/typescript/utils/common.d.ts +9 -0
  101. package/lib/typescript/utils/common.d.ts.map +1 -0
  102. package/package.json +18 -6
  103. package/src/AudioSessionIos.ts +3 -2
  104. package/src/NativeRNWhisper.ts +2 -0
  105. package/src/index.ts +74 -22
  106. package/{jest/mock.js → src/jest-mock.ts} +2 -2
  107. package/src/realtime-transcription/RealtimeTranscriber.ts +1015 -0
  108. package/src/realtime-transcription/SliceManager.ts +252 -0
  109. package/src/realtime-transcription/adapters/AudioPcmStreamAdapter.ts +143 -0
  110. package/src/realtime-transcription/adapters/JestAudioStreamAdapter.ts +251 -0
  111. package/src/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.ts +378 -0
  112. package/src/realtime-transcription/index.ts +34 -0
  113. package/src/realtime-transcription/types.ts +283 -0
  114. package/src/utils/WavFileReader.ts +202 -0
  115. package/src/utils/WavFileWriter.ts +206 -0
  116. package/src/utils/common.ts +17 -0
@@ -0,0 +1,378 @@
1
+ import type {
2
+ AudioStreamInterface,
3
+ AudioStreamConfig,
4
+ AudioStreamData,
5
+ } from '../types'
6
+ import { WavFileReader, WavFileReaderFs } from '../../utils/WavFileReader'
7
+
8
+ export interface SimulateFileOptions {
9
+ fs: WavFileReaderFs
10
+ filePath: string
11
+ playbackSpeed?: number // Default: 1.0 (real-time), 0.5 (half speed), 2.0 (double speed)
12
+ chunkDurationMs?: number // Default: 100ms chunks
13
+ loop?: boolean // Default: false
14
+ onEndOfFile?: () => void // Callback when end of file is reached
15
+ logger?: (message: string) => void // Default: noop - custom logger function
16
+ }
17
+
18
+ export class SimulateFileAudioStreamAdapter implements AudioStreamInterface {
19
+ private fileReader: WavFileReader
20
+
21
+ private config: AudioStreamConfig | null = null
22
+
23
+ private options: SimulateFileOptions
24
+
25
+ private isInitialized = false
26
+
27
+ private recording = false
28
+
29
+ private dataCallback?: (data: AudioStreamData) => void
30
+
31
+ private errorCallback?: (error: string) => void
32
+
33
+ private statusCallback?: (isRecording: boolean) => void
34
+
35
+ private streamInterval?: ReturnType<typeof setInterval>
36
+
37
+ private currentBytePosition = 0
38
+
39
+ private startTime = 0
40
+
41
+ private pausedTime = 0
42
+
43
+ private hasReachedEnd = false
44
+
45
+ constructor(options: SimulateFileOptions) {
46
+ this.options = {
47
+ playbackSpeed: 1.0,
48
+ chunkDurationMs: 100,
49
+ loop: false,
50
+ logger: () => {},
51
+ ...options,
52
+ }
53
+ this.fileReader = new WavFileReader(this.options.fs, this.options.filePath)
54
+ }
55
+
56
+ async initialize(config: AudioStreamConfig): Promise<void> {
57
+ if (this.isInitialized) {
58
+ await this.release()
59
+ }
60
+
61
+ try {
62
+ this.config = config
63
+
64
+ // Initialize the WAV file reader
65
+ await this.fileReader.initialize()
66
+
67
+ // Validate file format matches config
68
+ const header = this.fileReader.getHeader()
69
+ if (!header) {
70
+ throw new Error('Failed to read WAV file header')
71
+ }
72
+
73
+ // Warn about mismatched formats but allow processing
74
+ if (header.sampleRate !== config.sampleRate) {
75
+ this.log(
76
+ `WAV file sample rate (${header.sampleRate}Hz) differs from config (${config.sampleRate}Hz)`,
77
+ )
78
+ }
79
+
80
+ if (header.channels !== config.channels) {
81
+ this.log(
82
+ `WAV file channels (${header.channels}) differs from config (${config.channels})`,
83
+ )
84
+ }
85
+
86
+ if (header.bitsPerSample !== config.bitsPerSample) {
87
+ this.log(
88
+ `WAV file bits per sample (${header.bitsPerSample}) differs from config (${config.bitsPerSample})`,
89
+ )
90
+ }
91
+
92
+ this.isInitialized = true
93
+ this.log(
94
+ `Simulate audio stream initialized: ${header.duration.toFixed(2)}s at ${
95
+ this.options.playbackSpeed
96
+ }x speed`,
97
+ )
98
+ } catch (error) {
99
+ const errorMessage =
100
+ error instanceof Error ? error.message : 'Unknown initialization error'
101
+ this.errorCallback?.(errorMessage)
102
+ throw new Error(
103
+ `Failed to initialize SimulateFileAudioStreamAdapter: ${errorMessage}`,
104
+ )
105
+ }
106
+ }
107
+
108
+ async start(): Promise<void> {
109
+ if (!this.isInitialized || !this.config) {
110
+ throw new Error('Adapter not initialized')
111
+ }
112
+
113
+ if (this.recording) {
114
+ return
115
+ }
116
+
117
+ try {
118
+ this.recording = true
119
+ this.hasReachedEnd = false
120
+ this.startTime = Date.now() - this.pausedTime
121
+ this.statusCallback?.(true)
122
+
123
+ // Start streaming chunks
124
+ this.startStreaming()
125
+
126
+ this.log('File audio simulation started')
127
+ } catch (error) {
128
+ this.recording = false
129
+ this.statusCallback?.(false)
130
+ const errorMessage =
131
+ error instanceof Error ? error.message : 'Unknown start error'
132
+ this.errorCallback?.(errorMessage)
133
+ throw error
134
+ }
135
+ }
136
+
137
+ async stop(): Promise<void> {
138
+ if (!this.recording) {
139
+ return
140
+ }
141
+
142
+ try {
143
+ this.recording = false
144
+ this.pausedTime = Date.now() - this.startTime
145
+
146
+ // Stop the streaming interval
147
+ if (this.streamInterval) {
148
+ clearInterval(this.streamInterval)
149
+ this.streamInterval = undefined
150
+ }
151
+
152
+ this.statusCallback?.(false)
153
+ this.log('File audio simulation stopped')
154
+ } catch (error) {
155
+ const errorMessage =
156
+ error instanceof Error ? error.message : 'Unknown stop error'
157
+ this.errorCallback?.(errorMessage)
158
+ }
159
+ }
160
+
161
+ isRecording(): boolean {
162
+ return this.recording
163
+ }
164
+
165
+ onData(callback: (data: AudioStreamData) => void): void {
166
+ this.dataCallback = callback
167
+ }
168
+
169
+ onError(callback: (error: string) => void): void {
170
+ this.errorCallback = callback
171
+ }
172
+
173
+ onStatusChange(callback: (isRecording: boolean) => void): void {
174
+ this.statusCallback = callback
175
+ }
176
+
177
+ async release(): Promise<void> {
178
+ await this.stop()
179
+ this.isInitialized = false
180
+ this.currentBytePosition = 0
181
+ this.pausedTime = 0
182
+ this.log('SimulateFileAudioStreamAdapter released')
183
+ }
184
+
185
+ /**
186
+ * Start the streaming process
187
+ */
188
+ private startStreaming(): void {
189
+ if (!this.config || !this.isInitialized) {
190
+ return
191
+ }
192
+
193
+ const header = this.fileReader.getHeader()
194
+ if (!header) {
195
+ this.errorCallback?.('WAV file header not available')
196
+ return
197
+ }
198
+
199
+ // Calculate chunk size based on desired duration
200
+ const chunkDurationSec = (this.options.chunkDurationMs || 100) / 1000
201
+ const bytesPerSecond =
202
+ header.sampleRate * header.channels * (header.bitsPerSample / 8)
203
+ const chunkSizeBytes = Math.floor(chunkDurationSec * bytesPerSecond)
204
+
205
+ // Adjust interval timing based on playback speed
206
+ const intervalMs =
207
+ (this.options.chunkDurationMs || 100) /
208
+ (this.options.playbackSpeed || 1.0)
209
+
210
+ this.streamInterval = setInterval(() => {
211
+ if (!this.recording) {
212
+ return
213
+ }
214
+
215
+ try {
216
+ this.streamNextChunk(chunkSizeBytes)
217
+ } catch (error) {
218
+ const errorMessage =
219
+ error instanceof Error ? error.message : 'Streaming error'
220
+ this.errorCallback?.(errorMessage)
221
+ this.stop()
222
+ }
223
+ }, intervalMs)
224
+ }
225
+
226
+ /**
227
+ * Stream the next audio chunk
228
+ */
229
+ private streamNextChunk(chunkSizeBytes: number): void {
230
+ if (!this.dataCallback || !this.config) {
231
+ return
232
+ }
233
+
234
+ const header = this.fileReader.getHeader()
235
+ if (!header) {
236
+ return
237
+ }
238
+
239
+ // Get the next chunk of audio data
240
+ const audioChunk = this.fileReader.getAudioSlice(
241
+ this.currentBytePosition,
242
+ chunkSizeBytes,
243
+ )
244
+
245
+ if (!audioChunk || audioChunk.length === 0) {
246
+ // End of file reached
247
+ if (this.options.loop) {
248
+ // Reset to beginning for looping
249
+ this.currentBytePosition = 0
250
+ this.startTime = Date.now()
251
+ this.pausedTime = 0
252
+ this.hasReachedEnd = false
253
+ this.log('Looping audio file simulation')
254
+ return
255
+ }
256
+
257
+ // Stop streaming due to no new buffer
258
+ this.log('Audio file simulation completed - no new buffer available')
259
+ this.hasReachedEnd = true
260
+
261
+ // Call the end-of-file callback if provided
262
+ if (this.options.onEndOfFile) {
263
+ this.log('Calling onEndOfFile callback')
264
+ this.options.onEndOfFile()
265
+ }
266
+
267
+ // Stop the stream
268
+ this.stop()
269
+ return
270
+ }
271
+
272
+ // Update position
273
+ this.currentBytePosition += audioChunk.length
274
+
275
+ // Create stream data using the original file's format
276
+ const streamData: AudioStreamData = {
277
+ data: audioChunk,
278
+ sampleRate: header.sampleRate,
279
+ channels: header.channels,
280
+ timestamp: Date.now(),
281
+ }
282
+
283
+ // Send the chunk
284
+ this.dataCallback(streamData)
285
+ }
286
+
287
+ /**
288
+ * Get current playback statistics
289
+ */
290
+ getStatistics() {
291
+ const header = this.fileReader.getHeader()
292
+ const currentTime = this.fileReader.byteToTime(this.currentBytePosition)
293
+
294
+ return {
295
+ filePath: this.options.filePath,
296
+ isRecording: this.recording,
297
+ currentTime,
298
+ totalDuration: header?.duration || 0,
299
+ progress: header ? currentTime / header.duration : 0,
300
+ playbackSpeed: this.options.playbackSpeed,
301
+ currentBytePosition: this.currentBytePosition,
302
+ totalBytes: this.fileReader.getTotalDataSize(),
303
+ hasReachedEnd: this.hasReachedEnd,
304
+ header,
305
+ }
306
+ }
307
+
308
+ /**
309
+ * Seek to a specific time position
310
+ */
311
+ seekToTime(timeSeconds: number): void {
312
+ const header = this.fileReader.getHeader()
313
+ if (!header) {
314
+ return
315
+ }
316
+
317
+ const clampedTime = Math.max(0, Math.min(timeSeconds, header.duration))
318
+ this.currentBytePosition = this.fileReader.timeToByte(clampedTime)
319
+
320
+ // Reset timing if we're currently playing
321
+ if (this.recording) {
322
+ this.startTime =
323
+ Date.now() - (clampedTime * 1000) / (this.options.playbackSpeed || 1.0)
324
+ this.pausedTime = 0
325
+ }
326
+
327
+ this.log(`Seeked to ${clampedTime.toFixed(2)}s`)
328
+ }
329
+
330
+ /**
331
+ * Set playback speed
332
+ */
333
+ setPlaybackSpeed(speed: number): void {
334
+ if (speed <= 0) {
335
+ throw new Error('Playback speed must be greater than 0')
336
+ }
337
+
338
+ this.options.playbackSpeed = speed
339
+
340
+ // If currently playing, restart streaming with new speed
341
+ if (this.recording) {
342
+ this.stop().then(() => {
343
+ this.start()
344
+ })
345
+ }
346
+
347
+ this.log(`Playback speed set to ${speed}x`)
348
+ }
349
+
350
+ /**
351
+ * Reset file buffer to beginning
352
+ */
353
+ resetBuffer(): void {
354
+ this.log('Resetting file buffer to beginning')
355
+
356
+ // Reset position and timing
357
+ this.currentBytePosition = 0
358
+ this.startTime = Date.now()
359
+ this.pausedTime = 0
360
+ this.hasReachedEnd = false
361
+
362
+ // If currently playing, restart streaming from beginning
363
+ if (this.recording) {
364
+ this.log('Restarting streaming from beginning')
365
+ // Stop and restart to apply the reset
366
+ this.stop().then(() => {
367
+ this.start()
368
+ })
369
+ }
370
+ }
371
+
372
+ /**
373
+ * Logger function
374
+ */
375
+ private log(message: string): void {
376
+ this.options.logger?.(`[SimulateFileAudioStreamAdapter] ${message}`)
377
+ }
378
+ }
@@ -0,0 +1,34 @@
1
+ // Main transcriber class
2
+ export { RealtimeTranscriber } from './RealtimeTranscriber'
3
+
4
+ // Slice manager (for advanced use cases)
5
+ export { SliceManager } from './SliceManager'
6
+
7
+ export type { WavFileWriterFs } from '../utils/WavFileWriter'
8
+
9
+ // Types and interfaces
10
+ export type {
11
+ // Audio Stream types
12
+ AudioStreamData,
13
+ AudioStreamConfig,
14
+ AudioStreamInterface,
15
+
16
+ // VAD and event types
17
+ RealtimeVadEvent,
18
+ RealtimeTranscribeEvent,
19
+ RealtimeStatsEvent,
20
+
21
+ // Configuration types
22
+ RealtimeTranscriberDependencies,
23
+ RealtimeOptions,
24
+ RealtimeTranscriberCallbacks,
25
+
26
+ // Audio slice types
27
+ AudioSlice,
28
+ AudioSliceNoData,
29
+ MemoryUsage,
30
+
31
+ } from './types'
32
+
33
+ // VAD presets constant
34
+ export { VAD_PRESETS } from './types'
@@ -0,0 +1,283 @@
1
+ import type { TranscribeOptions, TranscribeResult, VadOptions } from '../index'
2
+ import type { WavFileWriterFs } from '../utils/WavFileWriter'
3
+
4
+ // === Audio Stream Interfaces ===
5
+
6
+ export interface AudioStreamData {
7
+ data: Uint8Array
8
+ sampleRate: number
9
+ channels: number
10
+ timestamp: number
11
+ }
12
+
13
+ export interface AudioStreamConfig {
14
+ sampleRate?: number
15
+ channels?: number
16
+ bitsPerSample?: number
17
+ bufferSize?: number
18
+ audioSource?: number
19
+ }
20
+
21
+ export interface AudioStreamInterface {
22
+ initialize(config: AudioStreamConfig): Promise<void>
23
+ start(): Promise<void>
24
+ stop(): Promise<void>
25
+ isRecording(): boolean
26
+ onData(callback: (data: AudioStreamData) => void): void
27
+ onError(callback: (error: string) => void): void
28
+ onStatusChange(callback: (isRecording: boolean) => void): void
29
+ release(): Promise<void>
30
+ }
31
+
32
+ // === Enhanced VAD Options ===
33
+
34
+ // Pre-defined VAD configurations for different use cases
35
+ /**
36
+ * VAD Presets Overview:
37
+ *
38
+ * VAD Presets
39
+ * / | \
40
+ * Conservative Default Sensitive
41
+ * / | | \
42
+ * conservative very-conservative sensitive very-sensitive
43
+ * (0.7 thresh) (0.8 thresh) (0.3 thresh) (0.2 thresh)
44
+ * 500ms min 750ms min 100ms min 100ms min
45
+ * Clear speech Very clear Quiet env Catches whispers
46
+ *
47
+ * Specialized Presets
48
+ * / | \
49
+ * continuous meeting noisy
50
+ * (60s max) (45s max) (0.75 thresh)
51
+ * Lectures Multi-spk Strict for noise
52
+ *
53
+ * Key Parameters:
54
+ * - threshold: 0.0-1.0 (lower = more sensitive)
55
+ * - minSpeechDurationMs: Min duration to consider speech
56
+ * - minSilenceDurationMs: Min silence before ending speech
57
+ * - maxSpeechDurationS: Max continuous speech duration
58
+ * - speechPadMs: Padding around detected speech
59
+ * - samplesOverlap: Analysis window overlap (0.0-1.0)
60
+ */
61
+ export const VAD_PRESETS = {
62
+ // Default - balanced performance
63
+ default: {
64
+ threshold: 0.5,
65
+ minSpeechDurationMs: 250,
66
+ minSilenceDurationMs: 100,
67
+ maxSpeechDurationS: 30,
68
+ speechPadMs: 30,
69
+ samplesOverlap: 0.1,
70
+ },
71
+
72
+ // Sensitive - good for quiet environments
73
+ sensitive: {
74
+ threshold: 0.3,
75
+ minSpeechDurationMs: 100,
76
+ minSilenceDurationMs: 50,
77
+ maxSpeechDurationS: 15,
78
+ speechPadMs: 50,
79
+ samplesOverlap: 0.2,
80
+ },
81
+
82
+ // Very sensitive - catches even quiet speech
83
+ 'very-sensitive': {
84
+ threshold: 0.2,
85
+ minSpeechDurationMs: 100,
86
+ minSilenceDurationMs: 50,
87
+ maxSpeechDurationS: 15,
88
+ speechPadMs: 100,
89
+ samplesOverlap: 0.3,
90
+ },
91
+
92
+ // Conservative - avoids false positives
93
+ conservative: {
94
+ threshold: 0.7,
95
+ minSpeechDurationMs: 500,
96
+ minSilenceDurationMs: 200,
97
+ maxSpeechDurationS: 25,
98
+ speechPadMs: 20,
99
+ samplesOverlap: 0.05,
100
+ },
101
+
102
+ // Very conservative - only clear speech
103
+ 'very-conservative': {
104
+ threshold: 0.8,
105
+ minSpeechDurationMs: 750,
106
+ minSilenceDurationMs: 300,
107
+ maxSpeechDurationS: 20,
108
+ speechPadMs: 10,
109
+ samplesOverlap: 0.05,
110
+ },
111
+
112
+ // Continuous speech - for presentations/lectures
113
+ continuous: {
114
+ threshold: 0.4,
115
+ minSpeechDurationMs: 200,
116
+ minSilenceDurationMs: 300,
117
+ maxSpeechDurationS: 60, // Longer segments
118
+ speechPadMs: 50,
119
+ samplesOverlap: 0.15,
120
+ },
121
+
122
+ // Meeting mode - handles multiple speakers
123
+ meeting: {
124
+ threshold: 0.45,
125
+ minSpeechDurationMs: 300,
126
+ minSilenceDurationMs: 150,
127
+ maxSpeechDurationS: 45,
128
+ speechPadMs: 75,
129
+ samplesOverlap: 0.2,
130
+ },
131
+
132
+ // Noisy environment - more strict thresholds
133
+ noisy: {
134
+ threshold: 0.75,
135
+ minSpeechDurationMs: 400,
136
+ minSilenceDurationMs: 100,
137
+ maxSpeechDurationS: 25,
138
+ speechPadMs: 40,
139
+ samplesOverlap: 0.1,
140
+ },
141
+ }
142
+
143
+ export interface RealtimeVadEvent {
144
+ type: 'speech_start' | 'speech_end' | 'speech_continue' | 'silence'
145
+ timestamp: number
146
+ lastSpeechDetectedTime: number
147
+ confidence: number
148
+ duration: number
149
+ sliceIndex: number
150
+
151
+ // Additional context
152
+ analysis?: {
153
+ averageAmplitude: number
154
+ peakAmplitude: number
155
+ spectralCentroid?: number
156
+ zeroCrossingRate?: number
157
+ }
158
+
159
+ // Adaptive threshold info
160
+ currentThreshold?: number
161
+ environmentNoise?: number
162
+ }
163
+
164
+ export interface RealtimeTranscribeEvent {
165
+ type: 'start' | 'transcribe' | 'end' | 'error'
166
+ sliceIndex: number
167
+ data?: TranscribeResult
168
+ isCapturing: boolean
169
+ processTime: number
170
+ recordingTime: number
171
+ memoryUsage?: {
172
+ slicesInMemory: number
173
+ totalSamples: number
174
+ estimatedMB: number
175
+ }
176
+ vadEvent?: RealtimeVadEvent
177
+ }
178
+
179
+ export interface RealtimeOptions {
180
+ // Audio settings
181
+ audioSliceSec?: number // default: 25
182
+ audioMinSec?: number // default: 1
183
+ maxSlicesInMemory?: number // default: 3
184
+
185
+ // VAD settings - now using extended options
186
+ vadOptions?: VadOptions
187
+ vadPreset?: keyof typeof VAD_PRESETS // Quick preset selection
188
+
189
+ // Auto-slice settings
190
+ autoSliceOnSpeechEnd?: boolean // default: false - automatically slice when speech ends and duration thresholds are met
191
+ autoSliceThreshold?: number // default: 0.85 - percentage of audioSliceSec to trigger auto-slice
192
+
193
+ // Transcription settings
194
+ transcribeOptions?: TranscribeOptions
195
+
196
+ // Prompt settings
197
+ initialPrompt?: string // Initial prompt to use for transcription
198
+ promptPreviousSlices?: boolean // Add transcription results from previous slices as prompt (default: true)
199
+
200
+ // File settings (Only used if fs dependency is provided)
201
+ audioOutputPath?: string
202
+
203
+ // Audio stream configuration
204
+ audioStreamConfig?: AudioStreamConfig
205
+
206
+ // Logger settings
207
+ logger?: (message: string) => void // default: noop - custom logger function
208
+ }
209
+
210
+ export interface AudioSlice {
211
+ index: number
212
+ data: Uint8Array
213
+ sampleCount: number
214
+ startTime: number
215
+ endTime: number
216
+ isProcessed: boolean
217
+ isReleased: boolean
218
+ }
219
+
220
+ export interface AudioSliceNoData extends Omit<AudioSlice, 'data'> {}
221
+
222
+ export interface MemoryUsage {
223
+ slicesInMemory: number
224
+ totalSamples: number
225
+ estimatedMB: number
226
+ }
227
+
228
+ export interface RealtimeStatsEvent {
229
+ timestamp: number
230
+ type:
231
+ | 'slice_processed'
232
+ | 'vad_change'
233
+ | 'memory_change'
234
+ | 'status_change'
235
+ data: {
236
+ isActive: boolean
237
+ isTranscribing: boolean
238
+ vadEnabled: boolean
239
+ audioStats: any
240
+ vadStats: any
241
+ sliceStats: any
242
+ }
243
+ }
244
+
245
+ export interface RealtimeTranscriberCallbacks {
246
+ onBeginTranscribe?: (sliceInfo: {
247
+ audioData: Uint8Array
248
+ sliceIndex: number
249
+ duration: number
250
+ vadEvent?: RealtimeVadEvent
251
+ }) => Promise<boolean>
252
+ onTranscribe?: (event: RealtimeTranscribeEvent) => void
253
+ onVad?: (event: RealtimeVadEvent) => void
254
+ onError?: (error: string) => void
255
+ onStatusChange?: (isActive: boolean) => void
256
+ onStatsUpdate?: (event: RealtimeStatsEvent) => void
257
+ }
258
+
259
+ // === Context Interfaces ===
260
+
261
+ export type WhisperContextLike = {
262
+ transcribeData: (
263
+ data: ArrayBuffer,
264
+ options: TranscribeOptions,
265
+ ) => {
266
+ stop: () => Promise<void>
267
+ promise: Promise<TranscribeResult>
268
+ }
269
+ }
270
+
271
+ export type WhisperVadContextLike = {
272
+ detectSpeechData: (
273
+ data: ArrayBuffer,
274
+ options: VadOptions,
275
+ ) => Promise<Array<{ t0: number; t1: number }>>
276
+ }
277
+
278
+ export interface RealtimeTranscriberDependencies {
279
+ whisperContext: WhisperContextLike
280
+ vadContext?: WhisperVadContextLike
281
+ audioStream: AudioStreamInterface
282
+ fs?: WavFileWriterFs
283
+ }