whisper.rn 0.5.0-rc.0 → 0.5.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/README.md +128 -50
  2. package/android/build.gradle +1 -0
  3. package/android/src/main/CMakeLists.txt +1 -0
  4. package/android/src/main/java/com/rnwhisper/RNWhisper.java +35 -0
  5. package/android/src/main/java/com/rnwhisper/WhisperContext.java +33 -0
  6. package/android/src/main/jni.cpp +81 -0
  7. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +5 -0
  8. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +5 -0
  9. package/cpp/jsi/RNWhisperJSI.cpp +42 -6
  10. package/ios/RNWhisper.mm +11 -0
  11. package/ios/RNWhisperContext.h +1 -0
  12. package/ios/RNWhisperContext.mm +46 -0
  13. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  14. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  15. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  16. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  17. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  18. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  19. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  20. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  21. package/lib/commonjs/AudioSessionIos.js +2 -1
  22. package/lib/commonjs/AudioSessionIos.js.map +1 -1
  23. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  24. package/lib/commonjs/index.js +50 -10
  25. package/lib/commonjs/index.js.map +1 -1
  26. package/lib/commonjs/jest-mock.js +126 -0
  27. package/lib/commonjs/jest-mock.js.map +1 -0
  28. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +857 -0
  29. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -0
  30. package/lib/commonjs/realtime-transcription/SliceManager.js +233 -0
  31. package/lib/commonjs/realtime-transcription/SliceManager.js.map +1 -0
  32. package/lib/commonjs/realtime-transcription/adapters/AudioPcmStreamAdapter.js +133 -0
  33. package/lib/commonjs/realtime-transcription/adapters/AudioPcmStreamAdapter.js.map +1 -0
  34. package/lib/commonjs/realtime-transcription/adapters/JestAudioStreamAdapter.js +201 -0
  35. package/lib/commonjs/realtime-transcription/adapters/JestAudioStreamAdapter.js.map +1 -0
  36. package/lib/commonjs/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js +309 -0
  37. package/lib/commonjs/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js.map +1 -0
  38. package/lib/commonjs/realtime-transcription/index.js +27 -0
  39. package/lib/commonjs/realtime-transcription/index.js.map +1 -0
  40. package/lib/commonjs/realtime-transcription/types.js +114 -0
  41. package/lib/commonjs/realtime-transcription/types.js.map +1 -0
  42. package/lib/commonjs/utils/WavFileReader.js +158 -0
  43. package/lib/commonjs/utils/WavFileReader.js.map +1 -0
  44. package/lib/commonjs/utils/WavFileWriter.js +181 -0
  45. package/lib/commonjs/utils/WavFileWriter.js.map +1 -0
  46. package/lib/commonjs/utils/common.js +25 -0
  47. package/lib/commonjs/utils/common.js.map +1 -0
  48. package/lib/module/AudioSessionIos.js +2 -1
  49. package/lib/module/AudioSessionIos.js.map +1 -1
  50. package/lib/module/NativeRNWhisper.js.map +1 -1
  51. package/lib/module/index.js +48 -10
  52. package/lib/module/index.js.map +1 -1
  53. package/lib/module/jest-mock.js +124 -0
  54. package/lib/module/jest-mock.js.map +1 -0
  55. package/lib/module/realtime-transcription/RealtimeTranscriber.js +851 -0
  56. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -0
  57. package/lib/module/realtime-transcription/SliceManager.js +226 -0
  58. package/lib/module/realtime-transcription/SliceManager.js.map +1 -0
  59. package/lib/module/realtime-transcription/adapters/AudioPcmStreamAdapter.js +124 -0
  60. package/lib/module/realtime-transcription/adapters/AudioPcmStreamAdapter.js.map +1 -0
  61. package/lib/module/realtime-transcription/adapters/JestAudioStreamAdapter.js +194 -0
  62. package/lib/module/realtime-transcription/adapters/JestAudioStreamAdapter.js.map +1 -0
  63. package/lib/module/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js +302 -0
  64. package/lib/module/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.js.map +1 -0
  65. package/lib/module/realtime-transcription/index.js +8 -0
  66. package/lib/module/realtime-transcription/index.js.map +1 -0
  67. package/lib/module/realtime-transcription/types.js +107 -0
  68. package/lib/module/realtime-transcription/types.js.map +1 -0
  69. package/lib/module/utils/WavFileReader.js +151 -0
  70. package/lib/module/utils/WavFileReader.js.map +1 -0
  71. package/lib/module/utils/WavFileWriter.js +174 -0
  72. package/lib/module/utils/WavFileWriter.js.map +1 -0
  73. package/lib/module/utils/common.js +18 -0
  74. package/lib/module/utils/common.js.map +1 -0
  75. package/lib/typescript/AudioSessionIos.d.ts +1 -1
  76. package/lib/typescript/AudioSessionIos.d.ts.map +1 -1
  77. package/lib/typescript/NativeRNWhisper.d.ts +1 -0
  78. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  79. package/lib/typescript/index.d.ts +8 -4
  80. package/lib/typescript/index.d.ts.map +1 -1
  81. package/lib/typescript/jest-mock.d.ts +2 -0
  82. package/lib/typescript/jest-mock.d.ts.map +1 -0
  83. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +166 -0
  84. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -0
  85. package/lib/typescript/realtime-transcription/SliceManager.d.ts +72 -0
  86. package/lib/typescript/realtime-transcription/SliceManager.d.ts.map +1 -0
  87. package/lib/typescript/realtime-transcription/adapters/AudioPcmStreamAdapter.d.ts +22 -0
  88. package/lib/typescript/realtime-transcription/adapters/AudioPcmStreamAdapter.d.ts.map +1 -0
  89. package/lib/typescript/realtime-transcription/adapters/JestAudioStreamAdapter.d.ts +44 -0
  90. package/lib/typescript/realtime-transcription/adapters/JestAudioStreamAdapter.d.ts.map +1 -0
  91. package/lib/typescript/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.d.ts +75 -0
  92. package/lib/typescript/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.d.ts.map +1 -0
  93. package/lib/typescript/realtime-transcription/index.d.ts +6 -0
  94. package/lib/typescript/realtime-transcription/index.d.ts.map +1 -0
  95. package/lib/typescript/realtime-transcription/types.d.ts +222 -0
  96. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -0
  97. package/lib/typescript/utils/WavFileReader.d.ts +61 -0
  98. package/lib/typescript/utils/WavFileReader.d.ts.map +1 -0
  99. package/lib/typescript/utils/WavFileWriter.d.ts +57 -0
  100. package/lib/typescript/utils/WavFileWriter.d.ts.map +1 -0
  101. package/lib/typescript/utils/common.d.ts +9 -0
  102. package/lib/typescript/utils/common.d.ts.map +1 -0
  103. package/package.json +23 -11
  104. package/src/AudioSessionIos.ts +3 -2
  105. package/src/NativeRNWhisper.ts +2 -0
  106. package/src/index.ts +74 -22
  107. package/{jest/mock.js → src/jest-mock.ts} +2 -2
  108. package/src/realtime-transcription/RealtimeTranscriber.ts +1015 -0
  109. package/src/realtime-transcription/SliceManager.ts +252 -0
  110. package/src/realtime-transcription/adapters/AudioPcmStreamAdapter.ts +143 -0
  111. package/src/realtime-transcription/adapters/JestAudioStreamAdapter.ts +251 -0
  112. package/src/realtime-transcription/adapters/SimulateFileAudioStreamAdapter.ts +378 -0
  113. package/src/realtime-transcription/index.ts +34 -0
  114. package/src/realtime-transcription/types.ts +283 -0
  115. package/src/utils/WavFileReader.ts +202 -0
  116. package/src/utils/WavFileWriter.ts +206 -0
  117. package/src/utils/common.ts +17 -0
@@ -0,0 +1,857 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.RealtimeTranscriber = void 0;
7
+ var _SliceManager = require("./SliceManager");
8
+ var _WavFileWriter = require("../utils/WavFileWriter");
9
+ var _types = require("./types");
10
+ /* eslint-disable class-methods-use-this */
11
+
12
+ /**
13
+ * RealtimeTranscriber provides real-time audio transcription with VAD support.
14
+ *
15
+ * Features:
16
+ * - Automatic slice management based on duration
17
+ * - VAD-based speech detection and auto-slicing
18
+ * - Configurable auto-slice mechanism that triggers on speech_end/silence events
19
+ * - Memory management for audio slices
20
+ * - Queue-based transcription processing
21
+ */
22
+ class RealtimeTranscriber {
23
+ callbacks = {};
24
+ isActive = false;
25
+ isTranscribing = false;
26
+ vadEnabled = false;
27
+ transcriptionQueue = [];
28
+ accumulatedData = new Uint8Array(0);
29
+ wavFileWriter = null;
30
+
31
+ // Simplified VAD state management
32
+ lastSpeechDetectedTime = 0;
33
+
34
+ // Track VAD state for proper event transitions
35
+ lastVadState = 'silence';
36
+
37
+ // Track last stats to emit only when changed
38
+ lastStatsSnapshot = null;
39
+
40
+ // Store transcription results by slice index
41
+ transcriptionResults = new Map();
42
+
43
+ // Store VAD events by slice index for inclusion in transcribe events
44
+ vadEvents = new Map();
45
+ constructor(dependencies) {
46
+ let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
47
+ let callbacks = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
48
+ this.whisperContext = dependencies.whisperContext;
49
+ this.vadContext = dependencies.vadContext;
50
+ this.audioStream = dependencies.audioStream;
51
+ this.fs = dependencies.fs;
52
+ this.callbacks = callbacks;
53
+
54
+ // Set default options with proper types
55
+ this.options = {
56
+ audioSliceSec: options.audioSliceSec || 30,
57
+ audioMinSec: options.audioMinSec || 1,
58
+ maxSlicesInMemory: options.maxSlicesInMemory || 3,
59
+ vadOptions: options.vadOptions || _types.VAD_PRESETS.default,
60
+ vadPreset: options.vadPreset,
61
+ autoSliceOnSpeechEnd: options.autoSliceOnSpeechEnd || true,
62
+ autoSliceThreshold: options.autoSliceThreshold || 0.5,
63
+ transcribeOptions: options.transcribeOptions || {},
64
+ initialPrompt: options.initialPrompt,
65
+ promptPreviousSlices: options.promptPreviousSlices ?? true,
66
+ audioOutputPath: options.audioOutputPath,
67
+ logger: options.logger || (() => {})
68
+ };
69
+
70
+ // Apply VAD preset if specified
71
+ if (this.options.vadPreset && _types.VAD_PRESETS[this.options.vadPreset]) {
72
+ this.options.vadOptions = {
73
+ ..._types.VAD_PRESETS[this.options.vadPreset],
74
+ ...this.options.vadOptions
75
+ };
76
+ }
77
+
78
+ // Enable VAD if context is provided and not explicitly disabled
79
+ this.vadEnabled = !!this.vadContext;
80
+
81
+ // Initialize managers
82
+ this.sliceManager = new _SliceManager.SliceManager(this.options.audioSliceSec, this.options.maxSlicesInMemory);
83
+
84
+ // Set up audio stream callbacks
85
+ this.audioStream.onData(this.handleAudioData.bind(this));
86
+ this.audioStream.onError(this.handleError.bind(this));
87
+ this.audioStream.onStatusChange(this.handleAudioStatusChange.bind(this));
88
+ }
89
+
90
+ /**
91
+ * Start realtime transcription
92
+ */
93
+ async start() {
94
+ if (this.isActive) {
95
+ throw new Error('Realtime transcription is already active');
96
+ }
97
+ try {
98
+ var _this$callbacks$onSta, _this$callbacks, _this$options$audioSt4, _this$options$audioSt5, _this$options$audioSt6, _this$options$audioSt7, _this$options$audioSt8;
99
+ this.isActive = true;
100
+ (_this$callbacks$onSta = (_this$callbacks = this.callbacks).onStatusChange) === null || _this$callbacks$onSta === void 0 ? void 0 : _this$callbacks$onSta.call(_this$callbacks, true);
101
+
102
+ // Reset all state to ensure clean start
103
+ this.reset();
104
+
105
+ // Initialize WAV file writer if output path is specified
106
+ if (this.fs && this.options.audioOutputPath) {
107
+ var _this$options$audioSt, _this$options$audioSt2, _this$options$audioSt3;
108
+ this.wavFileWriter = new _WavFileWriter.WavFileWriter(this.fs, this.options.audioOutputPath, {
109
+ sampleRate: ((_this$options$audioSt = this.options.audioStreamConfig) === null || _this$options$audioSt === void 0 ? void 0 : _this$options$audioSt.sampleRate) || 16000,
110
+ channels: ((_this$options$audioSt2 = this.options.audioStreamConfig) === null || _this$options$audioSt2 === void 0 ? void 0 : _this$options$audioSt2.channels) || 1,
111
+ bitsPerSample: ((_this$options$audioSt3 = this.options.audioStreamConfig) === null || _this$options$audioSt3 === void 0 ? void 0 : _this$options$audioSt3.bitsPerSample) || 16
112
+ });
113
+ await this.wavFileWriter.initialize();
114
+ }
115
+
116
+ // Start audio recording
117
+ await this.audioStream.initialize({
118
+ sampleRate: ((_this$options$audioSt4 = this.options.audioStreamConfig) === null || _this$options$audioSt4 === void 0 ? void 0 : _this$options$audioSt4.sampleRate) || 16000,
119
+ channels: ((_this$options$audioSt5 = this.options.audioStreamConfig) === null || _this$options$audioSt5 === void 0 ? void 0 : _this$options$audioSt5.channels) || 1,
120
+ bitsPerSample: ((_this$options$audioSt6 = this.options.audioStreamConfig) === null || _this$options$audioSt6 === void 0 ? void 0 : _this$options$audioSt6.bitsPerSample) || 16,
121
+ audioSource: ((_this$options$audioSt7 = this.options.audioStreamConfig) === null || _this$options$audioSt7 === void 0 ? void 0 : _this$options$audioSt7.audioSource) || 6,
122
+ bufferSize: ((_this$options$audioSt8 = this.options.audioStreamConfig) === null || _this$options$audioSt8 === void 0 ? void 0 : _this$options$audioSt8.bufferSize) || 16 * 1024
123
+ });
124
+ await this.audioStream.start();
125
+
126
+ // Emit stats update for status change
127
+ this.emitStatsUpdate('status_change');
128
+ this.log('Realtime transcription started');
129
+ } catch (error) {
130
+ var _this$callbacks$onSta2, _this$callbacks2;
131
+ this.isActive = false;
132
+ (_this$callbacks$onSta2 = (_this$callbacks2 = this.callbacks).onStatusChange) === null || _this$callbacks$onSta2 === void 0 ? void 0 : _this$callbacks$onSta2.call(_this$callbacks2, false);
133
+ throw error;
134
+ }
135
+ }
136
+
137
+ /**
138
+ * Stop realtime transcription
139
+ */
140
+ async stop() {
141
+ if (!this.isActive) {
142
+ return;
143
+ }
144
+ try {
145
+ var _this$callbacks$onSta3, _this$callbacks3;
146
+ this.isActive = false;
147
+
148
+ // Stop audio recording
149
+ await this.audioStream.stop();
150
+
151
+ // Process any remaining accumulated data
152
+ if (this.accumulatedData.length > 0) {
153
+ this.processAccumulatedDataForSliceManagement();
154
+ }
155
+
156
+ // Process any remaining queued transcriptions
157
+ await this.processTranscriptionQueue();
158
+
159
+ // Finalize WAV file
160
+ if (this.wavFileWriter) {
161
+ await this.wavFileWriter.finalize();
162
+ this.wavFileWriter = null;
163
+ }
164
+
165
+ // Reset all state completely
166
+ this.reset();
167
+ (_this$callbacks$onSta3 = (_this$callbacks3 = this.callbacks).onStatusChange) === null || _this$callbacks$onSta3 === void 0 ? void 0 : _this$callbacks$onSta3.call(_this$callbacks3, false);
168
+
169
+ // Emit stats update for status change
170
+ this.emitStatsUpdate('status_change');
171
+ this.log('Realtime transcription stopped');
172
+ } catch (error) {
173
+ this.handleError(`Stop error: ${error}`);
174
+ }
175
+ }
176
+
177
+ /**
178
+ * Handle incoming audio data from audio stream
179
+ */
180
+ handleAudioData(streamData) {
181
+ if (!this.isActive) {
182
+ return;
183
+ }
184
+ try {
185
+ // Write to WAV file if enabled (convert to Uint8Array for WavFileWriter)
186
+ if (this.wavFileWriter) {
187
+ this.wavFileWriter.appendAudioData(streamData.data).catch(error => {
188
+ this.log(`Failed to write audio to WAV file: ${error}`);
189
+ });
190
+ }
191
+
192
+ // Always accumulate data for slice management
193
+ this.accumulateAudioData(streamData.data);
194
+ } catch (error) {
195
+ const errorMessage = error instanceof Error ? error.message : 'Audio processing error';
196
+ this.handleError(errorMessage);
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Accumulate audio data for slice management
202
+ */
203
+ accumulateAudioData(newData) {
204
+ const combined = new Uint8Array(this.accumulatedData.length + newData.length);
205
+ combined.set(this.accumulatedData);
206
+ combined.set(new Uint8Array(newData), this.accumulatedData.length);
207
+ this.accumulatedData = combined;
208
+
209
+ // Process accumulated data when we have enough for slice management
210
+ const minBufferSamples = 16000 * 1; // 1 second for slice management
211
+ if (this.accumulatedData.length >= minBufferSamples) {
212
+ this.processAccumulatedDataForSliceManagement();
213
+ }
214
+ }
215
+
216
+ /**
217
+ * Process accumulated audio data through SliceManager
218
+ */
219
+ processAccumulatedDataForSliceManagement() {
220
+ if (this.accumulatedData.length === 0) {
221
+ return;
222
+ }
223
+
224
+ // Process through slice manager directly with Uint8Array
225
+ const result = this.sliceManager.addAudioData(this.accumulatedData);
226
+ if (result.slice) {
227
+ this.log(`Slice ${result.slice.index} ready (${result.slice.data.length} bytes)`);
228
+
229
+ // Process VAD for the slice if enabled
230
+ if (!this.isTranscribing && this.vadEnabled) {
231
+ this.processSliceVAD(result.slice).catch(error => {
232
+ this.handleError(`VAD processing error: ${error}`);
233
+ });
234
+ } else if (!this.isTranscribing) {
235
+ // If VAD is disabled, transcribe slices as they become ready
236
+ this.queueSliceForTranscription(result.slice).catch(error => {
237
+ this.handleError(`Failed to queue slice for transcription: ${error}`);
238
+ });
239
+ } else {
240
+ this.log(`Skipping slice ${result.slice.index} - already transcribing`);
241
+ }
242
+ this.emitStatsUpdate('memory_change');
243
+ }
244
+
245
+ // Clear accumulated data
246
+ this.accumulatedData = new Uint8Array(0);
247
+ }
248
+
249
+ /**
250
+ * Check if auto-slice should be triggered based on VAD event and timing
251
+ */
252
+ async checkAutoSlice(vadEvent, _slice) {
253
+ if (!this.options.autoSliceOnSpeechEnd || !this.vadEnabled) {
254
+ return;
255
+ }
256
+
257
+ // Only trigger on speech_end or silence events
258
+ const shouldTriggerAutoSlice = vadEvent.type === 'speech_end' || vadEvent.type === 'silence';
259
+ if (!shouldTriggerAutoSlice) {
260
+ return;
261
+ }
262
+
263
+ // Get current slice info from SliceManager
264
+ const currentSliceInfo = this.sliceManager.getCurrentSliceInfo();
265
+ const currentSlice = this.sliceManager.getSliceByIndex(currentSliceInfo.currentSliceIndex);
266
+ if (!currentSlice) {
267
+ return;
268
+ }
269
+
270
+ // Calculate current slice duration
271
+ const currentDuration = (Date.now() - currentSlice.startTime) / 1000; // Convert to seconds
272
+ const targetDuration = this.options.audioSliceSec;
273
+ const minDuration = this.options.audioMinSec;
274
+ const autoSliceThreshold = targetDuration * this.options.autoSliceThreshold;
275
+
276
+ // Check if conditions are met for auto-slice
277
+ const meetsMinDuration = currentDuration >= minDuration;
278
+ const meetsThreshold = currentDuration >= autoSliceThreshold;
279
+ if (meetsMinDuration && meetsThreshold) {
280
+ this.log(`Auto-slicing on ${vadEvent.type} at ${currentDuration.toFixed(1)}s ` + `(min: ${minDuration}s, threshold: ${autoSliceThreshold.toFixed(1)}s, target: ${targetDuration}s)`);
281
+
282
+ // Force next slice
283
+ await this.nextSlice();
284
+ } else {
285
+ this.log(`Auto-slice conditions not met on ${vadEvent.type}: ` + `duration=${currentDuration.toFixed(1)}s, min=${minDuration}s, threshold=${autoSliceThreshold.toFixed(1)}s ` + `(minOk=${meetsMinDuration}, thresholdOk=${meetsThreshold})`);
286
+ }
287
+ }
288
+
289
+ /**
290
+ * Process VAD for a completed slice
291
+ */
292
+ async processSliceVAD(slice) {
293
+ try {
294
+ var _this$callbacks$onVad, _this$callbacks4;
295
+ // Get audio data from the slice for VAD processing
296
+ const audioData = this.sliceManager.getAudioDataForTranscription(slice.index);
297
+ if (!audioData) {
298
+ this.log(`No audio data available for VAD processing of slice ${slice.index}`);
299
+ return;
300
+ }
301
+
302
+ // Convert base64 back to Uint8Array for VAD processing
303
+
304
+ // Detect speech in the slice
305
+ const vadEvent = await this.detectSpeech(audioData, slice.index);
306
+ vadEvent.timestamp = Date.now();
307
+
308
+ // Store VAD event for inclusion in transcribe event
309
+ this.vadEvents.set(slice.index, vadEvent);
310
+
311
+ // Emit VAD event
312
+ (_this$callbacks$onVad = (_this$callbacks4 = this.callbacks).onVad) === null || _this$callbacks$onVad === void 0 ? void 0 : _this$callbacks$onVad.call(_this$callbacks4, vadEvent);
313
+
314
+ // Check if auto-slice should be triggered
315
+ await this.checkAutoSlice(vadEvent, slice);
316
+
317
+ // Check if speech was detected and if we should transcribe
318
+ const isSpeech = vadEvent.type === 'speech_start' || vadEvent.type === 'speech_continue';
319
+ const isSpeechEnd = vadEvent.type === 'speech_end';
320
+ if (isSpeech) {
321
+ const minDuration = this.options.audioMinSec;
322
+ // Check minimum duration requirement
323
+ const speechDuration = slice.data.length / 16000 / 2; // Convert bytes to seconds (16kHz, 16-bit)
324
+
325
+ if (speechDuration >= minDuration) {
326
+ this.log(`Speech detected in slice ${slice.index}, queueing for transcription`);
327
+ await this.queueSliceForTranscription(slice);
328
+ } else {
329
+ this.log(`Speech too short in slice ${slice.index} (${speechDuration.toFixed(2)}s < ${minDuration}s), skipping`);
330
+ }
331
+ } else if (isSpeechEnd) {
332
+ this.log(`Speech ended in slice ${slice.index}`);
333
+ // For speech_end events, we might want to queue the slice for transcription
334
+ // to capture the final part of the speech segment
335
+ const speechDuration = slice.data.length / 16000 / 2; // Convert bytes to seconds
336
+ const minDuration = this.options.audioMinSec;
337
+ if (speechDuration >= minDuration) {
338
+ this.log(`Speech end detected in slice ${slice.index}, queueing final segment for transcription`);
339
+ await this.queueSliceForTranscription(slice);
340
+ } else {
341
+ this.log(`Speech end segment too short in slice ${slice.index} (${speechDuration.toFixed(2)}s < ${minDuration}s), skipping`);
342
+ }
343
+ } else {
344
+ this.log(`No speech detected in slice ${slice.index}`);
345
+ }
346
+
347
+ // Emit stats update for VAD change
348
+ this.emitStatsUpdate('vad_change');
349
+ } catch (error) {
350
+ this.handleError(`VAD processing error for slice ${slice.index}: ${error}`);
351
+ }
352
+ }
353
+
354
+ /**
355
+ * Queue a slice for transcription
356
+ */
357
+ async queueSliceForTranscription(slice) {
358
+ try {
359
+ // Get audio data from the slice
360
+ const audioData = this.sliceManager.getAudioDataForTranscription(slice.index);
361
+ if (!audioData) {
362
+ this.log(`No audio data available for slice ${slice.index}`);
363
+ return;
364
+ }
365
+ if (this.callbacks.onBeginTranscribe) {
366
+ const shouldTranscribe = (await this.callbacks.onBeginTranscribe({
367
+ sliceIndex: slice.index,
368
+ audioData,
369
+ duration: slice.data.length / 16000 / 2 * 1000,
370
+ // Convert to milliseconds
371
+ vadEvent: this.vadEvents.get(slice.index)
372
+ })) ?? true;
373
+ if (!shouldTranscribe) {
374
+ this.log(`User callback declined transcription for slice ${slice.index}`);
375
+ return;
376
+ }
377
+ }
378
+
379
+ // Add to transcription queue
380
+ this.transcriptionQueue.unshift({
381
+ sliceIndex: slice.index,
382
+ audioData
383
+ });
384
+ this.log(`Queued slice ${slice.index} for transcription (${slice.data.length} samples)`);
385
+ await this.processTranscriptionQueue();
386
+ } catch (error) {
387
+ this.handleError(`Failed to queue slice for transcription: ${error}`);
388
+ }
389
+ }
390
+
391
+ /**
392
+ * Detect speech using VAD context
393
+ */
394
+ async detectSpeech(audioData, sliceIndex) {
395
+ if (!this.vadContext) {
396
+ // When no VAD context is available, assume speech is always detected
397
+ // but still follow the state machine pattern
398
+ const currentTimestamp = Date.now();
399
+
400
+ // Assume speech is always detected when no VAD context
401
+ const vadEventType = this.lastVadState === 'silence' ? 'speech_start' : 'speech_continue';
402
+
403
+ // Update VAD state
404
+ this.lastVadState = 'speech';
405
+ const {
406
+ sampleRate = 16000
407
+ } = this.options.audioStreamConfig || {};
408
+ return {
409
+ type: vadEventType,
410
+ lastSpeechDetectedTime: 0,
411
+ timestamp: currentTimestamp,
412
+ confidence: 1.0,
413
+ duration: audioData.length / sampleRate / 2,
414
+ // Convert bytes to seconds
415
+ sliceIndex
416
+ };
417
+ }
418
+ try {
419
+ const audioBuffer = audioData.buffer;
420
+
421
+ // Use VAD context to detect speech segments
422
+ const vadSegments = await this.vadContext.detectSpeechData(audioBuffer, this.options.vadOptions);
423
+
424
+ // Calculate confidence based on speech segments
425
+ let confidence = 0.0;
426
+ let lastSpeechDetectedTime = 0;
427
+ if (vadSegments && vadSegments.length > 0) {
428
+ var _vadSegments;
429
+ // If there are speech segments, calculate average confidence
430
+ const totalTime = vadSegments.reduce((sum, segment) => sum + (segment.t1 - segment.t0), 0);
431
+ const audioDuration = audioData.length / 16000 / 2; // Convert bytes to seconds
432
+ confidence = totalTime > 0 ? Math.min(totalTime / audioDuration, 1.0) : 0.0;
433
+ lastSpeechDetectedTime = ((_vadSegments = vadSegments[vadSegments.length - 1]) === null || _vadSegments === void 0 ? void 0 : _vadSegments.t1) || -1;
434
+ }
435
+ const threshold = this.options.vadOptions.threshold || 0.5;
436
+ let isSpeech = confidence > threshold;
437
+ const currentTimestamp = Date.now();
438
+
439
+ // Determine VAD event type based on current and previous state
440
+ let vadEventType;
441
+ if (isSpeech) {
442
+ vadEventType = this.lastVadState === 'silence' ? 'speech_start' : 'speech_continue';
443
+ const minDuration = this.options.audioMinSec;
444
+ // Check if this is a new speech detection (different from last detected time)
445
+ if (lastSpeechDetectedTime === this.lastSpeechDetectedTime || (lastSpeechDetectedTime - this.lastSpeechDetectedTime) / 100 < minDuration) {
446
+ if (this.lastVadState === 'silence') vadEventType = 'silence';
447
+ if (this.lastVadState === 'speech') vadEventType = 'speech_end';
448
+ isSpeech = false;
449
+ confidence = 0.0;
450
+ }
451
+ this.lastSpeechDetectedTime = lastSpeechDetectedTime;
452
+ } else {
453
+ vadEventType = this.lastVadState === 'speech' ? 'speech_end' : 'silence';
454
+ }
455
+
456
+ // Update VAD state for next detection
457
+ this.lastVadState = isSpeech ? 'speech' : 'silence';
458
+ const {
459
+ sampleRate = 16000
460
+ } = this.options.audioStreamConfig || {};
461
+ return {
462
+ type: vadEventType,
463
+ lastSpeechDetectedTime,
464
+ timestamp: currentTimestamp,
465
+ confidence,
466
+ duration: audioData.length / sampleRate / 2,
467
+ // Convert bytes to seconds
468
+ sliceIndex,
469
+ currentThreshold: threshold
470
+ };
471
+ } catch (error) {
472
+ this.log(`VAD detection error: ${error}`);
473
+ // Re-throw the error so it can be handled by the caller
474
+ throw error;
475
+ }
476
+ }
477
+ isProcessingTranscriptionQueue = false;
478
+
479
+ /**
480
+ * Process the transcription queue
481
+ */
482
+ async processTranscriptionQueue() {
483
+ if (this.isProcessingTranscriptionQueue) return;
484
+ this.isProcessingTranscriptionQueue = true;
485
+ while (this.transcriptionQueue.length > 0) {
486
+ const item = this.transcriptionQueue.shift();
487
+ this.transcriptionQueue = []; // Old items are not needed anymore
488
+ if (item) {
489
+ // eslint-disable-next-line no-await-in-loop
490
+ await this.processTranscription(item).catch(error => {
491
+ this.handleError(`Transcription error: ${error}`);
492
+ });
493
+ }
494
+ }
495
+ this.isProcessingTranscriptionQueue = false;
496
+ }
497
+
498
+ /**
499
+ * Build prompt from initial prompt and previous slices
500
+ */
501
+ buildPrompt(currentSliceIndex) {
502
+ const promptParts = [];
503
+
504
+ // Add initial prompt if provided
505
+ if (this.options.initialPrompt) {
506
+ promptParts.push(this.options.initialPrompt);
507
+ }
508
+
509
+ // Add previous slice results if enabled
510
+ if (this.options.promptPreviousSlices) {
511
+ // Get transcription results from previous slices (up to the current slice)
512
+ const previousResults = Array.from(this.transcriptionResults.entries()).filter(_ref => {
513
+ let [sliceIndex] = _ref;
514
+ return sliceIndex < currentSliceIndex;
515
+ }).sort((_ref2, _ref3) => {
516
+ let [a] = _ref2;
517
+ let [b] = _ref3;
518
+ return a - b;
519
+ }) // Sort by slice index
520
+ .map(_ref4 => {
521
+ var _result$transcribeEve;
522
+ let [, result] = _ref4;
523
+ return (_result$transcribeEve = result.transcribeEvent.data) === null || _result$transcribeEve === void 0 ? void 0 : _result$transcribeEve.result;
524
+ }).filter(result => Boolean(result)); // Filter out empty results with type guard
525
+
526
+ if (previousResults.length > 0) {
527
+ promptParts.push(...previousResults);
528
+ }
529
+ }
530
+ return promptParts.join(' ') || undefined;
531
+ }
532
+
533
+ /**
534
+ * Process a single transcription
535
+ */
536
+ async processTranscription(item) {
537
+ if (!this.isActive) {
538
+ return;
539
+ }
540
+ this.isTranscribing = true;
541
+
542
+ // Emit stats update for status change
543
+ this.emitStatsUpdate('status_change');
544
+ const startTime = Date.now();
545
+ try {
546
+ var _this$callbacks$onTra, _this$callbacks5;
547
+ // Build prompt from initial prompt and previous slices
548
+ const prompt = this.buildPrompt(item.sliceIndex);
549
+ const audioBuffer = item.audioData.buffer;
550
+ const {
551
+ promise
552
+ } = this.whisperContext.transcribeData(audioBuffer, {
553
+ ...this.options.transcribeOptions,
554
+ prompt,
555
+ // Include the constructed prompt
556
+ onProgress: undefined // Disable progress for realtime
557
+ });
558
+
559
+ const result = await promise;
560
+ const endTime = Date.now();
561
+
562
+ // Create transcribe event
563
+ const {
564
+ sampleRate = 16000
565
+ } = this.options.audioStreamConfig || {};
566
+ const transcribeEvent = {
567
+ type: 'transcribe',
568
+ sliceIndex: item.sliceIndex,
569
+ data: result,
570
+ isCapturing: this.audioStream.isRecording(),
571
+ processTime: endTime - startTime,
572
+ recordingTime: item.audioData.length / (sampleRate / 1000) / 2,
573
+ // ms,
574
+ memoryUsage: this.sliceManager.getMemoryUsage(),
575
+ vadEvent: this.vadEvents.get(item.sliceIndex)
576
+ };
577
+
578
+ // Save transcription results
579
+ const slice = this.sliceManager.getSliceByIndex(item.sliceIndex);
580
+ if (slice) {
581
+ this.transcriptionResults.set(item.sliceIndex, {
582
+ slice: {
583
+ // Don't keep data in the slice
584
+ index: slice.index,
585
+ sampleCount: slice.sampleCount,
586
+ startTime: slice.startTime,
587
+ endTime: slice.endTime,
588
+ isProcessed: slice.isProcessed,
589
+ isReleased: slice.isReleased
590
+ },
591
+ transcribeEvent
592
+ });
593
+ }
594
+
595
+ // Emit transcribe event
596
+ (_this$callbacks$onTra = (_this$callbacks5 = this.callbacks).onTranscribe) === null || _this$callbacks$onTra === void 0 ? void 0 : _this$callbacks$onTra.call(_this$callbacks5, transcribeEvent);
597
+ this.vadEvents.delete(item.sliceIndex);
598
+
599
+ // Emit stats update for memory/slice changes
600
+ this.emitStatsUpdate('memory_change');
601
+ this.log(`Transcribed speech segment ${item.sliceIndex}: "${result.result}"`);
602
+ } catch (error) {
603
+ var _this$callbacks$onTra2, _this$callbacks6;
604
+ // Emit error event to transcribe callback
605
+ const errorEvent = {
606
+ type: 'error',
607
+ sliceIndex: item.sliceIndex,
608
+ data: undefined,
609
+ isCapturing: this.audioStream.isRecording(),
610
+ processTime: Date.now() - startTime,
611
+ recordingTime: 0,
612
+ memoryUsage: this.sliceManager.getMemoryUsage(),
613
+ vadEvent: this.vadEvents.get(item.sliceIndex)
614
+ };
615
+ (_this$callbacks$onTra2 = (_this$callbacks6 = this.callbacks).onTranscribe) === null || _this$callbacks$onTra2 === void 0 ? void 0 : _this$callbacks$onTra2.call(_this$callbacks6, errorEvent);
616
+ this.vadEvents.delete(item.sliceIndex);
617
+ this.handleError(`Transcription failed for speech segment ${item.sliceIndex}: ${error}`);
618
+ } finally {
619
+ // Check if we should continue processing queue
620
+ if (this.transcriptionQueue.length > 0) {
621
+ await this.processTranscriptionQueue();
622
+ } else {
623
+ this.isTranscribing = false;
624
+ }
625
+ }
626
+ }
627
+
628
+ /**
629
+ * Handle audio status changes
630
+ */
631
+ handleAudioStatusChange(isRecording) {
632
+ this.log(`Audio recording: ${isRecording ? 'started' : 'stopped'}`);
633
+ }
634
+
635
+ /**
636
+ * Handle errors from components
637
+ */
638
+ handleError(error) {
639
+ var _this$callbacks$onErr, _this$callbacks7;
640
+ this.log(`Error: ${error}`);
641
+ (_this$callbacks$onErr = (_this$callbacks7 = this.callbacks).onError) === null || _this$callbacks$onErr === void 0 ? void 0 : _this$callbacks$onErr.call(_this$callbacks7, error);
642
+ }
643
+
644
+ /**
645
+ * Update callbacks
646
+ */
647
+ updateCallbacks(callbacks) {
648
+ this.callbacks = {
649
+ ...this.callbacks,
650
+ ...callbacks
651
+ };
652
+ }
653
+
654
+ /**
655
+ * Update VAD options dynamically
656
+ */
657
+ updateVadOptions(options) {
658
+ this.options.vadOptions = {
659
+ ...this.options.vadOptions,
660
+ ...options
661
+ };
662
+ }
663
+
664
+ /**
665
+ * Update auto-slice options dynamically
666
+ */
667
+ updateAutoSliceOptions(options) {
668
+ if (options.autoSliceOnSpeechEnd !== undefined) {
669
+ this.options.autoSliceOnSpeechEnd = options.autoSliceOnSpeechEnd;
670
+ }
671
+ if (options.autoSliceThreshold !== undefined) {
672
+ this.options.autoSliceThreshold = options.autoSliceThreshold;
673
+ }
674
+ this.log(`Auto-slice options updated: enabled=${this.options.autoSliceOnSpeechEnd}, threshold=${this.options.autoSliceThreshold}`);
675
+ }
676
+
677
+ /**
678
+ * Get current statistics
679
+ */
680
+ getStatistics() {
681
+ return {
682
+ isActive: this.isActive,
683
+ isTranscribing: this.isTranscribing,
684
+ vadEnabled: this.vadEnabled,
685
+ audioStats: {
686
+ isRecording: this.audioStream.isRecording(),
687
+ accumulatedSamples: this.accumulatedData.length
688
+ },
689
+ vadStats: this.vadEnabled ? {
690
+ enabled: true,
691
+ contextAvailable: !!this.vadContext,
692
+ lastSpeechDetectedTime: this.lastSpeechDetectedTime
693
+ } : null,
694
+ sliceStats: this.sliceManager.getCurrentSliceInfo(),
695
+ autoSliceConfig: {
696
+ enabled: this.options.autoSliceOnSpeechEnd,
697
+ threshold: this.options.autoSliceThreshold,
698
+ targetDuration: this.options.audioSliceSec,
699
+ minDuration: this.options.audioMinSec
700
+ }
701
+ };
702
+ }
703
+
704
+ /**
705
+ * Get all transcription results
706
+ */
707
+ getTranscriptionResults() {
708
+ return Array.from(this.transcriptionResults.values());
709
+ }
710
+
711
+ /**
712
+ * Force move to the next slice, finalizing the current one regardless of capacity
713
+ */
714
+ async nextSlice() {
715
+ var _this$callbacks$onTra3, _this$callbacks8;
716
+ if (!this.isActive) {
717
+ this.log('Cannot force next slice - transcriber is not active');
718
+ return;
719
+ }
720
+
721
+ // Emit start event to indicate slice processing has started
722
+ const startEvent = {
723
+ type: 'start',
724
+ sliceIndex: -1,
725
+ // Use -1 to indicate forced slice
726
+ data: undefined,
727
+ isCapturing: this.audioStream.isRecording(),
728
+ processTime: 0,
729
+ recordingTime: 0,
730
+ memoryUsage: this.sliceManager.getMemoryUsage()
731
+ };
732
+ (_this$callbacks$onTra3 = (_this$callbacks8 = this.callbacks).onTranscribe) === null || _this$callbacks$onTra3 === void 0 ? void 0 : _this$callbacks$onTra3.call(_this$callbacks8, startEvent);
733
+
734
+ // Check if there are pending transcriptions or currently transcribing
735
+ if (this.isTranscribing || this.transcriptionQueue.length > 0) {
736
+ this.log('Waiting for pending transcriptions to complete before forcing next slice...');
737
+
738
+ // Wait for current transcription queue to be processed
739
+ await this.processTranscriptionQueue();
740
+ }
741
+ const result = this.sliceManager.forceNextSlice();
742
+ if (result.slice) {
743
+ this.log(`Forced slice ${result.slice.index} ready (${result.slice.data.length} bytes)`);
744
+
745
+ // Process VAD for the slice if enabled
746
+ if (!this.isTranscribing && this.vadEnabled) {
747
+ this.processSliceVAD(result.slice).catch(error => {
748
+ this.handleError(`VAD processing error: ${error}`);
749
+ });
750
+ } else if (!this.isTranscribing) {
751
+ // If VAD is disabled, transcribe slices as they become ready
752
+ this.queueSliceForTranscription(result.slice).catch(error => {
753
+ this.handleError(`Failed to queue slice for transcription: ${error}`);
754
+ });
755
+ } else {
756
+ this.log(`Skipping slice ${result.slice.index} - already transcribing`);
757
+ }
758
+ this.emitStatsUpdate('memory_change');
759
+ } else {
760
+ this.log('Forced next slice but no slice data to process');
761
+ }
762
+ }
763
+
764
+ /**
765
+ * Reset all components
766
+ */
767
+ reset() {
768
+ this.sliceManager.reset();
769
+ this.transcriptionQueue = [];
770
+ this.isTranscribing = false;
771
+ this.accumulatedData = new Uint8Array(0);
772
+
773
+ // Reset simplified VAD state
774
+ this.lastSpeechDetectedTime = -1;
775
+ this.lastVadState = 'silence';
776
+
777
+ // Reset stats snapshot for clean start
778
+ this.lastStatsSnapshot = null;
779
+
780
+ // Cancel WAV file writing if in progress
781
+ if (this.wavFileWriter) {
782
+ this.wavFileWriter.cancel().catch(error => {
783
+ this.log(`Failed to cancel WAV file writing: ${error}`);
784
+ });
785
+ this.wavFileWriter = null;
786
+ }
787
+
788
+ // Clear transcription results
789
+ this.transcriptionResults.clear();
790
+
791
+ // Clear VAD events
792
+ this.vadEvents.clear();
793
+ }
794
+
795
+ /**
796
+ * Release all resources
797
+ */
798
+ async release() {
799
+ var _this$wavFileWriter;
800
+ if (this.isActive) {
801
+ await this.stop();
802
+ }
803
+ await this.audioStream.release();
804
+ await ((_this$wavFileWriter = this.wavFileWriter) === null || _this$wavFileWriter === void 0 ? void 0 : _this$wavFileWriter.finalize());
805
+ this.vadContext = undefined;
806
+ }
807
+
808
+ /**
809
+ * Emit stats update event if stats have changed significantly
810
+ */
811
+ emitStatsUpdate(eventType) {
812
+ const currentStats = this.getStatistics();
813
+
814
+ // Check if stats have changed significantly
815
+ if (!this.lastStatsSnapshot || RealtimeTranscriber.shouldEmitStatsUpdate(currentStats, this.lastStatsSnapshot)) {
816
+ var _this$callbacks$onSta4, _this$callbacks9;
817
+ const statsEvent = {
818
+ timestamp: Date.now(),
819
+ type: eventType,
820
+ data: currentStats
821
+ };
822
+ (_this$callbacks$onSta4 = (_this$callbacks9 = this.callbacks).onStatsUpdate) === null || _this$callbacks$onSta4 === void 0 ? void 0 : _this$callbacks$onSta4.call(_this$callbacks9, statsEvent);
823
+ this.lastStatsSnapshot = {
824
+ ...currentStats
825
+ };
826
+ }
827
+ }
828
+
829
+ /**
830
+ * Determine if stats update should be emitted
831
+ */
832
+ static shouldEmitStatsUpdate(current, previous) {
833
+ var _current$sliceStats, _current$sliceStats$m, _previous$sliceStats, _previous$sliceStats$;
834
+ // Always emit on status changes
835
+ if (current.isActive !== previous.isActive || current.isTranscribing !== previous.isTranscribing) {
836
+ return true;
837
+ }
838
+
839
+ // Emit on significant memory changes (>10% or >5MB)
840
+ const currentMemory = ((_current$sliceStats = current.sliceStats) === null || _current$sliceStats === void 0 ? void 0 : (_current$sliceStats$m = _current$sliceStats.memoryUsage) === null || _current$sliceStats$m === void 0 ? void 0 : _current$sliceStats$m.estimatedMB) || 0;
841
+ const previousMemory = ((_previous$sliceStats = previous.sliceStats) === null || _previous$sliceStats === void 0 ? void 0 : (_previous$sliceStats$ = _previous$sliceStats.memoryUsage) === null || _previous$sliceStats$ === void 0 ? void 0 : _previous$sliceStats$.estimatedMB) || 0;
842
+ const memoryDiff = Math.abs(currentMemory - previousMemory);
843
+ if (memoryDiff > 5 || previousMemory > 0 && memoryDiff / previousMemory > 0.1) {
844
+ return true;
845
+ }
846
+ return false;
847
+ }
848
+
849
+ /**
850
+ * Logger function
851
+ */
852
+ log(message) {
853
+ this.options.logger(`[RealtimeTranscriber] ${message}`);
854
+ }
855
+ }
856
+ exports.RealtimeTranscriber = RealtimeTranscriber;
857
+ //# sourceMappingURL=RealtimeTranscriber.js.map