whisper.rn 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/README.md +1 -1
  2. package/android/src/main/java/com/rnwhisper/WhisperContext.java +5 -0
  3. package/android/src/main/jni.cpp +13 -0
  4. package/cpp/ggml-alloc.c +78 -26
  5. package/cpp/ggml-alloc.h +9 -0
  6. package/cpp/ggml-backend-impl.h +1 -1
  7. package/cpp/ggml-backend-reg.cpp +19 -3
  8. package/cpp/ggml-backend.cpp +72 -20
  9. package/cpp/ggml-backend.h +2 -1
  10. package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  11. package/cpp/ggml-cpu/arch/arm/repack.cpp +1004 -0
  12. package/cpp/ggml-cpu/arch/x86/repack.cpp +6 -6
  13. package/cpp/ggml-cpu/arch-fallback.h +50 -2
  14. package/cpp/ggml-cpu/ggml-cpu-impl.h +1 -1
  15. package/cpp/ggml-cpu/ggml-cpu.c +139 -58
  16. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  17. package/cpp/ggml-cpu/ops.cpp +170 -18
  18. package/cpp/ggml-cpu/ops.h +1 -0
  19. package/cpp/ggml-cpu/repack.cpp +531 -5
  20. package/cpp/ggml-cpu/repack.h +14 -0
  21. package/cpp/ggml-cpu/simd-mappings.h +16 -18
  22. package/cpp/ggml-cpu/vec.cpp +41 -1
  23. package/cpp/ggml-cpu/vec.h +241 -138
  24. package/cpp/ggml-cpu.h +1 -0
  25. package/cpp/ggml-impl.h +0 -4
  26. package/cpp/ggml-metal/ggml-metal-context.m +26 -16
  27. package/cpp/ggml-metal/ggml-metal-device.cpp +452 -371
  28. package/cpp/ggml-metal/ggml-metal-device.h +87 -65
  29. package/cpp/ggml-metal/ggml-metal-device.m +263 -104
  30. package/cpp/ggml-metal/ggml-metal-impl.h +58 -4
  31. package/cpp/ggml-metal/ggml-metal-ops.cpp +415 -98
  32. package/cpp/ggml-metal/ggml-metal-ops.h +4 -0
  33. package/cpp/ggml-metal/ggml-metal.cpp +6 -5
  34. package/cpp/ggml-metal/ggml-metal.metal +404 -34
  35. package/cpp/ggml.c +110 -31
  36. package/cpp/ggml.h +51 -12
  37. package/cpp/jsi/RNWhisperJSI.cpp +1 -0
  38. package/cpp/whisper.cpp +17 -4
  39. package/ios/CMakeLists.txt +21 -1
  40. package/ios/RNWhisperContext.mm +5 -0
  41. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  42. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  43. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  44. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  45. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  46. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
  47. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Info.plist +0 -0
  48. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
  49. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  50. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  51. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  52. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  53. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  55. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
  56. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  57. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  58. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
  59. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  60. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  61. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  62. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  63. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  64. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  65. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +51 -12
  66. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Info.plist +0 -0
  67. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-metal.metal +404 -34
  68. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  69. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-alloc.h +9 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend-impl.h +1 -1
  71. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +2 -1
  72. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +1 -0
  73. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +0 -4
  74. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +51 -12
  75. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Info.plist +0 -0
  76. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/_CodeSignature/CodeResources +1 -1
  77. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-metal.metal +404 -34
  78. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  79. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  80. package/lib/commonjs/jest-mock.js +2 -0
  81. package/lib/commonjs/jest-mock.js.map +1 -1
  82. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js +156 -12
  83. package/lib/commonjs/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  84. package/lib/commonjs/version.json +1 -1
  85. package/lib/module/NativeRNWhisper.js.map +1 -1
  86. package/lib/module/jest-mock.js +2 -0
  87. package/lib/module/jest-mock.js.map +1 -1
  88. package/lib/module/realtime-transcription/RealtimeTranscriber.js +155 -12
  89. package/lib/module/realtime-transcription/RealtimeTranscriber.js.map +1 -1
  90. package/lib/module/version.json +1 -1
  91. package/lib/typescript/NativeRNWhisper.d.ts +1 -0
  92. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  93. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts +29 -0
  94. package/lib/typescript/realtime-transcription/RealtimeTranscriber.d.ts.map +1 -1
  95. package/lib/typescript/realtime-transcription/types.d.ts +7 -0
  96. package/lib/typescript/realtime-transcription/types.d.ts.map +1 -1
  97. package/package.json +1 -1
  98. package/src/NativeRNWhisper.ts +1 -0
  99. package/src/jest-mock.ts +2 -0
  100. package/src/realtime-transcription/RealtimeTranscriber.ts +179 -9
  101. package/src/realtime-transcription/types.ts +9 -0
  102. package/src/version.json +1 -1
@@ -18,6 +18,8 @@ import type {
18
18
  } from './types'
19
19
  import { VAD_PRESETS } from './types'
20
20
 
21
+ const SILENCE_SEGMENT_REGEX = /\[(\s*\w+\s*)]/i
22
+
21
23
  /**
22
24
  * RealtimeTranscriber provides real-time audio transcription with VAD support.
23
25
  *
@@ -55,6 +57,9 @@ export class RealtimeTranscriber {
55
57
  audioOutputPath?: string
56
58
  audioStreamConfig?: AudioStreamConfig
57
59
  logger: (message: string) => void
60
+ // VAD optimization options for low-end CPU
61
+ vadThrottleMs: number // Minimum time between VAD calls (ms)
62
+ vadSkipRatio: number // Skip every Nth slice (0 = no skipping)
58
63
  }
59
64
 
60
65
  private isActive = false
@@ -81,6 +86,15 @@ export class RealtimeTranscriber {
81
86
  // Track last stats to emit only when changed
82
87
  private lastStatsSnapshot: any = null
83
88
 
89
+ // VAD throttling for low-end CPU optimization
90
+ private isProcessingVAD = false
91
+
92
+ private lastVadProcessTime = 0
93
+
94
+ private vadProcessingQueue: any[] = []
95
+
96
+ private skippedVadCount = 0
97
+
84
98
  // Store transcription results by slice index
85
99
  private transcriptionResults: Map<
86
100
  number,
@@ -115,6 +129,9 @@ export class RealtimeTranscriber {
115
129
  promptPreviousSlices: options.promptPreviousSlices ?? true,
116
130
  audioOutputPath: options.audioOutputPath,
117
131
  logger: options.logger || (() => {}),
132
+ // VAD optimization options for low-end CPU
133
+ vadThrottleMs: options.vadThrottleMs ?? 1500, // Minimum time between VAD calls (ms)
134
+ vadSkipRatio: options.vadSkipRatio ?? 0, // Skip every Nth slice (0 = no skipping)
118
135
  }
119
136
 
120
137
  // Apply VAD preset if specified
@@ -291,11 +308,9 @@ export class RealtimeTranscriber {
291
308
  `Slice ${result.slice.index} ready (${result.slice.data.length} bytes)`,
292
309
  )
293
310
 
294
- // Process VAD for the slice if enabled
311
+ // Process VAD for the slice if enabled (with throttling for low-end CPU)
295
312
  if (!this.isTranscribing && this.vadEnabled) {
296
- this.processSliceVAD(result.slice).catch((error: any) => {
297
- this.handleError(`VAD processing error: ${error}`)
298
- })
313
+ this.queueVADProcessing(result.slice)
299
314
  } else if (!this.isTranscribing) {
300
315
  // If VAD is disabled, transcribe slices as they become ready
301
316
  this.queueSliceForTranscription(result.slice).catch((error: any) => {
@@ -374,6 +389,106 @@ export class RealtimeTranscriber {
374
389
  }
375
390
  }
376
391
 
392
+ /**
393
+ * Queue VAD processing with throttling for low-end CPU systems
394
+ * This prevents VAD from blocking the audio pipeline
395
+ */
396
+ private queueVADProcessing(slice: any): void {
397
+ // Check if we should skip this slice based on skip ratio
398
+ if (this.options.vadSkipRatio > 0) {
399
+ const shouldSkip = slice.index % (this.options.vadSkipRatio + 1) !== 0
400
+ if (shouldSkip) {
401
+ this.skippedVadCount += 1
402
+ this.log(
403
+ `Skipping VAD for slice ${slice.index} (skip ratio: ${this.options.vadSkipRatio})`,
404
+ )
405
+ // Still queue for transcription if VAD would have approved
406
+ this.queueSliceForTranscription(slice).catch((error: any) => {
407
+ this.handleError(`Failed to queue skipped slice for transcription: ${error}`)
408
+ })
409
+ return
410
+ }
411
+ }
412
+
413
+ // Check throttling - don't process if we recently processed VAD
414
+ const now = Date.now()
415
+ const timeSinceLastVad = now - this.lastVadProcessTime
416
+
417
+ if (this.isProcessingVAD) {
418
+ // VAD is already running, queue this slice
419
+ this.vadProcessingQueue.push(slice)
420
+ this.log(
421
+ `VAD busy, queued slice ${slice.index} (queue size: ${this.vadProcessingQueue.length})`,
422
+ )
423
+ return
424
+ }
425
+
426
+ if (timeSinceLastVad < this.options.vadThrottleMs) {
427
+ // Too soon since last VAD, queue it
428
+ this.vadProcessingQueue.push(slice)
429
+ this.log(
430
+ `VAD throttled, queued slice ${slice.index} (will process in ${
431
+ this.options.vadThrottleMs - timeSinceLastVad
432
+ }ms)`,
433
+ )
434
+ // Schedule processing after throttle period
435
+ setTimeout(() => {
436
+ this.processVADQueue()
437
+ }, this.options.vadThrottleMs - timeSinceLastVad)
438
+ return
439
+ }
440
+
441
+ this.processSliceVADThrottled(slice)
442
+ }
443
+
444
+ /**
445
+ * Process the VAD queue
446
+ */
447
+ private processVADQueue(): void {
448
+ if (this.isProcessingVAD || this.vadProcessingQueue.length === 0) {
449
+ return
450
+ }
451
+
452
+ // Get the most recent slice from queue (discard older ones for real-time performance)
453
+ const slice = this.vadProcessingQueue.pop()
454
+ this.vadProcessingQueue = [] // Clear queue, we only care about latest
455
+
456
+ if (slice) {
457
+ this.log(`Processing queued VAD for slice ${slice.index}`)
458
+ this.processSliceVADThrottled(slice)
459
+ }
460
+ }
461
+
462
+ /**
463
+ * Throttled wrapper for processSliceVAD
464
+ */
465
+ private async processSliceVADThrottled(slice: any): Promise<void> {
466
+ if (this.isProcessingVAD) {
467
+ // Already processing, re-queue
468
+ this.vadProcessingQueue.push(slice)
469
+ return
470
+ }
471
+
472
+ this.isProcessingVAD = true
473
+ this.lastVadProcessTime = Date.now()
474
+
475
+ try {
476
+ await this.processSliceVAD(slice)
477
+ } catch (error) {
478
+ this.handleError(`VAD processing error: ${error}`)
479
+ } finally {
480
+ this.isProcessingVAD = false
481
+
482
+ // Process next item in queue if available
483
+ if (this.vadProcessingQueue.length > 0) {
484
+ // Schedule next processing with a small delay to yield to event loop
485
+ setTimeout(() => {
486
+ this.processVADQueue()
487
+ }, 50) // 50ms delay between VAD processings
488
+ }
489
+ }
490
+ }
491
+
377
492
  /**
378
493
  * Process VAD for a completed slice
379
494
  */
@@ -391,7 +506,25 @@ export class RealtimeTranscriber {
391
506
  return
392
507
  }
393
508
 
394
- // Convert base64 back to Uint8Array for VAD processing
509
+ // Check if user callback allows VAD processing
510
+ if (this.callbacks.onBeginVad) {
511
+ const {
512
+ sampleRate = 16000,
513
+ channels = 1,
514
+ } = this.options.audioStreamConfig || {}
515
+ const duration = audioData.length / sampleRate / channels * 1000 // Convert to milliseconds
516
+ const shouldProcessVad =
517
+ (await this.callbacks.onBeginVad({
518
+ sliceIndex: slice.index,
519
+ audioData,
520
+ duration,
521
+ })) ?? true
522
+
523
+ if (!shouldProcessVad) {
524
+ this.log(`User callback declined VAD processing for slice ${slice.index}`)
525
+ return
526
+ }
527
+ }
395
528
 
396
529
  // Detect speech in the slice
397
530
  const vadEvent = await this.detectSpeech(audioData, slice.index)
@@ -694,6 +827,9 @@ export class RealtimeTranscriber {
694
827
  const result = await promise
695
828
  const endTime = Date.now()
696
829
 
830
+ // Normalize result and segments, remove "[ silence ]" or "[BLANK]"
831
+ result.result = result.result.replace(SILENCE_SEGMENT_REGEX, '').trim()
832
+
697
833
  // Create transcribe event
698
834
  const { sampleRate = 16000 } = this.options.audioStreamConfig || {}
699
835
  const transcribeEvent: RealtimeTranscribeEvent = {
@@ -707,6 +843,13 @@ export class RealtimeTranscriber {
707
843
  vadEvent: this.vadEvents.get(item.sliceIndex),
708
844
  }
709
845
 
846
+ // if the current result is invalid, use the previous result
847
+ const previousTranscribe = this.transcriptionResults.get(item.sliceIndex)
848
+ ?.transcribeEvent
849
+ if (previousTranscribe && result.result.trim() === '.') {
850
+ transcribeEvent.data = previousTranscribe.data
851
+ }
852
+
710
853
  // Save transcription results
711
854
  const slice = this.sliceManager.getSliceByIndex(item.sliceIndex)
712
855
  if (slice) {
@@ -812,6 +955,24 @@ export class RealtimeTranscriber {
812
955
  )
813
956
  }
814
957
 
958
+ /**
959
+ * Update VAD throttling options dynamically for low-end CPU optimization
960
+ */
961
+ updateVadThrottleOptions(options: {
962
+ vadThrottleMs?: number
963
+ vadSkipRatio?: number
964
+ }): void {
965
+ if (options.vadThrottleMs !== undefined) {
966
+ this.options.vadThrottleMs = options.vadThrottleMs
967
+ }
968
+ if (options.vadSkipRatio !== undefined) {
969
+ this.options.vadSkipRatio = options.vadSkipRatio
970
+ }
971
+ this.log(
972
+ `VAD throttle options updated: throttleMs=${this.options.vadThrottleMs}, skipRatio=${this.options.vadSkipRatio}`,
973
+ )
974
+ }
975
+
815
976
  /**
816
977
  * Get current statistics
817
978
  */
@@ -829,6 +990,11 @@ export class RealtimeTranscriber {
829
990
  enabled: true,
830
991
  contextAvailable: !!this.vadContext,
831
992
  lastSpeechDetectedTime: this.lastSpeechDetectedTime,
993
+ isProcessing: this.isProcessingVAD,
994
+ queueSize: this.vadProcessingQueue.length,
995
+ skippedCount: this.skippedVadCount,
996
+ throttleMs: this.options.vadThrottleMs,
997
+ skipRatio: this.options.vadSkipRatio,
832
998
  }
833
999
  : null,
834
1000
  sliceStats: this.sliceManager.getCurrentSliceInfo(),
@@ -890,11 +1056,9 @@ export class RealtimeTranscriber {
890
1056
  `Forced slice ${result.slice.index} ready (${result.slice.data.length} bytes)`,
891
1057
  )
892
1058
 
893
- // Process VAD for the slice if enabled
1059
+ // Process VAD for the slice if enabled (with throttling for low-end CPU)
894
1060
  if (!this.isTranscribing && this.vadEnabled) {
895
- this.processSliceVAD(result.slice).catch((error: any) => {
896
- this.handleError(`VAD processing error: ${error}`)
897
- })
1061
+ this.queueVADProcessing(result.slice)
898
1062
  } else if (!this.isTranscribing) {
899
1063
  // If VAD is disabled, transcribe slices as they become ready
900
1064
  this.queueSliceForTranscription(result.slice).catch((error: any) => {
@@ -923,6 +1087,12 @@ export class RealtimeTranscriber {
923
1087
  this.lastSpeechDetectedTime = -1
924
1088
  this.lastVadState = 'silence'
925
1089
 
1090
+ // Reset VAD throttling state
1091
+ this.isProcessingVAD = false
1092
+ this.lastVadProcessTime = 0
1093
+ this.vadProcessingQueue = []
1094
+ this.skippedVadCount = 0
1095
+
926
1096
  // Reset stats snapshot for clean start
927
1097
  this.lastStatsSnapshot = null
928
1098
 
@@ -190,6 +190,10 @@ export interface RealtimeOptions {
190
190
  autoSliceOnSpeechEnd?: boolean // default: false - automatically slice when speech ends and duration thresholds are met
191
191
  autoSliceThreshold?: number // default: 0.85 - percentage of audioSliceSec to trigger auto-slice
192
192
 
193
+ // VAD optimization options for low-end CPU
194
+ vadThrottleMs?: number // default: 1500 - Minimum time between VAD calls (ms)
195
+ vadSkipRatio?: number // default: 0 - Skip every Nth slice (0 = no skipping)
196
+
193
197
  // Transcription settings
194
198
  transcribeOptions?: TranscribeOptions
195
199
 
@@ -250,6 +254,11 @@ export interface RealtimeTranscriberCallbacks {
250
254
  vadEvent?: RealtimeVadEvent
251
255
  }) => Promise<boolean>
252
256
  onTranscribe?: (event: RealtimeTranscribeEvent) => void
257
+ onBeginVad?: (sliceInfo: {
258
+ audioData: Uint8Array
259
+ sliceIndex: number
260
+ duration: number
261
+ }) => Promise<boolean>
253
262
  onVad?: (event: RealtimeVadEvent) => void
254
263
  onError?: (error: string) => void
255
264
  onStatusChange?: (isActive: boolean) => void
package/src/version.json CHANGED
@@ -1 +1 @@
1
- {"version":"1.8.2"}
1
+ {"version":"1.8.3"}