whisper.rn 0.4.2 → 0.5.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +1 -3
  2. package/android/build.gradle +70 -11
  3. package/android/src/main/CMakeLists.txt +28 -1
  4. package/android/src/main/java/com/rnwhisper/JSCallInvokerResolver.java +40 -0
  5. package/android/src/main/java/com/rnwhisper/RNWhisper.java +80 -27
  6. package/android/src/main/java/com/rnwhisper/WhisperContext.java +21 -9
  7. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +1 -1
  8. package/android/src/main/jni.cpp +79 -2
  9. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  12. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +5 -0
  16. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +5 -0
  17. package/cpp/ggml-backend.cpp +36 -18
  18. package/cpp/ggml-backend.h +1 -1
  19. package/cpp/ggml-cpu/amx/mmq.cpp +10 -9
  20. package/cpp/ggml-cpu/arch/arm/quants.c +109 -108
  21. package/cpp/ggml-cpu/arch/arm/repack.cpp +13 -12
  22. package/cpp/ggml-cpu/arch/x86/quants.c +83 -82
  23. package/cpp/ggml-cpu/arch/x86/repack.cpp +20 -19
  24. package/cpp/ggml-cpu/common.h +3 -2
  25. package/cpp/ggml-cpu/ggml-cpu-impl.h +9 -3
  26. package/cpp/ggml-cpu/ggml-cpu.c +95 -17
  27. package/cpp/ggml-cpu/ggml-cpu.cpp +4 -0
  28. package/cpp/ggml-cpu/ops.cpp +775 -74
  29. package/cpp/ggml-cpu/ops.h +7 -0
  30. package/cpp/ggml-cpu/quants.c +25 -24
  31. package/cpp/ggml-cpu/repack.cpp +15 -14
  32. package/cpp/ggml-cpu/simd-mappings.h +211 -33
  33. package/cpp/ggml-cpu/vec.cpp +26 -2
  34. package/cpp/ggml-cpu/vec.h +99 -45
  35. package/cpp/ggml-cpu.h +2 -0
  36. package/cpp/ggml-impl.h +125 -183
  37. package/cpp/ggml-metal-impl.h +27 -0
  38. package/cpp/ggml-metal.m +298 -41
  39. package/cpp/ggml-quants.c +6 -6
  40. package/cpp/ggml-whisper-sim.metallib +0 -0
  41. package/cpp/ggml-whisper.metallib +0 -0
  42. package/cpp/ggml.c +269 -40
  43. package/cpp/ggml.h +122 -2
  44. package/cpp/gguf.cpp +5 -1
  45. package/cpp/jsi/RNWhisperJSI.cpp +681 -0
  46. package/cpp/jsi/RNWhisperJSI.h +44 -0
  47. package/cpp/jsi/ThreadPool.h +100 -0
  48. package/cpp/whisper.cpp +4 -0
  49. package/cpp/whisper.h +2 -0
  50. package/ios/RNWhisper.h +3 -0
  51. package/ios/RNWhisper.mm +66 -31
  52. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  53. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  54. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  55. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  56. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  57. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  58. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  59. package/ios/rnwhisper.xcframework/ios-arm64/rnwhisper.framework/rnwhisper +0 -0
  60. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  61. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  62. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  63. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  64. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  65. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  66. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  67. package/ios/rnwhisper.xcframework/ios-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  68. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  69. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  70. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  71. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  72. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/ggml.h +122 -2
  73. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/Headers/whisper.h +2 -0
  74. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/ggml-whisper.metallib +0 -0
  75. package/ios/rnwhisper.xcframework/tvos-arm64/rnwhisper.framework/rnwhisper +0 -0
  76. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-backend.h +1 -1
  77. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-cpu.h +2 -0
  78. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-impl.h +125 -183
  79. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml-metal-impl.h +27 -0
  80. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/ggml.h +122 -2
  81. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/Headers/whisper.h +2 -0
  82. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/ggml-whisper-sim.metallib +0 -0
  83. package/ios/rnwhisper.xcframework/tvos-arm64_x86_64-simulator/rnwhisper.framework/rnwhisper +0 -0
  84. package/jest/mock.js +1 -0
  85. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  86. package/lib/commonjs/index.js +83 -2
  87. package/lib/commonjs/index.js.map +1 -1
  88. package/lib/module/NativeRNWhisper.js.map +1 -1
  89. package/lib/module/index.js +83 -2
  90. package/lib/module/index.js.map +1 -1
  91. package/lib/typescript/NativeRNWhisper.d.ts +4 -0
  92. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  93. package/lib/typescript/index.d.ts +18 -6
  94. package/lib/typescript/index.d.ts.map +1 -1
  95. package/package.json +2 -3
  96. package/src/NativeRNWhisper.ts +2 -0
  97. package/src/index.ts +162 -33
  98. package/whisper-rn.podspec +6 -3
package/src/index.ts CHANGED
@@ -5,7 +5,10 @@ import {
5
5
  DeviceEventEmitterStatic,
6
6
  Image,
7
7
  } from 'react-native'
8
- import RNWhisper, { NativeWhisperContext, NativeWhisperVadContext } from './NativeRNWhisper'
8
+ import RNWhisper, {
9
+ NativeWhisperContext,
10
+ NativeWhisperVadContext,
11
+ } from './NativeRNWhisper'
9
12
  import type {
10
13
  TranscribeOptions,
11
14
  TranscribeResult,
@@ -21,6 +24,43 @@ import type {
21
24
  } from './AudioSessionIos'
22
25
  import { version } from './version.json'
23
26
 
27
+ declare global {
28
+ // eslint-disable-next-line no-var
29
+ var whisperTranscribeData: (
30
+ contextId: number,
31
+ options: TranscribeOptions,
32
+ data: ArrayBuffer | SharedArrayBuffer,
33
+ ) => Promise<TranscribeResult>
34
+ // eslint-disable-next-line no-var
35
+ var whisperVadDetectSpeech: (
36
+ contextId: number,
37
+ options: VadOptions,
38
+ audioData: ArrayBuffer | SharedArrayBuffer,
39
+ ) => Promise<{ hasSpeech: boolean; segments: VadSegment[] }>
40
+ }
41
+
42
+ let jsiWhisperTranscribeData: (
43
+ contextId: number,
44
+ options: TranscribeOptions,
45
+ data: ArrayBuffer | SharedArrayBuffer,
46
+ ) => Promise<TranscribeResult>
47
+ let jsiWhisperVadDetectSpeech: (
48
+ contextId: number,
49
+ options: VadOptions,
50
+ audioData: ArrayBuffer | SharedArrayBuffer,
51
+ ) => Promise<{ hasSpeech: boolean; segments: VadSegment[] }>
52
+
53
+ RNWhisper.installJSIBindings()
54
+ .then(() => {
55
+ jsiWhisperTranscribeData = global.whisperTranscribeData
56
+ delete (global as any).whisperTranscribeData
57
+ jsiWhisperVadDetectSpeech = global.whisperVadDetectSpeech
58
+ delete (global as any).whisperVadDetectSpeech
59
+ })
60
+ .catch((e) => {
61
+ console.warn('Failed to install JSI bindings', e)
62
+ })
63
+
24
64
  let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
25
65
  if (Platform.OS === 'ios') {
26
66
  // @ts-ignore
@@ -188,10 +228,7 @@ export type BenchResult = {
188
228
  }
189
229
 
190
230
  const updateAudioSession = async (setting: AudioSessionSettingIos) => {
191
- await AudioSessionIos.setCategory(
192
- setting.category,
193
- setting.options || [],
194
- )
231
+ await AudioSessionIos.setCategory(setting.category, setting.options || [])
195
232
  if (setting.mode) {
196
233
  await AudioSessionIos.setMode(setting.mode)
197
234
  }
@@ -199,6 +236,8 @@ const updateAudioSession = async (setting: AudioSessionSettingIos) => {
199
236
  }
200
237
 
201
238
  export class WhisperContext {
239
+ ptr: number
240
+
202
241
  id: number
203
242
 
204
243
  gpu: boolean = false
@@ -206,16 +245,22 @@ export class WhisperContext {
206
245
  reasonNoGPU: string = ''
207
246
 
208
247
  constructor({
248
+ contextPtr,
209
249
  contextId,
210
250
  gpu,
211
251
  reasonNoGPU,
212
252
  }: NativeWhisperContext) {
253
+ this.ptr = contextPtr
213
254
  this.id = contextId
214
255
  this.gpu = gpu
215
256
  this.reasonNoGPU = reasonNoGPU
216
257
  }
217
258
 
218
- private transcribeWithNativeMethod(method: 'transcribeFile' | 'transcribeData', data: string, options: TranscribeFileOptions = {}): {
259
+ private transcribeWithNativeMethod(
260
+ method: 'transcribeFile' | 'transcribeData',
261
+ data: string,
262
+ options: TranscribeFileOptions = {},
263
+ ): {
219
264
  stop: () => Promise<void>
220
265
  promise: Promise<TranscribeResult>
221
266
  } {
@@ -322,15 +367,76 @@ export class WhisperContext {
322
367
  }
323
368
 
324
369
  /**
325
- * Transcribe audio data (base64 encoded float32 PCM data)
370
+ * Transcribe audio data (base64 encoded float32 PCM data or ArrayBuffer)
326
371
  */
327
- transcribeData(data: string, options: TranscribeFileOptions = {}): {
372
+ transcribeData(
373
+ data: string | ArrayBuffer | SharedArrayBuffer,
374
+ options: TranscribeFileOptions = {},
375
+ ): {
328
376
  stop: () => Promise<void>
329
377
  promise: Promise<TranscribeResult>
330
378
  } {
379
+ if (data instanceof ArrayBuffer || data instanceof SharedArrayBuffer) {
380
+ // Use JSI function for ArrayBuffer
381
+ if (!jsiWhisperTranscribeData) {
382
+ throw new Error('JSI binding `whisperTranscribeData` not installed')
383
+ }
384
+ return this.transcribeDataArrayBuffer(data, options)
385
+ }
331
386
  return this.transcribeWithNativeMethod('transcribeData', data, options)
332
387
  }
333
388
 
389
+ /**
390
+ * Transcribe audio data from ArrayBuffer (16-bit PCM, mono, 16kHz)
391
+ */
392
+ private transcribeDataArrayBuffer(
393
+ data: ArrayBuffer | SharedArrayBuffer,
394
+ options: TranscribeFileOptions = {},
395
+ ): {
396
+ stop: () => Promise<void>
397
+ promise: Promise<TranscribeResult>
398
+ } {
399
+ const { onProgress, onNewSegments, ...rest } = options
400
+
401
+ // Generate a unique jobId for this transcription
402
+ const jobId = Math.floor(Math.random() * 10000)
403
+
404
+ const jsiOptions = {
405
+ ...rest,
406
+ onProgress: onProgress || undefined,
407
+ onNewSegments: onNewSegments || undefined,
408
+ jobId, // Pass jobId to native implementation
409
+ }
410
+
411
+ let isAborted = false
412
+ const promise = jsiWhisperTranscribeData(this.id, jsiOptions, data)
413
+ .then((result: any) => {
414
+ if (isAborted) {
415
+ return { ...result, isAborted: true }
416
+ }
417
+ return result
418
+ })
419
+ .catch((error: any) => {
420
+ if (isAborted) {
421
+ return { isAborted: true, error: 'Transcription aborted' }
422
+ }
423
+ throw error
424
+ })
425
+
426
+ return {
427
+ stop: async () => {
428
+ isAborted = true
429
+ try {
430
+ // Use the existing native abort method
431
+ await RNWhisper.abortTranscribe(this.id, jobId)
432
+ } catch (error) {
433
+ // Ignore errors if context is already released or job doesn't exist
434
+ }
435
+ },
436
+ promise,
437
+ }
438
+ }
439
+
334
440
  /** Transcribe the microphone audio stream, the microphone user permission is required */
335
441
  async transcribeRealtime(options: TranscribeRealtimeOptions = {}): Promise<{
336
442
  /** Stop the realtime transcribe */
@@ -361,7 +467,7 @@ export class WhisperContext {
361
467
  t0: segment.t0 + tOffset,
362
468
  t1: segment.t1 + tOffset,
363
469
  })) || [],
364
- }
470
+ },
365
471
  }
366
472
  }
367
473
 
@@ -404,7 +510,10 @@ export class WhisperContext {
404
510
  // iOS: Update audio session state
405
511
  await updateAudioSession(options?.audioSessionOnStartIos)
406
512
  }
407
- if (Platform.OS === 'ios' && typeof options?.audioSessionOnStopIos === 'object') {
513
+ if (
514
+ Platform.OS === 'ios' &&
515
+ typeof options?.audioSessionOnStopIos === 'object'
516
+ ) {
408
517
  prevAudioSession = options?.audioSessionOnStopIos
409
518
  }
410
519
 
@@ -468,8 +577,16 @@ export class WhisperContext {
468
577
 
469
578
  async bench(maxThreads: number): Promise<BenchResult> {
470
579
  const result = await RNWhisper.bench(this.id, maxThreads)
471
- const [config, nThreads, encodeMs, decodeMs, batchMs, promptMs] = JSON.parse(result)
472
- return { config, nThreads, encodeMs, decodeMs, batchMs, promptMs } as BenchResult
580
+ const [config, nThreads, encodeMs, decodeMs, batchMs, promptMs] =
581
+ JSON.parse(result)
582
+ return {
583
+ config,
584
+ nThreads,
585
+ encodeMs,
586
+ decodeMs,
587
+ batchMs,
588
+ promptMs,
589
+ } as BenchResult
473
590
  }
474
591
 
475
592
  async release(): Promise<void> {
@@ -495,7 +612,7 @@ export type ContextOptions = {
495
612
  /** Use GPU if available. Currently iOS only, if it's enabled, Core ML option will be ignored. */
496
613
  useGpu?: boolean
497
614
  /** Use Flash Attention, only recommended if GPU available */
498
- useFlashAttn?: boolean,
615
+ useFlashAttn?: boolean
499
616
  }
500
617
 
501
618
  const coreMLModelAssetPaths = [
@@ -557,17 +674,18 @@ export async function initWhisper({
557
674
  path = filePath
558
675
  }
559
676
  if (path.startsWith('file://')) path = path.slice(7)
560
- const { contextId, gpu, reasonNoGPU } = await RNWhisper.initContext({
561
- filePath: path,
562
- isBundleAsset: !!isBundleAsset,
563
- useFlashAttn,
564
- useGpu,
565
- useCoreMLIos,
566
- // Only development mode need download Core ML model assets (from packager server)
567
- downloadCoreMLAssets: __DEV__ && !!coreMLAssets,
568
- coreMLAssets,
569
- })
570
- return new WhisperContext({ contextId, gpu, reasonNoGPU })
677
+ const { contextPtr, contextId, gpu, reasonNoGPU } =
678
+ await RNWhisper.initContext({
679
+ filePath: path,
680
+ isBundleAsset: !!isBundleAsset,
681
+ useFlashAttn,
682
+ useGpu,
683
+ useCoreMLIos,
684
+ // Only development mode need download Core ML model assets (from packager server)
685
+ downloadCoreMLAssets: __DEV__ && !!coreMLAssets,
686
+ coreMLAssets,
687
+ })
688
+ return new WhisperContext({ contextPtr, contextId, gpu, reasonNoGPU })
571
689
  }
572
690
 
573
691
  export async function releaseAllWhisper(): Promise<void> {
@@ -608,11 +726,7 @@ export class WhisperVadContext {
608
726
 
609
727
  reasonNoGPU: string = ''
610
728
 
611
- constructor({
612
- contextId,
613
- gpu,
614
- reasonNoGPU,
615
- }: NativeWhisperVadContext) {
729
+ constructor({ contextId, gpu, reasonNoGPU }: NativeWhisperVadContext) {
616
730
  this.id = contextId
617
731
  this.gpu = gpu
618
732
  this.reasonNoGPU = reasonNoGPU
@@ -624,7 +738,7 @@ export class WhisperVadContext {
624
738
  */
625
739
  async detectSpeech(
626
740
  filePathOrBase64: string | number,
627
- options: VadOptions = {}
741
+ options: VadOptions = {},
628
742
  ): Promise<VadSegment[]> {
629
743
  let path = ''
630
744
  if (typeof filePathOrBase64 === 'number') {
@@ -654,12 +768,27 @@ export class WhisperVadContext {
654
768
  }
655
769
 
656
770
  /**
657
- * Detect speech segments in raw audio data (base64 encoded float32 PCM data)
771
+ * Detect speech segments in raw audio data (base64 encoded float32 PCM data or ArrayBuffer)
658
772
  */
659
773
  async detectSpeechData(
660
- audioData: string,
661
- options: VadOptions = {}
774
+ audioData: string | ArrayBuffer | SharedArrayBuffer,
775
+ options: VadOptions = {},
662
776
  ): Promise<VadSegment[]> {
777
+ if (
778
+ audioData instanceof ArrayBuffer ||
779
+ audioData instanceof SharedArrayBuffer
780
+ ) {
781
+ // Use JSI function for ArrayBuffer
782
+ if (!jsiWhisperVadDetectSpeech) {
783
+ throw new Error('JSI binding `whisperVadDetectSpeech` not installed')
784
+ }
785
+ const result = await jsiWhisperVadDetectSpeech(
786
+ this.id,
787
+ options,
788
+ audioData,
789
+ )
790
+ return result.segments || []
791
+ }
663
792
  return RNWhisper.vadDetectSpeech(this.id, audioData, options)
664
793
  }
665
794
 
@@ -2,7 +2,7 @@ require "json"
2
2
 
3
3
  package = JSON.parse(File.read(File.join(__dir__, "package.json")))
4
4
  base_ld_flags = "-framework Accelerate -framework Foundation -framework Metal -framework MetalKit"
5
- base_compiler_flags = "-DWSP_GGML_USE_CPU -DWSP_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
5
+ base_compiler_flags = "-DWSP_GGML_USE_CPU -DWSP_GGML_USE_ACCELERATE -pthread -Wno-shorten-64-to-32"
6
6
  folly_compiler_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -Wno-comma"
7
7
 
8
8
  # Use base_optimizer_flags = "" for debug builds
@@ -36,13 +36,12 @@ Pod::Spec.new do |s|
36
36
  s.resources = "cpp/**/*.{metallib}"
37
37
  base_compiler_flags += " -DRNWHISPER_BUILD_FROM_SOURCE"
38
38
  else
39
- s.source_files = "ios/**/*.{h,m,mm}"
39
+ s.source_files = "ios/**/*.{h,m,mm}", "cpp/jsi/*.{h,cpp}"
40
40
  s.vendored_frameworks = "ios/rnwhisper.xcframework"
41
41
  end
42
42
 
43
43
  s.requires_arc = true
44
44
 
45
- s.dependency "React-Core"
46
45
 
47
46
  s.compiler_flags = base_compiler_flags
48
47
  s.pod_target_xcconfig = {
@@ -51,9 +50,13 @@ Pod::Spec.new do |s|
51
50
  "OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags + " -std=c++17"
52
51
  }
53
52
 
53
+ s.dependency "React-callinvoker"
54
+ s.dependency "React"
54
55
  # Don't install the dependencies when we run `pod install` in the old architecture.
55
56
  if ENV['RCT_NEW_ARCH_ENABLED'] == '1' then
56
57
  install_modules_dependencies(s)
58
+ else
59
+ s.dependency "React-Core"
57
60
  end
58
61
 
59
62
  s.subspec "no-require-arc" do |ss|