whisper.rn 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/README.md +69 -0
  2. package/android/src/main/java/com/rnwhisper/RNWhisper.java +211 -0
  3. package/android/src/main/java/com/rnwhisper/WhisperContext.java +34 -4
  4. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +157 -0
  5. package/android/src/main/jni.cpp +196 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  12. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  13. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  14. package/ios/RNWhisper.mm +147 -0
  15. package/ios/RNWhisperContext.mm +18 -24
  16. package/ios/RNWhisperVadContext.h +29 -0
  17. package/ios/RNWhisperVadContext.mm +152 -0
  18. package/jest/mock.js +19 -0
  19. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  20. package/lib/commonjs/index.js +111 -1
  21. package/lib/commonjs/index.js.map +1 -1
  22. package/lib/module/NativeRNWhisper.js.map +1 -1
  23. package/lib/module/index.js +112 -0
  24. package/lib/module/index.js.map +1 -1
  25. package/lib/typescript/NativeRNWhisper.d.ts +35 -0
  26. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  27. package/lib/typescript/index.d.ts +39 -3
  28. package/lib/typescript/index.d.ts.map +1 -1
  29. package/package.json +1 -1
  30. package/src/NativeRNWhisper.ts +48 -0
  31. package/src/index.ts +132 -1
package/src/index.ts CHANGED
@@ -5,11 +5,13 @@ import {
5
5
  DeviceEventEmitterStatic,
6
6
  Image,
7
7
  } from 'react-native'
8
- import RNWhisper, { NativeWhisperContext } from './NativeRNWhisper'
8
+ import RNWhisper, { NativeWhisperContext, NativeWhisperVadContext } from './NativeRNWhisper'
9
9
  import type {
10
10
  TranscribeOptions,
11
11
  TranscribeResult,
12
12
  CoreMLAsset,
13
+ VadOptions,
14
+ VadSegment,
13
15
  } from './NativeRNWhisper'
14
16
  import AudioSessionIos from './AudioSessionIos'
15
17
  import type {
@@ -34,6 +36,8 @@ export type {
34
36
  AudioSessionCategoryIos,
35
37
  AudioSessionCategoryOptionIos,
36
38
  AudioSessionModeIos,
39
+ VadOptions,
40
+ VadSegment,
37
41
  }
38
42
 
39
43
  const EVENT_ON_TRANSCRIBE_PROGRESS = '@RNWhisper_onTranscribeProgress'
@@ -582,3 +586,130 @@ export const isUseCoreML: boolean = !!useCoreML
582
586
  export const isCoreMLAllowFallback: boolean = !!coreMLAllowFallback
583
587
 
584
588
  export { AudioSessionIos }
589
+
590
+ //
591
+ // VAD (Voice Activity Detection) Context
592
+ //
593
+
594
+ export type VadContextOptions = {
595
+ filePath: string | number
596
+ /** Is the file path a bundle asset for pure string filePath */
597
+ isBundleAsset?: boolean
598
+ /** Use GPU if available. Currently iOS only */
599
+ useGpu?: boolean
600
+ /** Number of threads to use during computation (Default: 2 for 4-core devices, 4 for more cores) */
601
+ nThreads?: number
602
+ }
603
+
604
+ export class WhisperVadContext {
605
+ id: number
606
+
607
+ gpu: boolean = false
608
+
609
+ reasonNoGPU: string = ''
610
+
611
+ constructor({
612
+ contextId,
613
+ gpu,
614
+ reasonNoGPU,
615
+ }: NativeWhisperVadContext) {
616
+ this.id = contextId
617
+ this.gpu = gpu
618
+ this.reasonNoGPU = reasonNoGPU
619
+ }
620
+
621
+ /**
622
+ * Detect speech segments in audio file (path or base64 encoded wav file)
623
+ * base64: need add `data:audio/wav;base64,` prefix
624
+ */
625
+ async detectSpeech(
626
+ filePathOrBase64: string | number,
627
+ options: VadOptions = {}
628
+ ): Promise<VadSegment[]> {
629
+ let path = ''
630
+ if (typeof filePathOrBase64 === 'number') {
631
+ try {
632
+ const source = Image.resolveAssetSource(filePathOrBase64)
633
+ if (source) path = source.uri
634
+ } catch (e) {
635
+ throw new Error(`Invalid asset: ${filePathOrBase64}`)
636
+ }
637
+ } else {
638
+ if (filePathOrBase64.startsWith('http'))
639
+ throw new Error(
640
+ 'VAD remote file is not supported, please download it first',
641
+ )
642
+ path = filePathOrBase64
643
+ }
644
+ if (path.startsWith('file://')) path = path.slice(7)
645
+
646
+ // Check if this is base64 encoded audio data
647
+ if (path.startsWith('data:audio/')) {
648
+ // This is base64 encoded audio data, use the raw data method
649
+ return RNWhisper.vadDetectSpeech(this.id, path, options)
650
+ } else {
651
+ // This is a file path, use the file method
652
+ return RNWhisper.vadDetectSpeechFile(this.id, path, options)
653
+ }
654
+ }
655
+
656
+ /**
657
+ * Detect speech segments in raw audio data (base64 encoded float32 PCM data)
658
+ */
659
+ async detectSpeechData(
660
+ audioData: string,
661
+ options: VadOptions = {}
662
+ ): Promise<VadSegment[]> {
663
+ return RNWhisper.vadDetectSpeech(this.id, audioData, options)
664
+ }
665
+
666
+ async release(): Promise<void> {
667
+ return RNWhisper.releaseVadContext(this.id)
668
+ }
669
+ }
670
+
671
+ /**
672
+ * Initialize a VAD context for voice activity detection
673
+ * @param options VAD context options
674
+ * @returns Promise resolving to WhisperVadContext instance
675
+ */
676
+ export async function initWhisperVad({
677
+ filePath,
678
+ isBundleAsset,
679
+ useGpu = true,
680
+ nThreads,
681
+ }: VadContextOptions): Promise<WhisperVadContext> {
682
+ let path = ''
683
+ if (typeof filePath === 'number') {
684
+ try {
685
+ const source = Image.resolveAssetSource(filePath)
686
+ if (source) {
687
+ path = source.uri
688
+ }
689
+ } catch (e) {
690
+ throw new Error(`Invalid asset: ${filePath}`)
691
+ }
692
+ } else {
693
+ if (!isBundleAsset && filePath.startsWith('http'))
694
+ throw new Error(
695
+ 'VAD remote file is not supported, please download it first',
696
+ )
697
+ path = filePath
698
+ }
699
+ if (path.startsWith('file://')) path = path.slice(7)
700
+ const { contextId, gpu, reasonNoGPU } = await RNWhisper.initVadContext({
701
+ filePath: path,
702
+ isBundleAsset: !!isBundleAsset,
703
+ useGpu,
704
+ nThreads,
705
+ })
706
+ return new WhisperVadContext({ contextId, gpu, reasonNoGPU })
707
+ }
708
+
709
+ /**
710
+ * Release all VAD contexts and free their memory
711
+ * @returns Promise resolving when all contexts are released
712
+ */
713
+ export async function releaseAllWhisperVad(): Promise<void> {
714
+ return RNWhisper.releaseAllVadContexts()
715
+ }