whisper.rn 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -0
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +211 -0
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +34 -4
- package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +157 -0
- package/android/src/main/jni.cpp +196 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
- package/ios/RNWhisper.mm +147 -0
- package/ios/RNWhisperContext.mm +18 -24
- package/ios/RNWhisperVadContext.h +29 -0
- package/ios/RNWhisperVadContext.mm +152 -0
- package/jest/mock.js +19 -0
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +111 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +112 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +35 -0
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +39 -3
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +48 -0
- package/src/index.ts +132 -1
package/src/index.ts
CHANGED
|
@@ -5,11 +5,13 @@ import {
|
|
|
5
5
|
DeviceEventEmitterStatic,
|
|
6
6
|
Image,
|
|
7
7
|
} from 'react-native'
|
|
8
|
-
import RNWhisper, { NativeWhisperContext } from './NativeRNWhisper'
|
|
8
|
+
import RNWhisper, { NativeWhisperContext, NativeWhisperVadContext } from './NativeRNWhisper'
|
|
9
9
|
import type {
|
|
10
10
|
TranscribeOptions,
|
|
11
11
|
TranscribeResult,
|
|
12
12
|
CoreMLAsset,
|
|
13
|
+
VadOptions,
|
|
14
|
+
VadSegment,
|
|
13
15
|
} from './NativeRNWhisper'
|
|
14
16
|
import AudioSessionIos from './AudioSessionIos'
|
|
15
17
|
import type {
|
|
@@ -34,6 +36,8 @@ export type {
|
|
|
34
36
|
AudioSessionCategoryIos,
|
|
35
37
|
AudioSessionCategoryOptionIos,
|
|
36
38
|
AudioSessionModeIos,
|
|
39
|
+
VadOptions,
|
|
40
|
+
VadSegment,
|
|
37
41
|
}
|
|
38
42
|
|
|
39
43
|
const EVENT_ON_TRANSCRIBE_PROGRESS = '@RNWhisper_onTranscribeProgress'
|
|
@@ -582,3 +586,130 @@ export const isUseCoreML: boolean = !!useCoreML
|
|
|
582
586
|
export const isCoreMLAllowFallback: boolean = !!coreMLAllowFallback
|
|
583
587
|
|
|
584
588
|
export { AudioSessionIos }
|
|
589
|
+
|
|
590
|
+
//
|
|
591
|
+
// VAD (Voice Activity Detection) Context
|
|
592
|
+
//
|
|
593
|
+
|
|
594
|
+
export type VadContextOptions = {
|
|
595
|
+
filePath: string | number
|
|
596
|
+
/** Is the file path a bundle asset for pure string filePath */
|
|
597
|
+
isBundleAsset?: boolean
|
|
598
|
+
/** Use GPU if available. Currently iOS only */
|
|
599
|
+
useGpu?: boolean
|
|
600
|
+
/** Number of threads to use during computation (Default: 2 for 4-core devices, 4 for more cores) */
|
|
601
|
+
nThreads?: number
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
export class WhisperVadContext {
|
|
605
|
+
id: number
|
|
606
|
+
|
|
607
|
+
gpu: boolean = false
|
|
608
|
+
|
|
609
|
+
reasonNoGPU: string = ''
|
|
610
|
+
|
|
611
|
+
constructor({
|
|
612
|
+
contextId,
|
|
613
|
+
gpu,
|
|
614
|
+
reasonNoGPU,
|
|
615
|
+
}: NativeWhisperVadContext) {
|
|
616
|
+
this.id = contextId
|
|
617
|
+
this.gpu = gpu
|
|
618
|
+
this.reasonNoGPU = reasonNoGPU
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* Detect speech segments in audio file (path or base64 encoded wav file)
|
|
623
|
+
* base64: need add `data:audio/wav;base64,` prefix
|
|
624
|
+
*/
|
|
625
|
+
async detectSpeech(
|
|
626
|
+
filePathOrBase64: string | number,
|
|
627
|
+
options: VadOptions = {}
|
|
628
|
+
): Promise<VadSegment[]> {
|
|
629
|
+
let path = ''
|
|
630
|
+
if (typeof filePathOrBase64 === 'number') {
|
|
631
|
+
try {
|
|
632
|
+
const source = Image.resolveAssetSource(filePathOrBase64)
|
|
633
|
+
if (source) path = source.uri
|
|
634
|
+
} catch (e) {
|
|
635
|
+
throw new Error(`Invalid asset: ${filePathOrBase64}`)
|
|
636
|
+
}
|
|
637
|
+
} else {
|
|
638
|
+
if (filePathOrBase64.startsWith('http'))
|
|
639
|
+
throw new Error(
|
|
640
|
+
'VAD remote file is not supported, please download it first',
|
|
641
|
+
)
|
|
642
|
+
path = filePathOrBase64
|
|
643
|
+
}
|
|
644
|
+
if (path.startsWith('file://')) path = path.slice(7)
|
|
645
|
+
|
|
646
|
+
// Check if this is base64 encoded audio data
|
|
647
|
+
if (path.startsWith('data:audio/')) {
|
|
648
|
+
// This is base64 encoded audio data, use the raw data method
|
|
649
|
+
return RNWhisper.vadDetectSpeech(this.id, path, options)
|
|
650
|
+
} else {
|
|
651
|
+
// This is a file path, use the file method
|
|
652
|
+
return RNWhisper.vadDetectSpeechFile(this.id, path, options)
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
/**
|
|
657
|
+
* Detect speech segments in raw audio data (base64 encoded float32 PCM data)
|
|
658
|
+
*/
|
|
659
|
+
async detectSpeechData(
|
|
660
|
+
audioData: string,
|
|
661
|
+
options: VadOptions = {}
|
|
662
|
+
): Promise<VadSegment[]> {
|
|
663
|
+
return RNWhisper.vadDetectSpeech(this.id, audioData, options)
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
async release(): Promise<void> {
|
|
667
|
+
return RNWhisper.releaseVadContext(this.id)
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
/**
|
|
672
|
+
* Initialize a VAD context for voice activity detection
|
|
673
|
+
* @param options VAD context options
|
|
674
|
+
* @returns Promise resolving to WhisperVadContext instance
|
|
675
|
+
*/
|
|
676
|
+
export async function initWhisperVad({
|
|
677
|
+
filePath,
|
|
678
|
+
isBundleAsset,
|
|
679
|
+
useGpu = true,
|
|
680
|
+
nThreads,
|
|
681
|
+
}: VadContextOptions): Promise<WhisperVadContext> {
|
|
682
|
+
let path = ''
|
|
683
|
+
if (typeof filePath === 'number') {
|
|
684
|
+
try {
|
|
685
|
+
const source = Image.resolveAssetSource(filePath)
|
|
686
|
+
if (source) {
|
|
687
|
+
path = source.uri
|
|
688
|
+
}
|
|
689
|
+
} catch (e) {
|
|
690
|
+
throw new Error(`Invalid asset: ${filePath}`)
|
|
691
|
+
}
|
|
692
|
+
} else {
|
|
693
|
+
if (!isBundleAsset && filePath.startsWith('http'))
|
|
694
|
+
throw new Error(
|
|
695
|
+
'VAD remote file is not supported, please download it first',
|
|
696
|
+
)
|
|
697
|
+
path = filePath
|
|
698
|
+
}
|
|
699
|
+
if (path.startsWith('file://')) path = path.slice(7)
|
|
700
|
+
const { contextId, gpu, reasonNoGPU } = await RNWhisper.initVadContext({
|
|
701
|
+
filePath: path,
|
|
702
|
+
isBundleAsset: !!isBundleAsset,
|
|
703
|
+
useGpu,
|
|
704
|
+
nThreads,
|
|
705
|
+
})
|
|
706
|
+
return new WhisperVadContext({ contextId, gpu, reasonNoGPU })
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
/**
|
|
710
|
+
* Release all VAD contexts and free their memory
|
|
711
|
+
* @returns Promise resolving when all contexts are released
|
|
712
|
+
*/
|
|
713
|
+
export async function releaseAllWhisperVad(): Promise<void> {
|
|
714
|
+
return RNWhisper.releaseAllVadContexts()
|
|
715
|
+
}
|