whisper.rn 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +43 -4
- package/android/build.gradle +2 -4
- package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +47 -7
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +196 -7
- package/android/src/main/jni/whisper/Whisper.mk +1 -1
- package/android/src/main/jni/whisper/jni.cpp +33 -9
- package/cpp/rn-whisper.cpp +26 -0
- package/cpp/rn-whisper.h +5 -0
- package/cpp/whisper.cpp +603 -412
- package/cpp/whisper.h +120 -40
- package/ios/RNWhisper.h +2 -2
- package/ios/RNWhisper.mm +78 -111
- package/ios/RNWhisperContext.h +53 -0
- package/ios/RNWhisperContext.mm +303 -0
- package/jest/mock.js +38 -2
- package/lib/commonjs/index.js +63 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +64 -3
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +61 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/index.tsx +121 -4
- package/whisper-rn.podspec +15 -8
package/src/index.tsx
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
NativeEventEmitter,
|
|
3
|
+
DeviceEventEmitter,
|
|
4
|
+
NativeModules,
|
|
5
|
+
Platform,
|
|
6
|
+
DeviceEventEmitterStatic,
|
|
7
|
+
} from 'react-native'
|
|
2
8
|
|
|
3
9
|
const LINKING_ERROR =
|
|
4
10
|
`The package 'whisper.rn' doesn't seem to be linked. Make sure: \n\n${Platform.select({ ios: "- You have run 'pod install'\n", default: '' })
|
|
@@ -15,24 +21,58 @@ const RNWhisper = NativeModules.RNWhisper
|
|
|
15
21
|
},
|
|
16
22
|
)
|
|
17
23
|
|
|
24
|
+
let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
|
|
25
|
+
if (Platform.OS === 'ios') {
|
|
26
|
+
EventEmitter = new NativeEventEmitter(RNWhisper)
|
|
27
|
+
}
|
|
28
|
+
if (Platform.OS === 'android') {
|
|
29
|
+
EventEmitter = DeviceEventEmitter
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const EVENT_ON_REALTIME_TRANSCRIBE = '@RNWhisper_onRealtimeTranscribe'
|
|
33
|
+
const EVENT_ON_REALTIME_TRANSCRIBE_END = '@RNWhisper_onRealtimeTranscribeEnd'
|
|
34
|
+
|
|
18
35
|
export type TranscribeOptions = {
|
|
36
|
+
/** Spoken language (Default: 'auto' for auto-detect) */
|
|
19
37
|
language?: string,
|
|
38
|
+
/** Translate from source language to english (Default: false) */
|
|
20
39
|
translate?: boolean,
|
|
40
|
+
/** Number of threads to use during computation (Default: 4) */
|
|
21
41
|
maxThreads?: number,
|
|
42
|
+
/** Maximum number of text context tokens to store */
|
|
22
43
|
maxContext?: number,
|
|
44
|
+
/** Maximum segment length in characters */
|
|
23
45
|
maxLen?: number,
|
|
46
|
+
/** Enable token-level timestamps */
|
|
24
47
|
tokenTimestamps?: boolean,
|
|
48
|
+
/** Word timestamp probability threshold */
|
|
49
|
+
wordThold?: number,
|
|
50
|
+
/** Time offset in milliseconds */
|
|
25
51
|
offset?: number,
|
|
52
|
+
/** Duration of audio to process in milliseconds */
|
|
26
53
|
duration?: number,
|
|
27
|
-
|
|
54
|
+
/** Tnitial decoding temperature */
|
|
28
55
|
temperature?: number,
|
|
29
56
|
temperatureInc?: number,
|
|
57
|
+
/** Beam size for beam search */
|
|
30
58
|
beamSize?: number,
|
|
59
|
+
/** Number of best candidates to keep */
|
|
31
60
|
bestOf?: number,
|
|
61
|
+
/** Speed up audio by x2 (reduced accuracy) */
|
|
32
62
|
speedUp?: boolean,
|
|
63
|
+
/** Initial Prompt */
|
|
33
64
|
prompt?: string,
|
|
34
65
|
}
|
|
35
66
|
|
|
67
|
+
export type TranscribeRealtimeOptions = TranscribeOptions & {
|
|
68
|
+
/**
|
|
69
|
+
* Realtime record max duration in seconds.
|
|
70
|
+
* Due to the whisper.cpp hard constraint - processes the audio in chunks of 30 seconds,
|
|
71
|
+
* the recommended value will be <= 30 seconds. (Default: 30)
|
|
72
|
+
*/
|
|
73
|
+
realtimeAudioSec?: number,
|
|
74
|
+
}
|
|
75
|
+
|
|
36
76
|
export type TranscribeResult = {
|
|
37
77
|
result: string,
|
|
38
78
|
segments: Array<{
|
|
@@ -42,6 +82,32 @@ export type TranscribeResult = {
|
|
|
42
82
|
}>,
|
|
43
83
|
}
|
|
44
84
|
|
|
85
|
+
export type TranscribeRealtimeEvent = {
|
|
86
|
+
contextId: number,
|
|
87
|
+
jobId: number,
|
|
88
|
+
/** Is capturing audio, when false, the event is the final result */
|
|
89
|
+
isCapturing: boolean,
|
|
90
|
+
code: number,
|
|
91
|
+
processTime: number,
|
|
92
|
+
recordingTime: number,
|
|
93
|
+
data?: TranscribeResult,
|
|
94
|
+
error?: string,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export type TranscribeRealtimeNativeEvent = {
|
|
98
|
+
contextId: number,
|
|
99
|
+
jobId: number,
|
|
100
|
+
payload: {
|
|
101
|
+
/** Is capturing audio, when false, the event is the final result */
|
|
102
|
+
isCapturing: boolean,
|
|
103
|
+
code: number,
|
|
104
|
+
processTime: number,
|
|
105
|
+
recordingTime: number,
|
|
106
|
+
data?: TranscribeResult,
|
|
107
|
+
error?: string,
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
|
|
45
111
|
class WhisperContext {
|
|
46
112
|
id: number
|
|
47
113
|
|
|
@@ -49,8 +115,59 @@ class WhisperContext {
|
|
|
49
115
|
this.id = id
|
|
50
116
|
}
|
|
51
117
|
|
|
52
|
-
|
|
53
|
-
|
|
118
|
+
/** Transcribe audio file */
|
|
119
|
+
transcribe(path: string, options: TranscribeOptions = {}): {
|
|
120
|
+
/** Stop the transcribe */
|
|
121
|
+
stop: () => void,
|
|
122
|
+
/** Transcribe result promise */
|
|
123
|
+
promise: Promise<TranscribeResult>,
|
|
124
|
+
} {
|
|
125
|
+
const jobId: number = Math.floor(Math.random() * 10000)
|
|
126
|
+
return {
|
|
127
|
+
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
128
|
+
promise: RNWhisper.transcribeFile(this.id, jobId, path, options),
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/** Transcribe the microphone audio stream, the microphone user permission is required */
|
|
133
|
+
async transcribeRealtime(options: TranscribeRealtimeOptions = {}): Promise<{
|
|
134
|
+
/** Stop the realtime transcribe */
|
|
135
|
+
stop: () => void,
|
|
136
|
+
/** Subscribe to realtime transcribe events */
|
|
137
|
+
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void,
|
|
138
|
+
}> {
|
|
139
|
+
const jobId: number = Math.floor(Math.random() * 10000)
|
|
140
|
+
await RNWhisper.startRealtimeTranscribe(this.id, jobId, options)
|
|
141
|
+
let removeTranscribe: () => void
|
|
142
|
+
let removeEnd: () => void
|
|
143
|
+
let lastTranscribePayload: TranscribeRealtimeNativeEvent['payload']
|
|
144
|
+
return {
|
|
145
|
+
stop: () => RNWhisper.abortTranscribe(this.id, jobId),
|
|
146
|
+
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => {
|
|
147
|
+
const transcribeListener = EventEmitter.addListener(
|
|
148
|
+
EVENT_ON_REALTIME_TRANSCRIBE,
|
|
149
|
+
(evt: TranscribeRealtimeNativeEvent) => {
|
|
150
|
+
const { contextId, payload } = evt
|
|
151
|
+
if (contextId !== this.id || evt.jobId !== jobId) return
|
|
152
|
+
lastTranscribePayload = payload
|
|
153
|
+
callback({ contextId, jobId: evt.jobId, ...payload })
|
|
154
|
+
if (!payload.isCapturing) removeTranscribe()
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
removeTranscribe = transcribeListener.remove
|
|
158
|
+
const endListener = EventEmitter.addListener(
|
|
159
|
+
EVENT_ON_REALTIME_TRANSCRIBE_END,
|
|
160
|
+
(evt: TranscribeRealtimeNativeEvent) => {
|
|
161
|
+
const { contextId } = evt
|
|
162
|
+
if (contextId !== this.id || evt.jobId !== jobId) return
|
|
163
|
+
callback({ contextId, jobId: evt.jobId, ...lastTranscribePayload, isCapturing: false })
|
|
164
|
+
removeTranscribe?.()
|
|
165
|
+
removeEnd()
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
removeEnd = endListener.remove
|
|
169
|
+
},
|
|
170
|
+
}
|
|
54
171
|
}
|
|
55
172
|
|
|
56
173
|
async release() {
|
package/whisper-rn.podspec
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
require "json"
|
|
2
2
|
|
|
3
3
|
package = JSON.parse(File.read(File.join(__dir__, "package.json")))
|
|
4
|
-
base_compiler_flags =
|
|
5
|
-
folly_compiler_flags =
|
|
4
|
+
base_compiler_flags = "-DGGML_USE_ACCELERATE -Wno-shorten-64-to-32"
|
|
5
|
+
folly_compiler_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -Wno-comma"
|
|
6
|
+
base_optimizer_flags = "-O3 -DNDEBUG"
|
|
6
7
|
|
|
7
8
|
Pod::Spec.new do |s|
|
|
8
9
|
s.name = "whisper-rn"
|
|
@@ -20,17 +21,23 @@ Pod::Spec.new do |s|
|
|
|
20
21
|
s.dependency "React-Core"
|
|
21
22
|
|
|
22
23
|
s.compiler_flags = base_compiler_flags
|
|
23
|
-
s.
|
|
24
|
-
|
|
24
|
+
s.pod_target_xcconfig = {
|
|
25
|
+
"OTHER_LDFLAGS" => "-framework Accelerate",
|
|
26
|
+
"OTHER_CFLAGS[config=Release]" => base_optimizer_flags,
|
|
27
|
+
"OTHER_CPLUSPLUSFLAGS[config=Release]" => base_optimizer_flags
|
|
25
28
|
}
|
|
26
29
|
|
|
27
30
|
# Don't install the dependencies when we run `pod install` in the old architecture.
|
|
28
31
|
if ENV['RCT_NEW_ARCH_ENABLED'] == '1' then
|
|
29
32
|
s.compiler_flags = base_compiler_flags + " " + folly_compiler_flags + " -DRCT_NEW_ARCH_ENABLED=1"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
new_arch_cpp_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1"
|
|
34
|
+
s.pod_target_xcconfig = {
|
|
35
|
+
"CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
|
|
36
|
+
"HEADER_SEARCH_PATHS" => "\"$(PODS_ROOT)/boost\"",
|
|
37
|
+
"OTHER_LDFLAGS" => "-framework Accelerate",
|
|
38
|
+
"OTHER_CFLAGS[config=Release]" => base_optimizer_flags,
|
|
39
|
+
"OTHER_CPLUSPLUSFLAGS[config=Debug]" => new_arch_cpp_flags,
|
|
40
|
+
"OTHER_CPLUSPLUSFLAGS[config=Release]" => new_arch_cpp_flags + " " + base_optimizer_flags
|
|
34
41
|
}
|
|
35
42
|
s.dependency "React-Codegen"
|
|
36
43
|
s.dependency "RCT-Folly"
|