whisper.rn 0.1.5 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -4
- package/android/build.gradle +2 -4
- package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +47 -7
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +224 -7
- package/android/src/main/jni/whisper/Whisper.mk +1 -1
- package/android/src/main/jni/whisper/jni.cpp +34 -5
- package/cpp/rn-whisper.cpp +26 -0
- package/cpp/rn-whisper.h +5 -0
- package/ios/RNWhisper.h +2 -2
- package/ios/RNWhisper.mm +78 -111
- package/ios/RNWhisperContext.h +55 -0
- package/ios/RNWhisperContext.mm +326 -0
- package/jest/mock.js +43 -2
- package/lib/commonjs/index.js +59 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +60 -3
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +63 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/index.tsx +124 -4
package/README.md
CHANGED
|
@@ -20,6 +20,23 @@ npm install whisper.rn
|
|
|
20
20
|
|
|
21
21
|
Then re-run `npx pod-install` again for iOS.
|
|
22
22
|
|
|
23
|
+
## Add Microphone Permissions (Optional)
|
|
24
|
+
|
|
25
|
+
If you want to use realtime transcribe, you need to add the microphone permission to your app.
|
|
26
|
+
|
|
27
|
+
### iOS
|
|
28
|
+
Add these lines to ```ios/[YOU_APP_NAME]/info.plist```
|
|
29
|
+
```xml
|
|
30
|
+
<key>NSMicrophoneUsageDescription</key>
|
|
31
|
+
<string>This app requires microphone access in order to transcribe speech</string>
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Android
|
|
35
|
+
Add the following line to ```android/app/src/main/AndroidManifest.xml```
|
|
36
|
+
```xml
|
|
37
|
+
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
|
38
|
+
```
|
|
39
|
+
|
|
23
40
|
## Usage
|
|
24
41
|
|
|
25
42
|
```js
|
|
@@ -30,13 +47,35 @@ const sampleFilePath = 'file://.../sample.wav'
|
|
|
30
47
|
|
|
31
48
|
const whisperContext = await initWhisper({ filePath })
|
|
32
49
|
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
50
|
+
const options = { language: 'en' }
|
|
51
|
+
const { stop, promise } = whisperContext.transcribe(sampleFilePath, options)
|
|
52
|
+
|
|
53
|
+
const { result } = await promise
|
|
37
54
|
// result: (The inference text result from audio file)
|
|
38
55
|
```
|
|
39
56
|
|
|
57
|
+
Use realtime transcribe:
|
|
58
|
+
|
|
59
|
+
```js
|
|
60
|
+
const { stop, subscribe } = await whisperContext.transcribeRealtime(options)
|
|
61
|
+
|
|
62
|
+
subscribe(evt => {
|
|
63
|
+
const { isCapturing, data, processTime, recordingTime } = evt
|
|
64
|
+
console.log(
|
|
65
|
+
`Realtime transcribing: ${isCapturing ? 'ON' : 'OFF'}\n` +
|
|
66
|
+
// The inference text result from audio record:
|
|
67
|
+
`Result: ${data.result}\n\n` +
|
|
68
|
+
`Process time: ${processTime}ms\n` +
|
|
69
|
+
`Recording time: ${recordingTime}ms`,
|
|
70
|
+
)
|
|
71
|
+
if (!isCapturing) console.log('Finished realtime transcribing')
|
|
72
|
+
})
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
|
|
76
|
+
|
|
77
|
+
The documentation is not ready yet, please see the comments of [index](./src/index.tsx) file for more details at the moment.
|
|
78
|
+
|
|
40
79
|
## Run with example
|
|
41
80
|
|
|
42
81
|
The example app is using [react-native-fs](https://github.com/itinance/react-native-fs) to download the model file and audio file.
|
package/android/build.gradle
CHANGED
|
@@ -40,10 +40,8 @@ android {
|
|
|
40
40
|
buildConfigField "boolean", "IS_NEW_ARCHITECTURE_ENABLED", isNewArchitectureEnabled().toString()
|
|
41
41
|
}
|
|
42
42
|
externalNativeBuild {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
path 'src/main/jni/whisper/Android.mk'
|
|
46
|
-
}
|
|
43
|
+
ndkBuild {
|
|
44
|
+
path 'src/main/jni/whisper/Android.mk'
|
|
47
45
|
}
|
|
48
46
|
}
|
|
49
47
|
buildTypes {
|
|
@@ -5,6 +5,7 @@ import android.util.Log;
|
|
|
5
5
|
import android.os.Build;
|
|
6
6
|
import android.os.Handler;
|
|
7
7
|
import android.os.AsyncTask;
|
|
8
|
+
import android.media.AudioRecord;
|
|
8
9
|
|
|
9
10
|
import com.facebook.react.bridge.Promise;
|
|
10
11
|
import com.facebook.react.bridge.ReactApplicationContext;
|
|
@@ -51,7 +52,7 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
51
52
|
throw new Exception("Failed to initialize context");
|
|
52
53
|
}
|
|
53
54
|
int id = Math.abs(new Random().nextInt());
|
|
54
|
-
WhisperContext whisperContext = new WhisperContext(context);
|
|
55
|
+
WhisperContext whisperContext = new WhisperContext(id, reactContext, context);
|
|
55
56
|
contexts.put(id, whisperContext);
|
|
56
57
|
return id;
|
|
57
58
|
} catch (Exception e) {
|
|
@@ -72,18 +73,27 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
72
73
|
}
|
|
73
74
|
|
|
74
75
|
@ReactMethod
|
|
75
|
-
public void
|
|
76
|
+
public void transcribeFile(int id, int jobId, String filePath, ReadableMap options, Promise promise) {
|
|
77
|
+
final WhisperContext context = contexts.get(id);
|
|
78
|
+
if (context == null) {
|
|
79
|
+
promise.reject("Context not found");
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (context.isCapturing()) {
|
|
83
|
+
promise.reject("The context is in realtime transcribe mode");
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
if (context.isTranscribing()) {
|
|
87
|
+
promise.reject("Context is already transcribing");
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
76
90
|
new AsyncTask<Void, Void, WritableMap>() {
|
|
77
91
|
private Exception exception;
|
|
78
92
|
|
|
79
93
|
@Override
|
|
80
94
|
protected WritableMap doInBackground(Void... voids) {
|
|
81
95
|
try {
|
|
82
|
-
|
|
83
|
-
if (context == null) {
|
|
84
|
-
throw new Exception("Context " + id + " not found");
|
|
85
|
-
}
|
|
86
|
-
return context.transcribe(filePath, options);
|
|
96
|
+
return context.transcribeFile(jobId, filePath, options);
|
|
87
97
|
} catch (Exception e) {
|
|
88
98
|
exception = e;
|
|
89
99
|
return null;
|
|
@@ -101,6 +111,35 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
101
111
|
}.execute();
|
|
102
112
|
}
|
|
103
113
|
|
|
114
|
+
@ReactMethod
|
|
115
|
+
public void startRealtimeTranscribe(int id, int jobId, ReadableMap options, Promise promise) {
|
|
116
|
+
final WhisperContext context = contexts.get(id);
|
|
117
|
+
if (context == null) {
|
|
118
|
+
promise.reject("Context not found");
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
if (context.isCapturing()) {
|
|
122
|
+
promise.reject("Context is already in capturing");
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
int state = context.startRealtimeTranscribe(jobId, options);
|
|
126
|
+
if (state == AudioRecord.STATE_INITIALIZED) {
|
|
127
|
+
promise.resolve(null);
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
promise.reject("Failed to start realtime transcribe. State: " + state);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
@ReactMethod
|
|
134
|
+
public void abortTranscribe(int contextId, int jobId, Promise promise) {
|
|
135
|
+
WhisperContext context = contexts.get(contextId);
|
|
136
|
+
if (context == null) {
|
|
137
|
+
promise.reject("Context not found");
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
context.stopTranscribe(jobId);
|
|
141
|
+
}
|
|
142
|
+
|
|
104
143
|
@ReactMethod
|
|
105
144
|
public void releaseContext(int id, Promise promise) {
|
|
106
145
|
new AsyncTask<Void, Void, Void>() {
|
|
@@ -168,6 +207,7 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
168
207
|
|
|
169
208
|
@Override
|
|
170
209
|
public void onHostDestroy() {
|
|
210
|
+
WhisperContext.abortAllTranscribe();
|
|
171
211
|
for (WhisperContext context : contexts.values()) {
|
|
172
212
|
context.release();
|
|
173
213
|
}
|
|
@@ -4,10 +4,15 @@ import com.facebook.react.bridge.Arguments;
|
|
|
4
4
|
import com.facebook.react.bridge.WritableArray;
|
|
5
5
|
import com.facebook.react.bridge.WritableMap;
|
|
6
6
|
import com.facebook.react.bridge.ReadableMap;
|
|
7
|
+
import com.facebook.react.bridge.ReactApplicationContext;
|
|
8
|
+
import com.facebook.react.modules.core.DeviceEventManagerModule;
|
|
7
9
|
|
|
8
10
|
import android.util.Log;
|
|
9
11
|
import android.os.Build;
|
|
10
12
|
import android.content.res.AssetManager;
|
|
13
|
+
import android.media.AudioFormat;
|
|
14
|
+
import android.media.AudioRecord;
|
|
15
|
+
import android.media.MediaRecorder.AudioSource;
|
|
11
16
|
|
|
12
17
|
import java.util.Random;
|
|
13
18
|
import java.lang.StringBuilder;
|
|
@@ -26,16 +31,202 @@ import java.nio.ShortBuffer;
|
|
|
26
31
|
|
|
27
32
|
public class WhisperContext {
|
|
28
33
|
public static final String NAME = "RNWhisperContext";
|
|
34
|
+
|
|
35
|
+
private static final int SAMPLE_RATE = 16000;
|
|
36
|
+
private static final int CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO;
|
|
37
|
+
private static final int AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT;
|
|
38
|
+
private static final int AUDIO_SOURCE = AudioSource.VOICE_RECOGNITION;
|
|
39
|
+
private static final int DEFAULT_MAX_AUDIO_SEC = 30;
|
|
40
|
+
|
|
41
|
+
private int id;
|
|
42
|
+
private ReactApplicationContext reactContext;
|
|
29
43
|
private long context;
|
|
30
44
|
|
|
31
|
-
|
|
45
|
+
private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
|
|
46
|
+
|
|
47
|
+
private int jobId = -1;
|
|
48
|
+
private AudioRecord recorder = null;
|
|
49
|
+
private int bufferSize;
|
|
50
|
+
private short[] buffer16;
|
|
51
|
+
private int nSamples = 0;
|
|
52
|
+
private int nSamplesTranscribing = 0;
|
|
53
|
+
private boolean isCapturing = false;
|
|
54
|
+
private boolean isStoppedByAction = false;
|
|
55
|
+
private boolean isTranscribing = false;
|
|
56
|
+
private boolean isRealtime = false;
|
|
57
|
+
private Thread fullHandler = null;
|
|
58
|
+
|
|
59
|
+
public WhisperContext(int id, ReactApplicationContext reactContext, long context) {
|
|
60
|
+
this.id = id;
|
|
32
61
|
this.context = context;
|
|
62
|
+
this.reactContext = reactContext;
|
|
63
|
+
eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
|
|
64
|
+
bufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT);
|
|
33
65
|
}
|
|
34
66
|
|
|
35
|
-
public
|
|
36
|
-
|
|
67
|
+
public int startRealtimeTranscribe(int jobId, ReadableMap options) {
|
|
68
|
+
if (isCapturing || isTranscribing) {
|
|
69
|
+
return -100;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
recorder = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT, bufferSize);
|
|
73
|
+
|
|
74
|
+
int state = recorder.getState();
|
|
75
|
+
if (state != AudioRecord.STATE_INITIALIZED) {
|
|
76
|
+
recorder.release();
|
|
77
|
+
return state;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
|
|
81
|
+
final int maxAudioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
|
|
82
|
+
|
|
83
|
+
buffer16 = new short[maxAudioSec * SAMPLE_RATE * Short.BYTES];
|
|
84
|
+
|
|
85
|
+
this.jobId = jobId;
|
|
86
|
+
isCapturing = true;
|
|
87
|
+
isStoppedByAction = false;
|
|
88
|
+
isRealtime = true;
|
|
89
|
+
nSamples = 0;
|
|
90
|
+
nSamplesTranscribing = 0;
|
|
91
|
+
fullHandler = null;
|
|
92
|
+
|
|
93
|
+
recorder.startRecording();
|
|
94
|
+
|
|
95
|
+
new Thread(new Runnable() {
|
|
96
|
+
@Override
|
|
97
|
+
public void run() {
|
|
98
|
+
try {
|
|
99
|
+
short[] buffer = new short[bufferSize];
|
|
100
|
+
while (isCapturing) {
|
|
101
|
+
try {
|
|
102
|
+
int n = recorder.read(buffer, 0, bufferSize);
|
|
103
|
+
if (n == 0) continue;
|
|
104
|
+
|
|
105
|
+
if (nSamples + n > maxAudioSec * SAMPLE_RATE) {
|
|
106
|
+
// Full, stop capturing
|
|
107
|
+
isCapturing = false;
|
|
108
|
+
if (!isTranscribing && nSamples == nSamplesTranscribing) {
|
|
109
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
110
|
+
} else {
|
|
111
|
+
// wait previous handler to finish
|
|
112
|
+
fullHandler.join();
|
|
113
|
+
fullTranscribeSamples(options, true);
|
|
114
|
+
}
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Append to buffer
|
|
119
|
+
nSamples += n;
|
|
120
|
+
for (int i = 0; i < n; i++) {
|
|
121
|
+
buffer16[nSamples + i] = buffer[i];
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
fullTranscribeSamples(options, false);
|
|
125
|
+
} catch (Exception e) {
|
|
126
|
+
Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
if (!isTranscribing) {
|
|
130
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
131
|
+
}
|
|
132
|
+
if (fullHandler != null) {
|
|
133
|
+
fullHandler.join(); // Wait for full transcribe to finish
|
|
134
|
+
}
|
|
135
|
+
recorder.stop();
|
|
136
|
+
} catch (Exception e) {
|
|
137
|
+
e.printStackTrace();
|
|
138
|
+
} finally {
|
|
139
|
+
recorder.release();
|
|
140
|
+
recorder = null;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}).start();
|
|
144
|
+
return state;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
private void fullTranscribeSamples(ReadableMap options, boolean skipCapturingCheck) {
|
|
148
|
+
if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
|
|
149
|
+
isTranscribing = true;
|
|
150
|
+
fullHandler = new Thread(new Runnable() {
|
|
151
|
+
@Override
|
|
152
|
+
public void run() {
|
|
153
|
+
if (!isCapturing && !skipCapturingCheck) return;
|
|
154
|
+
|
|
155
|
+
nSamplesTranscribing = nSamples;
|
|
156
|
+
|
|
157
|
+
// convert I16 to F32
|
|
158
|
+
float[] nSamplesBuffer32 = new float[nSamplesTranscribing];
|
|
159
|
+
for (int i = 0; i < nSamplesTranscribing; i++) {
|
|
160
|
+
nSamplesBuffer32[i] = buffer16[i] / 32768.0f;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
Log.d(NAME, "Start transcribing realtime: " + nSamplesTranscribing);
|
|
164
|
+
|
|
165
|
+
int timeStart = (int) System.currentTimeMillis();
|
|
166
|
+
int code = full(jobId, options, nSamplesBuffer32, nSamplesTranscribing);
|
|
167
|
+
int timeEnd = (int) System.currentTimeMillis();
|
|
168
|
+
int timeRecording = (int) (nSamplesTranscribing / SAMPLE_RATE * 1000);
|
|
169
|
+
|
|
170
|
+
WritableMap payload = Arguments.createMap();
|
|
171
|
+
payload.putInt("code", code);
|
|
172
|
+
payload.putInt("processTime", timeEnd - timeStart);
|
|
173
|
+
payload.putInt("recordingTime", timeRecording);
|
|
174
|
+
|
|
175
|
+
if (code == 0) {
|
|
176
|
+
payload.putMap("data", getTextSegments());
|
|
177
|
+
} else {
|
|
178
|
+
payload.putString("error", "Transcribe failed with code " + code);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (isStoppedByAction || !isCapturing && nSamplesTranscribing == nSamples) {
|
|
182
|
+
payload.putBoolean("isCapturing", false);
|
|
183
|
+
payload.putBoolean("isStoppedByAction", isStoppedByAction);
|
|
184
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", payload);
|
|
185
|
+
} else if (code == 0) {
|
|
186
|
+
payload.putBoolean("isCapturing", true);
|
|
187
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
188
|
+
} else {
|
|
189
|
+
payload.putBoolean("isCapturing", true);
|
|
190
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
191
|
+
}
|
|
192
|
+
isTranscribing = false;
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
fullHandler.start();
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
private void emitTranscribeEvent(final String eventName, final WritableMap payload) {
|
|
200
|
+
WritableMap event = Arguments.createMap();
|
|
201
|
+
event.putInt("contextId", WhisperContext.this.id);
|
|
202
|
+
event.putInt("jobId", jobId);
|
|
203
|
+
event.putMap("payload", payload);
|
|
204
|
+
eventEmitter.emit(eventName, event);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
public WritableMap transcribeFile(int jobId, String filePath, ReadableMap options) throws IOException, Exception {
|
|
208
|
+
this.jobId = jobId;
|
|
209
|
+
isTranscribing = true;
|
|
210
|
+
float[] audioData = decodeWaveFile(new File(filePath));
|
|
211
|
+
int code = full(jobId, options, audioData, audioData.length);
|
|
212
|
+
isTranscribing = false;
|
|
213
|
+
this.jobId = -1;
|
|
214
|
+
if (code != 0) {
|
|
215
|
+
throw new Exception("Failed to transcribe the file. Code: " + code);
|
|
216
|
+
}
|
|
217
|
+
return getTextSegments();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
|
|
221
|
+
return fullTranscribe(
|
|
222
|
+
jobId,
|
|
37
223
|
context,
|
|
38
|
-
|
|
224
|
+
// jboolean realtime,
|
|
225
|
+
isRealtime,
|
|
226
|
+
// float[] audio_data,
|
|
227
|
+
audioData,
|
|
228
|
+
// jint audio_data_len,
|
|
229
|
+
audioDataLen,
|
|
39
230
|
// jint n_threads,
|
|
40
231
|
options.hasKey("maxThreads") ? options.getInt("maxThreads") : -1,
|
|
41
232
|
// jint max_context,
|
|
@@ -69,9 +260,9 @@ public class WhisperContext {
|
|
|
69
260
|
// jstring prompt
|
|
70
261
|
options.hasKey("prompt") ? options.getString("prompt") : null
|
|
71
262
|
);
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
private WritableMap getTextSegments() {
|
|
75
266
|
Integer count = getTextSegmentCount(context);
|
|
76
267
|
StringBuilder builder = new StringBuilder();
|
|
77
268
|
|
|
@@ -92,7 +283,28 @@ public class WhisperContext {
|
|
|
92
283
|
return data;
|
|
93
284
|
}
|
|
94
285
|
|
|
286
|
+
|
|
287
|
+
public boolean isCapturing() {
|
|
288
|
+
return isCapturing;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
public boolean isTranscribing() {
|
|
292
|
+
return isTranscribing;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
public void stopTranscribe(int jobId) {
|
|
296
|
+
abortTranscribe(jobId);
|
|
297
|
+
isCapturing = false;
|
|
298
|
+
isTranscribing = false;
|
|
299
|
+
isStoppedByAction = true;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
public void stopCurrentTranscribe() {
|
|
303
|
+
stopTranscribe(this.jobId);
|
|
304
|
+
}
|
|
305
|
+
|
|
95
306
|
public void release() {
|
|
307
|
+
stopCurrentTranscribe();
|
|
96
308
|
freeContext(context);
|
|
97
309
|
}
|
|
98
310
|
|
|
@@ -185,8 +397,11 @@ public class WhisperContext {
|
|
|
185
397
|
|
|
186
398
|
protected static native long initContext(String modelPath);
|
|
187
399
|
protected static native int fullTranscribe(
|
|
400
|
+
int job_id,
|
|
188
401
|
long context,
|
|
402
|
+
boolean realtime,
|
|
189
403
|
float[] audio_data,
|
|
404
|
+
int audio_data_len,
|
|
190
405
|
int n_threads,
|
|
191
406
|
int max_context,
|
|
192
407
|
int word_thold,
|
|
@@ -203,6 +418,8 @@ public class WhisperContext {
|
|
|
203
418
|
String language,
|
|
204
419
|
String prompt
|
|
205
420
|
);
|
|
421
|
+
protected static native void abortTranscribe(int jobId);
|
|
422
|
+
protected static native void abortAllTranscribe();
|
|
206
423
|
protected static native int getTextSegmentCount(long context);
|
|
207
424
|
protected static native String getTextSegment(long context, int index);
|
|
208
425
|
protected static native int getTextSegmentT0(long context, int index);
|
|
@@ -12,7 +12,7 @@ ifneq ($(APP_OPTIM),debug)
|
|
|
12
12
|
endif
|
|
13
13
|
|
|
14
14
|
LOCAL_CFLAGS += -DSTDC_HEADERS -std=c11 -I $(WHISPER_LIB_DIR)
|
|
15
|
-
LOCAL_CPPFLAGS += -std=c++11
|
|
15
|
+
LOCAL_CPPFLAGS += -std=c++11 -I $(WHISPER_LIB_DIR)
|
|
16
16
|
LOCAL_SRC_FILES := $(WHISPER_LIB_DIR)/ggml.c \
|
|
17
17
|
$(WHISPER_LIB_DIR)/whisper.cpp \
|
|
18
18
|
$(WHISPER_LIB_DIR)/rn-whisper.cpp \
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
#include <cstdlib>
|
|
6
6
|
#include <sys/sysinfo.h>
|
|
7
7
|
#include <string>
|
|
8
|
+
#include <thread>
|
|
8
9
|
#include "whisper.h"
|
|
9
10
|
#include "rn-whisper.h"
|
|
10
11
|
#include "ggml.h"
|
|
@@ -36,8 +37,11 @@ JNIEXPORT jint JNICALL
|
|
|
36
37
|
Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
37
38
|
JNIEnv *env,
|
|
38
39
|
jobject thiz,
|
|
40
|
+
jint job_id,
|
|
39
41
|
jlong context_ptr,
|
|
42
|
+
jboolean realtime,
|
|
40
43
|
jfloatArray audio_data,
|
|
44
|
+
jint audio_data_len,
|
|
41
45
|
jint n_threads,
|
|
42
46
|
jint max_context,
|
|
43
47
|
int word_thold,
|
|
@@ -57,9 +61,8 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
57
61
|
UNUSED(thiz);
|
|
58
62
|
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
59
63
|
jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
|
|
60
|
-
const jsize audio_data_length = env->GetArrayLength(audio_data);
|
|
61
64
|
|
|
62
|
-
int max_threads = min(4,
|
|
65
|
+
int max_threads = min(4, std::thread::hardware_concurrency());
|
|
63
66
|
|
|
64
67
|
LOGI("About to create params");
|
|
65
68
|
|
|
@@ -81,7 +84,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
81
84
|
params.speed_up = speed_up;
|
|
82
85
|
params.offset_ms = 0;
|
|
83
86
|
params.no_context = true;
|
|
84
|
-
params.single_segment =
|
|
87
|
+
params.single_segment = realtime;
|
|
85
88
|
|
|
86
89
|
if (max_len > -1) {
|
|
87
90
|
params.max_len = max_len;
|
|
@@ -117,19 +120,45 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
117
120
|
);
|
|
118
121
|
}
|
|
119
122
|
|
|
123
|
+
params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
|
124
|
+
bool is_aborted = *(bool*)user_data;
|
|
125
|
+
return !is_aborted;
|
|
126
|
+
};
|
|
127
|
+
params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
|
|
128
|
+
|
|
120
129
|
LOGI("About to reset timings");
|
|
121
130
|
whisper_reset_timings(context);
|
|
122
131
|
|
|
123
132
|
LOGI("About to run whisper_full");
|
|
124
|
-
int code = whisper_full(context, params, audio_data_arr,
|
|
133
|
+
int code = whisper_full(context, params, audio_data_arr, audio_data_len);
|
|
125
134
|
if (code == 0) {
|
|
126
135
|
// whisper_print_timings(context);
|
|
127
136
|
}
|
|
128
137
|
env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
|
|
129
138
|
env->ReleaseStringUTFChars(language, language_chars);
|
|
139
|
+
rn_whisper_remove_abort_map(job_id);
|
|
130
140
|
return code;
|
|
131
141
|
}
|
|
132
142
|
|
|
143
|
+
JNIEXPORT void JNICALL
|
|
144
|
+
Java_com_rnwhisper_WhisperContext_abortTranscribe(
|
|
145
|
+
JNIEnv *env,
|
|
146
|
+
jobject thiz,
|
|
147
|
+
jint job_id
|
|
148
|
+
) {
|
|
149
|
+
UNUSED(thiz);
|
|
150
|
+
rn_whisper_abort_transcribe(job_id);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
JNIEXPORT void JNICALL
|
|
154
|
+
Java_com_rnwhisper_WhisperContext_abortAllTranscribe(
|
|
155
|
+
JNIEnv *env,
|
|
156
|
+
jobject thiz
|
|
157
|
+
) {
|
|
158
|
+
UNUSED(thiz);
|
|
159
|
+
rn_whisper_abort_all_transcribe();
|
|
160
|
+
}
|
|
161
|
+
|
|
133
162
|
JNIEXPORT jint JNICALL
|
|
134
163
|
Java_com_rnwhisper_WhisperContext_getTextSegmentCount(
|
|
135
164
|
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
|
@@ -176,4 +205,4 @@ Java_com_rnwhisper_WhisperContext_freeContext(
|
|
|
176
205
|
whisper_free(context);
|
|
177
206
|
}
|
|
178
207
|
|
|
179
|
-
} // extern "C"
|
|
208
|
+
} // extern "C"
|
package/cpp/rn-whisper.cpp
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include <cstdio>
|
|
2
2
|
#include <string>
|
|
3
3
|
#include <vector>
|
|
4
|
+
#include <unordered_map>
|
|
4
5
|
#include "whisper.h"
|
|
5
6
|
|
|
6
7
|
extern "C" {
|
|
@@ -28,4 +29,29 @@ void rn_whisper_convert_prompt(
|
|
|
28
29
|
}
|
|
29
30
|
}
|
|
30
31
|
|
|
32
|
+
std::unordered_map<int, bool> abort_map;
|
|
33
|
+
|
|
34
|
+
bool* rn_whisper_assign_abort_map(int job_id) {
|
|
35
|
+
abort_map[job_id] = false;
|
|
36
|
+
return &abort_map[job_id];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
void rn_whisper_remove_abort_map(int job_id) {
|
|
40
|
+
if (abort_map.find(job_id) != abort_map.end()) {
|
|
41
|
+
abort_map.erase(job_id);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
void rn_whisper_abort_transcribe(int job_id) {
|
|
46
|
+
if (abort_map.find(job_id) != abort_map.end()) {
|
|
47
|
+
abort_map[job_id] = true;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
void rn_whisper_abort_all_transcribe() {
|
|
52
|
+
for (auto it = abort_map.begin(); it != abort_map.end(); ++it) {
|
|
53
|
+
it->second = true;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
31
57
|
}
|
package/cpp/rn-whisper.h
CHANGED
|
@@ -11,6 +11,11 @@ void rn_whisper_convert_prompt(
|
|
|
11
11
|
std::string * prompt
|
|
12
12
|
);
|
|
13
13
|
|
|
14
|
+
bool* rn_whisper_assign_abort_map(int job_id);
|
|
15
|
+
void rn_whisper_remove_abort_map(int job_id);
|
|
16
|
+
void rn_whisper_abort_transcribe(int job_id);
|
|
17
|
+
void rn_whisper_abort_all_transcribe();
|
|
18
|
+
|
|
14
19
|
#ifdef __cplusplus
|
|
15
20
|
}
|
|
16
21
|
#endif
|
package/ios/RNWhisper.h
CHANGED