whisper.rn 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -4
- package/android/build.gradle +2 -4
- package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +47 -7
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +196 -7
- package/android/src/main/jni/whisper/Whisper.mk +1 -1
- package/android/src/main/jni/whisper/jni.cpp +32 -4
- package/cpp/rn-whisper.cpp +26 -0
- package/cpp/rn-whisper.h +5 -0
- package/ios/RNWhisper.h +2 -2
- package/ios/RNWhisper.mm +78 -111
- package/ios/RNWhisperContext.h +53 -0
- package/ios/RNWhisperContext.mm +303 -0
- package/jest/mock.js +38 -2
- package/lib/commonjs/index.js +63 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +64 -3
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +61 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/index.tsx +121 -4
package/README.md
CHANGED
|
@@ -20,6 +20,23 @@ npm install whisper.rn
|
|
|
20
20
|
|
|
21
21
|
Then re-run `npx pod-install` again for iOS.
|
|
22
22
|
|
|
23
|
+
## Add Microphone Permissions (Optional)
|
|
24
|
+
|
|
25
|
+
If you want to use realtime transcribe, you need to add the microphone permission to your app.
|
|
26
|
+
|
|
27
|
+
### iOS
|
|
28
|
+
Add these lines to ```ios/[YOU_APP_NAME]/info.plist```
|
|
29
|
+
```xml
|
|
30
|
+
<key>NSMicrophoneUsageDescription</key>
|
|
31
|
+
<string>This app requires microphone access in order to transcribe speech</string>
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Android
|
|
35
|
+
Add the following line to ```android/app/src/main/AndroidManifest.xml```
|
|
36
|
+
```xml
|
|
37
|
+
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
|
38
|
+
```
|
|
39
|
+
|
|
23
40
|
## Usage
|
|
24
41
|
|
|
25
42
|
```js
|
|
@@ -30,13 +47,35 @@ const sampleFilePath = 'file://.../sample.wav'
|
|
|
30
47
|
|
|
31
48
|
const whisperContext = await initWhisper({ filePath })
|
|
32
49
|
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
50
|
+
const options = { language: 'en' }
|
|
51
|
+
const { stop, promise } = whisperContext.transcribe(sampleFilePath, options)
|
|
52
|
+
|
|
53
|
+
const { result } = await promise
|
|
37
54
|
// result: (The inference text result from audio file)
|
|
38
55
|
```
|
|
39
56
|
|
|
57
|
+
Use realtime transcribe:
|
|
58
|
+
|
|
59
|
+
```js
|
|
60
|
+
const { stop, subscribe } = whisperContext.transcribeRealtime(options)
|
|
61
|
+
|
|
62
|
+
subscribe(evt => {
|
|
63
|
+
const { isCapturing, data, processTime, recordingTime } = evt
|
|
64
|
+
console.log(
|
|
65
|
+
`Realtime transcribing: ${isCapturing ? 'ON' : 'OFF'}\n` +
|
|
66
|
+
// The inference text result from audio record:
|
|
67
|
+
`Result: ${data.result}\n\n` +
|
|
68
|
+
`Process time: ${processTime}ms\n` +
|
|
69
|
+
`Recording time: ${recordingTime}ms`,
|
|
70
|
+
)
|
|
71
|
+
if (!isCapturing) console.log('Finished realtime transcribing')
|
|
72
|
+
})
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
|
|
76
|
+
|
|
77
|
+
The documentation is not ready yet, please see the comments of [index](./src/index.tsx) file for more details at the moment.
|
|
78
|
+
|
|
40
79
|
## Run with example
|
|
41
80
|
|
|
42
81
|
The example app is using [react-native-fs](https://github.com/itinance/react-native-fs) to download the model file and audio file.
|
package/android/build.gradle
CHANGED
|
@@ -40,10 +40,8 @@ android {
|
|
|
40
40
|
buildConfigField "boolean", "IS_NEW_ARCHITECTURE_ENABLED", isNewArchitectureEnabled().toString()
|
|
41
41
|
}
|
|
42
42
|
externalNativeBuild {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
path 'src/main/jni/whisper/Android.mk'
|
|
46
|
-
}
|
|
43
|
+
ndkBuild {
|
|
44
|
+
path 'src/main/jni/whisper/Android.mk'
|
|
47
45
|
}
|
|
48
46
|
}
|
|
49
47
|
buildTypes {
|
|
@@ -5,6 +5,7 @@ import android.util.Log;
|
|
|
5
5
|
import android.os.Build;
|
|
6
6
|
import android.os.Handler;
|
|
7
7
|
import android.os.AsyncTask;
|
|
8
|
+
import android.media.AudioRecord;
|
|
8
9
|
|
|
9
10
|
import com.facebook.react.bridge.Promise;
|
|
10
11
|
import com.facebook.react.bridge.ReactApplicationContext;
|
|
@@ -51,7 +52,7 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
51
52
|
throw new Exception("Failed to initialize context");
|
|
52
53
|
}
|
|
53
54
|
int id = Math.abs(new Random().nextInt());
|
|
54
|
-
WhisperContext whisperContext = new WhisperContext(context);
|
|
55
|
+
WhisperContext whisperContext = new WhisperContext(id, reactContext, context);
|
|
55
56
|
contexts.put(id, whisperContext);
|
|
56
57
|
return id;
|
|
57
58
|
} catch (Exception e) {
|
|
@@ -72,18 +73,27 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
72
73
|
}
|
|
73
74
|
|
|
74
75
|
@ReactMethod
|
|
75
|
-
public void
|
|
76
|
+
public void transcribeFile(int id, int jobId, String filePath, ReadableMap options, Promise promise) {
|
|
77
|
+
final WhisperContext context = contexts.get(id);
|
|
78
|
+
if (context == null) {
|
|
79
|
+
promise.reject("Context not found");
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (context.isCapturing()) {
|
|
83
|
+
promise.reject("The context is in realtime transcribe mode");
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
if (context.isTranscribing()) {
|
|
87
|
+
promise.reject("Context is already transcribing");
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
76
90
|
new AsyncTask<Void, Void, WritableMap>() {
|
|
77
91
|
private Exception exception;
|
|
78
92
|
|
|
79
93
|
@Override
|
|
80
94
|
protected WritableMap doInBackground(Void... voids) {
|
|
81
95
|
try {
|
|
82
|
-
|
|
83
|
-
if (context == null) {
|
|
84
|
-
throw new Exception("Context " + id + " not found");
|
|
85
|
-
}
|
|
86
|
-
return context.transcribe(filePath, options);
|
|
96
|
+
return context.transcribeFile(jobId, filePath, options);
|
|
87
97
|
} catch (Exception e) {
|
|
88
98
|
exception = e;
|
|
89
99
|
return null;
|
|
@@ -101,6 +111,35 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
101
111
|
}.execute();
|
|
102
112
|
}
|
|
103
113
|
|
|
114
|
+
@ReactMethod
|
|
115
|
+
public void startRealtimeTranscribe(int id, int jobId, ReadableMap options, Promise promise) {
|
|
116
|
+
final WhisperContext context = contexts.get(id);
|
|
117
|
+
if (context == null) {
|
|
118
|
+
promise.reject("Context not found");
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
if (context.isCapturing()) {
|
|
122
|
+
promise.reject("Context is already in capturing");
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
int state = context.startRealtimeTranscribe(jobId, options);
|
|
126
|
+
if (state == AudioRecord.STATE_INITIALIZED) {
|
|
127
|
+
promise.resolve(null);
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
promise.reject("Failed to start realtime transcribe. State: " + state);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
@ReactMethod
|
|
134
|
+
public void abortTranscribe(int contextId, int jobId, Promise promise) {
|
|
135
|
+
WhisperContext context = contexts.get(contextId);
|
|
136
|
+
if (context == null) {
|
|
137
|
+
promise.reject("Context not found");
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
context.stopTranscribe(jobId);
|
|
141
|
+
}
|
|
142
|
+
|
|
104
143
|
@ReactMethod
|
|
105
144
|
public void releaseContext(int id, Promise promise) {
|
|
106
145
|
new AsyncTask<Void, Void, Void>() {
|
|
@@ -168,6 +207,7 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
168
207
|
|
|
169
208
|
@Override
|
|
170
209
|
public void onHostDestroy() {
|
|
210
|
+
WhisperContext.abortAllTranscribe();
|
|
171
211
|
for (WhisperContext context : contexts.values()) {
|
|
172
212
|
context.release();
|
|
173
213
|
}
|
|
@@ -4,10 +4,15 @@ import com.facebook.react.bridge.Arguments;
|
|
|
4
4
|
import com.facebook.react.bridge.WritableArray;
|
|
5
5
|
import com.facebook.react.bridge.WritableMap;
|
|
6
6
|
import com.facebook.react.bridge.ReadableMap;
|
|
7
|
+
import com.facebook.react.bridge.ReactApplicationContext;
|
|
8
|
+
import com.facebook.react.modules.core.DeviceEventManagerModule;
|
|
7
9
|
|
|
8
10
|
import android.util.Log;
|
|
9
11
|
import android.os.Build;
|
|
10
12
|
import android.content.res.AssetManager;
|
|
13
|
+
import android.media.AudioFormat;
|
|
14
|
+
import android.media.AudioRecord;
|
|
15
|
+
import android.media.MediaRecorder.AudioSource;
|
|
11
16
|
|
|
12
17
|
import java.util.Random;
|
|
13
18
|
import java.lang.StringBuilder;
|
|
@@ -26,16 +31,175 @@ import java.nio.ShortBuffer;
|
|
|
26
31
|
|
|
27
32
|
public class WhisperContext {
|
|
28
33
|
public static final String NAME = "RNWhisperContext";
|
|
34
|
+
|
|
35
|
+
private static final int SAMPLE_RATE = 16000;
|
|
36
|
+
private static final int CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO;
|
|
37
|
+
private static final int AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT;
|
|
38
|
+
private static final int AUDIO_SOURCE = AudioSource.VOICE_RECOGNITION;
|
|
39
|
+
private static final int DEFAULT_MAX_AUDIO_SEC = 30;
|
|
40
|
+
|
|
41
|
+
private int id;
|
|
42
|
+
private ReactApplicationContext reactContext;
|
|
29
43
|
private long context;
|
|
30
44
|
|
|
31
|
-
|
|
45
|
+
private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
|
|
46
|
+
|
|
47
|
+
private int jobId = -1;
|
|
48
|
+
private AudioRecord recorder = null;
|
|
49
|
+
private int bufferSize;
|
|
50
|
+
private short[] buffer16;
|
|
51
|
+
private int nSamples = 0;
|
|
52
|
+
private boolean isCapturing = false;
|
|
53
|
+
private boolean isTranscribing = false;
|
|
54
|
+
private boolean isRealtime = false;
|
|
55
|
+
|
|
56
|
+
public WhisperContext(int id, ReactApplicationContext reactContext, long context) {
|
|
57
|
+
this.id = id;
|
|
32
58
|
this.context = context;
|
|
59
|
+
this.reactContext = reactContext;
|
|
60
|
+
eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
|
|
61
|
+
bufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
public int startRealtimeTranscribe(int jobId, ReadableMap options) {
|
|
65
|
+
if (isCapturing || isTranscribing) {
|
|
66
|
+
return -100;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
recorder = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT, bufferSize);
|
|
70
|
+
|
|
71
|
+
int state = recorder.getState();
|
|
72
|
+
if (state != AudioRecord.STATE_INITIALIZED) {
|
|
73
|
+
recorder.release();
|
|
74
|
+
return state;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
|
|
78
|
+
final int maxAudioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
|
|
79
|
+
|
|
80
|
+
buffer16 = new short[maxAudioSec * SAMPLE_RATE * Short.BYTES];
|
|
81
|
+
|
|
82
|
+
this.jobId = jobId;
|
|
83
|
+
isCapturing = true;
|
|
84
|
+
isRealtime = true;
|
|
85
|
+
nSamples = 0;
|
|
86
|
+
|
|
87
|
+
recorder.startRecording();
|
|
88
|
+
|
|
89
|
+
new Thread(new Runnable() {
|
|
90
|
+
@Override
|
|
91
|
+
public void run() {
|
|
92
|
+
try {
|
|
93
|
+
short[] buffer = new short[bufferSize];
|
|
94
|
+
Thread fullHandler = null;
|
|
95
|
+
while (isCapturing) {
|
|
96
|
+
try {
|
|
97
|
+
int n = recorder.read(buffer, 0, bufferSize);
|
|
98
|
+
if (n == 0) continue;
|
|
99
|
+
|
|
100
|
+
if (nSamples + n > maxAudioSec * SAMPLE_RATE) {
|
|
101
|
+
// Full, ignore data
|
|
102
|
+
isCapturing = false;
|
|
103
|
+
if (!isTranscribing)
|
|
104
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
nSamples += n;
|
|
108
|
+
for (int i = 0; i < n; i++) {
|
|
109
|
+
buffer16[nSamples + i] = buffer[i];
|
|
110
|
+
}
|
|
111
|
+
if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
|
|
112
|
+
isTranscribing = true;
|
|
113
|
+
Log.d(NAME, "Start transcribing realtime: " + nSamples);
|
|
114
|
+
fullHandler = new Thread(new Runnable() {
|
|
115
|
+
@Override
|
|
116
|
+
public void run() {
|
|
117
|
+
if (!isCapturing) return;
|
|
118
|
+
|
|
119
|
+
// convert I16 to F32
|
|
120
|
+
float[] nSamplesBuffer32 = new float[nSamples];
|
|
121
|
+
for (int i = 0; i < nSamples; i++) {
|
|
122
|
+
nSamplesBuffer32[i] = buffer16[i] / 32768.0f;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
int timeStart = (int) System.currentTimeMillis();
|
|
126
|
+
int code = full(jobId, options, nSamplesBuffer32, nSamples);
|
|
127
|
+
int timeEnd = (int) System.currentTimeMillis();
|
|
128
|
+
int timeRecording = (int) (nSamples / SAMPLE_RATE * 1000);
|
|
129
|
+
|
|
130
|
+
WritableMap payload = Arguments.createMap();
|
|
131
|
+
payload.putBoolean("isCapturing", isCapturing);
|
|
132
|
+
payload.putInt("code", code);
|
|
133
|
+
payload.putInt("processTime", timeEnd - timeStart);
|
|
134
|
+
payload.putInt("recordingTime", timeRecording);
|
|
135
|
+
|
|
136
|
+
if (code == 0) {
|
|
137
|
+
payload.putMap("data", getTextSegments());
|
|
138
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
139
|
+
} else {
|
|
140
|
+
payload.putString("error", "Transcribe failed with code " + code);
|
|
141
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (!isCapturing) {
|
|
145
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
146
|
+
}
|
|
147
|
+
isTranscribing = false;
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
fullHandler.start();
|
|
151
|
+
}
|
|
152
|
+
} catch (Exception e) {
|
|
153
|
+
Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (fullHandler != null) {
|
|
157
|
+
fullHandler.join(); // Wait for full transcribe to finish
|
|
158
|
+
}
|
|
159
|
+
recorder.stop();
|
|
160
|
+
} catch (Exception e) {
|
|
161
|
+
e.printStackTrace();
|
|
162
|
+
} finally {
|
|
163
|
+
recorder.release();
|
|
164
|
+
recorder = null;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}).start();
|
|
168
|
+
|
|
169
|
+
return state;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
private void emitTranscribeEvent(final String eventName, final WritableMap payload) {
|
|
173
|
+
WritableMap event = Arguments.createMap();
|
|
174
|
+
event.putInt("contextId", WhisperContext.this.id);
|
|
175
|
+
event.putInt("jobId", jobId);
|
|
176
|
+
event.putMap("payload", payload);
|
|
177
|
+
eventEmitter.emit(eventName, event);
|
|
33
178
|
}
|
|
34
179
|
|
|
35
|
-
public WritableMap
|
|
36
|
-
|
|
180
|
+
public WritableMap transcribeFile(int jobId, String filePath, ReadableMap options) throws IOException, Exception {
|
|
181
|
+
this.jobId = jobId;
|
|
182
|
+
isTranscribing = true;
|
|
183
|
+
float[] audioData = decodeWaveFile(new File(filePath));
|
|
184
|
+
int code = full(jobId, options, audioData, audioData.length);
|
|
185
|
+
isTranscribing = false;
|
|
186
|
+
this.jobId = -1;
|
|
187
|
+
if (code != 0) {
|
|
188
|
+
throw new Exception("Failed to transcribe the file. Code: " + code);
|
|
189
|
+
}
|
|
190
|
+
return getTextSegments();
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
|
|
194
|
+
return fullTranscribe(
|
|
195
|
+
jobId,
|
|
37
196
|
context,
|
|
38
|
-
|
|
197
|
+
// jboolean realtime,
|
|
198
|
+
isRealtime,
|
|
199
|
+
// float[] audio_data,
|
|
200
|
+
audioData,
|
|
201
|
+
// jint audio_data_len,
|
|
202
|
+
audioDataLen,
|
|
39
203
|
// jint n_threads,
|
|
40
204
|
options.hasKey("maxThreads") ? options.getInt("maxThreads") : -1,
|
|
41
205
|
// jint max_context,
|
|
@@ -69,9 +233,9 @@ public class WhisperContext {
|
|
|
69
233
|
// jstring prompt
|
|
70
234
|
options.hasKey("prompt") ? options.getString("prompt") : null
|
|
71
235
|
);
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
private WritableMap getTextSegments() {
|
|
75
239
|
Integer count = getTextSegmentCount(context);
|
|
76
240
|
StringBuilder builder = new StringBuilder();
|
|
77
241
|
|
|
@@ -92,7 +256,27 @@ public class WhisperContext {
|
|
|
92
256
|
return data;
|
|
93
257
|
}
|
|
94
258
|
|
|
259
|
+
|
|
260
|
+
public boolean isCapturing() {
|
|
261
|
+
return isCapturing;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
public boolean isTranscribing() {
|
|
265
|
+
return isTranscribing;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
public void stopTranscribe(int jobId) {
|
|
269
|
+
abortTranscribe(jobId);
|
|
270
|
+
isCapturing = false;
|
|
271
|
+
isTranscribing = false;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
public void stopCurrentTranscribe() {
|
|
275
|
+
stopTranscribe(this.jobId);
|
|
276
|
+
}
|
|
277
|
+
|
|
95
278
|
public void release() {
|
|
279
|
+
stopCurrentTranscribe();
|
|
96
280
|
freeContext(context);
|
|
97
281
|
}
|
|
98
282
|
|
|
@@ -185,8 +369,11 @@ public class WhisperContext {
|
|
|
185
369
|
|
|
186
370
|
protected static native long initContext(String modelPath);
|
|
187
371
|
protected static native int fullTranscribe(
|
|
372
|
+
int job_id,
|
|
188
373
|
long context,
|
|
374
|
+
boolean realtime,
|
|
189
375
|
float[] audio_data,
|
|
376
|
+
int audio_data_len,
|
|
190
377
|
int n_threads,
|
|
191
378
|
int max_context,
|
|
192
379
|
int word_thold,
|
|
@@ -203,6 +390,8 @@ public class WhisperContext {
|
|
|
203
390
|
String language,
|
|
204
391
|
String prompt
|
|
205
392
|
);
|
|
393
|
+
protected static native void abortTranscribe(int jobId);
|
|
394
|
+
protected static native void abortAllTranscribe();
|
|
206
395
|
protected static native int getTextSegmentCount(long context);
|
|
207
396
|
protected static native String getTextSegment(long context, int index);
|
|
208
397
|
protected static native int getTextSegmentT0(long context, int index);
|
|
@@ -12,7 +12,7 @@ ifneq ($(APP_OPTIM),debug)
|
|
|
12
12
|
endif
|
|
13
13
|
|
|
14
14
|
LOCAL_CFLAGS += -DSTDC_HEADERS -std=c11 -I $(WHISPER_LIB_DIR)
|
|
15
|
-
LOCAL_CPPFLAGS += -std=c++11
|
|
15
|
+
LOCAL_CPPFLAGS += -std=c++11 -I $(WHISPER_LIB_DIR)
|
|
16
16
|
LOCAL_SRC_FILES := $(WHISPER_LIB_DIR)/ggml.c \
|
|
17
17
|
$(WHISPER_LIB_DIR)/whisper.cpp \
|
|
18
18
|
$(WHISPER_LIB_DIR)/rn-whisper.cpp \
|
|
@@ -36,8 +36,11 @@ JNIEXPORT jint JNICALL
|
|
|
36
36
|
Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
37
37
|
JNIEnv *env,
|
|
38
38
|
jobject thiz,
|
|
39
|
+
jint job_id,
|
|
39
40
|
jlong context_ptr,
|
|
41
|
+
jboolean realtime,
|
|
40
42
|
jfloatArray audio_data,
|
|
43
|
+
jint audio_data_len,
|
|
41
44
|
jint n_threads,
|
|
42
45
|
jint max_context,
|
|
43
46
|
int word_thold,
|
|
@@ -57,7 +60,6 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
57
60
|
UNUSED(thiz);
|
|
58
61
|
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
59
62
|
jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
|
|
60
|
-
const jsize audio_data_length = env->GetArrayLength(audio_data);
|
|
61
63
|
|
|
62
64
|
int max_threads = min(4, get_nprocs());
|
|
63
65
|
|
|
@@ -81,7 +83,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
81
83
|
params.speed_up = speed_up;
|
|
82
84
|
params.offset_ms = 0;
|
|
83
85
|
params.no_context = true;
|
|
84
|
-
params.single_segment =
|
|
86
|
+
params.single_segment = realtime;
|
|
85
87
|
|
|
86
88
|
if (max_len > -1) {
|
|
87
89
|
params.max_len = max_len;
|
|
@@ -117,19 +119,45 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
117
119
|
);
|
|
118
120
|
}
|
|
119
121
|
|
|
122
|
+
params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
|
123
|
+
bool is_aborted = *(bool*)user_data;
|
|
124
|
+
return !is_aborted;
|
|
125
|
+
};
|
|
126
|
+
params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
|
|
127
|
+
|
|
120
128
|
LOGI("About to reset timings");
|
|
121
129
|
whisper_reset_timings(context);
|
|
122
130
|
|
|
123
131
|
LOGI("About to run whisper_full");
|
|
124
|
-
int code = whisper_full(context, params, audio_data_arr,
|
|
132
|
+
int code = whisper_full(context, params, audio_data_arr, audio_data_len);
|
|
125
133
|
if (code == 0) {
|
|
126
134
|
// whisper_print_timings(context);
|
|
127
135
|
}
|
|
128
136
|
env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
|
|
129
137
|
env->ReleaseStringUTFChars(language, language_chars);
|
|
138
|
+
rn_whisper_remove_abort_map(job_id);
|
|
130
139
|
return code;
|
|
131
140
|
}
|
|
132
141
|
|
|
142
|
+
JNIEXPORT void JNICALL
|
|
143
|
+
Java_com_rnwhisper_WhisperContext_abortTranscribe(
|
|
144
|
+
JNIEnv *env,
|
|
145
|
+
jobject thiz,
|
|
146
|
+
jint job_id
|
|
147
|
+
) {
|
|
148
|
+
UNUSED(thiz);
|
|
149
|
+
rn_whisper_abort_transcribe(job_id);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
JNIEXPORT void JNICALL
|
|
153
|
+
Java_com_rnwhisper_WhisperContext_abortAllTranscribe(
|
|
154
|
+
JNIEnv *env,
|
|
155
|
+
jobject thiz
|
|
156
|
+
) {
|
|
157
|
+
UNUSED(thiz);
|
|
158
|
+
rn_whisper_abort_all_transcribe();
|
|
159
|
+
}
|
|
160
|
+
|
|
133
161
|
JNIEXPORT jint JNICALL
|
|
134
162
|
Java_com_rnwhisper_WhisperContext_getTextSegmentCount(
|
|
135
163
|
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
|
@@ -176,4 +204,4 @@ Java_com_rnwhisper_WhisperContext_freeContext(
|
|
|
176
204
|
whisper_free(context);
|
|
177
205
|
}
|
|
178
206
|
|
|
179
|
-
} // extern "C"
|
|
207
|
+
} // extern "C"
|
package/cpp/rn-whisper.cpp
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include <cstdio>
|
|
2
2
|
#include <string>
|
|
3
3
|
#include <vector>
|
|
4
|
+
#include <unordered_map>
|
|
4
5
|
#include "whisper.h"
|
|
5
6
|
|
|
6
7
|
extern "C" {
|
|
@@ -28,4 +29,29 @@ void rn_whisper_convert_prompt(
|
|
|
28
29
|
}
|
|
29
30
|
}
|
|
30
31
|
|
|
32
|
+
std::unordered_map<int, bool> abort_map;
|
|
33
|
+
|
|
34
|
+
bool* rn_whisper_assign_abort_map(int job_id) {
|
|
35
|
+
abort_map[job_id] = false;
|
|
36
|
+
return &abort_map[job_id];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
void rn_whisper_remove_abort_map(int job_id) {
|
|
40
|
+
if (abort_map.find(job_id) != abort_map.end()) {
|
|
41
|
+
abort_map.erase(job_id);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
void rn_whisper_abort_transcribe(int job_id) {
|
|
46
|
+
if (abort_map.find(job_id) != abort_map.end()) {
|
|
47
|
+
abort_map[job_id] = true;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
void rn_whisper_abort_all_transcribe() {
|
|
52
|
+
for (auto it = abort_map.begin(); it != abort_map.end(); ++it) {
|
|
53
|
+
it->second = true;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
31
57
|
}
|
package/cpp/rn-whisper.h
CHANGED
|
@@ -11,6 +11,11 @@ void rn_whisper_convert_prompt(
|
|
|
11
11
|
std::string * prompt
|
|
12
12
|
);
|
|
13
13
|
|
|
14
|
+
bool* rn_whisper_assign_abort_map(int job_id);
|
|
15
|
+
void rn_whisper_remove_abort_map(int job_id);
|
|
16
|
+
void rn_whisper_abort_transcribe(int job_id);
|
|
17
|
+
void rn_whisper_abort_all_transcribe();
|
|
18
|
+
|
|
14
19
|
#ifdef __cplusplus
|
|
15
20
|
}
|
|
16
21
|
#endif
|
package/ios/RNWhisper.h
CHANGED