whisper.rn 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +43 -4
- package/android/build.gradle +2 -4
- package/android/src/main/java/com/rnwhisper/RNWhisperModule.java +47 -7
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +196 -7
- package/android/src/main/jni/whisper/Whisper.mk +1 -1
- package/android/src/main/jni/whisper/jni.cpp +33 -9
- package/cpp/rn-whisper.cpp +26 -0
- package/cpp/rn-whisper.h +5 -0
- package/cpp/whisper.cpp +603 -412
- package/cpp/whisper.h +120 -40
- package/ios/RNWhisper.h +2 -2
- package/ios/RNWhisper.mm +78 -111
- package/ios/RNWhisperContext.h +53 -0
- package/ios/RNWhisperContext.mm +303 -0
- package/jest/mock.js +38 -2
- package/lib/commonjs/index.js +63 -2
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +64 -3
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/index.d.ts +61 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/index.tsx +121 -4
- package/whisper-rn.podspec +15 -8
package/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2023 Jhen
|
|
3
|
+
Copyright (c) 2023 Jhen-Jie Hong
|
|
4
4
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
5
|
of this software and associated documentation files (the "Software"), to deal
|
|
6
6
|
in the Software without restriction, including without limitation the rights
|
package/README.md
CHANGED
|
@@ -20,6 +20,23 @@ npm install whisper.rn
|
|
|
20
20
|
|
|
21
21
|
Then re-run `npx pod-install` again for iOS.
|
|
22
22
|
|
|
23
|
+
## Add Microphone Permissions (Optional)
|
|
24
|
+
|
|
25
|
+
If you want to use realtime transcribe, you need to add the microphone permission to your app.
|
|
26
|
+
|
|
27
|
+
### iOS
|
|
28
|
+
Add these lines to ```ios/[YOU_APP_NAME]/info.plist```
|
|
29
|
+
```xml
|
|
30
|
+
<key>NSMicrophoneUsageDescription</key>
|
|
31
|
+
<string>This app requires microphone access in order to transcribe speech</string>
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Android
|
|
35
|
+
Add the following line to ```android/app/src/main/AndroidManifest.xml```
|
|
36
|
+
```xml
|
|
37
|
+
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
|
38
|
+
```
|
|
39
|
+
|
|
23
40
|
## Usage
|
|
24
41
|
|
|
25
42
|
```js
|
|
@@ -30,13 +47,35 @@ const sampleFilePath = 'file://.../sample.wav'
|
|
|
30
47
|
|
|
31
48
|
const whisperContext = await initWhisper({ filePath })
|
|
32
49
|
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
50
|
+
const options = { language: 'en' }
|
|
51
|
+
const { stop, promise } = whisperContext.transcribe(sampleFilePath, options)
|
|
52
|
+
|
|
53
|
+
const { result } = await promise
|
|
37
54
|
// result: (The inference text result from audio file)
|
|
38
55
|
```
|
|
39
56
|
|
|
57
|
+
Use realtime transcribe:
|
|
58
|
+
|
|
59
|
+
```js
|
|
60
|
+
const { stop, subscribe } = whisperContext.transcribeRealtime(options)
|
|
61
|
+
|
|
62
|
+
subscribe(evt => {
|
|
63
|
+
const { isCapturing, data, processTime, recordingTime } = evt
|
|
64
|
+
console.log(
|
|
65
|
+
`Realtime transcribing: ${isCapturing ? 'ON' : 'OFF'}\n` +
|
|
66
|
+
// The inference text result from audio record:
|
|
67
|
+
`Result: ${data.result}\n\n` +
|
|
68
|
+
`Process time: ${processTime}ms\n` +
|
|
69
|
+
`Recording time: ${recordingTime}ms`,
|
|
70
|
+
)
|
|
71
|
+
if (!isCapturing) console.log('Finished realtime transcribing')
|
|
72
|
+
})
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
In Android, you may need to request the microphone permission by [`PermissionAndroid`](https://reactnative.dev/docs/permissionsandroid).
|
|
76
|
+
|
|
77
|
+
The documentation is not ready yet, please see the comments of [index](./src/index.tsx) file for more details at the moment.
|
|
78
|
+
|
|
40
79
|
## Run with example
|
|
41
80
|
|
|
42
81
|
The example app is using [react-native-fs](https://github.com/itinance/react-native-fs) to download the model file and audio file.
|
package/android/build.gradle
CHANGED
|
@@ -40,10 +40,8 @@ android {
|
|
|
40
40
|
buildConfigField "boolean", "IS_NEW_ARCHITECTURE_ENABLED", isNewArchitectureEnabled().toString()
|
|
41
41
|
}
|
|
42
42
|
externalNativeBuild {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
path 'src/main/jni/whisper/Android.mk'
|
|
46
|
-
}
|
|
43
|
+
ndkBuild {
|
|
44
|
+
path 'src/main/jni/whisper/Android.mk'
|
|
47
45
|
}
|
|
48
46
|
}
|
|
49
47
|
buildTypes {
|
|
@@ -5,6 +5,7 @@ import android.util.Log;
|
|
|
5
5
|
import android.os.Build;
|
|
6
6
|
import android.os.Handler;
|
|
7
7
|
import android.os.AsyncTask;
|
|
8
|
+
import android.media.AudioRecord;
|
|
8
9
|
|
|
9
10
|
import com.facebook.react.bridge.Promise;
|
|
10
11
|
import com.facebook.react.bridge.ReactApplicationContext;
|
|
@@ -51,7 +52,7 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
51
52
|
throw new Exception("Failed to initialize context");
|
|
52
53
|
}
|
|
53
54
|
int id = Math.abs(new Random().nextInt());
|
|
54
|
-
WhisperContext whisperContext = new WhisperContext(context);
|
|
55
|
+
WhisperContext whisperContext = new WhisperContext(id, reactContext, context);
|
|
55
56
|
contexts.put(id, whisperContext);
|
|
56
57
|
return id;
|
|
57
58
|
} catch (Exception e) {
|
|
@@ -72,18 +73,27 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
72
73
|
}
|
|
73
74
|
|
|
74
75
|
@ReactMethod
|
|
75
|
-
public void
|
|
76
|
+
public void transcribeFile(int id, int jobId, String filePath, ReadableMap options, Promise promise) {
|
|
77
|
+
final WhisperContext context = contexts.get(id);
|
|
78
|
+
if (context == null) {
|
|
79
|
+
promise.reject("Context not found");
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (context.isCapturing()) {
|
|
83
|
+
promise.reject("The context is in realtime transcribe mode");
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
if (context.isTranscribing()) {
|
|
87
|
+
promise.reject("Context is already transcribing");
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
76
90
|
new AsyncTask<Void, Void, WritableMap>() {
|
|
77
91
|
private Exception exception;
|
|
78
92
|
|
|
79
93
|
@Override
|
|
80
94
|
protected WritableMap doInBackground(Void... voids) {
|
|
81
95
|
try {
|
|
82
|
-
|
|
83
|
-
if (context == null) {
|
|
84
|
-
throw new Exception("Context " + id + " not found");
|
|
85
|
-
}
|
|
86
|
-
return context.transcribe(filePath, options);
|
|
96
|
+
return context.transcribeFile(jobId, filePath, options);
|
|
87
97
|
} catch (Exception e) {
|
|
88
98
|
exception = e;
|
|
89
99
|
return null;
|
|
@@ -101,6 +111,35 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
101
111
|
}.execute();
|
|
102
112
|
}
|
|
103
113
|
|
|
114
|
+
@ReactMethod
|
|
115
|
+
public void startRealtimeTranscribe(int id, int jobId, ReadableMap options, Promise promise) {
|
|
116
|
+
final WhisperContext context = contexts.get(id);
|
|
117
|
+
if (context == null) {
|
|
118
|
+
promise.reject("Context not found");
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
if (context.isCapturing()) {
|
|
122
|
+
promise.reject("Context is already in capturing");
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
int state = context.startRealtimeTranscribe(jobId, options);
|
|
126
|
+
if (state == AudioRecord.STATE_INITIALIZED) {
|
|
127
|
+
promise.resolve(null);
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
promise.reject("Failed to start realtime transcribe. State: " + state);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
@ReactMethod
|
|
134
|
+
public void abortTranscribe(int contextId, int jobId, Promise promise) {
|
|
135
|
+
WhisperContext context = contexts.get(contextId);
|
|
136
|
+
if (context == null) {
|
|
137
|
+
promise.reject("Context not found");
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
context.stopTranscribe(jobId);
|
|
141
|
+
}
|
|
142
|
+
|
|
104
143
|
@ReactMethod
|
|
105
144
|
public void releaseContext(int id, Promise promise) {
|
|
106
145
|
new AsyncTask<Void, Void, Void>() {
|
|
@@ -168,6 +207,7 @@ public class RNWhisperModule extends ReactContextBaseJavaModule implements Lifec
|
|
|
168
207
|
|
|
169
208
|
@Override
|
|
170
209
|
public void onHostDestroy() {
|
|
210
|
+
WhisperContext.abortAllTranscribe();
|
|
171
211
|
for (WhisperContext context : contexts.values()) {
|
|
172
212
|
context.release();
|
|
173
213
|
}
|
|
@@ -4,10 +4,15 @@ import com.facebook.react.bridge.Arguments;
|
|
|
4
4
|
import com.facebook.react.bridge.WritableArray;
|
|
5
5
|
import com.facebook.react.bridge.WritableMap;
|
|
6
6
|
import com.facebook.react.bridge.ReadableMap;
|
|
7
|
+
import com.facebook.react.bridge.ReactApplicationContext;
|
|
8
|
+
import com.facebook.react.modules.core.DeviceEventManagerModule;
|
|
7
9
|
|
|
8
10
|
import android.util.Log;
|
|
9
11
|
import android.os.Build;
|
|
10
12
|
import android.content.res.AssetManager;
|
|
13
|
+
import android.media.AudioFormat;
|
|
14
|
+
import android.media.AudioRecord;
|
|
15
|
+
import android.media.MediaRecorder.AudioSource;
|
|
11
16
|
|
|
12
17
|
import java.util.Random;
|
|
13
18
|
import java.lang.StringBuilder;
|
|
@@ -26,16 +31,175 @@ import java.nio.ShortBuffer;
|
|
|
26
31
|
|
|
27
32
|
public class WhisperContext {
|
|
28
33
|
public static final String NAME = "RNWhisperContext";
|
|
34
|
+
|
|
35
|
+
private static final int SAMPLE_RATE = 16000;
|
|
36
|
+
private static final int CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO;
|
|
37
|
+
private static final int AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT;
|
|
38
|
+
private static final int AUDIO_SOURCE = AudioSource.VOICE_RECOGNITION;
|
|
39
|
+
private static final int DEFAULT_MAX_AUDIO_SEC = 30;
|
|
40
|
+
|
|
41
|
+
private int id;
|
|
42
|
+
private ReactApplicationContext reactContext;
|
|
29
43
|
private long context;
|
|
30
44
|
|
|
31
|
-
|
|
45
|
+
private DeviceEventManagerModule.RCTDeviceEventEmitter eventEmitter;
|
|
46
|
+
|
|
47
|
+
private int jobId = -1;
|
|
48
|
+
private AudioRecord recorder = null;
|
|
49
|
+
private int bufferSize;
|
|
50
|
+
private short[] buffer16;
|
|
51
|
+
private int nSamples = 0;
|
|
52
|
+
private boolean isCapturing = false;
|
|
53
|
+
private boolean isTranscribing = false;
|
|
54
|
+
private boolean isRealtime = false;
|
|
55
|
+
|
|
56
|
+
public WhisperContext(int id, ReactApplicationContext reactContext, long context) {
|
|
57
|
+
this.id = id;
|
|
32
58
|
this.context = context;
|
|
59
|
+
this.reactContext = reactContext;
|
|
60
|
+
eventEmitter = reactContext.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class);
|
|
61
|
+
bufferSize = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
public int startRealtimeTranscribe(int jobId, ReadableMap options) {
|
|
65
|
+
if (isCapturing || isTranscribing) {
|
|
66
|
+
return -100;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
recorder = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT, bufferSize);
|
|
70
|
+
|
|
71
|
+
int state = recorder.getState();
|
|
72
|
+
if (state != AudioRecord.STATE_INITIALIZED) {
|
|
73
|
+
recorder.release();
|
|
74
|
+
return state;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
|
|
78
|
+
final int maxAudioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
|
|
79
|
+
|
|
80
|
+
buffer16 = new short[maxAudioSec * SAMPLE_RATE * Short.BYTES];
|
|
81
|
+
|
|
82
|
+
this.jobId = jobId;
|
|
83
|
+
isCapturing = true;
|
|
84
|
+
isRealtime = true;
|
|
85
|
+
nSamples = 0;
|
|
86
|
+
|
|
87
|
+
recorder.startRecording();
|
|
88
|
+
|
|
89
|
+
new Thread(new Runnable() {
|
|
90
|
+
@Override
|
|
91
|
+
public void run() {
|
|
92
|
+
try {
|
|
93
|
+
short[] buffer = new short[bufferSize];
|
|
94
|
+
Thread fullHandler = null;
|
|
95
|
+
while (isCapturing) {
|
|
96
|
+
try {
|
|
97
|
+
int n = recorder.read(buffer, 0, bufferSize);
|
|
98
|
+
if (n == 0) continue;
|
|
99
|
+
|
|
100
|
+
if (nSamples + n > maxAudioSec * SAMPLE_RATE) {
|
|
101
|
+
// Full, ignore data
|
|
102
|
+
isCapturing = false;
|
|
103
|
+
if (!isTranscribing)
|
|
104
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
nSamples += n;
|
|
108
|
+
for (int i = 0; i < n; i++) {
|
|
109
|
+
buffer16[nSamples + i] = buffer[i];
|
|
110
|
+
}
|
|
111
|
+
if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
|
|
112
|
+
isTranscribing = true;
|
|
113
|
+
Log.d(NAME, "Start transcribing realtime: " + nSamples);
|
|
114
|
+
fullHandler = new Thread(new Runnable() {
|
|
115
|
+
@Override
|
|
116
|
+
public void run() {
|
|
117
|
+
if (!isCapturing) return;
|
|
118
|
+
|
|
119
|
+
// convert I16 to F32
|
|
120
|
+
float[] nSamplesBuffer32 = new float[nSamples];
|
|
121
|
+
for (int i = 0; i < nSamples; i++) {
|
|
122
|
+
nSamplesBuffer32[i] = buffer16[i] / 32768.0f;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
int timeStart = (int) System.currentTimeMillis();
|
|
126
|
+
int code = full(jobId, options, nSamplesBuffer32, nSamples);
|
|
127
|
+
int timeEnd = (int) System.currentTimeMillis();
|
|
128
|
+
int timeRecording = (int) (nSamples / SAMPLE_RATE * 1000);
|
|
129
|
+
|
|
130
|
+
WritableMap payload = Arguments.createMap();
|
|
131
|
+
payload.putBoolean("isCapturing", isCapturing);
|
|
132
|
+
payload.putInt("code", code);
|
|
133
|
+
payload.putInt("processTime", timeEnd - timeStart);
|
|
134
|
+
payload.putInt("recordingTime", timeRecording);
|
|
135
|
+
|
|
136
|
+
if (code == 0) {
|
|
137
|
+
payload.putMap("data", getTextSegments());
|
|
138
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
139
|
+
} else {
|
|
140
|
+
payload.putString("error", "Transcribe failed with code " + code);
|
|
141
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (!isCapturing) {
|
|
145
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
146
|
+
}
|
|
147
|
+
isTranscribing = false;
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
fullHandler.start();
|
|
151
|
+
}
|
|
152
|
+
} catch (Exception e) {
|
|
153
|
+
Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (fullHandler != null) {
|
|
157
|
+
fullHandler.join(); // Wait for full transcribe to finish
|
|
158
|
+
}
|
|
159
|
+
recorder.stop();
|
|
160
|
+
} catch (Exception e) {
|
|
161
|
+
e.printStackTrace();
|
|
162
|
+
} finally {
|
|
163
|
+
recorder.release();
|
|
164
|
+
recorder = null;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}).start();
|
|
168
|
+
|
|
169
|
+
return state;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
private void emitTranscribeEvent(final String eventName, final WritableMap payload) {
|
|
173
|
+
WritableMap event = Arguments.createMap();
|
|
174
|
+
event.putInt("contextId", WhisperContext.this.id);
|
|
175
|
+
event.putInt("jobId", jobId);
|
|
176
|
+
event.putMap("payload", payload);
|
|
177
|
+
eventEmitter.emit(eventName, event);
|
|
33
178
|
}
|
|
34
179
|
|
|
35
|
-
public WritableMap
|
|
36
|
-
|
|
180
|
+
public WritableMap transcribeFile(int jobId, String filePath, ReadableMap options) throws IOException, Exception {
|
|
181
|
+
this.jobId = jobId;
|
|
182
|
+
isTranscribing = true;
|
|
183
|
+
float[] audioData = decodeWaveFile(new File(filePath));
|
|
184
|
+
int code = full(jobId, options, audioData, audioData.length);
|
|
185
|
+
isTranscribing = false;
|
|
186
|
+
this.jobId = -1;
|
|
187
|
+
if (code != 0) {
|
|
188
|
+
throw new Exception("Failed to transcribe the file. Code: " + code);
|
|
189
|
+
}
|
|
190
|
+
return getTextSegments();
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
|
|
194
|
+
return fullTranscribe(
|
|
195
|
+
jobId,
|
|
37
196
|
context,
|
|
38
|
-
|
|
197
|
+
// jboolean realtime,
|
|
198
|
+
isRealtime,
|
|
199
|
+
// float[] audio_data,
|
|
200
|
+
audioData,
|
|
201
|
+
// jint audio_data_len,
|
|
202
|
+
audioDataLen,
|
|
39
203
|
// jint n_threads,
|
|
40
204
|
options.hasKey("maxThreads") ? options.getInt("maxThreads") : -1,
|
|
41
205
|
// jint max_context,
|
|
@@ -69,9 +233,9 @@ public class WhisperContext {
|
|
|
69
233
|
// jstring prompt
|
|
70
234
|
options.hasKey("prompt") ? options.getString("prompt") : null
|
|
71
235
|
);
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
private WritableMap getTextSegments() {
|
|
75
239
|
Integer count = getTextSegmentCount(context);
|
|
76
240
|
StringBuilder builder = new StringBuilder();
|
|
77
241
|
|
|
@@ -92,7 +256,27 @@ public class WhisperContext {
|
|
|
92
256
|
return data;
|
|
93
257
|
}
|
|
94
258
|
|
|
259
|
+
|
|
260
|
+
public boolean isCapturing() {
|
|
261
|
+
return isCapturing;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
public boolean isTranscribing() {
|
|
265
|
+
return isTranscribing;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
public void stopTranscribe(int jobId) {
|
|
269
|
+
abortTranscribe(jobId);
|
|
270
|
+
isCapturing = false;
|
|
271
|
+
isTranscribing = false;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
public void stopCurrentTranscribe() {
|
|
275
|
+
stopTranscribe(this.jobId);
|
|
276
|
+
}
|
|
277
|
+
|
|
95
278
|
public void release() {
|
|
279
|
+
stopCurrentTranscribe();
|
|
96
280
|
freeContext(context);
|
|
97
281
|
}
|
|
98
282
|
|
|
@@ -185,8 +369,11 @@ public class WhisperContext {
|
|
|
185
369
|
|
|
186
370
|
protected static native long initContext(String modelPath);
|
|
187
371
|
protected static native int fullTranscribe(
|
|
372
|
+
int job_id,
|
|
188
373
|
long context,
|
|
374
|
+
boolean realtime,
|
|
189
375
|
float[] audio_data,
|
|
376
|
+
int audio_data_len,
|
|
190
377
|
int n_threads,
|
|
191
378
|
int max_context,
|
|
192
379
|
int word_thold,
|
|
@@ -203,6 +390,8 @@ public class WhisperContext {
|
|
|
203
390
|
String language,
|
|
204
391
|
String prompt
|
|
205
392
|
);
|
|
393
|
+
protected static native void abortTranscribe(int jobId);
|
|
394
|
+
protected static native void abortAllTranscribe();
|
|
206
395
|
protected static native int getTextSegmentCount(long context);
|
|
207
396
|
protected static native String getTextSegment(long context, int index);
|
|
208
397
|
protected static native int getTextSegmentT0(long context, int index);
|
|
@@ -12,7 +12,7 @@ ifneq ($(APP_OPTIM),debug)
|
|
|
12
12
|
endif
|
|
13
13
|
|
|
14
14
|
LOCAL_CFLAGS += -DSTDC_HEADERS -std=c11 -I $(WHISPER_LIB_DIR)
|
|
15
|
-
LOCAL_CPPFLAGS += -std=c++11
|
|
15
|
+
LOCAL_CPPFLAGS += -std=c++11 -I $(WHISPER_LIB_DIR)
|
|
16
16
|
LOCAL_SRC_FILES := $(WHISPER_LIB_DIR)/ggml.c \
|
|
17
17
|
$(WHISPER_LIB_DIR)/whisper.cpp \
|
|
18
18
|
$(WHISPER_LIB_DIR)/rn-whisper.cpp \
|
|
@@ -19,10 +19,6 @@ static inline int min(int a, int b) {
|
|
|
19
19
|
return (a < b) ? a : b;
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
static inline int max(int a, int b) {
|
|
23
|
-
return (a > b) ? a : b;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
22
|
extern "C" {
|
|
27
23
|
|
|
28
24
|
JNIEXPORT jlong JNICALL
|
|
@@ -40,8 +36,11 @@ JNIEXPORT jint JNICALL
|
|
|
40
36
|
Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
41
37
|
JNIEnv *env,
|
|
42
38
|
jobject thiz,
|
|
39
|
+
jint job_id,
|
|
43
40
|
jlong context_ptr,
|
|
41
|
+
jboolean realtime,
|
|
44
42
|
jfloatArray audio_data,
|
|
43
|
+
jint audio_data_len,
|
|
45
44
|
jint n_threads,
|
|
46
45
|
jint max_context,
|
|
47
46
|
int word_thold,
|
|
@@ -61,9 +60,8 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
61
60
|
UNUSED(thiz);
|
|
62
61
|
struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
|
|
63
62
|
jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
|
|
64
|
-
const jsize audio_data_length = env->GetArrayLength(audio_data);
|
|
65
63
|
|
|
66
|
-
int max_threads =
|
|
64
|
+
int max_threads = min(4, get_nprocs());
|
|
67
65
|
|
|
68
66
|
LOGI("About to create params");
|
|
69
67
|
|
|
@@ -85,7 +83,7 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
85
83
|
params.speed_up = speed_up;
|
|
86
84
|
params.offset_ms = 0;
|
|
87
85
|
params.no_context = true;
|
|
88
|
-
params.single_segment =
|
|
86
|
+
params.single_segment = realtime;
|
|
89
87
|
|
|
90
88
|
if (max_len > -1) {
|
|
91
89
|
params.max_len = max_len;
|
|
@@ -121,19 +119,45 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
121
119
|
);
|
|
122
120
|
}
|
|
123
121
|
|
|
122
|
+
params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
|
123
|
+
bool is_aborted = *(bool*)user_data;
|
|
124
|
+
return !is_aborted;
|
|
125
|
+
};
|
|
126
|
+
params.encoder_begin_callback_user_data = rn_whisper_assign_abort_map(job_id);
|
|
127
|
+
|
|
124
128
|
LOGI("About to reset timings");
|
|
125
129
|
whisper_reset_timings(context);
|
|
126
130
|
|
|
127
131
|
LOGI("About to run whisper_full");
|
|
128
|
-
int code = whisper_full(context, params, audio_data_arr,
|
|
132
|
+
int code = whisper_full(context, params, audio_data_arr, audio_data_len);
|
|
129
133
|
if (code == 0) {
|
|
130
134
|
// whisper_print_timings(context);
|
|
131
135
|
}
|
|
132
136
|
env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
|
|
133
137
|
env->ReleaseStringUTFChars(language, language_chars);
|
|
138
|
+
rn_whisper_remove_abort_map(job_id);
|
|
134
139
|
return code;
|
|
135
140
|
}
|
|
136
141
|
|
|
142
|
+
JNIEXPORT void JNICALL
|
|
143
|
+
Java_com_rnwhisper_WhisperContext_abortTranscribe(
|
|
144
|
+
JNIEnv *env,
|
|
145
|
+
jobject thiz,
|
|
146
|
+
jint job_id
|
|
147
|
+
) {
|
|
148
|
+
UNUSED(thiz);
|
|
149
|
+
rn_whisper_abort_transcribe(job_id);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
JNIEXPORT void JNICALL
|
|
153
|
+
Java_com_rnwhisper_WhisperContext_abortAllTranscribe(
|
|
154
|
+
JNIEnv *env,
|
|
155
|
+
jobject thiz
|
|
156
|
+
) {
|
|
157
|
+
UNUSED(thiz);
|
|
158
|
+
rn_whisper_abort_all_transcribe();
|
|
159
|
+
}
|
|
160
|
+
|
|
137
161
|
JNIEXPORT jint JNICALL
|
|
138
162
|
Java_com_rnwhisper_WhisperContext_getTextSegmentCount(
|
|
139
163
|
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
|
@@ -180,4 +204,4 @@ Java_com_rnwhisper_WhisperContext_freeContext(
|
|
|
180
204
|
whisper_free(context);
|
|
181
205
|
}
|
|
182
206
|
|
|
183
|
-
} // extern "C"
|
|
207
|
+
} // extern "C"
|
package/cpp/rn-whisper.cpp
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include <cstdio>
|
|
2
2
|
#include <string>
|
|
3
3
|
#include <vector>
|
|
4
|
+
#include <unordered_map>
|
|
4
5
|
#include "whisper.h"
|
|
5
6
|
|
|
6
7
|
extern "C" {
|
|
@@ -28,4 +29,29 @@ void rn_whisper_convert_prompt(
|
|
|
28
29
|
}
|
|
29
30
|
}
|
|
30
31
|
|
|
32
|
+
std::unordered_map<int, bool> abort_map;
|
|
33
|
+
|
|
34
|
+
bool* rn_whisper_assign_abort_map(int job_id) {
|
|
35
|
+
abort_map[job_id] = false;
|
|
36
|
+
return &abort_map[job_id];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
void rn_whisper_remove_abort_map(int job_id) {
|
|
40
|
+
if (abort_map.find(job_id) != abort_map.end()) {
|
|
41
|
+
abort_map.erase(job_id);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
void rn_whisper_abort_transcribe(int job_id) {
|
|
46
|
+
if (abort_map.find(job_id) != abort_map.end()) {
|
|
47
|
+
abort_map[job_id] = true;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
void rn_whisper_abort_all_transcribe() {
|
|
52
|
+
for (auto it = abort_map.begin(); it != abort_map.end(); ++it) {
|
|
53
|
+
it->second = true;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
31
57
|
}
|
package/cpp/rn-whisper.h
CHANGED
|
@@ -11,6 +11,11 @@ void rn_whisper_convert_prompt(
|
|
|
11
11
|
std::string * prompt
|
|
12
12
|
);
|
|
13
13
|
|
|
14
|
+
bool* rn_whisper_assign_abort_map(int job_id);
|
|
15
|
+
void rn_whisper_remove_abort_map(int job_id);
|
|
16
|
+
void rn_whisper_abort_transcribe(int job_id);
|
|
17
|
+
void rn_whisper_abort_all_transcribe();
|
|
18
|
+
|
|
14
19
|
#ifdef __cplusplus
|
|
15
20
|
}
|
|
16
21
|
#endif
|