whisper.rn 0.4.0-rc.2 → 0.4.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +2 -0
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +6 -1
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +29 -15
- package/android/src/main/jni.cpp +6 -2
- package/cpp/ggml-alloc.c +413 -280
- package/cpp/ggml-alloc.h +67 -8
- package/cpp/ggml-backend-impl.h +87 -0
- package/cpp/ggml-backend.c +950 -0
- package/cpp/ggml-backend.h +136 -0
- package/cpp/ggml-impl.h +243 -0
- package/cpp/{ggml-metal.metal → ggml-metal-whisper.metal} +591 -121
- package/cpp/ggml-metal.h +21 -0
- package/cpp/ggml-metal.m +623 -234
- package/cpp/ggml-quants.c +7377 -0
- package/cpp/ggml-quants.h +224 -0
- package/cpp/ggml.c +3773 -4455
- package/cpp/ggml.h +279 -146
- package/cpp/whisper.cpp +182 -103
- package/cpp/whisper.h +48 -11
- package/ios/RNWhisper.mm +8 -2
- package/ios/RNWhisperContext.h +6 -2
- package/ios/RNWhisperContext.mm +97 -26
- package/jest/mock.js +1 -1
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +28 -9
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +28 -9
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +7 -1
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +8 -3
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNWhisper.ts +8 -1
- package/src/index.ts +30 -18
- package/src/version.json +1 -1
- package/whisper-rn.podspec +1 -2
|
@@ -9,6 +9,8 @@ set(
|
|
|
9
9
|
SOURCE_FILES
|
|
10
10
|
${RNWHISPER_LIB_DIR}/ggml.c
|
|
11
11
|
${RNWHISPER_LIB_DIR}/ggml-alloc.c
|
|
12
|
+
${RNWHISPER_LIB_DIR}/ggml-backend.c
|
|
13
|
+
${RNWHISPER_LIB_DIR}/ggml-quants.c
|
|
12
14
|
${RNWHISPER_LIB_DIR}/whisper.cpp
|
|
13
15
|
${RNWHISPER_LIB_DIR}/rn-whisper.cpp
|
|
14
16
|
${CMAKE_SOURCE_DIR}/jni.cpp
|
|
@@ -13,6 +13,7 @@ import com.facebook.react.bridge.ReactMethod;
|
|
|
13
13
|
import com.facebook.react.bridge.LifecycleEventListener;
|
|
14
14
|
import com.facebook.react.bridge.ReadableMap;
|
|
15
15
|
import com.facebook.react.bridge.WritableMap;
|
|
16
|
+
import com.facebook.react.bridge.Arguments;
|
|
16
17
|
|
|
17
18
|
import java.util.HashMap;
|
|
18
19
|
import java.util.Random;
|
|
@@ -107,7 +108,11 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
107
108
|
promise.reject(exception);
|
|
108
109
|
return;
|
|
109
110
|
}
|
|
110
|
-
|
|
111
|
+
WritableMap result = Arguments.createMap();
|
|
112
|
+
result.putInt("contextId", id);
|
|
113
|
+
result.putBoolean("gpu", false);
|
|
114
|
+
result.putString("reasonNoGPU", "Currently not supported");
|
|
115
|
+
promise.resolve(result);
|
|
111
116
|
tasks.remove(this);
|
|
112
117
|
}
|
|
113
118
|
}.execute();
|
|
@@ -82,8 +82,9 @@ public class WhisperContext {
|
|
|
82
82
|
private boolean vad(ReadableMap options, short[] shortBuffer, int nSamples, int n) {
|
|
83
83
|
boolean isSpeech = true;
|
|
84
84
|
if (!isTranscribing && options.hasKey("useVad") && options.getBoolean("useVad")) {
|
|
85
|
-
int
|
|
86
|
-
|
|
85
|
+
int vadMs = options.hasKey("vadMs") ? options.getInt("vadMs") : 2000;
|
|
86
|
+
if (vadMs < 2000) vadMs = 2000;
|
|
87
|
+
int sampleSize = (int) (SAMPLE_RATE * vadMs / 1000);
|
|
87
88
|
if (nSamples + n > sampleSize) {
|
|
88
89
|
int start = nSamples + n - sampleSize;
|
|
89
90
|
float[] audioData = new float[sampleSize];
|
|
@@ -100,6 +101,21 @@ public class WhisperContext {
|
|
|
100
101
|
return isSpeech;
|
|
101
102
|
}
|
|
102
103
|
|
|
104
|
+
private void finishRealtimeTranscribe(ReadableMap options, WritableMap result) {
|
|
105
|
+
String audioOutputPath = options.hasKey("audioOutputPath") ? options.getString("audioOutputPath") : null;
|
|
106
|
+
if (audioOutputPath != null) {
|
|
107
|
+
// TODO: Append in real time so we don't need to keep all slices & also reduce memory usage
|
|
108
|
+
Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
|
|
109
|
+
try {
|
|
110
|
+
AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
|
|
111
|
+
} catch (IOException e) {
|
|
112
|
+
Log.e(NAME, "Error saving wav file: " + e.getMessage());
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
117
|
+
}
|
|
118
|
+
|
|
103
119
|
public int startRealtimeTranscribe(int jobId, ReadableMap options) {
|
|
104
120
|
if (isCapturing || isTranscribing) {
|
|
105
121
|
return -100;
|
|
@@ -131,7 +147,7 @@ public class WhisperContext {
|
|
|
131
147
|
shortBufferSlices.add(new short[audioSliceSec * SAMPLE_RATE]);
|
|
132
148
|
sliceNSamples = new ArrayList<Integer>();
|
|
133
149
|
sliceNSamples.add(0);
|
|
134
|
-
|
|
150
|
+
|
|
135
151
|
isCapturing = true;
|
|
136
152
|
recorder.startRecording();
|
|
137
153
|
|
|
@@ -159,12 +175,12 @@ public class WhisperContext {
|
|
|
159
175
|
nSamples == nSamplesTranscribing &&
|
|
160
176
|
sliceIndex == transcribeSliceIndex
|
|
161
177
|
) {
|
|
162
|
-
|
|
178
|
+
finishRealtimeTranscribe(options, Arguments.createMap());
|
|
163
179
|
} else if (!isTranscribing) {
|
|
164
180
|
short[] shortBuffer = shortBufferSlices.get(sliceIndex);
|
|
165
181
|
boolean isSpeech = vad(options, shortBuffer, nSamples, 0);
|
|
166
182
|
if (!isSpeech) {
|
|
167
|
-
|
|
183
|
+
finishRealtimeTranscribe(options, Arguments.createMap());
|
|
168
184
|
break;
|
|
169
185
|
}
|
|
170
186
|
isTranscribing = true;
|
|
@@ -210,11 +226,9 @@ public class WhisperContext {
|
|
|
210
226
|
Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
|
|
211
227
|
}
|
|
212
228
|
}
|
|
213
|
-
|
|
214
|
-
Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
|
|
215
|
-
AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
|
|
229
|
+
|
|
216
230
|
if (!isTranscribing) {
|
|
217
|
-
|
|
231
|
+
finishRealtimeTranscribe(options, Arguments.createMap());
|
|
218
232
|
}
|
|
219
233
|
if (fullHandler != null) {
|
|
220
234
|
fullHandler.join(); // Wait for full transcribe to finish
|
|
@@ -264,7 +278,7 @@ public class WhisperContext {
|
|
|
264
278
|
|
|
265
279
|
if (code == 0) {
|
|
266
280
|
payload.putMap("data", getTextSegments(0, getTextSegmentCount(context)));
|
|
267
|
-
} else {
|
|
281
|
+
} else if (code != -999) { // Not aborted
|
|
268
282
|
payload.putString("error", "Transcribe failed with code " + code);
|
|
269
283
|
}
|
|
270
284
|
|
|
@@ -283,12 +297,12 @@ public class WhisperContext {
|
|
|
283
297
|
nSamplesTranscribing = 0;
|
|
284
298
|
}
|
|
285
299
|
|
|
286
|
-
boolean continueNeeded = !isCapturing && nSamplesTranscribing != nSamplesOfIndex;
|
|
300
|
+
boolean continueNeeded = !isCapturing && nSamplesTranscribing != nSamplesOfIndex && code != -999;
|
|
287
301
|
|
|
288
302
|
if (isStopped && !continueNeeded) {
|
|
289
303
|
payload.putBoolean("isCapturing", false);
|
|
290
304
|
payload.putBoolean("isStoppedByAction", isStoppedByAction);
|
|
291
|
-
|
|
305
|
+
finishRealtimeTranscribe(options, payload);
|
|
292
306
|
} else if (code == 0) {
|
|
293
307
|
payload.putBoolean("isCapturing", true);
|
|
294
308
|
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
@@ -372,7 +386,7 @@ public class WhisperContext {
|
|
|
372
386
|
int code = full(jobId, options, audioData, audioData.length);
|
|
373
387
|
isTranscribing = false;
|
|
374
388
|
this.jobId = -1;
|
|
375
|
-
if (code != 0) {
|
|
389
|
+
if (code != 0 && code != 999) {
|
|
376
390
|
throw new Exception("Failed to transcribe the file. Code: " + code);
|
|
377
391
|
}
|
|
378
392
|
WritableMap result = getTextSegments(0, getTextSegmentCount(context));
|
|
@@ -401,7 +415,7 @@ public class WhisperContext {
|
|
|
401
415
|
options.hasKey("maxLen") ? options.getInt("maxLen") : -1,
|
|
402
416
|
// jboolean token_timestamps,
|
|
403
417
|
options.hasKey("tokenTimestamps") ? options.getBoolean("tokenTimestamps") : false,
|
|
404
|
-
|
|
418
|
+
|
|
405
419
|
// jint offset,
|
|
406
420
|
options.hasKey("offset") ? options.getInt("offset") : -1,
|
|
407
421
|
// jint duration,
|
|
@@ -577,4 +591,4 @@ public class WhisperContext {
|
|
|
577
591
|
protected static native int getTextSegmentT0(long context, int index);
|
|
578
592
|
protected static native int getTextSegmentT1(long context, int index);
|
|
579
593
|
protected static native void freeContext(long contextPtr);
|
|
580
|
-
}
|
|
594
|
+
}
|
package/android/src/main/jni.cpp
CHANGED
|
@@ -297,16 +297,17 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
297
297
|
}
|
|
298
298
|
|
|
299
299
|
// abort handlers
|
|
300
|
+
bool* abort_ptr = rn_whisper_assign_abort_map(job_id);
|
|
300
301
|
params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
|
301
302
|
bool is_aborted = *(bool*)user_data;
|
|
302
303
|
return !is_aborted;
|
|
303
304
|
};
|
|
304
|
-
params.encoder_begin_callback_user_data =
|
|
305
|
+
params.encoder_begin_callback_user_data = abort_ptr;
|
|
305
306
|
params.abort_callback = [](void * user_data) {
|
|
306
307
|
bool is_aborted = *(bool*)user_data;
|
|
307
308
|
return is_aborted;
|
|
308
309
|
};
|
|
309
|
-
params.abort_callback_user_data =
|
|
310
|
+
params.abort_callback_user_data = abort_ptr;
|
|
310
311
|
|
|
311
312
|
if (callback_instance != nullptr) {
|
|
312
313
|
callback_context *cb_ctx = new callback_context;
|
|
@@ -344,6 +345,9 @@ Java_com_rnwhisper_WhisperContext_fullTranscribe(
|
|
|
344
345
|
}
|
|
345
346
|
env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
|
|
346
347
|
env->ReleaseStringUTFChars(language, language_chars);
|
|
348
|
+
if (rn_whisper_transcribe_is_aborted(job_id)) {
|
|
349
|
+
code = -999;
|
|
350
|
+
}
|
|
347
351
|
rn_whisper_remove_abort_map(job_id);
|
|
348
352
|
return code;
|
|
349
353
|
}
|