whisper.rn 0.4.0-rc.1 → 0.4.0-rc.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/android/build.gradle +4 -0
- package/android/src/main/CMakeLists.txt +21 -1
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -92
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +86 -40
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +85 -131
- package/android/src/main/jni-utils.h +76 -0
- package/android/src/main/jni.cpp +226 -109
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/cpp/coreml/whisper-encoder-impl.h +1 -1
- package/cpp/coreml/whisper-encoder.h +4 -0
- package/cpp/coreml/whisper-encoder.mm +5 -3
- package/cpp/ggml-alloc.c +797 -400
- package/cpp/ggml-alloc.h +60 -10
- package/cpp/ggml-backend-impl.h +255 -0
- package/cpp/ggml-backend-reg.cpp +582 -0
- package/cpp/ggml-backend.cpp +2002 -0
- package/cpp/ggml-backend.h +354 -0
- package/cpp/ggml-common.h +1851 -0
- package/cpp/ggml-cpp.h +39 -0
- package/cpp/ggml-cpu-aarch64.cpp +4247 -0
- package/cpp/ggml-cpu-aarch64.h +8 -0
- package/cpp/ggml-cpu-impl.h +531 -0
- package/cpp/ggml-cpu-quants.c +12245 -0
- package/cpp/ggml-cpu-quants.h +63 -0
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +14792 -0
- package/cpp/ggml-cpu.cpp +653 -0
- package/cpp/ggml-cpu.h +137 -0
- package/cpp/ggml-impl.h +567 -0
- package/cpp/ggml-metal-impl.h +288 -0
- package/cpp/ggml-metal.h +24 -43
- package/cpp/ggml-metal.m +4867 -1080
- package/cpp/ggml-opt.cpp +854 -0
- package/cpp/ggml-opt.h +216 -0
- package/cpp/ggml-quants.c +5238 -0
- package/cpp/ggml-quants.h +100 -0
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +14 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +5106 -19431
- package/cpp/ggml.h +847 -669
- package/cpp/gguf.cpp +1329 -0
- package/cpp/gguf.h +202 -0
- package/cpp/rn-audioutils.cpp +68 -0
- package/cpp/rn-audioutils.h +14 -0
- package/cpp/rn-whisper-log.h +11 -0
- package/cpp/rn-whisper.cpp +221 -52
- package/cpp/rn-whisper.h +50 -15
- package/cpp/whisper.cpp +3174 -1533
- package/cpp/whisper.h +176 -44
- package/ios/RNWhisper.mm +139 -46
- package/ios/RNWhisperAudioUtils.h +1 -2
- package/ios/RNWhisperAudioUtils.m +18 -67
- package/ios/RNWhisperContext.h +11 -8
- package/ios/RNWhisperContext.mm +195 -150
- package/jest/mock.js +15 -2
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +76 -28
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +76 -28
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +13 -4
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +37 -5
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +9 -7
- package/src/NativeRNWhisper.ts +20 -4
- package/src/index.ts +98 -42
- package/src/version.json +1 -1
- package/whisper-rn.podspec +13 -20
- package/cpp/README.md +0 -4
- package/cpp/ggml-metal.metal +0 -2353
package/README.md
CHANGED
|
@@ -25,19 +25,19 @@ npm install whisper.rn
|
|
|
25
25
|
|
|
26
26
|
Please re-run `npx pod-install` again.
|
|
27
27
|
|
|
28
|
-
#### Android
|
|
29
|
-
|
|
30
28
|
If you want to use `medium` or `large` model, the [Extended Virtual Addressing](https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_developer_kernel_extended-virtual-addressing) capability is recommended to enable on iOS project.
|
|
31
29
|
|
|
32
|
-
|
|
30
|
+
#### Android
|
|
33
31
|
|
|
34
|
-
|
|
32
|
+
Add proguard rule if it's enabled in project (android/app/proguard-rules.pro):
|
|
35
33
|
|
|
36
34
|
```proguard
|
|
37
35
|
# whisper.rn
|
|
38
36
|
-keep class com.rnwhisper.** { *; }
|
|
39
37
|
```
|
|
40
38
|
|
|
39
|
+
For build, it's recommended to use `ndkVersion = "24.0.8215888"` (or above) in your root project build configuration for Apple Silicon Macs. Otherwise please follow this trobleshooting [issue](./TROUBLESHOOTING.md#android-got-build-error-unknown-host-cpu-architecture-arm64-on-apple-silicon-macs).
|
|
40
|
+
|
|
41
41
|
#### Expo
|
|
42
42
|
|
|
43
43
|
You will need to prebuild the project before using it. See [Expo guide](https://docs.expo.io/guides/using-libraries/#using-a-library-in-a-expo-project) for more details.
|
|
@@ -91,7 +91,7 @@ subscribe(evt => {
|
|
|
91
91
|
console.log(
|
|
92
92
|
`Realtime transcribing: ${isCapturing ? 'ON' : 'OFF'}\n` +
|
|
93
93
|
// The inference text result from audio record:
|
|
94
|
-
`Result: ${data.result}\n\n` +
|
|
94
|
+
`Result: ${data.result}\n\n` +
|
|
95
95
|
`Process time: ${processTime}ms\n` +
|
|
96
96
|
`Recording time: ${recordingTime}ms`,
|
|
97
97
|
)
|
|
@@ -220,7 +220,7 @@ In real world, we recommended to split the asset imports into another platform s
|
|
|
220
220
|
|
|
221
221
|
The example app provide a simple UI for testing the functions.
|
|
222
222
|
|
|
223
|
-
Used Whisper model: `tiny.en` in https://huggingface.co/ggerganov/whisper.cpp
|
|
223
|
+
Used Whisper model: `tiny.en` in https://huggingface.co/ggerganov/whisper.cpp
|
|
224
224
|
Sample file: `jfk.wav` in https://github.com/ggerganov/whisper.cpp/tree/master/samples
|
|
225
225
|
|
|
226
226
|
Please follow the [Development Workflow section of contributing guide](./CONTRIBUTING.md#development-workflow) to run the example app.
|
package/android/build.gradle
CHANGED
|
@@ -36,6 +36,10 @@ def reactNativeArchitectures() {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
android {
|
|
39
|
+
def agpVersion = com.android.Version.ANDROID_GRADLE_PLUGIN_VERSION
|
|
40
|
+
if (agpVersion.tokenize('.')[0].toInteger() >= 7) {
|
|
41
|
+
namespace "com.rnwhisper"
|
|
42
|
+
}
|
|
39
43
|
ndkVersion getExtOrDefault("ndkVersion")
|
|
40
44
|
compileSdkVersion getExtOrIntegerDefault("compileSdkVersion")
|
|
41
45
|
|
|
@@ -2,14 +2,28 @@ cmake_minimum_required(VERSION 3.10)
|
|
|
2
2
|
|
|
3
3
|
project(whisper.rn)
|
|
4
4
|
|
|
5
|
-
set(CMAKE_CXX_STANDARD
|
|
5
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
6
6
|
set(RNWHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
|
|
7
7
|
|
|
8
8
|
set(
|
|
9
9
|
SOURCE_FILES
|
|
10
10
|
${RNWHISPER_LIB_DIR}/ggml.c
|
|
11
11
|
${RNWHISPER_LIB_DIR}/ggml-alloc.c
|
|
12
|
+
${RNWHISPER_LIB_DIR}/ggml-backend.cpp
|
|
13
|
+
${RNWHISPER_LIB_DIR}/ggml-backend-reg.cpp
|
|
14
|
+
${RNWHISPER_LIB_DIR}/ggml-cpu.c
|
|
15
|
+
${RNWHISPER_LIB_DIR}/ggml-cpu.cpp
|
|
16
|
+
${RNWHISPER_LIB_DIR}/ggml-cpu-aarch64.cpp
|
|
17
|
+
${RNWHISPER_LIB_DIR}/ggml-cpu-quants.c
|
|
18
|
+
${RNWHISPER_LIB_DIR}/ggml-cpu-traits.cpp
|
|
19
|
+
${RNWHISPER_LIB_DIR}/ggml-opt.cpp
|
|
20
|
+
${RNWHISPER_LIB_DIR}/ggml-threading.cpp
|
|
21
|
+
${RNWHISPER_LIB_DIR}/ggml-quants.c
|
|
22
|
+
${RNWHISPER_LIB_DIR}/gguf.cpp
|
|
23
|
+
${RNWHISPER_LIB_DIR}/amx/amx.cpp
|
|
24
|
+
${RNWHISPER_LIB_DIR}/amx/mmq.cpp
|
|
12
25
|
${RNWHISPER_LIB_DIR}/whisper.cpp
|
|
26
|
+
${RNWHISPER_LIB_DIR}/rn-audioutils.cpp
|
|
13
27
|
${RNWHISPER_LIB_DIR}/rn-whisper.cpp
|
|
14
28
|
${CMAKE_SOURCE_DIR}/jni.cpp
|
|
15
29
|
)
|
|
@@ -25,12 +39,18 @@ function(build_library target_name)
|
|
|
25
39
|
|
|
26
40
|
target_link_libraries(${target_name} ${LOG_LIB} android)
|
|
27
41
|
|
|
42
|
+
target_compile_options(${target_name} PRIVATE -DWSP_GGML_USE_CPU -DWSP_GGML_USE_CPU_AARCH64)
|
|
43
|
+
|
|
28
44
|
if (${target_name} STREQUAL "whisper_v8fp16_va")
|
|
29
45
|
target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
|
|
30
46
|
elseif (${target_name} STREQUAL "whisper_vfpv4")
|
|
31
47
|
target_compile_options(${target_name} PRIVATE -mfpu=neon-vfpv4)
|
|
32
48
|
endif ()
|
|
33
49
|
|
|
50
|
+
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
|
51
|
+
target_compile_options(${target_name} PRIVATE -DRNWHISPER_ANDROID_ENABLE_LOGGING)
|
|
52
|
+
endif ()
|
|
53
|
+
|
|
34
54
|
# NOTE: If you want to debug the native code, you can uncomment if and endif
|
|
35
55
|
# if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
|
36
56
|
|
|
@@ -2,112 +2,29 @@ package com.rnwhisper;
|
|
|
2
2
|
|
|
3
3
|
import android.util.Log;
|
|
4
4
|
|
|
5
|
-
import java.util.ArrayList;
|
|
6
|
-
import java.lang.StringBuilder;
|
|
7
|
-
import java.io.IOException;
|
|
8
|
-
import java.io.FileReader;
|
|
9
5
|
import java.io.ByteArrayOutputStream;
|
|
10
6
|
import java.io.File;
|
|
11
|
-
import java.io.FileOutputStream;
|
|
12
|
-
import java.io.DataOutputStream;
|
|
13
7
|
import java.io.IOException;
|
|
14
8
|
import java.io.InputStream;
|
|
15
9
|
import java.nio.ByteBuffer;
|
|
16
10
|
import java.nio.ByteOrder;
|
|
17
11
|
import java.nio.ShortBuffer;
|
|
12
|
+
import java.util.Base64;
|
|
13
|
+
|
|
14
|
+
import java.util.Arrays;
|
|
18
15
|
|
|
19
16
|
public class AudioUtils {
|
|
20
17
|
private static final String NAME = "RNWhisperAudioUtils";
|
|
21
18
|
|
|
22
|
-
private static
|
|
23
|
-
|
|
24
|
-
private static byte[] shortToByte(short[] shortInts) {
|
|
25
|
-
int j = 0;
|
|
26
|
-
int length = shortInts.length;
|
|
27
|
-
byte[] byteData = new byte[length * 2];
|
|
28
|
-
for (int i = 0; i < length; i++) {
|
|
29
|
-
byteData[j++] = (byte) (shortInts[i] >>> 8);
|
|
30
|
-
byteData[j++] = (byte) (shortInts[i] >>> 0);
|
|
31
|
-
}
|
|
32
|
-
return byteData;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
public static byte[] concatShortBuffers(ArrayList<short[]> buffers) {
|
|
36
|
-
int totalLength = 0;
|
|
37
|
-
for (int i = 0; i < buffers.size(); i++) {
|
|
38
|
-
totalLength += buffers.get(i).length;
|
|
39
|
-
}
|
|
40
|
-
byte[] result = new byte[totalLength * 2];
|
|
41
|
-
int offset = 0;
|
|
42
|
-
for (int i = 0; i < buffers.size(); i++) {
|
|
43
|
-
byte[] bytes = shortToByte(buffers.get(i));
|
|
44
|
-
System.arraycopy(bytes, 0, result, offset, bytes.length);
|
|
45
|
-
offset += bytes.length;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
return result;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
private static byte[] removeTrailingZeros(byte[] audioData) {
|
|
52
|
-
int i = audioData.length - 1;
|
|
53
|
-
while (i >= 0 && audioData[i] == 0) {
|
|
54
|
-
--i;
|
|
55
|
-
}
|
|
56
|
-
byte[] newData = new byte[i + 1];
|
|
57
|
-
System.arraycopy(audioData, 0, newData, 0, i + 1);
|
|
58
|
-
return newData;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
public static void saveWavFile(byte[] rawData, String audioOutputFile) throws IOException {
|
|
62
|
-
Log.d(NAME, "call saveWavFile");
|
|
63
|
-
rawData = removeTrailingZeros(rawData);
|
|
64
|
-
DataOutputStream output = null;
|
|
65
|
-
try {
|
|
66
|
-
output = new DataOutputStream(new FileOutputStream(audioOutputFile));
|
|
67
|
-
// WAVE header
|
|
68
|
-
// see http://ccrma.stanford.edu/courses/422/projects/WaveFormat/
|
|
69
|
-
output.writeBytes("RIFF"); // chunk id
|
|
70
|
-
output.writeInt(Integer.reverseBytes(36 + rawData.length)); // chunk size
|
|
71
|
-
output.writeBytes("WAVE"); // format
|
|
72
|
-
output.writeBytes("fmt "); // subchunk 1 id
|
|
73
|
-
output.writeInt(Integer.reverseBytes(16)); // subchunk 1 size
|
|
74
|
-
output.writeShort(Short.reverseBytes((short) 1)); // audio format (1 = PCM)
|
|
75
|
-
output.writeShort(Short.reverseBytes((short) 1)); // number of channels
|
|
76
|
-
output.writeInt(Integer.reverseBytes(SAMPLE_RATE)); // sample rate
|
|
77
|
-
output.writeInt(Integer.reverseBytes(SAMPLE_RATE * 2)); // byte rate
|
|
78
|
-
output.writeShort(Short.reverseBytes((short) 2)); // block align
|
|
79
|
-
output.writeShort(Short.reverseBytes((short) 16)); // bits per sample
|
|
80
|
-
output.writeBytes("data"); // subchunk 2 id
|
|
81
|
-
output.writeInt(Integer.reverseBytes(rawData.length)); // subchunk 2 size
|
|
82
|
-
// Audio data (conversion big endian -> little endian)
|
|
83
|
-
short[] shorts = new short[rawData.length / 2];
|
|
84
|
-
ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
|
|
85
|
-
ByteBuffer bytes = ByteBuffer.allocate(shorts.length * 2);
|
|
86
|
-
for (short s : shorts) {
|
|
87
|
-
bytes.putShort(s);
|
|
88
|
-
}
|
|
89
|
-
Log.d(NAME, "writing audio file: " + audioOutputFile);
|
|
90
|
-
output.write(bytes.array());
|
|
91
|
-
} finally {
|
|
92
|
-
if (output != null) {
|
|
93
|
-
output.close();
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
|
|
99
|
-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
100
|
-
byte[] buffer = new byte[1024];
|
|
101
|
-
int bytesRead;
|
|
102
|
-
while ((bytesRead = inputStream.read(buffer)) != -1) {
|
|
103
|
-
baos.write(buffer, 0, bytesRead);
|
|
104
|
-
}
|
|
105
|
-
ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
|
|
19
|
+
private static float[] bufferToFloatArray(byte[] buffer, Boolean cutHeader) {
|
|
20
|
+
ByteBuffer byteBuffer = ByteBuffer.wrap(buffer);
|
|
106
21
|
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
|
107
|
-
byteBuffer.position(44);
|
|
108
22
|
ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
|
|
109
23
|
short[] shortArray = new short[shortBuffer.limit()];
|
|
110
24
|
shortBuffer.get(shortArray);
|
|
25
|
+
if (cutHeader) {
|
|
26
|
+
shortArray = Arrays.copyOfRange(shortArray, 44, shortArray.length);
|
|
27
|
+
}
|
|
111
28
|
float[] floatArray = new float[shortArray.length];
|
|
112
29
|
for (int i = 0; i < shortArray.length; i++) {
|
|
113
30
|
floatArray[i] = ((float) shortArray[i]) / 32767.0f;
|
|
@@ -116,4 +33,22 @@ public class AudioUtils {
|
|
|
116
33
|
}
|
|
117
34
|
return floatArray;
|
|
118
35
|
}
|
|
119
|
-
|
|
36
|
+
|
|
37
|
+
public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
|
|
38
|
+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
39
|
+
byte[] buffer = new byte[1024];
|
|
40
|
+
int bytesRead;
|
|
41
|
+
while ((bytesRead = inputStream.read(buffer)) != -1) {
|
|
42
|
+
baos.write(buffer, 0, bytesRead);
|
|
43
|
+
}
|
|
44
|
+
return bufferToFloatArray(baos.toByteArray(), true);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
public static float[] decodeWaveData(String dataBase64) throws IOException {
|
|
48
|
+
return bufferToFloatArray(Base64.getDecoder().decode(dataBase64), true);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
public static float[] decodePcmData(String dataBase64) {
|
|
52
|
+
return bufferToFloatArray(Base64.getDecoder().decode(dataBase64), false);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -13,11 +13,13 @@ import com.facebook.react.bridge.ReactMethod;
|
|
|
13
13
|
import com.facebook.react.bridge.LifecycleEventListener;
|
|
14
14
|
import com.facebook.react.bridge.ReadableMap;
|
|
15
15
|
import com.facebook.react.bridge.WritableMap;
|
|
16
|
+
import com.facebook.react.bridge.Arguments;
|
|
16
17
|
|
|
17
18
|
import java.util.HashMap;
|
|
18
19
|
import java.util.Random;
|
|
19
20
|
import java.io.File;
|
|
20
21
|
import java.io.FileInputStream;
|
|
22
|
+
import java.io.InputStream;
|
|
21
23
|
import java.io.PushbackInputStream;
|
|
22
24
|
|
|
23
25
|
public class RNWhisper implements LifecycleEventListener {
|
|
@@ -107,51 +109,27 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
107
109
|
promise.reject(exception);
|
|
108
110
|
return;
|
|
109
111
|
}
|
|
110
|
-
|
|
112
|
+
WritableMap result = Arguments.createMap();
|
|
113
|
+
result.putInt("contextId", id);
|
|
114
|
+
result.putBoolean("gpu", false);
|
|
115
|
+
result.putString("reasonNoGPU", "Currently not supported");
|
|
116
|
+
promise.resolve(result);
|
|
111
117
|
tasks.remove(this);
|
|
112
118
|
}
|
|
113
|
-
}.
|
|
119
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
114
120
|
tasks.put(task, "initContext");
|
|
115
121
|
}
|
|
116
122
|
|
|
117
|
-
|
|
118
|
-
final WhisperContext context = contexts.get((int) id);
|
|
119
|
-
if (context == null) {
|
|
120
|
-
promise.reject("Context not found");
|
|
121
|
-
return;
|
|
122
|
-
}
|
|
123
|
-
if (context.isCapturing()) {
|
|
124
|
-
promise.reject("The context is in realtime transcribe mode");
|
|
125
|
-
return;
|
|
126
|
-
}
|
|
127
|
-
if (context.isTranscribing()) {
|
|
128
|
-
promise.reject("Context is already transcribing");
|
|
129
|
-
return;
|
|
130
|
-
}
|
|
123
|
+
private AsyncTask transcribe(WhisperContext context, double jobId, final float[] audioData, final ReadableMap options, Promise promise) {
|
|
131
124
|
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
|
|
132
125
|
private Exception exception;
|
|
133
126
|
|
|
134
127
|
@Override
|
|
135
128
|
protected WritableMap doInBackground(Void... voids) {
|
|
136
129
|
try {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if (filePath.startsWith("http://") || filePath.startsWith("https://")) {
|
|
140
|
-
waveFilePath = downloader.downloadFile(filePath);
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
int resId = getResourceIdentifier(waveFilePath);
|
|
144
|
-
if (resId > 0) {
|
|
145
|
-
return context.transcribeInputStream(
|
|
146
|
-
(int) jobId,
|
|
147
|
-
reactContext.getResources().openRawResource(resId),
|
|
148
|
-
options
|
|
149
|
-
);
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
return context.transcribeInputStream(
|
|
130
|
+
return context.transcribe(
|
|
153
131
|
(int) jobId,
|
|
154
|
-
|
|
132
|
+
audioData,
|
|
155
133
|
options
|
|
156
134
|
);
|
|
157
135
|
} catch (Exception e) {
|
|
@@ -169,8 +147,67 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
169
147
|
promise.resolve(data);
|
|
170
148
|
tasks.remove(this);
|
|
171
149
|
}
|
|
172
|
-
}.
|
|
173
|
-
|
|
150
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
151
|
+
return task;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
public void transcribeFile(double id, double jobId, String filePathOrBase64, ReadableMap options, Promise promise) {
|
|
155
|
+
final WhisperContext context = contexts.get((int) id);
|
|
156
|
+
if (context == null) {
|
|
157
|
+
promise.reject("Context not found");
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
if (context.isCapturing()) {
|
|
161
|
+
promise.reject("The context is in realtime transcribe mode");
|
|
162
|
+
return;
|
|
163
|
+
}
|
|
164
|
+
if (context.isTranscribing()) {
|
|
165
|
+
promise.reject("Context is already transcribing");
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
String waveFilePath = filePathOrBase64;
|
|
170
|
+
try {
|
|
171
|
+
if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
|
|
172
|
+
waveFilePath = downloader.downloadFile(filePathOrBase64);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
float[] audioData;
|
|
176
|
+
int resId = getResourceIdentifier(waveFilePath);
|
|
177
|
+
if (resId > 0) {
|
|
178
|
+
audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
|
|
179
|
+
} else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
|
|
180
|
+
audioData = AudioUtils.decodeWaveData(filePathOrBase64);
|
|
181
|
+
} else {
|
|
182
|
+
audioData = AudioUtils.decodeWaveFile(new FileInputStream(new File(waveFilePath)));
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
AsyncTask task = transcribe(context, jobId, audioData, options, promise);
|
|
186
|
+
tasks.put(task, "transcribeFile-" + id);
|
|
187
|
+
} catch (Exception e) {
|
|
188
|
+
promise.reject(e);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
public void transcribeData(double id, double jobId, String dataBase64, ReadableMap options, Promise promise) {
|
|
193
|
+
final WhisperContext context = contexts.get((int) id);
|
|
194
|
+
if (context == null) {
|
|
195
|
+
promise.reject("Context not found");
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
if (context.isCapturing()) {
|
|
199
|
+
promise.reject("The context is in realtime transcribe mode");
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
if (context.isTranscribing()) {
|
|
203
|
+
promise.reject("Context is already transcribing");
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
float[] audioData = AudioUtils.decodePcmData(dataBase64);
|
|
208
|
+
AsyncTask task = transcribe(context, jobId, audioData, options, promise);
|
|
209
|
+
|
|
210
|
+
tasks.put(task, "transcribeData-" + id);
|
|
174
211
|
}
|
|
175
212
|
|
|
176
213
|
public void startRealtimeTranscribe(double id, double jobId, ReadableMap options, Promise promise) {
|
|
@@ -206,7 +243,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
206
243
|
context.stopTranscribe((int) jobId);
|
|
207
244
|
AsyncTask completionTask = null;
|
|
208
245
|
for (AsyncTask task : tasks.keySet()) {
|
|
209
|
-
if (tasks.get(task).equals("transcribeFile-" + id)) {
|
|
246
|
+
if (tasks.get(task).equals("transcribeFile-" + id) || tasks.get(task).equals("transcribeData-" + id)) {
|
|
210
247
|
task.get();
|
|
211
248
|
break;
|
|
212
249
|
}
|
|
@@ -226,10 +263,19 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
226
263
|
promise.resolve(null);
|
|
227
264
|
tasks.remove(this);
|
|
228
265
|
}
|
|
229
|
-
}.
|
|
266
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
230
267
|
tasks.put(task, "abortTranscribe-" + id);
|
|
231
268
|
}
|
|
232
269
|
|
|
270
|
+
public void bench(double id, double nThreads, Promise promise) {
|
|
271
|
+
final WhisperContext context = contexts.get((int) id);
|
|
272
|
+
if (context == null) {
|
|
273
|
+
promise.reject("Context not found");
|
|
274
|
+
return;
|
|
275
|
+
}
|
|
276
|
+
promise.resolve(context.bench((int) nThreads));
|
|
277
|
+
}
|
|
278
|
+
|
|
233
279
|
public void releaseContext(double id, Promise promise) {
|
|
234
280
|
final int contextId = (int) id;
|
|
235
281
|
AsyncTask task = new AsyncTask<Void, Void, Void>() {
|
|
@@ -245,7 +291,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
245
291
|
context.stopCurrentTranscribe();
|
|
246
292
|
AsyncTask completionTask = null;
|
|
247
293
|
for (AsyncTask task : tasks.keySet()) {
|
|
248
|
-
if (tasks.get(task).equals("transcribeFile-" + contextId)) {
|
|
294
|
+
if (tasks.get(task).equals("transcribeFile-" + contextId) || tasks.get(task).equals("transcribeData-" + contextId)) {
|
|
249
295
|
task.get();
|
|
250
296
|
break;
|
|
251
297
|
}
|
|
@@ -267,7 +313,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
267
313
|
promise.resolve(null);
|
|
268
314
|
tasks.remove(this);
|
|
269
315
|
}
|
|
270
|
-
}.
|
|
316
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
271
317
|
tasks.put(task, "releaseContext-" + id);
|
|
272
318
|
}
|
|
273
319
|
|
|
@@ -294,7 +340,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
294
340
|
promise.resolve(null);
|
|
295
341
|
tasks.remove(this);
|
|
296
342
|
}
|
|
297
|
-
}.
|
|
343
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
298
344
|
tasks.put(task, "releaseAllContexts");
|
|
299
345
|
}
|
|
300
346
|
|