whisper.rn 0.4.0-rc.0 → 0.4.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/android/build.gradle +4 -0
- package/android/src/main/CMakeLists.txt +14 -0
- package/android/src/main/java/com/rnwhisper/AudioUtils.java +27 -92
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +86 -40
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +85 -131
- package/android/src/main/jni-utils.h +76 -0
- package/android/src/main/jni.cpp +226 -109
- package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +10 -0
- package/cpp/README.md +1 -1
- package/cpp/coreml/whisper-encoder-impl.h +1 -1
- package/cpp/coreml/whisper-encoder.h +4 -0
- package/cpp/coreml/whisper-encoder.mm +5 -3
- package/cpp/ggml-aarch64.c +129 -0
- package/cpp/ggml-aarch64.h +19 -0
- package/cpp/ggml-alloc.c +805 -400
- package/cpp/ggml-alloc.h +60 -10
- package/cpp/ggml-backend-impl.h +216 -0
- package/cpp/ggml-backend-reg.cpp +204 -0
- package/cpp/ggml-backend.cpp +1996 -0
- package/cpp/ggml-backend.cpp.rej +12 -0
- package/cpp/ggml-backend.h +336 -0
- package/cpp/ggml-common.h +1853 -0
- package/cpp/ggml-cpp.h +38 -0
- package/cpp/ggml-cpu-aarch64.c +3560 -0
- package/cpp/ggml-cpu-aarch64.h +30 -0
- package/cpp/ggml-cpu-impl.h +371 -0
- package/cpp/ggml-cpu-quants.c +10822 -0
- package/cpp/ggml-cpu-quants.h +63 -0
- package/cpp/ggml-cpu.c +13970 -0
- package/cpp/ggml-cpu.cpp +663 -0
- package/cpp/ggml-cpu.h +177 -0
- package/cpp/ggml-impl.h +551 -0
- package/cpp/ggml-metal-impl.h +249 -0
- package/cpp/ggml-metal.h +24 -43
- package/cpp/ggml-metal.m +4190 -1075
- package/cpp/ggml-quants.c +5247 -0
- package/cpp/ggml-quants.h +100 -0
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +12 -0
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +5474 -18763
- package/cpp/ggml.h +833 -628
- package/cpp/rn-audioutils.cpp +68 -0
- package/cpp/rn-audioutils.h +14 -0
- package/cpp/rn-whisper-log.h +11 -0
- package/cpp/rn-whisper.cpp +221 -52
- package/cpp/rn-whisper.h +50 -15
- package/cpp/whisper.cpp +2863 -1340
- package/cpp/whisper.h +170 -38
- package/ios/RNWhisper.mm +141 -46
- package/ios/RNWhisperAudioUtils.h +1 -2
- package/ios/RNWhisperAudioUtils.m +18 -67
- package/ios/RNWhisperContext.h +11 -8
- package/ios/RNWhisperContext.mm +197 -144
- package/jest/mock.js +15 -2
- package/lib/commonjs/NativeRNWhisper.js.map +1 -1
- package/lib/commonjs/index.js +78 -28
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/NativeRNWhisper.js.map +1 -1
- package/lib/module/index.js +78 -28
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/NativeRNWhisper.d.ts +14 -4
- package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +39 -5
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +9 -7
- package/src/NativeRNWhisper.ts +21 -4
- package/src/index.ts +102 -42
- package/src/version.json +1 -1
- package/whisper-rn.podspec +11 -18
- package/cpp/ggml-metal.metal +0 -2353
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -4
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -8
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +0 -19
|
@@ -42,7 +42,6 @@ public class WhisperContext {
|
|
|
42
42
|
private AudioRecord recorder = null;
|
|
43
43
|
private int bufferSize;
|
|
44
44
|
private int nSamplesTranscribing = 0;
|
|
45
|
-
private ArrayList<short[]> shortBufferSlices;
|
|
46
45
|
// Remember number of samples in each slice
|
|
47
46
|
private ArrayList<Integer> sliceNSamples;
|
|
48
47
|
// Current buffer slice index
|
|
@@ -54,6 +53,7 @@ public class WhisperContext {
|
|
|
54
53
|
private boolean isCapturing = false;
|
|
55
54
|
private boolean isStoppedByAction = false;
|
|
56
55
|
private boolean isTranscribing = false;
|
|
56
|
+
private boolean isTdrzEnable = false;
|
|
57
57
|
private Thread rootFullHandler = null;
|
|
58
58
|
private Thread fullHandler = null;
|
|
59
59
|
|
|
@@ -66,7 +66,6 @@ public class WhisperContext {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
private void rewind() {
|
|
69
|
-
shortBufferSlices = null;
|
|
70
69
|
sliceNSamples = null;
|
|
71
70
|
sliceIndex = 0;
|
|
72
71
|
transcribeSliceIndex = 0;
|
|
@@ -75,29 +74,19 @@ public class WhisperContext {
|
|
|
75
74
|
isCapturing = false;
|
|
76
75
|
isStoppedByAction = false;
|
|
77
76
|
isTranscribing = false;
|
|
77
|
+
isTdrzEnable = false;
|
|
78
78
|
rootFullHandler = null;
|
|
79
79
|
fullHandler = null;
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
-
private boolean vad(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
for (int i = 0; i < sampleSize; i++) {
|
|
91
|
-
audioData[i] = shortBuffer[i + start] / 32768.0f;
|
|
92
|
-
}
|
|
93
|
-
float vadThold = options.hasKey("vadThold") ? (float) options.getDouble("vadThold") : 0.6f;
|
|
94
|
-
float vadFreqThold = options.hasKey("vadFreqThold") ? (float) options.getDouble("vadFreqThold") : 0.6f;
|
|
95
|
-
isSpeech = vadSimple(audioData, sampleSize, vadThold, vadFreqThold);
|
|
96
|
-
} else {
|
|
97
|
-
isSpeech = false;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
return isSpeech;
|
|
82
|
+
private boolean vad(int sliceIndex, int nSamples, int n) {
|
|
83
|
+
if (isTranscribing) return true;
|
|
84
|
+
return vadSimple(jobId, sliceIndex, nSamples, n);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
private void finishRealtimeTranscribe(WritableMap result) {
|
|
88
|
+
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
89
|
+
finishRealtimeTranscribeJob(jobId, context, sliceNSamples.stream().mapToInt(i -> i).toArray());
|
|
101
90
|
}
|
|
102
91
|
|
|
103
92
|
public int startRealtimeTranscribe(int jobId, ReadableMap options) {
|
|
@@ -119,19 +108,20 @@ public class WhisperContext {
|
|
|
119
108
|
|
|
120
109
|
int realtimeAudioSec = options.hasKey("realtimeAudioSec") ? options.getInt("realtimeAudioSec") : 0;
|
|
121
110
|
final int audioSec = realtimeAudioSec > 0 ? realtimeAudioSec : DEFAULT_MAX_AUDIO_SEC;
|
|
122
|
-
|
|
123
111
|
int realtimeAudioSliceSec = options.hasKey("realtimeAudioSliceSec") ? options.getInt("realtimeAudioSliceSec") : 0;
|
|
124
112
|
final int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < audioSec ? realtimeAudioSliceSec : audioSec;
|
|
125
|
-
|
|
126
113
|
isUseSlices = audioSliceSec < audioSec;
|
|
127
114
|
|
|
128
|
-
|
|
115
|
+
double realtimeAudioMinSec = options.hasKey("realtimeAudioMinSec") ? options.getDouble("realtimeAudioMinSec") : 0;
|
|
116
|
+
final double audioMinSec = realtimeAudioMinSec > 0.5 && realtimeAudioMinSec <= audioSliceSec ? realtimeAudioMinSec : 1;
|
|
117
|
+
|
|
118
|
+
this.isTdrzEnable = options.hasKey("tdrzEnable") && options.getBoolean("tdrzEnable");
|
|
119
|
+
|
|
120
|
+
createRealtimeTranscribeJob(jobId, context, options);
|
|
129
121
|
|
|
130
|
-
shortBufferSlices = new ArrayList<short[]>();
|
|
131
|
-
shortBufferSlices.add(new short[audioSliceSec * SAMPLE_RATE]);
|
|
132
122
|
sliceNSamples = new ArrayList<Integer>();
|
|
133
123
|
sliceNSamples.add(0);
|
|
134
|
-
|
|
124
|
+
|
|
135
125
|
isCapturing = true;
|
|
136
126
|
recorder.startRecording();
|
|
137
127
|
|
|
@@ -159,49 +149,43 @@ public class WhisperContext {
|
|
|
159
149
|
nSamples == nSamplesTranscribing &&
|
|
160
150
|
sliceIndex == transcribeSliceIndex
|
|
161
151
|
) {
|
|
162
|
-
|
|
152
|
+
finishRealtimeTranscribe(Arguments.createMap());
|
|
163
153
|
} else if (!isTranscribing) {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribeEnd", Arguments.createMap());
|
|
154
|
+
boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
|
|
155
|
+
if (!isSamplesEnough || !vad(sliceIndex, nSamples, 0)) {
|
|
156
|
+
finishRealtimeTranscribe(Arguments.createMap());
|
|
168
157
|
break;
|
|
169
158
|
}
|
|
170
159
|
isTranscribing = true;
|
|
171
|
-
fullTranscribeSamples(
|
|
160
|
+
fullTranscribeSamples(true);
|
|
172
161
|
}
|
|
173
162
|
break;
|
|
174
163
|
}
|
|
175
164
|
|
|
176
165
|
// Append to buffer
|
|
177
|
-
short[] shortBuffer = shortBufferSlices.get(sliceIndex);
|
|
178
166
|
if (nSamples + n > audioSliceSec * SAMPLE_RATE) {
|
|
179
167
|
Log.d(NAME, "next slice");
|
|
180
168
|
|
|
181
169
|
sliceIndex++;
|
|
182
170
|
nSamples = 0;
|
|
183
|
-
shortBuffer = new short[audioSliceSec * SAMPLE_RATE];
|
|
184
|
-
shortBufferSlices.add(shortBuffer);
|
|
185
171
|
sliceNSamples.add(0);
|
|
186
172
|
}
|
|
173
|
+
putPcmData(jobId, buffer, sliceIndex, nSamples, n);
|
|
187
174
|
|
|
188
|
-
|
|
189
|
-
shortBuffer[nSamples + i] = buffer[i];
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
boolean isSpeech = vad(options, shortBuffer, nSamples, n);
|
|
175
|
+
boolean isSpeech = vad(sliceIndex, nSamples, n);
|
|
193
176
|
|
|
194
177
|
nSamples += n;
|
|
195
178
|
sliceNSamples.set(sliceIndex, nSamples);
|
|
196
179
|
|
|
197
|
-
|
|
180
|
+
boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
|
|
181
|
+
if (!isSamplesEnough || !isSpeech) continue;
|
|
198
182
|
|
|
199
183
|
if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
|
|
200
184
|
isTranscribing = true;
|
|
201
185
|
fullHandler = new Thread(new Runnable() {
|
|
202
186
|
@Override
|
|
203
187
|
public void run() {
|
|
204
|
-
fullTranscribeSamples(
|
|
188
|
+
fullTranscribeSamples(false);
|
|
205
189
|
}
|
|
206
190
|
});
|
|
207
191
|
fullHandler.start();
|
|
@@ -210,11 +194,9 @@ public class WhisperContext {
|
|
|
210
194
|
Log.e(NAME, "Error transcribing realtime: " + e.getMessage());
|
|
211
195
|
}
|
|
212
196
|
}
|
|
213
|
-
|
|
214
|
-
Log.d(NAME, "Begin saving wav file to " + audioOutputPath);
|
|
215
|
-
AudioUtils.saveWavFile(AudioUtils.concatShortBuffers(shortBufferSlices), audioOutputPath);
|
|
197
|
+
|
|
216
198
|
if (!isTranscribing) {
|
|
217
|
-
|
|
199
|
+
finishRealtimeTranscribe(Arguments.createMap());
|
|
218
200
|
}
|
|
219
201
|
if (fullHandler != null) {
|
|
220
202
|
fullHandler.join(); // Wait for full transcribe to finish
|
|
@@ -232,26 +214,16 @@ public class WhisperContext {
|
|
|
232
214
|
return state;
|
|
233
215
|
}
|
|
234
216
|
|
|
235
|
-
private void fullTranscribeSamples(
|
|
217
|
+
private void fullTranscribeSamples(boolean skipCapturingCheck) {
|
|
236
218
|
int nSamplesOfIndex = sliceNSamples.get(transcribeSliceIndex);
|
|
237
219
|
|
|
238
220
|
if (!isCapturing && !skipCapturingCheck) return;
|
|
239
221
|
|
|
240
|
-
short[] shortBuffer = shortBufferSlices.get(transcribeSliceIndex);
|
|
241
|
-
int nSamples = sliceNSamples.get(transcribeSliceIndex);
|
|
242
|
-
|
|
243
222
|
nSamplesTranscribing = nSamplesOfIndex;
|
|
244
|
-
|
|
245
|
-
// convert I16 to F32
|
|
246
|
-
float[] nSamplesBuffer32 = new float[nSamplesTranscribing];
|
|
247
|
-
for (int i = 0; i < nSamplesTranscribing; i++) {
|
|
248
|
-
nSamplesBuffer32[i] = shortBuffer[i] / 32768.0f;
|
|
249
|
-
}
|
|
250
|
-
|
|
251
223
|
Log.d(NAME, "Start transcribing realtime: " + nSamplesTranscribing);
|
|
252
224
|
|
|
253
225
|
int timeStart = (int) System.currentTimeMillis();
|
|
254
|
-
int code =
|
|
226
|
+
int code = fullWithJob(jobId, context, transcribeSliceIndex, nSamplesTranscribing);
|
|
255
227
|
int timeEnd = (int) System.currentTimeMillis();
|
|
256
228
|
int timeRecording = (int) (nSamplesTranscribing / SAMPLE_RATE * 1000);
|
|
257
229
|
|
|
@@ -264,7 +236,7 @@ public class WhisperContext {
|
|
|
264
236
|
|
|
265
237
|
if (code == 0) {
|
|
266
238
|
payload.putMap("data", getTextSegments(0, getTextSegmentCount(context)));
|
|
267
|
-
} else {
|
|
239
|
+
} else if (code != -999) { // Not aborted
|
|
268
240
|
payload.putString("error", "Transcribe failed with code " + code);
|
|
269
241
|
}
|
|
270
242
|
|
|
@@ -283,12 +255,12 @@ public class WhisperContext {
|
|
|
283
255
|
nSamplesTranscribing = 0;
|
|
284
256
|
}
|
|
285
257
|
|
|
286
|
-
boolean continueNeeded = !isCapturing && nSamplesTranscribing != nSamplesOfIndex;
|
|
258
|
+
boolean continueNeeded = !isCapturing && nSamplesTranscribing != nSamplesOfIndex && code != -999;
|
|
287
259
|
|
|
288
260
|
if (isStopped && !continueNeeded) {
|
|
289
261
|
payload.putBoolean("isCapturing", false);
|
|
290
262
|
payload.putBoolean("isStoppedByAction", isStoppedByAction);
|
|
291
|
-
|
|
263
|
+
finishRealtimeTranscribe(payload);
|
|
292
264
|
} else if (code == 0) {
|
|
293
265
|
payload.putBoolean("isCapturing", true);
|
|
294
266
|
emitTranscribeEvent("@RNWhisper_onRealtimeTranscribe", payload);
|
|
@@ -299,7 +271,7 @@ public class WhisperContext {
|
|
|
299
271
|
|
|
300
272
|
if (continueNeeded) {
|
|
301
273
|
// If no more capturing, continue transcribing until all slices are transcribed
|
|
302
|
-
fullTranscribeSamples(
|
|
274
|
+
fullTranscribeSamples(true);
|
|
303
275
|
} else if (isStopped) {
|
|
304
276
|
// No next, cleanup
|
|
305
277
|
rewind();
|
|
@@ -360,71 +332,39 @@ public class WhisperContext {
|
|
|
360
332
|
}
|
|
361
333
|
}
|
|
362
334
|
|
|
363
|
-
public WritableMap
|
|
335
|
+
public WritableMap transcribe(int jobId, float[] audioData, ReadableMap options) throws IOException, Exception {
|
|
364
336
|
if (isCapturing || isTranscribing) {
|
|
365
337
|
throw new Exception("Context is already in capturing or transcribing");
|
|
366
338
|
}
|
|
367
339
|
rewind();
|
|
368
|
-
|
|
369
340
|
this.jobId = jobId;
|
|
341
|
+
this.isTdrzEnable = options.hasKey("tdrzEnable") && options.getBoolean("tdrzEnable");
|
|
342
|
+
|
|
370
343
|
isTranscribing = true;
|
|
371
|
-
float[] audioData = AudioUtils.decodeWaveFile(inputStream);
|
|
372
|
-
int code = full(jobId, options, audioData, audioData.length);
|
|
373
|
-
isTranscribing = false;
|
|
374
|
-
this.jobId = -1;
|
|
375
|
-
if (code != 0) {
|
|
376
|
-
throw new Exception("Failed to transcribe the file. Code: " + code);
|
|
377
|
-
}
|
|
378
|
-
WritableMap result = getTextSegments(0, getTextSegmentCount(context));
|
|
379
|
-
result.putBoolean("isAborted", isStoppedByAction);
|
|
380
|
-
return result;
|
|
381
|
-
}
|
|
382
344
|
|
|
383
|
-
private int full(int jobId, ReadableMap options, float[] audioData, int audioDataLen) {
|
|
384
345
|
boolean hasProgressCallback = options.hasKey("onProgress") && options.getBoolean("onProgress");
|
|
385
346
|
boolean hasNewSegmentsCallback = options.hasKey("onNewSegments") && options.getBoolean("onNewSegments");
|
|
386
|
-
|
|
347
|
+
int code = fullWithNewJob(
|
|
387
348
|
jobId,
|
|
388
349
|
context,
|
|
389
350
|
// float[] audio_data,
|
|
390
351
|
audioData,
|
|
391
352
|
// jint audio_data_len,
|
|
392
|
-
|
|
393
|
-
//
|
|
394
|
-
options
|
|
395
|
-
// jint max_context,
|
|
396
|
-
options.hasKey("maxContext") ? options.getInt("maxContext") : -1,
|
|
397
|
-
|
|
398
|
-
// jint word_thold,
|
|
399
|
-
options.hasKey("wordThold") ? options.getInt("wordThold") : -1,
|
|
400
|
-
// jint max_len,
|
|
401
|
-
options.hasKey("maxLen") ? options.getInt("maxLen") : -1,
|
|
402
|
-
// jboolean token_timestamps,
|
|
403
|
-
options.hasKey("tokenTimestamps") ? options.getBoolean("tokenTimestamps") : false,
|
|
404
|
-
|
|
405
|
-
// jint offset,
|
|
406
|
-
options.hasKey("offset") ? options.getInt("offset") : -1,
|
|
407
|
-
// jint duration,
|
|
408
|
-
options.hasKey("duration") ? options.getInt("duration") : -1,
|
|
409
|
-
// jfloat temperature,
|
|
410
|
-
options.hasKey("temperature") ? (float) options.getDouble("temperature") : -1.0f,
|
|
411
|
-
// jfloat temperature_inc,
|
|
412
|
-
options.hasKey("temperatureInc") ? (float) options.getDouble("temperatureInc") : -1.0f,
|
|
413
|
-
// jint beam_size,
|
|
414
|
-
options.hasKey("beamSize") ? options.getInt("beamSize") : -1,
|
|
415
|
-
// jint best_of,
|
|
416
|
-
options.hasKey("bestOf") ? options.getInt("bestOf") : -1,
|
|
417
|
-
// jboolean speed_up,
|
|
418
|
-
options.hasKey("speedUp") ? options.getBoolean("speedUp") : false,
|
|
419
|
-
// jboolean translate,
|
|
420
|
-
options.hasKey("translate") ? options.getBoolean("translate") : false,
|
|
421
|
-
// jstring language,
|
|
422
|
-
options.hasKey("language") ? options.getString("language") : "auto",
|
|
423
|
-
// jstring prompt
|
|
424
|
-
options.hasKey("prompt") ? options.getString("prompt") : null,
|
|
353
|
+
audioData.length,
|
|
354
|
+
// ReadableMap options,
|
|
355
|
+
options,
|
|
425
356
|
// Callback callback
|
|
426
357
|
hasProgressCallback || hasNewSegmentsCallback ? new Callback(this, hasProgressCallback, hasNewSegmentsCallback) : null
|
|
427
358
|
);
|
|
359
|
+
|
|
360
|
+
isTranscribing = false;
|
|
361
|
+
this.jobId = -1;
|
|
362
|
+
if (code != 0 && code != 999) {
|
|
363
|
+
throw new Exception("Failed to transcribe the file. Code: " + code);
|
|
364
|
+
}
|
|
365
|
+
WritableMap result = getTextSegments(0, getTextSegmentCount(context));
|
|
366
|
+
result.putBoolean("isAborted", isStoppedByAction);
|
|
367
|
+
return result;
|
|
428
368
|
}
|
|
429
369
|
|
|
430
370
|
private WritableMap getTextSegments(int start, int count) {
|
|
@@ -432,8 +372,15 @@ public class WhisperContext {
|
|
|
432
372
|
|
|
433
373
|
WritableMap data = Arguments.createMap();
|
|
434
374
|
WritableArray segments = Arguments.createArray();
|
|
375
|
+
|
|
435
376
|
for (int i = 0; i < count; i++) {
|
|
436
377
|
String text = getTextSegment(context, i);
|
|
378
|
+
|
|
379
|
+
// If tdrzEnable is enabled and speaker turn is detected
|
|
380
|
+
if (this.isTdrzEnable && getTextSegmentSpeakerTurnNext(context, i)) {
|
|
381
|
+
text += " [SPEAKER_TURN]";
|
|
382
|
+
}
|
|
383
|
+
|
|
437
384
|
builder.append(text);
|
|
438
385
|
|
|
439
386
|
WritableMap segment = Arguments.createMap();
|
|
@@ -475,6 +422,10 @@ public class WhisperContext {
|
|
|
475
422
|
stopTranscribe(this.jobId);
|
|
476
423
|
}
|
|
477
424
|
|
|
425
|
+
public String bench(int n_threads) {
|
|
426
|
+
return bench(context, n_threads);
|
|
427
|
+
}
|
|
428
|
+
|
|
478
429
|
public void release() {
|
|
479
430
|
stopCurrentTranscribe();
|
|
480
431
|
freeContext(context);
|
|
@@ -543,31 +494,18 @@ public class WhisperContext {
|
|
|
543
494
|
}
|
|
544
495
|
}
|
|
545
496
|
|
|
546
|
-
|
|
497
|
+
// JNI methods
|
|
547
498
|
protected static native long initContext(String modelPath);
|
|
548
499
|
protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
|
|
549
500
|
protected static native long initContextWithInputStream(PushbackInputStream inputStream);
|
|
550
|
-
protected static native
|
|
551
|
-
|
|
501
|
+
protected static native void freeContext(long contextPtr);
|
|
502
|
+
|
|
503
|
+
protected static native int fullWithNewJob(
|
|
552
504
|
int job_id,
|
|
553
505
|
long context,
|
|
554
506
|
float[] audio_data,
|
|
555
507
|
int audio_data_len,
|
|
556
|
-
|
|
557
|
-
int max_context,
|
|
558
|
-
int word_thold,
|
|
559
|
-
int max_len,
|
|
560
|
-
boolean token_timestamps,
|
|
561
|
-
int offset,
|
|
562
|
-
int duration,
|
|
563
|
-
float temperature,
|
|
564
|
-
float temperature_inc,
|
|
565
|
-
int beam_size,
|
|
566
|
-
int best_of,
|
|
567
|
-
boolean speed_up,
|
|
568
|
-
boolean translate,
|
|
569
|
-
String language,
|
|
570
|
-
String prompt,
|
|
508
|
+
ReadableMap options,
|
|
571
509
|
Callback Callback
|
|
572
510
|
);
|
|
573
511
|
protected static native void abortTranscribe(int jobId);
|
|
@@ -576,5 +514,21 @@ public class WhisperContext {
|
|
|
576
514
|
protected static native String getTextSegment(long context, int index);
|
|
577
515
|
protected static native int getTextSegmentT0(long context, int index);
|
|
578
516
|
protected static native int getTextSegmentT1(long context, int index);
|
|
579
|
-
protected static native
|
|
580
|
-
|
|
517
|
+
protected static native boolean getTextSegmentSpeakerTurnNext(long context, int index);
|
|
518
|
+
|
|
519
|
+
protected static native void createRealtimeTranscribeJob(
|
|
520
|
+
int job_id,
|
|
521
|
+
long context,
|
|
522
|
+
ReadableMap options
|
|
523
|
+
);
|
|
524
|
+
protected static native void finishRealtimeTranscribeJob(int job_id, long context, int[] sliceNSamples);
|
|
525
|
+
protected static native boolean vadSimple(int job_id, int slice_index, int n_samples, int n);
|
|
526
|
+
protected static native void putPcmData(int job_id, short[] buffer, int slice_index, int n_samples, int n);
|
|
527
|
+
protected static native int fullWithJob(
|
|
528
|
+
int job_id,
|
|
529
|
+
long context,
|
|
530
|
+
int slice_index,
|
|
531
|
+
int n_samples
|
|
532
|
+
);
|
|
533
|
+
protected static native String bench(long context, int n_threads);
|
|
534
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#include <jni.h>
|
|
2
|
+
|
|
3
|
+
// ReadableMap utils
|
|
4
|
+
|
|
5
|
+
namespace readablemap {
|
|
6
|
+
|
|
7
|
+
bool hasKey(JNIEnv *env, jobject readableMap, const char *key) {
|
|
8
|
+
jclass mapClass = env->GetObjectClass(readableMap);
|
|
9
|
+
jmethodID hasKeyMethod = env->GetMethodID(mapClass, "hasKey", "(Ljava/lang/String;)Z");
|
|
10
|
+
jstring jKey = env->NewStringUTF(key);
|
|
11
|
+
jboolean result = env->CallBooleanMethod(readableMap, hasKeyMethod, jKey);
|
|
12
|
+
env->DeleteLocalRef(jKey);
|
|
13
|
+
return result;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
int getInt(JNIEnv *env, jobject readableMap, const char *key, jint defaultValue) {
|
|
17
|
+
if (!hasKey(env, readableMap, key)) {
|
|
18
|
+
return defaultValue;
|
|
19
|
+
}
|
|
20
|
+
jclass mapClass = env->GetObjectClass(readableMap);
|
|
21
|
+
jmethodID getIntMethod = env->GetMethodID(mapClass, "getInt", "(Ljava/lang/String;)I");
|
|
22
|
+
jstring jKey = env->NewStringUTF(key);
|
|
23
|
+
jint result = env->CallIntMethod(readableMap, getIntMethod, jKey);
|
|
24
|
+
env->DeleteLocalRef(jKey);
|
|
25
|
+
return result;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
bool getBool(JNIEnv *env, jobject readableMap, const char *key, jboolean defaultValue) {
|
|
29
|
+
if (!hasKey(env, readableMap, key)) {
|
|
30
|
+
return defaultValue;
|
|
31
|
+
}
|
|
32
|
+
jclass mapClass = env->GetObjectClass(readableMap);
|
|
33
|
+
jmethodID getBoolMethod = env->GetMethodID(mapClass, "getBoolean", "(Ljava/lang/String;)Z");
|
|
34
|
+
jstring jKey = env->NewStringUTF(key);
|
|
35
|
+
jboolean result = env->CallBooleanMethod(readableMap, getBoolMethod, jKey);
|
|
36
|
+
env->DeleteLocalRef(jKey);
|
|
37
|
+
return result;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
long getLong(JNIEnv *env, jobject readableMap, const char *key, jlong defaultValue) {
|
|
41
|
+
if (!hasKey(env, readableMap, key)) {
|
|
42
|
+
return defaultValue;
|
|
43
|
+
}
|
|
44
|
+
jclass mapClass = env->GetObjectClass(readableMap);
|
|
45
|
+
jmethodID getLongMethod = env->GetMethodID(mapClass, "getLong", "(Ljava/lang/String;)J");
|
|
46
|
+
jstring jKey = env->NewStringUTF(key);
|
|
47
|
+
jlong result = env->CallLongMethod(readableMap, getLongMethod, jKey);
|
|
48
|
+
env->DeleteLocalRef(jKey);
|
|
49
|
+
return result;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
float getFloat(JNIEnv *env, jobject readableMap, const char *key, jfloat defaultValue) {
|
|
53
|
+
if (!hasKey(env, readableMap, key)) {
|
|
54
|
+
return defaultValue;
|
|
55
|
+
}
|
|
56
|
+
jclass mapClass = env->GetObjectClass(readableMap);
|
|
57
|
+
jmethodID getFloatMethod = env->GetMethodID(mapClass, "getDouble", "(Ljava/lang/String;)D");
|
|
58
|
+
jstring jKey = env->NewStringUTF(key);
|
|
59
|
+
jfloat result = env->CallDoubleMethod(readableMap, getFloatMethod, jKey);
|
|
60
|
+
env->DeleteLocalRef(jKey);
|
|
61
|
+
return result;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
jstring getString(JNIEnv *env, jobject readableMap, const char *key, jstring defaultValue) {
|
|
65
|
+
if (!hasKey(env, readableMap, key)) {
|
|
66
|
+
return defaultValue;
|
|
67
|
+
}
|
|
68
|
+
jclass mapClass = env->GetObjectClass(readableMap);
|
|
69
|
+
jmethodID getStringMethod = env->GetMethodID(mapClass, "getString", "(Ljava/lang/String;)Ljava/lang/String;");
|
|
70
|
+
jstring jKey = env->NewStringUTF(key);
|
|
71
|
+
jstring result = (jstring) env->CallObjectMethod(readableMap, getStringMethod, jKey);
|
|
72
|
+
env->DeleteLocalRef(jKey);
|
|
73
|
+
return result;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
}
|