whisper.rn 0.4.0-rc.5 → 0.4.0-rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +7 -2
- package/android/src/main/jni.cpp +3 -2
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-metal-whisper.metal +1497 -169
- package/cpp/ggml-metal.m +530 -53
- package/cpp/ggml-quants.c +2 -2
- package/cpp/ggml.c +264 -99
- package/cpp/ggml.h +21 -7
- package/cpp/rn-whisper.cpp +2 -0
- package/cpp/rn-whisper.h +3 -2
- package/ios/RNWhisperContext.mm +8 -5
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/index.d.ts +5 -0
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +5 -0
- package/src/version.json +1 -1
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -4
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -8
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +0 -19
|
@@ -110,6 +110,9 @@ public class WhisperContext {
|
|
|
110
110
|
final int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < audioSec ? realtimeAudioSliceSec : audioSec;
|
|
111
111
|
isUseSlices = audioSliceSec < audioSec;
|
|
112
112
|
|
|
113
|
+
double realtimeAudioMinSec = options.hasKey("realtimeAudioMinSec") ? options.getDouble("realtimeAudioMinSec") : 0;
|
|
114
|
+
final double audioMinSec = realtimeAudioMinSec > 0.5 && realtimeAudioMinSec <= audioSliceSec ? realtimeAudioMinSec : 1;
|
|
115
|
+
|
|
113
116
|
createRealtimeTranscribeJob(jobId, context, options);
|
|
114
117
|
|
|
115
118
|
sliceNSamples = new ArrayList<Integer>();
|
|
@@ -144,7 +147,8 @@ public class WhisperContext {
|
|
|
144
147
|
) {
|
|
145
148
|
finishRealtimeTranscribe(Arguments.createMap());
|
|
146
149
|
} else if (!isTranscribing) {
|
|
147
|
-
|
|
150
|
+
boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
|
|
151
|
+
if (!isSamplesEnough || !vad(sliceIndex, nSamples, 0)) {
|
|
148
152
|
finishRealtimeTranscribe(Arguments.createMap());
|
|
149
153
|
break;
|
|
150
154
|
}
|
|
@@ -169,7 +173,8 @@ public class WhisperContext {
|
|
|
169
173
|
nSamples += n;
|
|
170
174
|
sliceNSamples.set(sliceIndex, nSamples);
|
|
171
175
|
|
|
172
|
-
|
|
176
|
+
boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
|
|
177
|
+
if (!isSamplesEnough || !isSpeech) continue;
|
|
173
178
|
|
|
174
179
|
if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
|
|
175
180
|
isTranscribing = true;
|
package/android/src/main/jni.cpp
CHANGED
|
@@ -199,7 +199,7 @@ struct whisper_full_params createFullParams(JNIEnv *env, jobject options) {
|
|
|
199
199
|
params.print_progress = false;
|
|
200
200
|
params.print_timestamps = false;
|
|
201
201
|
params.print_special = false;
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
int max_threads = std::thread::hardware_concurrency();
|
|
204
204
|
// Use 2 threads by default on 4-core devices, 4 threads on more cores
|
|
205
205
|
int default_n_threads = max_threads == 4 ? 2 : min(4, max_threads);
|
|
@@ -307,7 +307,7 @@ Java_com_rnwhisper_WhisperContext_fullWithNewJob(
|
|
|
307
307
|
// whisper_print_timings(context);
|
|
308
308
|
}
|
|
309
309
|
env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
|
|
310
|
-
|
|
310
|
+
|
|
311
311
|
if (job->is_aborted()) code = -999;
|
|
312
312
|
rnwhisper::job_remove(job_id);
|
|
313
313
|
return code;
|
|
@@ -339,6 +339,7 @@ Java_com_rnwhisper_WhisperContext_createRealtimeTranscribeJob(
|
|
|
339
339
|
vad,
|
|
340
340
|
readablemap::getInt(env, options, "realtimeAudioSec", 0),
|
|
341
341
|
readablemap::getInt(env, options, "realtimeAudioSliceSec", 0),
|
|
342
|
+
readablemap::getFloat(env, options, "realtimeAudioMinSec", 0),
|
|
342
343
|
audio_output_path_str
|
|
343
344
|
);
|
|
344
345
|
}
|
package/cpp/ggml-alloc.h
CHANGED
|
@@ -43,7 +43,7 @@ WSP_GGML_API size_t wsp_ggml_allocr_alloc_graph(wsp_ggml_allocr_t alloc, struct
|
|
|
43
43
|
// ggml-backend v2 API
|
|
44
44
|
//
|
|
45
45
|
|
|
46
|
-
//
|
|
46
|
+
// Separate tensor and graph allocator objects
|
|
47
47
|
// This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
|
|
48
48
|
// The original API is kept as a wrapper around the new API
|
|
49
49
|
|