whisper.rn 0.4.0-rc.5 → 0.4.0-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/java/com/rnwhisper/RNWhisper.java +5 -5
- package/android/src/main/java/com/rnwhisper/WhisperContext.java +7 -2
- package/android/src/main/jni.cpp +3 -2
- package/cpp/ggml-alloc.h +1 -1
- package/cpp/ggml-metal-whisper.metal +1497 -169
- package/cpp/ggml-metal.m +530 -53
- package/cpp/ggml-quants.c +2 -2
- package/cpp/ggml.c +264 -99
- package/cpp/ggml.h +21 -7
- package/cpp/rn-whisper.cpp +3 -0
- package/cpp/rn-whisper.h +3 -2
- package/ios/RNWhisperContext.mm +10 -6
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/version.json +1 -1
- package/lib/module/index.js.map +1 -1
- package/lib/module/version.json +1 -1
- package/lib/typescript/index.d.ts +5 -0
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +5 -0
- package/src/version.json +1 -1
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -4
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -8
- package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +0 -19
|
@@ -115,7 +115,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
115
115
|
promise.resolve(result);
|
|
116
116
|
tasks.remove(this);
|
|
117
117
|
}
|
|
118
|
-
}.
|
|
118
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
119
119
|
tasks.put(task, "initContext");
|
|
120
120
|
}
|
|
121
121
|
|
|
@@ -174,7 +174,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
174
174
|
promise.resolve(data);
|
|
175
175
|
tasks.remove(this);
|
|
176
176
|
}
|
|
177
|
-
}.
|
|
177
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
178
178
|
tasks.put(task, "transcribeFile-" + id);
|
|
179
179
|
}
|
|
180
180
|
|
|
@@ -231,7 +231,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
231
231
|
promise.resolve(null);
|
|
232
232
|
tasks.remove(this);
|
|
233
233
|
}
|
|
234
|
-
}.
|
|
234
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
235
235
|
tasks.put(task, "abortTranscribe-" + id);
|
|
236
236
|
}
|
|
237
237
|
|
|
@@ -272,7 +272,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
272
272
|
promise.resolve(null);
|
|
273
273
|
tasks.remove(this);
|
|
274
274
|
}
|
|
275
|
-
}.
|
|
275
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
276
276
|
tasks.put(task, "releaseContext-" + id);
|
|
277
277
|
}
|
|
278
278
|
|
|
@@ -299,7 +299,7 @@ public class RNWhisper implements LifecycleEventListener {
|
|
|
299
299
|
promise.resolve(null);
|
|
300
300
|
tasks.remove(this);
|
|
301
301
|
}
|
|
302
|
-
}.
|
|
302
|
+
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
|
|
303
303
|
tasks.put(task, "releaseAllContexts");
|
|
304
304
|
}
|
|
305
305
|
|
|
@@ -110,6 +110,9 @@ public class WhisperContext {
|
|
|
110
110
|
final int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < audioSec ? realtimeAudioSliceSec : audioSec;
|
|
111
111
|
isUseSlices = audioSliceSec < audioSec;
|
|
112
112
|
|
|
113
|
+
double realtimeAudioMinSec = options.hasKey("realtimeAudioMinSec") ? options.getDouble("realtimeAudioMinSec") : 0;
|
|
114
|
+
final double audioMinSec = realtimeAudioMinSec > 0.5 && realtimeAudioMinSec <= audioSliceSec ? realtimeAudioMinSec : 1;
|
|
115
|
+
|
|
113
116
|
createRealtimeTranscribeJob(jobId, context, options);
|
|
114
117
|
|
|
115
118
|
sliceNSamples = new ArrayList<Integer>();
|
|
@@ -144,7 +147,8 @@ public class WhisperContext {
|
|
|
144
147
|
) {
|
|
145
148
|
finishRealtimeTranscribe(Arguments.createMap());
|
|
146
149
|
} else if (!isTranscribing) {
|
|
147
|
-
|
|
150
|
+
boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
|
|
151
|
+
if (!isSamplesEnough || !vad(sliceIndex, nSamples, 0)) {
|
|
148
152
|
finishRealtimeTranscribe(Arguments.createMap());
|
|
149
153
|
break;
|
|
150
154
|
}
|
|
@@ -169,7 +173,8 @@ public class WhisperContext {
|
|
|
169
173
|
nSamples += n;
|
|
170
174
|
sliceNSamples.set(sliceIndex, nSamples);
|
|
171
175
|
|
|
172
|
-
|
|
176
|
+
boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
|
|
177
|
+
if (!isSamplesEnough || !isSpeech) continue;
|
|
173
178
|
|
|
174
179
|
if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
|
|
175
180
|
isTranscribing = true;
|
package/android/src/main/jni.cpp
CHANGED
|
@@ -199,7 +199,7 @@ struct whisper_full_params createFullParams(JNIEnv *env, jobject options) {
|
|
|
199
199
|
params.print_progress = false;
|
|
200
200
|
params.print_timestamps = false;
|
|
201
201
|
params.print_special = false;
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
int max_threads = std::thread::hardware_concurrency();
|
|
204
204
|
// Use 2 threads by default on 4-core devices, 4 threads on more cores
|
|
205
205
|
int default_n_threads = max_threads == 4 ? 2 : min(4, max_threads);
|
|
@@ -307,7 +307,7 @@ Java_com_rnwhisper_WhisperContext_fullWithNewJob(
|
|
|
307
307
|
// whisper_print_timings(context);
|
|
308
308
|
}
|
|
309
309
|
env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
|
|
310
|
-
|
|
310
|
+
|
|
311
311
|
if (job->is_aborted()) code = -999;
|
|
312
312
|
rnwhisper::job_remove(job_id);
|
|
313
313
|
return code;
|
|
@@ -339,6 +339,7 @@ Java_com_rnwhisper_WhisperContext_createRealtimeTranscribeJob(
|
|
|
339
339
|
vad,
|
|
340
340
|
readablemap::getInt(env, options, "realtimeAudioSec", 0),
|
|
341
341
|
readablemap::getInt(env, options, "realtimeAudioSliceSec", 0),
|
|
342
|
+
readablemap::getFloat(env, options, "realtimeAudioMinSec", 0),
|
|
342
343
|
audio_output_path_str
|
|
343
344
|
);
|
|
344
345
|
}
|
package/cpp/ggml-alloc.h
CHANGED
|
@@ -43,7 +43,7 @@ WSP_GGML_API size_t wsp_ggml_allocr_alloc_graph(wsp_ggml_allocr_t alloc, struct
|
|
|
43
43
|
// ggml-backend v2 API
|
|
44
44
|
//
|
|
45
45
|
|
|
46
|
-
//
|
|
46
|
+
// Separate tensor and graph allocator objects
|
|
47
47
|
// This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
|
|
48
48
|
// The original API is kept as a wrapper around the new API
|
|
49
49
|
|