whisper.rn 0.4.0-rc.5 → 0.4.0-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -115,7 +115,7 @@ public class RNWhisper implements LifecycleEventListener {
115
115
  promise.resolve(result);
116
116
  tasks.remove(this);
117
117
  }
118
- }.execute();
118
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
119
119
  tasks.put(task, "initContext");
120
120
  }
121
121
 
@@ -174,7 +174,7 @@ public class RNWhisper implements LifecycleEventListener {
174
174
  promise.resolve(data);
175
175
  tasks.remove(this);
176
176
  }
177
- }.execute();
177
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
178
178
  tasks.put(task, "transcribeFile-" + id);
179
179
  }
180
180
 
@@ -231,7 +231,7 @@ public class RNWhisper implements LifecycleEventListener {
231
231
  promise.resolve(null);
232
232
  tasks.remove(this);
233
233
  }
234
- }.execute();
234
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
235
235
  tasks.put(task, "abortTranscribe-" + id);
236
236
  }
237
237
 
@@ -272,7 +272,7 @@ public class RNWhisper implements LifecycleEventListener {
272
272
  promise.resolve(null);
273
273
  tasks.remove(this);
274
274
  }
275
- }.execute();
275
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
276
276
  tasks.put(task, "releaseContext-" + id);
277
277
  }
278
278
 
@@ -299,7 +299,7 @@ public class RNWhisper implements LifecycleEventListener {
299
299
  promise.resolve(null);
300
300
  tasks.remove(this);
301
301
  }
302
- }.execute();
302
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
303
303
  tasks.put(task, "releaseAllContexts");
304
304
  }
305
305
 
@@ -110,6 +110,9 @@ public class WhisperContext {
110
110
  final int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < audioSec ? realtimeAudioSliceSec : audioSec;
111
111
  isUseSlices = audioSliceSec < audioSec;
112
112
 
113
+ double realtimeAudioMinSec = options.hasKey("realtimeAudioMinSec") ? options.getDouble("realtimeAudioMinSec") : 0;
114
+ final double audioMinSec = realtimeAudioMinSec > 0.5 && realtimeAudioMinSec <= audioSliceSec ? realtimeAudioMinSec : 1;
115
+
113
116
  createRealtimeTranscribeJob(jobId, context, options);
114
117
 
115
118
  sliceNSamples = new ArrayList<Integer>();
@@ -144,7 +147,8 @@ public class WhisperContext {
144
147
  ) {
145
148
  finishRealtimeTranscribe(Arguments.createMap());
146
149
  } else if (!isTranscribing) {
147
- if (!vad(sliceIndex, nSamples, 0)) {
150
+ boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
151
+ if (!isSamplesEnough || !vad(sliceIndex, nSamples, 0)) {
148
152
  finishRealtimeTranscribe(Arguments.createMap());
149
153
  break;
150
154
  }
@@ -169,7 +173,8 @@ public class WhisperContext {
169
173
  nSamples += n;
170
174
  sliceNSamples.set(sliceIndex, nSamples);
171
175
 
172
- if (!isSpeech) continue;
176
+ boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
177
+ if (!isSamplesEnough || !isSpeech) continue;
173
178
 
174
179
  if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
175
180
  isTranscribing = true;
@@ -199,7 +199,7 @@ struct whisper_full_params createFullParams(JNIEnv *env, jobject options) {
199
199
  params.print_progress = false;
200
200
  params.print_timestamps = false;
201
201
  params.print_special = false;
202
-
202
+
203
203
  int max_threads = std::thread::hardware_concurrency();
204
204
  // Use 2 threads by default on 4-core devices, 4 threads on more cores
205
205
  int default_n_threads = max_threads == 4 ? 2 : min(4, max_threads);
@@ -307,7 +307,7 @@ Java_com_rnwhisper_WhisperContext_fullWithNewJob(
307
307
  // whisper_print_timings(context);
308
308
  }
309
309
  env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
310
-
310
+
311
311
  if (job->is_aborted()) code = -999;
312
312
  rnwhisper::job_remove(job_id);
313
313
  return code;
@@ -339,6 +339,7 @@ Java_com_rnwhisper_WhisperContext_createRealtimeTranscribeJob(
339
339
  vad,
340
340
  readablemap::getInt(env, options, "realtimeAudioSec", 0),
341
341
  readablemap::getInt(env, options, "realtimeAudioSliceSec", 0),
342
+ readablemap::getFloat(env, options, "realtimeAudioMinSec", 0),
342
343
  audio_output_path_str
343
344
  );
344
345
  }
package/cpp/ggml-alloc.h CHANGED
@@ -43,7 +43,7 @@ WSP_GGML_API size_t wsp_ggml_allocr_alloc_graph(wsp_ggml_allocr_t alloc, struct
43
43
  // ggml-backend v2 API
44
44
  //
45
45
 
46
- // Seperate tensor and graph allocator objects
46
+ // Separate tensor and graph allocator objects
47
47
  // This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
48
48
  // The original API is kept as a wrapper around the new API
49
49