npm - whisper.rn - Versions diffs - 0.4.0-rc.5 → 0.4.0-rc.6 - Mend

whisper.rn 0.4.0-rc.5 → 0.4.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/android/src/main/java/com/rnwhisper/WhisperContext.java +7 -2
package/android/src/main/jni.cpp +3 -2
package/cpp/ggml-alloc.h +1 -1
package/cpp/ggml-metal-whisper.metal +1497 -169
package/cpp/ggml-metal.m +530 -53
package/cpp/ggml-quants.c +2 -2
package/cpp/ggml.c +264 -99
package/cpp/ggml.h +21 -7
package/cpp/rn-whisper.cpp +2 -0
package/cpp/rn-whisper.h +3 -2
package/ios/RNWhisperContext.mm +8 -5
package/lib/commonjs/index.js.map +1 -1
package/lib/commonjs/version.json +1 -1
package/lib/module/index.js.map +1 -1
package/lib/module/version.json +1 -1
package/lib/typescript/index.d.ts +5 -0
package/lib/typescript/index.d.ts.map +1 -1
package/package.json +1 -1
package/src/index.ts +5 -0
package/src/version.json +1 -1
package/ios/RNWhisper.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -4
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +0 -8
package/ios/RNWhisper.xcodeproj/project.xcworkspace/xcuserdata/jhen.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
package/ios/RNWhisper.xcodeproj/xcuserdata/jhen.xcuserdatad/xcschemes/xcschememanagement.plist +0 -19

package/android/src/main/java/com/rnwhisper/WhisperContext.java CHANGED Viewed

@@ -110,6 +110,9 @@ public class WhisperContext {
     final int audioSliceSec = realtimeAudioSliceSec > 0 && realtimeAudioSliceSec < audioSec ? realtimeAudioSliceSec : audioSec;
     isUseSlices = audioSliceSec < audioSec;
+    double realtimeAudioMinSec = options.hasKey("realtimeAudioMinSec") ? options.getDouble("realtimeAudioMinSec") : 0;
+    final double audioMinSec = realtimeAudioMinSec > 0.5 && realtimeAudioMinSec <= audioSliceSec ? realtimeAudioMinSec : 1;
     createRealtimeTranscribeJob(jobId, context, options);
     sliceNSamples = new ArrayList<Integer>();
@@ -144,7 +147,8 @@ public class WhisperContext {
                 ) {
                   finishRealtimeTranscribe(Arguments.createMap());
                 } else if (!isTranscribing) {
-                  if (!vad(sliceIndex, nSamples, 0)) {
+                  boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
+                  if (!isSamplesEnough || !vad(sliceIndex, nSamples, 0)) {
                     finishRealtimeTranscribe(Arguments.createMap());
                     break;
                   }
@@ -169,7 +173,8 @@ public class WhisperContext {
               nSamples += n;
               sliceNSamples.set(sliceIndex, nSamples);
-              if (!isSpeech) continue;
+              boolean isSamplesEnough = nSamples / SAMPLE_RATE >= audioMinSec;
+              if (!isSamplesEnough || !isSpeech) continue;
               if (!isTranscribing && nSamples > SAMPLE_RATE / 2) {
                 isTranscribing = true;

package/android/src/main/jni.cpp CHANGED Viewed

@@ -199,7 +199,7 @@ struct whisper_full_params createFullParams(JNIEnv *env, jobject options) {
     params.print_progress = false;
     params.print_timestamps = false;
     params.print_special = false;
     int max_threads = std::thread::hardware_concurrency();
     // Use 2 threads by default on 4-core devices, 4 threads on more cores
     int default_n_threads = max_threads == 4 ? 2 : min(4, max_threads);
@@ -307,7 +307,7 @@ Java_com_rnwhisper_WhisperContext_fullWithNewJob(
         // whisper_print_timings(context);
     }
     env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
     if (job->is_aborted()) code = -999;
     rnwhisper::job_remove(job_id);
     return code;
@@ -339,6 +339,7 @@ Java_com_rnwhisper_WhisperContext_createRealtimeTranscribeJob(
         vad,
         readablemap::getInt(env, options, "realtimeAudioSec", 0),
         readablemap::getInt(env, options, "realtimeAudioSliceSec", 0),
+        readablemap::getFloat(env, options, "realtimeAudioMinSec", 0),
         audio_output_path_str
     );
 }

package/cpp/ggml-alloc.h CHANGED Viewed

@@ -43,7 +43,7 @@ WSP_GGML_API size_t wsp_ggml_allocr_alloc_graph(wsp_ggml_allocr_t alloc, struct
 // ggml-backend v2 API
 //
-// Seperate tensor and graph allocator objects
+// Separate tensor and graph allocator objects
 // This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
 // The original API is kept as a wrapper around the new API