whisper.rn 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/README.md +69 -0
  2. package/android/src/main/java/com/rnwhisper/RNWhisper.java +212 -0
  3. package/android/src/main/java/com/rnwhisper/WhisperContext.java +34 -4
  4. package/android/src/main/java/com/rnwhisper/WhisperVadContext.java +101 -0
  5. package/android/src/main/jni.cpp +196 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnwhisper.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnwhisper_v8fp16_va_2.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/librnwhisper_vfpv4.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnwhisper.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/librnwhisper_x86_64.so +0 -0
  12. package/android/src/newarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  13. package/android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java +26 -0
  14. package/ios/RNWhisper.mm +147 -0
  15. package/ios/RNWhisperContext.mm +18 -24
  16. package/ios/RNWhisperVadContext.h +29 -0
  17. package/ios/RNWhisperVadContext.mm +148 -0
  18. package/jest/mock.js +19 -0
  19. package/lib/commonjs/NativeRNWhisper.js.map +1 -1
  20. package/lib/commonjs/index.js +111 -1
  21. package/lib/commonjs/index.js.map +1 -1
  22. package/lib/module/NativeRNWhisper.js.map +1 -1
  23. package/lib/module/index.js +112 -0
  24. package/lib/module/index.js.map +1 -1
  25. package/lib/typescript/NativeRNWhisper.d.ts +35 -0
  26. package/lib/typescript/NativeRNWhisper.d.ts.map +1 -1
  27. package/lib/typescript/index.d.ts +39 -3
  28. package/lib/typescript/index.d.ts.map +1 -1
  29. package/package.json +1 -1
  30. package/src/NativeRNWhisper.ts +48 -0
  31. package/src/index.ts +132 -1
package/README.md CHANGED
@@ -103,6 +103,75 @@ subscribe(evt => {
103
103
  })
104
104
  ```
105
105
 
106
+ ## Voice Activity Detection (VAD)
107
+
108
+ Voice Activity Detection allows you to detect speech segments in audio data using the Silero VAD model.
109
+
110
+ #### Initialize VAD Context
111
+
112
+ ```typescript
113
+ import { initWhisperVad } from 'whisper.rn'
114
+
115
+ const vadContext = await initWhisperVad({
116
+ filePath: require('./assets/ggml-silero-v5.1.2.bin'), // VAD model file
117
+ useGpu: true, // Use GPU acceleration (iOS only)
118
+ nThreads: 4, // Number of threads for processing
119
+ })
120
+ ```
121
+
122
+ #### Detect Speech Segments
123
+
124
+ ##### From Audio Files
125
+
126
+ ```typescript
127
+ // Detect speech in audio file (supports same formats as transcribe)
128
+ const segments = await vadContext.detectSpeech(require('./assets/audio.wav'), {
129
+ threshold: 0.5, // Speech probability threshold (0.0-1.0)
130
+ minSpeechDurationMs: 250, // Minimum speech duration in ms
131
+ minSilenceDurationMs: 100, // Minimum silence duration in ms
132
+ maxSpeechDurationS: 30, // Maximum speech duration in seconds
133
+ speechPadMs: 30, // Padding around speech segments in ms
134
+ samplesOverlap: 0.1, // Overlap between analysis windows
135
+ })
136
+
137
+ // Also supports:
138
+ // - File paths: vadContext.detectSpeech('path/to/audio.wav', options)
139
+ // - HTTP URLs: vadContext.detectSpeech('https://example.com/audio.wav', options)
140
+ // - Base64 WAV: vadContext.detectSpeech('data:audio/wav;base64,...', options)
141
+ // - Assets: vadContext.detectSpeech(require('./assets/audio.wav'), options)
142
+ ```
143
+
144
+ ##### From Raw Audio Data
145
+
146
+ ```typescript
147
+ // Detect speech in base64 encoded float32 PCM data
148
+ const segments = await vadContext.detectSpeechData(base64AudioData, {
149
+ threshold: 0.5,
150
+ minSpeechDurationMs: 250,
151
+ minSilenceDurationMs: 100,
152
+ maxSpeechDurationS: 30,
153
+ speechPadMs: 30,
154
+ samplesOverlap: 0.1,
155
+ })
156
+ ```
157
+
158
+ #### Process Results
159
+
160
+ ```typescript
161
+ segments.forEach((segment, index) => {
162
+ console.log(`Segment ${index + 1}: ${segment.t0.toFixed(2)}s - ${segment.t1.toFixed(2)}s`)
163
+ console.log(`Duration: ${(segment.t1 - segment.t0).toFixed(2)}s`)
164
+ })
165
+ ```
166
+
167
+ #### Release VAD Context
168
+
169
+ ```typescript
170
+ await vadContext.release()
171
+ // Or release all VAD contexts
172
+ await releaseAllWhisperVad()
173
+ ```
174
+
106
175
  In iOS, You may need to change the Audio Session so that it can be used with other audio playback, or to optimize the quality of the recording. So we have provided AudioSession utilities for you:
107
176
 
108
177
  Option 1 - Use options in transcribeRealtime:
@@ -13,6 +13,7 @@ import com.facebook.react.bridge.ReactMethod;
13
13
  import com.facebook.react.bridge.LifecycleEventListener;
14
14
  import com.facebook.react.bridge.ReadableMap;
15
15
  import com.facebook.react.bridge.WritableMap;
16
+ import com.facebook.react.bridge.WritableArray;
16
17
  import com.facebook.react.bridge.Arguments;
17
18
 
18
19
  import java.util.HashMap;
@@ -47,6 +48,7 @@ public class RNWhisper implements LifecycleEventListener {
47
48
  private HashMap<AsyncTask, String> tasks = new HashMap<>();
48
49
 
49
50
  private HashMap<Integer, WhisperContext> contexts = new HashMap<>();
51
+ private HashMap<Integer, WhisperVadContext> vadContexts = new HashMap<>();
50
52
 
51
53
  private int getResourceIdentifier(String filePath) {
52
54
  int identifier = reactContext.getResources().getIdentifier(
@@ -344,6 +346,212 @@ public class RNWhisper implements LifecycleEventListener {
344
346
  tasks.put(task, "releaseAllContexts");
345
347
  }
346
348
 
349
+ public void initVadContext(final ReadableMap options, final Promise promise) {
350
+ AsyncTask task = new AsyncTask<Void, Void, Integer>() {
351
+ private Exception exception;
352
+
353
+ @Override
354
+ protected Integer doInBackground(Void... voids) {
355
+ try {
356
+ String modelPath = options.getString("filePath");
357
+ boolean isBundleAsset = options.getBoolean("isBundleAsset");
358
+
359
+ String modelFilePath = modelPath;
360
+ if (!isBundleAsset && (modelPath.startsWith("http://") || modelPath.startsWith("https://"))) {
361
+ modelFilePath = downloader.downloadFile(modelPath);
362
+ }
363
+
364
+ long vadContext;
365
+ int resId = getResourceIdentifier(modelFilePath);
366
+ if (resId > 0) {
367
+ vadContext = WhisperContext.initVadContextWithInputStream(
368
+ new PushbackInputStream(reactContext.getResources().openRawResource(resId))
369
+ );
370
+ } else if (isBundleAsset) {
371
+ vadContext = WhisperContext.initVadContextWithAsset(reactContext.getAssets(), modelFilePath);
372
+ } else {
373
+ vadContext = WhisperContext.initVadContext(modelFilePath);
374
+ }
375
+ if (vadContext == 0) {
376
+ throw new Exception("Failed to initialize VAD context");
377
+ }
378
+ int id = Math.abs(new Random().nextInt());
379
+ WhisperVadContext whisperVadContext = new WhisperVadContext(id, reactContext, vadContext);
380
+ vadContexts.put(id, whisperVadContext);
381
+ return id;
382
+ } catch (Exception e) {
383
+ exception = e;
384
+ return null;
385
+ }
386
+ }
387
+
388
+ @Override
389
+ protected void onPostExecute(Integer id) {
390
+ if (exception != null) {
391
+ promise.reject(exception);
392
+ return;
393
+ }
394
+ WritableMap result = Arguments.createMap();
395
+ result.putInt("contextId", id);
396
+ result.putBoolean("gpu", false);
397
+ result.putString("reasonNoGPU", "Currently not supported");
398
+ promise.resolve(result);
399
+ tasks.remove(this);
400
+ }
401
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
402
+ tasks.put(task, "initVadContext");
403
+ }
404
+
405
+ public void vadDetectSpeech(double id, String audioDataBase64, ReadableMap options, Promise promise) {
406
+ final WhisperVadContext vadContext = vadContexts.get((int) id);
407
+ if (vadContext == null) {
408
+ promise.reject("VAD context not found");
409
+ return;
410
+ }
411
+
412
+ AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
413
+ private Exception exception;
414
+
415
+ @Override
416
+ protected WritableArray doInBackground(Void... voids) {
417
+ try {
418
+ float[] audioData = AudioUtils.decodePcmData(audioDataBase64);
419
+ return vadContext.detectSpeechWithAudioData(audioData, audioData.length, options);
420
+ } catch (Exception e) {
421
+ exception = e;
422
+ return null;
423
+ }
424
+ }
425
+
426
+ @Override
427
+ protected void onPostExecute(WritableArray segments) {
428
+ if (exception != null) {
429
+ promise.reject(exception);
430
+ return;
431
+ }
432
+ promise.resolve(segments);
433
+ tasks.remove(this);
434
+ }
435
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
436
+ tasks.put(task, "vadDetectSpeech-" + id);
437
+ }
438
+
439
+ public void vadDetectSpeechFile(double id, String filePathOrBase64, ReadableMap options, Promise promise) {
440
+ final WhisperVadContext vadContext = vadContexts.get((int) id);
441
+ if (vadContext == null) {
442
+ promise.reject("VAD context not found");
443
+ return;
444
+ }
445
+
446
+ AsyncTask task = new AsyncTask<Void, Void, WritableArray>() {
447
+ private Exception exception;
448
+
449
+ @Override
450
+ protected WritableArray doInBackground(Void... voids) {
451
+ try {
452
+ // Handle file processing like transcribeFile does
453
+ String filePath = filePathOrBase64;
454
+ if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
455
+ filePath = downloader.downloadFile(filePathOrBase64);
456
+ }
457
+
458
+ float[] audioData;
459
+ int resId = getResourceIdentifier(filePath);
460
+ if (resId > 0) {
461
+ audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
462
+ } else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
463
+ audioData = AudioUtils.decodeWaveData(filePathOrBase64);
464
+ } else {
465
+ audioData = AudioUtils.decodeWaveFile(new java.io.FileInputStream(new java.io.File(filePath)));
466
+ }
467
+
468
+ if (audioData == null) {
469
+ throw new Exception("Failed to load audio file: " + filePathOrBase64);
470
+ }
471
+
472
+ return vadContext.detectSpeechWithAudioData(audioData, audioData.length, options);
473
+ } catch (Exception e) {
474
+ exception = e;
475
+ return null;
476
+ }
477
+ }
478
+
479
+ @Override
480
+ protected void onPostExecute(WritableArray segments) {
481
+ if (exception != null) {
482
+ promise.reject(exception);
483
+ return;
484
+ }
485
+ promise.resolve(segments);
486
+ tasks.remove(this);
487
+ }
488
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
489
+ tasks.put(task, "vadDetectSpeechFile-" + id);
490
+ }
491
+
492
+ public void releaseVadContext(double id, Promise promise) {
493
+ final int contextId = (int) id;
494
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
495
+ private Exception exception;
496
+
497
+ @Override
498
+ protected Void doInBackground(Void... voids) {
499
+ try {
500
+ WhisperVadContext vadContext = vadContexts.get(contextId);
501
+ if (vadContext == null) {
502
+ throw new Exception("VAD context " + id + " not found");
503
+ }
504
+ vadContext.release();
505
+ vadContexts.remove(contextId);
506
+ } catch (Exception e) {
507
+ exception = e;
508
+ }
509
+ return null;
510
+ }
511
+
512
+ @Override
513
+ protected void onPostExecute(Void result) {
514
+ if (exception != null) {
515
+ promise.reject(exception);
516
+ return;
517
+ }
518
+ promise.resolve(null);
519
+ tasks.remove(this);
520
+ }
521
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
522
+ tasks.put(task, "releaseVadContext-" + id);
523
+ }
524
+
525
+ public void releaseAllVadContexts(Promise promise) {
526
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
527
+ private Exception exception;
528
+
529
+ @Override
530
+ protected Void doInBackground(Void... voids) {
531
+ try {
532
+ for (WhisperVadContext vadContext : vadContexts.values()) {
533
+ vadContext.release();
534
+ }
535
+ vadContexts.clear();
536
+ } catch (Exception e) {
537
+ exception = e;
538
+ }
539
+ return null;
540
+ }
541
+
542
+ @Override
543
+ protected void onPostExecute(Void result) {
544
+ if (exception != null) {
545
+ promise.reject(exception);
546
+ return;
547
+ }
548
+ promise.resolve(null);
549
+ tasks.remove(this);
550
+ }
551
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
552
+ tasks.put(task, "releaseAllVadContexts");
553
+ }
554
+
347
555
  @Override
348
556
  public void onHostResume() {
349
557
  }
@@ -367,8 +575,12 @@ public class RNWhisper implements LifecycleEventListener {
367
575
  for (WhisperContext context : contexts.values()) {
368
576
  context.release();
369
577
  }
578
+ for (WhisperVadContext vadContext : vadContexts.values()) {
579
+ vadContext.release();
580
+ }
370
581
  WhisperContext.abortAllTranscribe(); // graceful abort
371
582
  contexts.clear();
583
+ vadContexts.clear();
372
584
  downloader.clearCache();
373
585
  }
374
586
  }
@@ -460,19 +460,19 @@ public class WhisperContext {
460
460
  }
461
461
  }
462
462
 
463
- private static boolean isArm64V8a() {
463
+ public static boolean isArm64V8a() {
464
464
  return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
465
465
  }
466
466
 
467
- private static boolean isArmeabiV7a() {
467
+ public static boolean isArmeabiV7a() {
468
468
  return Build.SUPPORTED_ABIS[0].equals("armeabi-v7a");
469
469
  }
470
470
 
471
- private static boolean isX86_64() {
471
+ public static boolean isX86_64() {
472
472
  return Build.SUPPORTED_ABIS[0].equals("x86_64");
473
473
  }
474
474
 
475
- private static String getCpuFeatures() {
475
+ public static String getCpuFeatures() {
476
476
  File file = new File("/proc/cpuinfo");
477
477
  StringBuilder stringBuilder = new StringBuilder();
478
478
  try {
@@ -492,6 +492,10 @@ public class WhisperContext {
492
492
  }
493
493
  }
494
494
 
495
+ public static String getLoadedLibrary() {
496
+ return loadedLibrary;
497
+ }
498
+
495
499
  // JNI methods
496
500
  protected static native long initContext(String modelPath);
497
501
  protected static native long initContextWithAsset(AssetManager assetManager, String modelPath);
@@ -529,4 +533,30 @@ public class WhisperContext {
529
533
  int n_samples
530
534
  );
531
535
  protected static native String bench(long context, int n_threads);
536
+
537
+ // VAD JNI methods
538
+ protected static native long initVadContext(String modelPath);
539
+ protected static native long initVadContextWithAsset(AssetManager assetManager, String modelPath);
540
+ protected static native long initVadContextWithInputStream(PushbackInputStream inputStream);
541
+ protected static native void freeVadContext(long vadContextPtr);
542
+ protected static native boolean vadDetectSpeech(long vadContextPtr, float[] audioData, int nSamples);
543
+ protected static native long vadGetSegmentsFromProbs(long vadContextPtr, float threshold,
544
+ int minSpeechDurationMs, int minSilenceDurationMs,
545
+ float maxSpeechDurationS, int speechPadMs,
546
+ float samplesOverlap);
547
+ protected static native int vadGetNSegments(long segmentsPtr);
548
+ protected static native float vadGetSegmentT0(long segmentsPtr, int index);
549
+ protected static native float vadGetSegmentT1(long segmentsPtr, int index);
550
+ protected static native void vadFreeSegments(long segmentsPtr);
551
+
552
+ // Audio file loading utility for VAD
553
+ public static float[] loadAudioFileAsFloat32(String filePath) {
554
+ try {
555
+ java.io.FileInputStream fis = new java.io.FileInputStream(new java.io.File(filePath));
556
+ return AudioUtils.decodeWaveFile(fis);
557
+ } catch (Exception e) {
558
+ Log.e(NAME, "Failed to load audio file: " + filePath, e);
559
+ return null;
560
+ }
561
+ }
532
562
  }
@@ -0,0 +1,101 @@
1
+ package com.rnwhisper;
2
+
3
+ import com.facebook.react.bridge.Arguments;
4
+ import com.facebook.react.bridge.WritableArray;
5
+ import com.facebook.react.bridge.WritableMap;
6
+ import com.facebook.react.bridge.ReadableMap;
7
+ import com.facebook.react.bridge.ReactApplicationContext;
8
+
9
+ import android.util.Log;
10
+ import android.content.res.AssetManager;
11
+ import android.util.Base64;
12
+
13
+ import java.io.PushbackInputStream;
14
+
15
+ public class WhisperVadContext {
16
+ public static final String NAME = "RNWhisperVadContext";
17
+
18
+ private int id;
19
+ private ReactApplicationContext reactContext;
20
+ private long vadContext;
21
+
22
+ public WhisperVadContext(int id, ReactApplicationContext reactContext, long vadContext) {
23
+ this.id = id;
24
+ this.vadContext = vadContext;
25
+ this.reactContext = reactContext;
26
+ }
27
+
28
+ public WritableArray detectSpeechWithAudioData(float[] audioData, int numSamples, ReadableMap options) throws Exception {
29
+ if (vadContext == 0) {
30
+ throw new Exception("VAD context is null");
31
+ }
32
+
33
+ return processVadDetection(audioData, numSamples, options);
34
+ }
35
+
36
+ private int getResourceIdentifier(String filePath) {
37
+ int identifier = reactContext.getResources().getIdentifier(
38
+ filePath,
39
+ "drawable",
40
+ reactContext.getPackageName()
41
+ );
42
+ if (identifier == 0) {
43
+ identifier = reactContext.getResources().getIdentifier(
44
+ filePath,
45
+ "raw",
46
+ reactContext.getPackageName()
47
+ );
48
+ }
49
+ return identifier;
50
+ }
51
+
52
+ private WritableArray processVadDetection(float[] audioData, int numSamples, ReadableMap options) throws Exception {
53
+ // Run VAD detection using WhisperContext static methods
54
+ boolean speechDetected = WhisperContext.vadDetectSpeech(vadContext, audioData, numSamples);
55
+ if (!speechDetected) {
56
+ return Arguments.createArray();
57
+ }
58
+
59
+ // Set VAD parameters from options
60
+ float threshold = options.hasKey("threshold") ? (float) options.getDouble("threshold") : 0.5f;
61
+ int minSpeechDurationMs = options.hasKey("minSpeechDurationMs") ? options.getInt("minSpeechDurationMs") : 250;
62
+ int minSilenceDurationMs = options.hasKey("minSilenceDurationMs") ? options.getInt("minSilenceDurationMs") : 100;
63
+ float maxSpeechDurationS = options.hasKey("maxSpeechDurationS") ? (float) options.getDouble("maxSpeechDurationS") : 30.0f;
64
+ int speechPadMs = options.hasKey("speechPadMs") ? options.getInt("speechPadMs") : 30;
65
+ float samplesOverlap = options.hasKey("samplesOverlap") ? (float) options.getDouble("samplesOverlap") : 0.1f;
66
+
67
+ // Get segments from VAD using WhisperContext static methods
68
+ long segments = WhisperContext.vadGetSegmentsFromProbs(vadContext, threshold, minSpeechDurationMs,
69
+ minSilenceDurationMs, maxSpeechDurationS,
70
+ speechPadMs, samplesOverlap);
71
+ if (segments == 0) {
72
+ return Arguments.createArray();
73
+ }
74
+
75
+ // Convert segments to WritableArray using WhisperContext static methods
76
+ WritableArray result = Arguments.createArray();
77
+ int nSegments = WhisperContext.vadGetNSegments(segments);
78
+
79
+ for (int i = 0; i < nSegments; i++) {
80
+ float t0 = WhisperContext.vadGetSegmentT0(segments, i);
81
+ float t1 = WhisperContext.vadGetSegmentT1(segments, i);
82
+
83
+ WritableMap segment = Arguments.createMap();
84
+ segment.putDouble("t0", t0);
85
+ segment.putDouble("t1", t1);
86
+ result.pushMap(segment);
87
+ }
88
+
89
+ // Clean up using WhisperContext static methods
90
+ WhisperContext.vadFreeSegments(segments);
91
+
92
+ return result;
93
+ }
94
+
95
+ public void release() {
96
+ if (vadContext != 0) {
97
+ WhisperContext.freeVadContext(vadContext);
98
+ vadContext = 0;
99
+ }
100
+ }
101
+ }
@@ -148,6 +148,47 @@ static struct whisper_context *whisper_init_from_asset(
148
148
  return whisper_init_with_params(&loader, cparams);
149
149
  }
150
150
 
151
+ // VAD context initialization functions
152
+ static struct whisper_vad_context *whisper_vad_init_from_input_stream(
153
+ JNIEnv *env,
154
+ jobject input_stream, // PushbackInputStream
155
+ struct whisper_vad_context_params vad_params
156
+ ) {
157
+ input_stream_context *context = new input_stream_context;
158
+ context->env = env;
159
+ context->input_stream = env->NewGlobalRef(input_stream);
160
+
161
+ whisper_model_loader loader = {
162
+ .context = context,
163
+ .read = &input_stream_read,
164
+ .eof = &input_stream_is_eof,
165
+ .close = &input_stream_close
166
+ };
167
+ return whisper_vad_init_with_params(&loader, vad_params);
168
+ }
169
+
170
+ static struct whisper_vad_context *whisper_vad_init_from_asset(
171
+ JNIEnv *env,
172
+ jobject assetManager,
173
+ const char *asset_path,
174
+ struct whisper_vad_context_params vad_params
175
+ ) {
176
+ LOGI("Loading VAD model from asset '%s'\n", asset_path);
177
+ AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager);
178
+ AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING);
179
+ if (!asset) {
180
+ LOGW("Failed to open VAD asset '%s'\n", asset_path);
181
+ return NULL;
182
+ }
183
+ whisper_model_loader loader = {
184
+ .context = asset,
185
+ .read = &asset_read,
186
+ .eof = &asset_is_eof,
187
+ .close = &asset_close
188
+ };
189
+ return whisper_vad_init_with_params(&loader, vad_params);
190
+ }
191
+
151
192
  extern "C" {
152
193
 
153
194
  JNIEXPORT jlong JNICALL
@@ -530,4 +571,159 @@ Java_com_rnwhisper_WhisperContext_bench(
530
571
  return env->NewStringUTF(result.c_str());
531
572
  }
532
573
 
574
+ // VAD Context JNI implementations
575
+ JNIEXPORT jlong JNICALL
576
+ Java_com_rnwhisper_WhisperContext_initVadContext(
577
+ JNIEnv *env,
578
+ jobject thiz,
579
+ jstring model_path_str
580
+ ) {
581
+ UNUSED(thiz);
582
+ struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
583
+
584
+ struct whisper_vad_context *vad_context = nullptr;
585
+ const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
586
+ vad_context = whisper_vad_init_from_file_with_params(model_path_chars, vad_params);
587
+ env->ReleaseStringUTFChars(model_path_str, model_path_chars);
588
+ return reinterpret_cast<jlong>(vad_context);
589
+ }
590
+
591
+ JNIEXPORT jlong JNICALL
592
+ Java_com_rnwhisper_WhisperContext_initVadContextWithAsset(
593
+ JNIEnv *env,
594
+ jobject thiz,
595
+ jobject asset_manager,
596
+ jstring model_path_str
597
+ ) {
598
+ UNUSED(thiz);
599
+ struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
600
+
601
+ struct whisper_vad_context *vad_context = nullptr;
602
+ const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
603
+ vad_context = whisper_vad_init_from_asset(env, asset_manager, model_path_chars, vad_params);
604
+ env->ReleaseStringUTFChars(model_path_str, model_path_chars);
605
+ return reinterpret_cast<jlong>(vad_context);
606
+ }
607
+
608
+ JNIEXPORT jlong JNICALL
609
+ Java_com_rnwhisper_WhisperContext_initVadContextWithInputStream(
610
+ JNIEnv *env,
611
+ jobject thiz,
612
+ jobject input_stream
613
+ ) {
614
+ UNUSED(thiz);
615
+ struct whisper_vad_context_params vad_params = whisper_vad_default_context_params();
616
+
617
+ struct whisper_vad_context *vad_context = nullptr;
618
+ vad_context = whisper_vad_init_from_input_stream(env, input_stream, vad_params);
619
+ return reinterpret_cast<jlong>(vad_context);
620
+ }
621
+
622
+ JNIEXPORT void JNICALL
623
+ Java_com_rnwhisper_WhisperContext_freeVadContext(
624
+ JNIEnv *env,
625
+ jobject thiz,
626
+ jlong vad_context_ptr
627
+ ) {
628
+ UNUSED(env);
629
+ UNUSED(thiz);
630
+ struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
631
+ whisper_vad_free(vad_context);
632
+ }
633
+
634
+ JNIEXPORT jboolean JNICALL
635
+ Java_com_rnwhisper_WhisperContext_vadDetectSpeech(
636
+ JNIEnv *env,
637
+ jobject thiz,
638
+ jlong vad_context_ptr,
639
+ jfloatArray audio_data,
640
+ jint n_samples
641
+ ) {
642
+ UNUSED(thiz);
643
+ struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
644
+
645
+ jfloat *audio_data_arr = env->GetFloatArrayElements(audio_data, nullptr);
646
+ bool result = whisper_vad_detect_speech(vad_context, audio_data_arr, n_samples);
647
+ env->ReleaseFloatArrayElements(audio_data, audio_data_arr, JNI_ABORT);
648
+
649
+ return result;
650
+ }
651
+
652
+ JNIEXPORT jlong JNICALL
653
+ Java_com_rnwhisper_WhisperContext_vadGetSegmentsFromProbs(
654
+ JNIEnv *env,
655
+ jobject thiz,
656
+ jlong vad_context_ptr,
657
+ jfloat threshold,
658
+ jint min_speech_duration_ms,
659
+ jint min_silence_duration_ms,
660
+ jfloat max_speech_duration_s,
661
+ jint speech_pad_ms,
662
+ jfloat samples_overlap
663
+ ) {
664
+ UNUSED(thiz);
665
+ struct whisper_vad_context *vad_context = reinterpret_cast<struct whisper_vad_context *>(vad_context_ptr);
666
+
667
+ struct whisper_vad_params vad_params = whisper_vad_default_params();
668
+ vad_params.threshold = threshold;
669
+ vad_params.min_speech_duration_ms = min_speech_duration_ms;
670
+ vad_params.min_silence_duration_ms = min_silence_duration_ms;
671
+ vad_params.max_speech_duration_s = max_speech_duration_s;
672
+ vad_params.speech_pad_ms = speech_pad_ms;
673
+ vad_params.samples_overlap = samples_overlap;
674
+
675
+ struct whisper_vad_segments *segments = whisper_vad_segments_from_probs(vad_context, vad_params);
676
+ return reinterpret_cast<jlong>(segments);
677
+ }
678
+
679
+ JNIEXPORT jint JNICALL
680
+ Java_com_rnwhisper_WhisperContext_vadGetNSegments(
681
+ JNIEnv *env,
682
+ jobject thiz,
683
+ jlong segments_ptr
684
+ ) {
685
+ UNUSED(env);
686
+ UNUSED(thiz);
687
+ struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
688
+ return whisper_vad_segments_n_segments(segments);
689
+ }
690
+
691
+ JNIEXPORT jfloat JNICALL
692
+ Java_com_rnwhisper_WhisperContext_vadGetSegmentT0(
693
+ JNIEnv *env,
694
+ jobject thiz,
695
+ jlong segments_ptr,
696
+ jint index
697
+ ) {
698
+ UNUSED(env);
699
+ UNUSED(thiz);
700
+ struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
701
+ return whisper_vad_segments_get_segment_t0(segments, index);
702
+ }
703
+
704
+ JNIEXPORT jfloat JNICALL
705
+ Java_com_rnwhisper_WhisperContext_vadGetSegmentT1(
706
+ JNIEnv *env,
707
+ jobject thiz,
708
+ jlong segments_ptr,
709
+ jint index
710
+ ) {
711
+ UNUSED(env);
712
+ UNUSED(thiz);
713
+ struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
714
+ return whisper_vad_segments_get_segment_t1(segments, index);
715
+ }
716
+
717
+ JNIEXPORT void JNICALL
718
+ Java_com_rnwhisper_WhisperContext_vadFreeSegments(
719
+ JNIEnv *env,
720
+ jobject thiz,
721
+ jlong segments_ptr
722
+ ) {
723
+ UNUSED(env);
724
+ UNUSED(thiz);
725
+ struct whisper_vad_segments *segments = reinterpret_cast<struct whisper_vad_segments *>(segments_ptr);
726
+ whisper_vad_free_segments(segments);
727
+ }
728
+
533
729
  } // extern "C"