whisper.rn 0.5.0-rc.3 → 0.5.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -295,6 +295,38 @@ CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
295
295
  return info;
296
296
  }
297
297
 
298
+ // Helper function to extract VAD parameters from options
299
+ whisper_vad_params extractVadParams(Runtime& runtime, const Object& optionsObj) {
300
+ whisper_vad_params vadParams = whisper_vad_default_params();
301
+
302
+ try {
303
+ auto propNames = optionsObj.getPropertyNames(runtime);
304
+ for (size_t i = 0; i < propNames.size(runtime); i++) {
305
+ auto propNameValue = propNames.getValueAtIndex(runtime, i);
306
+ std::string propName = propNameValue.getString(runtime).utf8(runtime);
307
+ Value propValue = optionsObj.getProperty(runtime, propNameValue.getString(runtime));
308
+
309
+ if (propName == "threshold" && propValue.isNumber()) {
310
+ vadParams.threshold = (float)propValue.getNumber();
311
+ } else if (propName == "minSpeechDurationMs" && propValue.isNumber()) {
312
+ vadParams.min_speech_duration_ms = (int)propValue.getNumber();
313
+ } else if (propName == "minSilenceDurationMs" && propValue.isNumber()) {
314
+ vadParams.min_silence_duration_ms = (int)propValue.getNumber();
315
+ } else if (propName == "maxSpeechDurationS" && propValue.isNumber()) {
316
+ vadParams.max_speech_duration_s = (float)propValue.getNumber();
317
+ } else if (propName == "speechPadMs" && propValue.isNumber()) {
318
+ vadParams.speech_pad_ms = (int)propValue.getNumber();
319
+ } else if (propName == "samplesOverlap" && propValue.isNumber()) {
320
+ vadParams.samples_overlap = (float)propValue.getNumber();
321
+ }
322
+ }
323
+ } catch (...) {
324
+ // Ignore parameter extraction errors
325
+ }
326
+
327
+ return vadParams;
328
+ }
329
+
298
330
  // Helper function to create segments array
299
331
  Array createSegmentsArray(Runtime& runtime, struct whisper_context* ctx, int offset) {
300
332
  int n_segments = whisper_full_n_segments(ctx);
@@ -355,10 +387,13 @@ Value createPromiseTask(
355
387
 
356
388
  whisper_full_params params = {};
357
389
  CallbackInfo callbackInfo = {};
390
+ whisper_vad_params vadParams = {};
358
391
  if (functionName == "whisperTranscribeData") {
359
392
  params = createFullParamsFromJSI(runtime, optionsObj);
360
393
  // Extract data from optionsObj before lambda capture
361
394
  callbackInfo = extractCallbacks(runtime, optionsObj);
395
+ } else if (functionName == "whisperVadDetectSpeech") {
396
+ vadParams = extractVadParams(runtime, optionsObj);
362
397
  }
363
398
 
364
399
  // Create promise
@@ -368,7 +403,7 @@ Value createPromiseTask(
368
403
  runtime,
369
404
  PropNameID::forAscii(runtime, ""),
370
405
  2, // resolve, reject
371
- [contextId, audioResult, params, callbackInfo, task, callInvoker, functionName](Runtime& runtime, const Value& thisValue, const Value* arguments, size_t count) -> Value {
406
+ [contextId, audioResult, params, callbackInfo, vadParams, task, callInvoker, functionName](Runtime& runtime, const Value& thisValue, const Value* arguments, size_t count) -> Value {
372
407
  if (count != 2) {
373
408
  throw JSError(runtime, "Promise executor expects 2 arguments (resolve, reject)");
374
409
  }
@@ -379,10 +414,10 @@ Value createPromiseTask(
379
414
 
380
415
  // Execute task in ThreadPool
381
416
  auto future = getWhisperThreadPool().enqueue([
382
- contextId, audioResult, params, callbackInfo, task, resolvePtr, rejectPtr, callInvoker, safeRuntime, functionName]() {
417
+ contextId, audioResult, params, callbackInfo, vadParams, task, resolvePtr, rejectPtr, callInvoker, safeRuntime, functionName]() {
383
418
 
384
419
  try {
385
- task(contextId, audioResult, params, callbackInfo, resolvePtr, rejectPtr, callInvoker, safeRuntime);
420
+ task(contextId, audioResult, params, callbackInfo, vadParams, resolvePtr, rejectPtr, callInvoker, safeRuntime);
386
421
  } catch (...) {
387
422
  callInvoker->invokeAsync([rejectPtr, safeRuntime, functionName]() {
388
423
  auto& runtime = *safeRuntime;
@@ -413,7 +448,7 @@ void installJSIBindings(
413
448
  try {
414
449
  return createPromiseTask<whisper_context>(
415
450
  runtime, "whisperTranscribeData", callInvoker, arguments, count,
416
- [](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo,
451
+ [](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo, const whisper_vad_params& vadParams,
417
452
  std::shared_ptr<Function> resolvePtr, std::shared_ptr<Function> rejectPtr,
418
453
  std::shared_ptr<facebook::react::CallInvoker> callInvoker,
419
454
  std::shared_ptr<Runtime> safeRuntime) {
@@ -566,7 +601,7 @@ void installJSIBindings(
566
601
  try {
567
602
  return createPromiseTask<whisper_vad_context>(
568
603
  runtime, "whisperVadDetectSpeech", callInvoker, arguments, count,
569
- [](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo,
604
+ [](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo, const whisper_vad_params& vadParams,
570
605
  std::shared_ptr<Function> resolvePtr, std::shared_ptr<Function> rejectPtr,
571
606
  std::shared_ptr<facebook::react::CallInvoker> callInvoker,
572
607
  std::shared_ptr<Runtime> safeRuntime) {
@@ -600,7 +635,8 @@ void installJSIBindings(
600
635
  bool isSpeech = whisper_vad_detect_speech(vadContext, audioResult.data.data(), audioResult.count);
601
636
  logInfo("VAD detection result: %s", isSpeech ? "speech" : "no speech");
602
637
 
603
- struct whisper_vad_params vad_params = whisper_vad_default_params();
638
+ struct whisper_vad_params vad_params = vadParams;
639
+
604
640
  struct whisper_vad_segments* segments = nullptr;
605
641
  if (isSpeech) {
606
642
  segments = whisper_vad_segments_from_probs(vadContext, vad_params);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whisper.rn",
3
- "version": "0.5.0-rc.3",
3
+ "version": "0.5.0-rc.4",
4
4
  "description": "React Native binding of whisper.cpp",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",