whisper.rn 0.5.0-rc.3 → 0.5.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/jsi/RNWhisperJSI.cpp +42 -6
- package/package.json +1 -1
package/cpp/jsi/RNWhisperJSI.cpp
CHANGED
|
@@ -295,6 +295,38 @@ CallbackInfo extractCallbacks(Runtime& runtime, const Object& optionsObj) {
|
|
|
295
295
|
return info;
|
|
296
296
|
}
|
|
297
297
|
|
|
298
|
+
// Helper function to extract VAD parameters from options
|
|
299
|
+
whisper_vad_params extractVadParams(Runtime& runtime, const Object& optionsObj) {
|
|
300
|
+
whisper_vad_params vadParams = whisper_vad_default_params();
|
|
301
|
+
|
|
302
|
+
try {
|
|
303
|
+
auto propNames = optionsObj.getPropertyNames(runtime);
|
|
304
|
+
for (size_t i = 0; i < propNames.size(runtime); i++) {
|
|
305
|
+
auto propNameValue = propNames.getValueAtIndex(runtime, i);
|
|
306
|
+
std::string propName = propNameValue.getString(runtime).utf8(runtime);
|
|
307
|
+
Value propValue = optionsObj.getProperty(runtime, propNameValue.getString(runtime));
|
|
308
|
+
|
|
309
|
+
if (propName == "threshold" && propValue.isNumber()) {
|
|
310
|
+
vadParams.threshold = (float)propValue.getNumber();
|
|
311
|
+
} else if (propName == "minSpeechDurationMs" && propValue.isNumber()) {
|
|
312
|
+
vadParams.min_speech_duration_ms = (int)propValue.getNumber();
|
|
313
|
+
} else if (propName == "minSilenceDurationMs" && propValue.isNumber()) {
|
|
314
|
+
vadParams.min_silence_duration_ms = (int)propValue.getNumber();
|
|
315
|
+
} else if (propName == "maxSpeechDurationS" && propValue.isNumber()) {
|
|
316
|
+
vadParams.max_speech_duration_s = (float)propValue.getNumber();
|
|
317
|
+
} else if (propName == "speechPadMs" && propValue.isNumber()) {
|
|
318
|
+
vadParams.speech_pad_ms = (int)propValue.getNumber();
|
|
319
|
+
} else if (propName == "samplesOverlap" && propValue.isNumber()) {
|
|
320
|
+
vadParams.samples_overlap = (float)propValue.getNumber();
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
} catch (...) {
|
|
324
|
+
// Ignore parameter extraction errors
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return vadParams;
|
|
328
|
+
}
|
|
329
|
+
|
|
298
330
|
// Helper function to create segments array
|
|
299
331
|
Array createSegmentsArray(Runtime& runtime, struct whisper_context* ctx, int offset) {
|
|
300
332
|
int n_segments = whisper_full_n_segments(ctx);
|
|
@@ -355,10 +387,13 @@ Value createPromiseTask(
|
|
|
355
387
|
|
|
356
388
|
whisper_full_params params = {};
|
|
357
389
|
CallbackInfo callbackInfo = {};
|
|
390
|
+
whisper_vad_params vadParams = {};
|
|
358
391
|
if (functionName == "whisperTranscribeData") {
|
|
359
392
|
params = createFullParamsFromJSI(runtime, optionsObj);
|
|
360
393
|
// Extract data from optionsObj before lambda capture
|
|
361
394
|
callbackInfo = extractCallbacks(runtime, optionsObj);
|
|
395
|
+
} else if (functionName == "whisperVadDetectSpeech") {
|
|
396
|
+
vadParams = extractVadParams(runtime, optionsObj);
|
|
362
397
|
}
|
|
363
398
|
|
|
364
399
|
// Create promise
|
|
@@ -368,7 +403,7 @@ Value createPromiseTask(
|
|
|
368
403
|
runtime,
|
|
369
404
|
PropNameID::forAscii(runtime, ""),
|
|
370
405
|
2, // resolve, reject
|
|
371
|
-
[contextId, audioResult, params, callbackInfo, task, callInvoker, functionName](Runtime& runtime, const Value& thisValue, const Value* arguments, size_t count) -> Value {
|
|
406
|
+
[contextId, audioResult, params, callbackInfo, vadParams, task, callInvoker, functionName](Runtime& runtime, const Value& thisValue, const Value* arguments, size_t count) -> Value {
|
|
372
407
|
if (count != 2) {
|
|
373
408
|
throw JSError(runtime, "Promise executor expects 2 arguments (resolve, reject)");
|
|
374
409
|
}
|
|
@@ -379,10 +414,10 @@ Value createPromiseTask(
|
|
|
379
414
|
|
|
380
415
|
// Execute task in ThreadPool
|
|
381
416
|
auto future = getWhisperThreadPool().enqueue([
|
|
382
|
-
contextId, audioResult, params, callbackInfo, task, resolvePtr, rejectPtr, callInvoker, safeRuntime, functionName]() {
|
|
417
|
+
contextId, audioResult, params, callbackInfo, vadParams, task, resolvePtr, rejectPtr, callInvoker, safeRuntime, functionName]() {
|
|
383
418
|
|
|
384
419
|
try {
|
|
385
|
-
task(contextId, audioResult, params, callbackInfo, resolvePtr, rejectPtr, callInvoker, safeRuntime);
|
|
420
|
+
task(contextId, audioResult, params, callbackInfo, vadParams, resolvePtr, rejectPtr, callInvoker, safeRuntime);
|
|
386
421
|
} catch (...) {
|
|
387
422
|
callInvoker->invokeAsync([rejectPtr, safeRuntime, functionName]() {
|
|
388
423
|
auto& runtime = *safeRuntime;
|
|
@@ -413,7 +448,7 @@ void installJSIBindings(
|
|
|
413
448
|
try {
|
|
414
449
|
return createPromiseTask<whisper_context>(
|
|
415
450
|
runtime, "whisperTranscribeData", callInvoker, arguments, count,
|
|
416
|
-
[](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo,
|
|
451
|
+
[](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo, const whisper_vad_params& vadParams,
|
|
417
452
|
std::shared_ptr<Function> resolvePtr, std::shared_ptr<Function> rejectPtr,
|
|
418
453
|
std::shared_ptr<facebook::react::CallInvoker> callInvoker,
|
|
419
454
|
std::shared_ptr<Runtime> safeRuntime) {
|
|
@@ -566,7 +601,7 @@ void installJSIBindings(
|
|
|
566
601
|
try {
|
|
567
602
|
return createPromiseTask<whisper_vad_context>(
|
|
568
603
|
runtime, "whisperVadDetectSpeech", callInvoker, arguments, count,
|
|
569
|
-
[](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo,
|
|
604
|
+
[](int contextId, const AudioData& audioResult, const whisper_full_params& params, const CallbackInfo& callbackInfo, const whisper_vad_params& vadParams,
|
|
570
605
|
std::shared_ptr<Function> resolvePtr, std::shared_ptr<Function> rejectPtr,
|
|
571
606
|
std::shared_ptr<facebook::react::CallInvoker> callInvoker,
|
|
572
607
|
std::shared_ptr<Runtime> safeRuntime) {
|
|
@@ -600,7 +635,8 @@ void installJSIBindings(
|
|
|
600
635
|
bool isSpeech = whisper_vad_detect_speech(vadContext, audioResult.data.data(), audioResult.count);
|
|
601
636
|
logInfo("VAD detection result: %s", isSpeech ? "speech" : "no speech");
|
|
602
637
|
|
|
603
|
-
struct whisper_vad_params vad_params =
|
|
638
|
+
struct whisper_vad_params vad_params = vadParams;
|
|
639
|
+
|
|
604
640
|
struct whisper_vad_segments* segments = nullptr;
|
|
605
641
|
if (isSpeech) {
|
|
606
642
|
segments = whisper_vad_segments_from_probs(vadContext, vad_params);
|