npm - @gmessier/nitro-speech - Versions diffs - 0.4.3 → 0.4.4 - Mend

@gmessier/nitro-speech 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md CHANGED Viewed

@@ -126,7 +126,7 @@ Both permissions are required for speech recognition to work on iOS.
 | **Reset Auto-finish Time** | Resets the Timer to the threshold | ✅ | ✅ |
 | **Voice input volume** | `useVoiceInputVolume`, `getVoiceInputVolume()`, `onVolumeChange` | ✅ | ✅ |
 | **Reset Auto-finish Sensitivity** | The voice detector sensitivity to reset the Auto-finish time | ✅ | ✅ |
-| **Prewarm** | Prepares resources, downloads assets, confirms locale availability | ✅ | ✅ |
+| **Prewarm** | Prepares resources, downloads assets, confirms locale availability, requests permissions | ✅ | ✅ |
 | **Update config** | Static method `updateConfig` allows updating the config on the fly | ✅ | ✅ |
 | **Is Active** | Static method `getIsActive()` | ✅ | ✅ |
 | **Haptic feedback** | Haptic feedback on recording start/stop | ✅ | ✅ |
@@ -230,6 +230,17 @@ function MyComponent() {
           )>
         <Text>Update Timer to 12s, 500ms interval, 0.65 sensitivity, with reset</Text>
       </TouchableOpacity>
+      <TouchableOpacity
+        onPress={() => {
+          scheduleOnRuntime(workletRuntime, () => {
+            RecognizerRef.prewarm({
+              iosPreset: 'speed',
+            }, { requestPermission: true });
+          });
+        }}
+      >
+        <Text>Prewarm from worklet with permission request (default behavior)</Text>
+      </TouchableOpacity>
     </View>
   );
 }
@@ -262,7 +273,7 @@ If you need to call recognizer methods from other components without prop drilli
 ```typescript
 import { RecognizerRef } from '@gmessier/nitro-speech';
-RecognizerRef.prewarm({ locale: 'en-US' });
+RecognizerRef.prewarm({ locale: 'en-US' }, { requestPermission: true });
 RecognizerRef.startListening({ locale: 'en-US' });
 RecognizerRef.addAutoFinishTime(5000);
 RecognizerRef.resetAutoFinishTime();
@@ -407,7 +418,8 @@ SpeechRecognizer.onVolumeChange = (volume) => {
 SpeechRecognizer.prewarm({
   locale: 'en-US',
   // ... your config to prepare
-});
+}, { requestPermission: true });
+);
 // OR `await` if you want to react to the success
 await SpeechRecognizer.prewarm({
   locale: 'en-US',
@@ -418,7 +430,7 @@ scheduleOnRuntime(workletRuntime, () => {
   SpeechRecognizer.prewarm({
     locale: 'en-US',
     // ... your config to prepare
-  });
+  }, { requestPermission: false });
 });
 // Start listening

package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AudioPermissionRequester.kt CHANGED Viewed

@@ -6,6 +6,8 @@ import android.content.pm.PackageManager
 import androidx.activity.ComponentActivity
 import androidx.activity.result.contract.ActivityResultContracts
 import androidx.core.content.ContextCompat
+import kotlinx.coroutines.suspendCancellableCoroutine
+import kotlin.coroutines.resume
 class AudioPermissionRequester (
   private val activity: Activity
@@ -16,12 +18,13 @@ class AudioPermissionRequester (
   private var callback: ((Boolean) -> Unit)? = null
   private val launcher = componentActivity.activityResultRegistry.register(
-    "record_audio_key", ActivityResultContracts.RequestPermission()
+    "record_audio_key",
+    ActivityResultContracts.RequestPermission()
   ) { granted ->
     callback?.invoke(granted)
   }
-  fun checkAndRequest(onResult: (Boolean) -> Unit) {
+  suspend fun checkAndRequest(): Boolean {
     val audioGranted =
       ContextCompat.checkSelfPermission(
         activity,
@@ -29,11 +32,14 @@ class AudioPermissionRequester (
       ) == PackageManager.PERMISSION_GRANTED
     if (audioGranted) {
-      onResult(true)
-      return
+      return true
     }
-    callback = onResult
-    launcher.launch(recordAudioPermission)
+    return suspendCancellableCoroutine { cont ->
+      callback = { granted ->
+        if (cont.isActive) cont.resume(granted)
+      }
+      launcher.launch(recordAudioPermission)
+    }
   }
 }

package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt CHANGED Viewed

@@ -14,6 +14,7 @@ import com.margelo.nitro.core.Promise
 import com.margelo.nitro.nitrospeech.MutableSpeechRecognitionConfig
 import com.margelo.nitro.nitrospeech.HybridRecognizerSpec
 import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
+import com.margelo.nitro.nitrospeech.SpeechRecognitionPrewarm
 import com.margelo.nitro.nitrospeech.VolumeChangeEvent
 @DoNotStrip
@@ -43,52 +44,27 @@ class HybridRecognizer: HybridRecognizerSpec() {
   @DoNotStrip
   @Keep
-  override fun prewarm(defaultParams: SpeechRecognitionConfig?): Promise<Unit> {
-    // no-op
+  override fun prewarm(
+    defaultParams: SpeechRecognitionConfig?,
+    options: SpeechRecognitionPrewarm?
+  ): Promise<Unit> {
+    logger.log("prewarm called")
     // nothing to prewarm
-    return Promise()
+    // only request permissions
+    return Promise.async {
+      // Enabled by default for user
+      if (options?.requestPermission != false) {
+        preparePermissions(null, isPrewarm = true)
+      }
+    }
   }
   @DoNotStrip
   @Keep
   override fun startListening(params: SpeechRecognitionConfig?) {
     logger.log("startListening: $params")
-    if (isActive) {
-      onFinishRecognition(
-        null,
-        "Error at startListening: Previous SpeechRecognizer is still active",
-        false
-      )
-      return
-    }
-    val context = NitroModules.applicationContext
-    if (context == null) {
-      onFinishRecognition(
-        null,
-        "Error at startListening: Context not available",
-        true
-      )
-      return
-    }
-    val activity = context.currentActivity
-    if (activity == null) {
-      onFinishRecognition(
-        null,
-        "Error at startListening: Activity not found",
-        true
-      )
-      return
-    }
-    val permissionRequester = AudioPermissionRequester(activity)
-    permissionRequester.checkAndRequest { granted ->
-      if (!granted) {
-        onPermissionDenied?.invoke()
-        return@checkAndRequest
-      }
-      config = params
-      start(context)
+    Promise.async {
+      preparePermissions(params, isPrewarm = false)
     }
   }
@@ -196,6 +172,48 @@ class HybridRecognizer: HybridRecognizerSpec() {
     stopListening()
   }
+  private suspend fun preparePermissions(params: SpeechRecognitionConfig?, isPrewarm: Boolean) {
+    if (isActive) {
+      onFinishRecognition(
+        null,
+        "Error: SpeechRecognizer is already active",
+        false
+      )
+      return
+    }
+    val context = NitroModules.applicationContext
+    if (context == null) {
+      onFinishRecognition(
+        null,
+        "Error: Context not available",
+        true
+      )
+      return
+    }
+    val activity = context.currentActivity
+    if (activity == null) {
+      onFinishRecognition(
+        null,
+        "Error: Activity not found",
+        true
+      )
+      return
+    }
+    val permissionRequester = AudioPermissionRequester(activity)
+    val granted = permissionRequester.checkAndRequest()
+    if (!granted) {
+      onPermissionDenied?.invoke()
+      return
+    }
+    if (isPrewarm) {
+      return
+    }
+    config = params
+    start(context)
+  }
   private fun start(context: Context) {
     mainHandler.post {
       try {

package/ios/Engines/AnalyzerEngine.swift CHANGED Viewed

@@ -46,17 +46,21 @@ final class AnalyzerEngine: RecognizerEngine {
         }
     }
-    override func prewarm(for type: FailureType) async {
-        await super.prewarm(for: type)
+    override func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
+        await super.prewarm(for: type, options)
         do {
             // Create transcriber and install assets
             try await transcriber.create(config: self.recognizerDelegate?.config)
         }
         catch {
+            let failureType: FailureType = switch type {
+                case .prewarm: .prewarm
+                case .start: .start
+            }
             self.reportFailure(
                 from: "prewarm.assets",
                 message: "Failed to create transcriber",
-                type: type
+                type: failureType
             )
         }
     }
@@ -93,7 +97,7 @@ final class AnalyzerEngine: RecognizerEngine {
                     self?.outputContinuation?.yield(buffer)
                 }
             )
-            guard let hardwareFormat else { return }
+            guard let hardwareFormat = recognizerDelegate?.hardwareFormat else { return }
             let stream = AsyncStream(
                 AVAudioPCMBuffer.self,
                 bufferingPolicy: .unbounded
@@ -225,14 +229,14 @@ final class AnalyzerEngine: RecognizerEngine {
         if !disableRepeatingFilter {
             newBatch = Utils.repeatingFilter(newBatch)
         }
-        Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
+//        Log.log("[1] lastBatch: \(self.resultBatches.last ?? "") | newBatch: \(newBatch)")
         if self.resultBatches.isEmpty {
             self.resultBatches.append(newBatch)
         } else if CMTimeGetSeconds(rangeStart) == self.lastBatchStartTime || isFinal {
-            Log.log("[2] replace, isFinal: \(isFinal)")
+//            Log.log("[2] replace, isFinal: \(isFinal)")
             self.resultBatches[self.resultBatches.count - 1] = newBatch
         } else {
-            Log.log("[2] add new batch")
+//            Log.log("[2] add new batch")
             self.resultBatches.append(newBatch)
         }
         self.lastBatchStartTime = CMTimeGetSeconds(rangeStart)

package/ios/Engines/RecognizerEngine.swift CHANGED Viewed

@@ -12,10 +12,14 @@ enum FailureType {
     case onSession
 }
+enum PrewarmType {
+    case start
+    case prewarm
+}
 class RecognizerEngine {
     var isActive = false
     var isStopping = false
-    var hardwareFormat: AVAudioFormat?
     weak var recognizerDelegate: RecognizerDelegate?
     private let audioLevelTracker = AudioLevelTracker()
@@ -33,21 +37,48 @@ class RecognizerEngine {
     // MARK: - Recognizer Methods
-    func prewarm(for: FailureType) async {
-        self.prepareAudioEngine()
+    func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
+        // Prepare audioEngine
+        audioEngine = AVAudioEngine()
+        lg.log("[prewarm.audioEngine]")
+        guard let recognizerDelegate else { return }
+        // Everything is set, return early
+        if type == .prewarm, recognizerDelegate.hardwareFormat != nil {
+            lg.log("[prewarm.return]: Everything set")
+            return
+        }
+        // User explicitly asked for prewarm without requesting permissions, return early
+        if type == .prewarm, options?.requestPermission == false {
+            lg.log("[prewarm.return]: requestPermission: false")
+            return
+        }
+        if type == .prewarm {
+            // options.requestPermission is true by default
+            // Start Permission sequence
+            let granted = await requestPermissions()
+            if granted {
+                self.prewarmAudioSession(for: type)
+            }
+        } else {
+            self.prewarmAudioSession(for: type)
+        }
         // for SpeechTranscriber: .isAvailable and async assets
         // for Dictation: only async assets
         // for legacy SF: only sync .isAvailable
     }
-    func start() {
-        guard let recognizerDelegate, !isActive else { return }
+    func start() async {
+        guard !isActive else { return }
-        Permissions(
-            onGranted: self.startSession,
-            onDenied: recognizerDelegate.permissionDenied,
-            onError: recognizerDelegate.error
-        ).requestAuthorization()
+        let granted = await requestPermissions()
+        if granted {
+            await startSession()
+        }
     }
     func stop() {
@@ -56,6 +87,55 @@ class RecognizerEngine {
         HapticImpact.trigger(with: self.recognizerDelegate?.config?.stopHapticFeedbackStyle)
     }
+    func updateSession(
+        newConfig: MutableSpeechRecognitionConfig? = nil,
+        addMsToTimer: Double? = nil,
+        resetTimer: Bool? = nil
+    ) {
+        guard let recognizerDelegate, isActive, !isStopping else { return }
+        let currentConfig = recognizerDelegate.config
+        // Update AutoFinish time
+        if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
+           newAutoFinish != currentConfig?.autoFinishRecognitionMs {
+            autoStopper?.updateThreshold(
+                newAutoFinish,
+                from: "updateSession"
+            )
+        }
+        // Update AutoFinish progress interval
+        if let newInterval = newConfig?.autoFinishProgressIntervalMs,
+           newInterval != currentConfig?.autoFinishProgressIntervalMs {
+            autoStopper?.updateProgressInterval(
+                newInterval,
+                from: "updateSession"
+            )
+        }
+        if let addMsToTimer {
+            // Add time to the timer once
+            autoStopper?.addMsOnce(
+                addMsToTimer,
+                from: "updateSession"
+            )
+        } else if resetTimer == true {
+            // Reset to current baseline threshold.
+            autoStopper?.resetTimer(from: "updateSession")
+        }
+        // Only update new non-nil values in the config
+        recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
+    }
+    func getVoiceInputVolume() -> VolumeChangeEvent? {
+        guard let currentSample = audioLevelTracker.currentSample else { return nil }
+        return VolumeChangeEvent(
+            smoothedVolume: currentSample.smoothed,
+            rawVolume: currentSample.raw,
+            db: currentSample.db
+        )
+    }
+    // MARK: Helpers
     func startSession() async {
         lg.log("[startSession.startSession]")
         // Init everything
@@ -66,15 +146,13 @@ class RecognizerEngine {
         lg.log("[startSession.initAutoStop]")
         startAppStateObserver()
         lg.log("[startSession.startAppStateObserver]")
-        startAudioSession()
-        lg.log("[startSession.startAudioSession]")
     }
     func startAudioEngine(
         onBuffer: @escaping (AVAudioPCMBuffer) -> Void
     ) {
         lg.log("[startAudioEngine]")
-        guard let audioEngine, let hardwareFormat else { return }
+        guard let audioEngine, let hardwareFormat = self.recognizerDelegate?.hardwareFormat else { return }
         audioEngine.inputNode.installTap(
             onBus: 0,
             bufferSize: 1024,
@@ -124,53 +202,6 @@ class RecognizerEngine {
         recognizerDelegate.readyForSpeech()
         recognizerDelegate.result(batches: [])
     }
-    func updateSession(
-        newConfig: MutableSpeechRecognitionConfig? = nil,
-        addMsToTimer: Double? = nil,
-        resetTimer: Bool? = nil
-    ) {
-        guard let recognizerDelegate, isActive, !isStopping else { return }
-        let currentConfig = recognizerDelegate.config
-        // Update AutoFinish time
-        if let newAutoFinish = newConfig?.autoFinishRecognitionMs,
-           newAutoFinish != currentConfig?.autoFinishRecognitionMs {
-            autoStopper?.updateThreshold(
-                newAutoFinish,
-                from: "updateSession"
-            )
-        }
-        // Update AutoFinish progress interval
-        if let newInterval = newConfig?.autoFinishProgressIntervalMs,
-           newInterval != currentConfig?.autoFinishProgressIntervalMs {
-            autoStopper?.updateProgressInterval(
-                newInterval,
-                from: "updateSession"
-            )
-        }
-        if let addMsToTimer {
-            // Add time to the timer once
-            autoStopper?.addMsOnce(
-                addMsToTimer,
-                from: "updateSession"
-            )
-        } else if resetTimer == true {
-            // Reset to current baseline threshold.
-            autoStopper?.resetTimer(from: "updateSession")
-        }
-        // Only update new non-nil values in the config
-        recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
-    }
-    func getVoiceInputVolume() -> VolumeChangeEvent? {
-        guard let currentSample = audioLevelTracker.currentSample else { return nil }
-        return VolumeChangeEvent(
-            smoothedVolume: currentSample.smoothed,
-            rawVolume: currentSample.raw,
-            db: currentSample.db
-        )
-    }
     func cleanup(from: String) {
         lg.log("[cleanup]: \(from)")
@@ -226,29 +257,32 @@ class RecognizerEngine {
         }
     }
-    // MARK: - AudioEngine heavy prepare
+    // MARK: Permissions
-    private func prepareAudioEngine() {
-        lg.log("[prewarm.start]")
-        audioEngine = AVAudioEngine()
-        guard let audioEngine else {
-            self.reportFailure(
-                from: "Audio Engine",
-                message: "Audio Engine failed to initiate",
-                // RecognizerEngine-agnostic Error
-                type: .system
-            )
-            return
+    private func requestPermissions() async -> Bool {
+        guard let recognizerDelegate else { return false }
+        let authStatus = await Permissions.requestAuthorization()
+        if authStatus == .denied || authStatus == .restricted {
+            recognizerDelegate.permissionDenied()
+            return false
         }
-        lg.log("[prewarm.audioEngine]")
-        // heavy first hardwareFormat retrieval
-        if hardwareFormat == nil {
-            hardwareFormat = audioEngine.inputNode.outputFormat(forBus: 0)
-            lg.log("[prewarm.hardwareFormat]")
+        if authStatus != .authorized {
+            // .notDetermined or unknown issue
+            recognizerDelegate.error(message: "Speech recognition permission is not determined")
+            return false
+        }
+        if !(await Permissions.requestMicrophonePermission()) {
+            recognizerDelegate.permissionDenied()
+            return false
         }
+        // Granted
+        return true
     }
-    // MARK: - AutoStopper
+    // MARK: Auto Stopper
     private func initAutoStop() {
         let config = self.recognizerDelegate?.config
@@ -271,7 +305,7 @@ class RecognizerEngine {
         autoStopper = nil
     }
-    // MARK: - App State Observer
+    // MARK: App State Observer
     private func startAppStateObserver() {
         appStateObserver = AppStateObserver { [weak self] in
@@ -285,12 +319,37 @@ class RecognizerEngine {
         appStateObserver = nil
     }
-    // MARK: - Audio Session
+    // MARK: Audio Session
+    private func prewarmAudioSession(for type: PrewarmType) {
+        guard let audioEngine else {
+            self.reportFailure(
+                from: "Audio Engine",
+                message: "Audio Engine failed to initiate",
+                // RecognizerEngine-agnostic Error
+                type: .system
+            )
+            return
+        }
+        startAudioSession()
+        lg.log("[prewarmAudioSession.audioSession]")
+        // heavy first hardwareFormat retrieval
+        if let recognizerDelegate, recognizerDelegate.hardwareFormat == nil {
+            let format = audioEngine.inputNode.outputFormat(forBus: 0)
+            recognizerDelegate.setHardwareFormat(format: format)
+            lg.log("[prewarmAudioSession.hardwareFormat]")
+        }
+        if type == .prewarm {
+            stopAudioSession()
+            lg.log("[prewarmAudioSession.stopAudioSession]")
+        }
+    }
     private func startAudioSession() {
         do {
             let audioSession = AVAudioSession.sharedInstance()
-            try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
+            try audioSession.setCategory(.playAndRecord, mode: .default, options: .duckOthers)
             // Required for haptic feedback
             try audioSession.setAllowHapticsAndSystemSoundsDuringRecording(true)
             try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
@@ -305,7 +364,6 @@ class RecognizerEngine {
     }
     private func stopAudioSession() {
         do {
-            // TODO: check unduck
             try AVAudioSession.sharedInstance().setActive(false)
         } catch {
             // Just log and no-op - not critical

package/ios/Engines/SFSpeechEngine.swift CHANGED Viewed

@@ -15,18 +15,22 @@ final class SFSpeechEngine: RecognizerEngine {
         recognitionTask?.finish()
     }
-    override func prewarm(for type: FailureType) async {
+    override func prewarm(for type: PrewarmType, _ options: SpeechRecognitionPrewarm? = nil) async {
         speechRecognizer = SFSpeechRecognizer(
             locale: Locale(identifier: self.recognizerDelegate?.config?.locale ?? "en-US")
         )
         if speechRecognizer?.isAvailable != true {
+            let failureType: FailureType = switch type {
+                case .prewarm: .prewarm
+                case .start: .start
+            }
             self.reportFailure(
                 from: "prewarm",
                 message: "SFSpeechRecognizer is not available",
-                type: type
+                type: failureType
             )
         }
-        await super.prewarm(for: type)
+        await super.prewarm(for: type, options)
     }
     override func startSession() async {

package/ios/HybridRecognizer.swift CHANGED Viewed

@@ -1,8 +1,11 @@
 import Foundation
 import NitroModules
+import AVFoundation
 class HybridRecognizer: HybridRecognizerSpec  {
+    var prewarmOptions: SpeechRecognitionPrewarm?
     var config: SpeechRecognitionConfig?
+    var hardwareFormat: AVAudioFormat?
     var onReadyForSpeech: (() -> Void)?
     var onRecordingStopped: (() -> Void)?
@@ -27,12 +30,18 @@ class HybridRecognizer: HybridRecognizerSpec  {
     private let lg = Lg(prefix: "HybridRecognizer")
     @discardableResult
-    func prewarm(defaultParams: SpeechRecognitionConfig?) -> Promise<Void> {
+    func prewarm(
+        defaultParams: SpeechRecognitionConfig?,
+        options: SpeechRecognitionPrewarm?
+    ) -> Promise<Void> {
+        prewarmOptions = options
         return Promise.async(.userInitiated) { [weak self] in
+            // Ignore when standalone prewarm triggered for active session
+            guard self?.engine?.isActive != true else { return }
             // Ensure correct engine is selected based on params and ios version
             await self?.ensureEngine(params: defaultParams)
             // try to preload assets and check if speech engine is available on OS level
-            await self?.engine?.prewarm(for: .prewarm)
+            await self?.engine?.prewarm(for: .prewarm, options)
         }
     }
@@ -40,7 +49,7 @@ class HybridRecognizer: HybridRecognizerSpec  {
         Task {
             // Ensure correct engine is selected based on params and ios version
             await ensureEngine(params: params)
-            engine?.start()
+            await engine?.start()
         }
     }
@@ -111,6 +120,8 @@ class HybridRecognizer: HybridRecognizerSpec  {
 protocol RecognizerDelegate: AnyObject {
     var config: SpeechRecognitionConfig? { get }
+    var hardwareFormat: AVAudioFormat? { get }
+    func setHardwareFormat(format: AVAudioFormat)
     func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?)
     func reselectEngine(forPrewarm: Bool)
     func readyForSpeech()
@@ -123,6 +134,9 @@ protocol RecognizerDelegate: AnyObject {
 }
 extension HybridRecognizer: RecognizerDelegate {
+    func setHardwareFormat(format: AVAudioFormat) {
+        hardwareFormat = format
+    }
     func softlyUpdateConfig(newConfig: MutableSpeechRecognitionConfig?) {
         if let newConfig {
             config = SpeechRecognitionConfig(
@@ -193,7 +207,6 @@ extension HybridRecognizer: RecognizerDelegate {
     }
     func volumeChange(event: VolumeChangeEvent) {
-        // self.lg.log("[onVolumeChange] \(event.rawVolume)")
         if onVolumeChange != nil {
             onVolumeChangeFallback = onVolumeChange
         }
@@ -212,7 +225,7 @@ extension HybridRecognizer: RecognizerDelegate {
         engine = nil
         // Try to prewarm with another candidate
         if forPrewarm {
-            self.prewarm(defaultParams: config)
+            self.prewarm(defaultParams: config, options: prewarmOptions)
         } else {
             // Try to start with another candidate
             self.startListening(params: config)