@gmessier/nitro-speech 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +56 -16
  2. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +7 -7
  3. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +29 -14
  4. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/Logger.kt +16 -0
  5. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +11 -12
  6. package/ios/Audio/AudioLevelTracker.swift +16 -22
  7. package/ios/Engines/RecognizerEngine.swift +16 -13
  8. package/ios/HybridRecognizer.swift +8 -0
  9. package/ios/Shared/AutoStopper.swift +1 -1
  10. package/lib/Recognizer/RecognizerRef.d.ts +2 -0
  11. package/lib/Recognizer/RecognizerRef.js +5 -1
  12. package/lib/Recognizer/SpeechRecognizer.d.ts +2 -1
  13. package/lib/Recognizer/SpeechRecognizer.js +2 -1
  14. package/lib/Recognizer/methods.d.ts +5 -3
  15. package/lib/Recognizer/methods.js +8 -0
  16. package/lib/Recognizer/types.d.ts +3 -3
  17. package/lib/Recognizer/useRecognizer.js +10 -9
  18. package/lib/Recognizer/useRecognizerIsActive.d.ts +25 -0
  19. package/lib/Recognizer/useRecognizerIsActive.js +40 -0
  20. package/lib/Recognizer/useVoiceInputVolume.d.ts +1 -1
  21. package/lib/index.d.ts +7 -6
  22. package/lib/index.js +7 -6
  23. package/lib/specs/Recognizer.nitro.d.ts +26 -11
  24. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +5 -0
  25. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +1 -0
  26. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +4 -0
  27. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +9 -0
  28. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +8 -0
  29. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +1 -0
  30. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +12 -0
  31. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +1 -0
  32. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +1 -0
  33. package/package.json +1 -1
  34. package/src/Recognizer/RecognizerRef.ts +6 -0
  35. package/src/Recognizer/SpeechRecognizer.ts +2 -1
  36. package/src/Recognizer/methods.ts +16 -3
  37. package/src/Recognizer/types.ts +7 -1
  38. package/src/Recognizer/useRecognizer.ts +11 -7
  39. package/src/Recognizer/useRecognizerIsActive.ts +49 -0
  40. package/src/Recognizer/useVoiceInputVolume.ts +1 -1
  41. package/src/index.ts +20 -6
  42. package/src/specs/Recognizer.nitro.ts +27 -11
package/README.md CHANGED
@@ -13,7 +13,7 @@
13
13
 
14
14
  #### Key Features:
15
15
 
16
- - ⚡ Built on Nitro Modules for zero-overhead native bridging
16
+ - ⚡ Built with Nitro Modules for low-overhead native binding
17
17
  - 🌎 Supports 60+ languages
18
18
  - 🍎 The only library that uses new `SpeechAnalyzer` with `SpeechTranscriber` or `DictationTranscriber` API for iOS 26+ (with fallback to legacy `SFSpeechRecognition` for older versions)
19
19
  - ⏱️ Timer for silence
@@ -21,16 +21,16 @@
21
21
  - Callback `onAutoFinishProgress` fires periodically with interval
22
22
  - Configurable interval `autoFinishProgressIntervalMs` value (default: 1 sec)
23
23
  - Method `updateConfig` with `autoFinishRecognitionMs` and `autoFinishProgressIntervalMs`
24
- allows to change value on the fly
24
+ allows changing the value on the fly
25
25
  - Method `resetAutoFinishTime` resets the Timer to the threshold
26
26
  - Method `addAutoFinishTime` adds ms once without changing threshold
27
27
  - Configurable volume-based sensitivity `resetAutoFinishVoiceSensitivity` for the timer from 0 to 1
28
28
  - 🎤 Rich user voice input management
29
- - Hook `useVoiceInputVolume()` for `raw` or `smoothed` normalized
30
- volume level from 0 to 1 -> easy to use for UI animations;
29
+ - Hook `useVoiceInputVolume()` for `raw` or `smoothed` normalized volume level from 0 to 1 -> easy to use for UI animations;
31
30
  And `db` as human-friendly value
32
31
  - Flexible callback `onVolumeChange` for custom behavior
33
- - 🧩 Lifecycle methods: `prewarm` | `updateConfig` | `getIsActive` | `getSupportedLocalesIOS`
32
+ - Static method `getVoiceInputVolume()`
33
+ - 🧩 Lifecycle methods: `prewarm` | `updateConfig` | `getIsActive`
34
34
  - 👆 Configurable Haptic Feedback on start and finish
35
35
  - 🎚️ Speech-quality configurations:
36
36
  - Result is grouped by speech segments into Batches.
@@ -53,6 +53,7 @@
53
53
  - [Cross-component control: RecognizerRef](#cross-component-control-recognizerref)
54
54
  - [Multithreading (react-native-worklets)](#multithreading-react-native-worklets)
55
55
  - [Voice input volume](#voice-input-volume)
56
+ - [useRecognizerIsActive](#userecognizerisactive)
56
57
  - [Unsafe: SpeechRecognizer](#unsafe-speechrecognizer)
57
58
  - [Requirements](#requirements)
58
59
  - [Compatibility](#compatibility)
@@ -123,11 +124,11 @@ Both permissions are required for speech recognition to work on iOS.
123
124
  | **Auto-finish progress** | Callback `onAutoFinishProgress` with countdown until auto-stop | ✅ | ✅ |
124
125
  | **Add Auto-finish Time** | Adds time to the auto finish timer once without changing the timer threshold | ✅ | ✅ |
125
126
  | **Reset Auto-finish Time** | Resets the Timer to the threshold | ✅ | ✅ |
126
- | **Voice input volume** | Hook `useVoiceInputVolume` and `onVolumeChange` callback | ✅ | ✅ |
127
+ | **Voice input volume** | `useVoiceInputVolume`, `getVoiceInputVolume()`, `onVolumeChange` | ✅ | ✅ |
127
128
  | **Reset Auto-finish Sensitivity** | The voice detector sensitivity to reset the Auto-finish time | ✅ | ✅ |
128
129
  | **Prewarm** | Prepares resources, downloads assets, confirms locale availability | ✅ | ✅ |
129
- | **Update config** | Static method `updateConfig` allows update config on the fly | ✅ | ✅ |
130
- | **isActive** | Static method `getIsActive()` | ✅ | ✅ |
130
+ | **Update config** | Static method `updateConfig` allows updating the config on the fly | ✅ | ✅ |
131
+ | **Is Active** | Static method `getIsActive()` | ✅ | ✅ |
131
132
  | **Haptic feedback** | Haptic feedback on recording start/stop | ✅ | ✅ |
132
133
  | **Permission handling** | Dedicated `onPermissionDenied` callback | ✅ | ✅ |
133
134
  | **Background handling** | Stop when app loses focus/goes to background | ✅ | ✅ |
@@ -154,7 +155,8 @@ Because of that, treat it as a **single session owner** setup hook: use it once
154
155
  import { useRecognizer } from '@gmessier/nitro-speech';
155
156
 
156
157
  function MyComponent() {
157
- const {
158
+ const {
159
+ prewarm,
158
160
  startListening,
159
161
  stopListening,
160
162
  resetAutoFinishTime,
@@ -162,6 +164,7 @@ function MyComponent() {
162
164
  updateConfig,
163
165
  getSupportedLocalesIOS,
164
166
  getIsActive,
167
+ getVoiceInputVolume,
165
168
  } = useRecognizer({
166
169
  onReadyForSpeech: () => {
167
170
  console.log('Listening...');
@@ -259,6 +262,7 @@ If you need to call recognizer methods from other components without prop drilli
259
262
  ```typescript
260
263
  import { RecognizerRef } from '@gmessier/nitro-speech';
261
264
 
265
+ RecognizerRef.prewarm({ locale: 'en-US' });
262
266
  RecognizerRef.startListening({ locale: 'en-US' });
263
267
  RecognizerRef.addAutoFinishTime(5000);
264
268
  RecognizerRef.resetAutoFinishTime();
@@ -271,6 +275,7 @@ RecognizerRef.updateConfig(
271
275
  true
272
276
  );
273
277
  RecognizerRef.getIsActive();
278
+ RecognizerRef.getVoiceInputVolume();
274
279
  RecognizerRef.stopListening();
275
280
  // iOS only
276
281
  RecognizerRef.getSupportedLocalesIOS();
@@ -321,8 +326,6 @@ function VoiceMeter() {
321
326
  As a better alternative you can control volume via SharedValue and apply it only on UI thread with Reanimated.
322
327
  This way you will avoid re-renders since the volume will be stored on UI thread
323
328
 
324
- Warning: this approach will disable the built-in `useVoiceInputVolume` hook.
325
-
326
329
  ```typescript
327
330
  function VoiceMeter() {
328
331
  const sharedVolume = useSharedValue(0)
@@ -341,17 +344,35 @@ function VoiceMeter() {
341
344
  }
342
345
  ```
343
346
 
347
+ ### useRecognizerIsActive
348
+
349
+ ```typescript
350
+ import { useRecognizerIsActive } from '@gmessier/nitro-speech';
351
+
352
+ function MyComponent() {
353
+ const isActive = useRecognizerIsActive();
354
+ return <Text>{isActive ? 'Listening...' : 'Not listening'}</Text>;
355
+ }
356
+ ```
344
357
 
345
358
  ### Unsafe: SpeechRecognizer
346
359
 
347
360
  `SpeechRecognizer` is the hybrid object. It gives direct access to callbacks and control methods, but it is unsafe to orchestrate the full session directly from it.
348
361
 
362
+ **Warning**: Since it reflects the original hybrid object, its API may change in the future.
363
+
349
364
  ```typescript
350
- import { SpeechRecognizer, speechRecognizerVolumeChangeHandler } from '@gmessier/nitro-speech';
365
+ import {
366
+ SpeechRecognizer,
367
+ speechRecognizerVolumeChangeHandler,
368
+ speechRecognizerActiveStateHandler,
369
+ } from '@gmessier/nitro-speech';
351
370
 
352
371
  // Set up callbacks
353
372
  SpeechRecognizer.onReadyForSpeech = () => {
354
373
  console.log('Listening...');
374
+ // Add speechRecognizerActiveStateHandler to enable useRecognizerIsActive hook manually
375
+ speechRecognizerActiveStateHandler(true);
355
376
  };
356
377
 
357
378
  SpeechRecognizer.onResult = (textBatches) => {
@@ -360,6 +381,8 @@ SpeechRecognizer.onResult = (textBatches) => {
360
381
 
361
382
  SpeechRecognizer.onRecordingStopped = () => {
362
383
  console.log('Stopped');
384
+ // Add speechRecognizerActiveStateHandler to enable useRecognizerIsActive hook manually
385
+ speechRecognizerActiveStateHandler(false);
363
386
  };
364
387
 
365
388
  SpeechRecognizer.onAutoFinishProgress = (timeLeftMs) => {
@@ -376,10 +399,27 @@ SpeechRecognizer.onPermissionDenied = () => {
376
399
 
377
400
  SpeechRecognizer.onVolumeChange = (volume) => {
378
401
  console.log('new volume: ', volume);
402
+ // Add speechRecognizerVolumeChangeHandler to enable useVoiceInputVolume hook manually
403
+ speechRecognizerVolumeChangeHandler(volume);
379
404
  };
380
- // OR use speechRecognizerVolumeChangeHandler to enable useVoiceInputVolume hook manually
381
- SpeechRecognizer.onVolumeChange = speechRecognizerVolumeChangeHandler
382
405
 
406
+ // Prepare resources, download assets, confirms locale availability
407
+ SpeechRecognizer.prewarm({
408
+ locale: 'en-US',
409
+ // ... your config to prepare
410
+ });
411
+ // OR `await` if you want to react to the success
412
+ await SpeechRecognizer.prewarm({
413
+ locale: 'en-US',
414
+ // ... your config to prepare
415
+ });
416
+ // OR from worklet (only sync)
417
+ scheduleOnRuntime(workletRuntime, () => {
418
+ SpeechRecognizer.prewarm({
419
+ locale: 'en-US',
420
+ // ... your config to prepare
421
+ });
422
+ });
383
423
 
384
424
  // Start listening
385
425
  SpeechRecognizer.startListening({
@@ -415,7 +455,7 @@ The `SpeechRecognizer.dispose()` method is **NOT SAFE** and should rarely be use
415
455
 
416
456
  ## Compatibility
417
457
 
418
- Latest versions of `@gmessier/nitro-speech` requires [react-native-nitro-modules 0.35.0 or higher](https://github.com/mrousavy/nitro/releases/tag/v0.35.0).
458
+ Latest versions of `@gmessier/nitro-speech` require [react-native-nitro-modules 0.35.0 or higher](https://github.com/mrousavy/nitro/releases/tag/v0.35.0).
419
459
 
420
460
 
421
461
  | Compatibility | Supported versions |
@@ -427,7 +467,7 @@ Latest versions of `@gmessier/nitro-speech` requires [react-native-nitro-modules
427
467
 
428
468
  ### Android Gradle sync issues
429
469
 
430
- If you're having issues with Android Gradle sync, try running the prebuild for the library, that causes the issue:
470
+ If you're having issues with Android Gradle sync, try running the prebuild for the library that causes the issue:
431
471
 
432
472
  e.g. failed in `react-native-nitro-modules`:
433
473
 
@@ -2,7 +2,6 @@ package com.margelo.nitro.nitrospeech.recognizer
2
2
 
3
3
  import android.os.Handler
4
4
  import android.os.Looper
5
- import android.util.Log
6
5
  import kotlin.math.max
7
6
 
8
7
  class AutoStopper(
@@ -12,12 +11,13 @@ class AutoStopper(
12
11
  val onTimeout: () -> Unit,
13
12
  ) {
14
13
  companion object {
15
- private const val TAG = "HybridRecognizer"
16
14
  private const val DEFAULT_SILENCE_THRESHOLD_MS = 8000.0
17
15
  private const val DEFAULT_PROGRESS_INTERVAL_MS = 1000.0
18
16
  private const val MIN_PROGRESS_INTERVAL_MS = 50.0
19
17
  }
20
18
 
19
+ private val logger = Logger(disable = false)
20
+
21
21
  private var silenceThresholdMs: Double = clampMs(silenceThresholdMs ?: DEFAULT_SILENCE_THRESHOLD_MS)
22
22
  private var progressIntervalMs: Double = clampMs(progressIntervalMs ?: DEFAULT_PROGRESS_INTERVAL_MS)
23
23
 
@@ -31,7 +31,7 @@ class AutoStopper(
31
31
  private val tickRunnable = Runnable { tick() }
32
32
 
33
33
  fun resetTimer() {
34
- Log.d(TAG, "resetTimer | isStopped: $isStopped | ms: ${System.currentTimeMillis()}")
34
+ logger.log("resetTimer | isStopped: $isStopped | ms: ${System.currentTimeMillis()}")
35
35
  handler.removeCallbacks(tickRunnable)
36
36
  isTimerScheduled = false
37
37
  if (isStopped) return
@@ -55,7 +55,7 @@ class AutoStopper(
55
55
 
56
56
  fun addMsOnce(extraMs: Double) {
57
57
  if (isStopped || !extraMs.isFinite()) return
58
- Log.d(TAG, "addMsOnce | extraMs: $extraMs")
58
+ logger.log("addMsOnce | extraMs: $extraMs")
59
59
  timeLeftMs += extraMs
60
60
  didTimeout = false
61
61
  if (timeLeftMs > 0 && isTimerScheduled) {
@@ -65,7 +65,7 @@ class AutoStopper(
65
65
 
66
66
  fun updateProgressInterval(newIntervalMs: Double) {
67
67
  if (isStopped) return
68
- Log.d(TAG, "updateProgressInterval | newIntervalMs: $newIntervalMs")
68
+ logger.log("updateProgressInterval | newIntervalMs: $newIntervalMs")
69
69
  progressIntervalMs = clampMs(newIntervalMs)
70
70
  if (isTimerScheduled) {
71
71
  scheduleNextTickLocked()
@@ -83,7 +83,7 @@ class AutoStopper(
83
83
  if (isStopped || didTimeout) return
84
84
  timeLeftMs -= progressIntervalMs
85
85
  if (timeLeftMs > 0) {
86
- Log.d(TAG, "onProgress | timeLeftMs: $timeLeftMs")
86
+ logger.log("onProgress | timeLeftMs: $timeLeftMs")
87
87
  onProgress(timeLeftMs)
88
88
  scheduleNextTickLocked()
89
89
  return
@@ -92,7 +92,7 @@ class AutoStopper(
92
92
  didTimeout = true
93
93
  handler.removeCallbacks(tickRunnable)
94
94
  isTimerScheduled = false
95
- Log.d(TAG, "onTimeout | ms: ${System.currentTimeMillis()}")
95
+ logger.log("onTimeout | ms: ${System.currentTimeMillis()}")
96
96
  onTimeout()
97
97
  }
98
98
 
@@ -7,7 +7,6 @@ import android.os.Handler
7
7
  import android.os.Looper
8
8
  import android.speech.RecognizerIntent
9
9
  import android.speech.SpeechRecognizer
10
- import android.util.Log
11
10
  import androidx.annotation.Keep
12
11
  import com.facebook.proguard.annotations.DoNotStrip
13
12
  import com.margelo.nitro.NitroModules
@@ -21,12 +20,14 @@ import com.margelo.nitro.nitrospeech.VolumeChangeEvent
21
20
  @Keep
22
21
  class HybridRecognizer: HybridRecognizerSpec() {
23
22
  companion object {
24
- private const val TAG = "HybridRecognizer"
25
23
  private const val POST_RECOGNITION_DELAY = 250L
26
24
  }
27
25
 
26
+ private val logger = Logger(disable = false)
27
+
28
28
  private var isActive: Boolean = false
29
29
  private var config: SpeechRecognitionConfig? = null
30
+ private var volumeChangeEvent: VolumeChangeEvent = VolumeChangeEvent(0.0,0.0,null)
30
31
  private var autoStopper: AutoStopper? = null
31
32
  private var speechRecognizer: SpeechRecognizer? = null
32
33
  private val mainHandler = Handler(Looper.getMainLooper())
@@ -51,7 +52,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
51
52
  @DoNotStrip
52
53
  @Keep
53
54
  override fun startListening(params: SpeechRecognitionConfig?) {
54
- Log.d(TAG, "startListening: $params")
55
+ logger.log("startListening: $params")
55
56
  if (isActive) {
56
57
  onFinishRecognition(
57
58
  null,
@@ -94,7 +95,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
94
95
  @DoNotStrip
95
96
  @Keep
96
97
  override fun stopListening() {
97
- Log.d(TAG, "stopListening called")
98
+ logger.log("stopListening called")
98
99
  if (!isActive) return
99
100
  onFinishRecognition(null, null, true)
100
101
  mainHandler.postDelayed({
@@ -117,7 +118,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
117
118
  @DoNotStrip
118
119
  @Keep
119
120
  override fun addAutoFinishTime(additionalTimeMs: Double?) {
120
- Log.d(TAG, "addAutoFinishTime")
121
+ logger.log("addAutoFinishTime")
121
122
  if (!isActive) return
122
123
 
123
124
  if (additionalTimeMs != null) {
@@ -134,7 +135,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
134
135
  newConfig: MutableSpeechRecognitionConfig?,
135
136
  resetAutoFinishTime: Boolean?
136
137
  ) {
137
- Log.d(TAG, "updateConfig $newConfig",)
138
+ logger.log("updateConfig $newConfig",)
138
139
  if (!isActive) return
139
140
 
140
141
  val newTimeMs = if (newConfig?.autoFinishRecognitionMs != null) newConfig.autoFinishRecognitionMs else config?.autoFinishRecognitionMs
@@ -177,6 +178,12 @@ class HybridRecognizer: HybridRecognizerSpec() {
177
178
  return isActive
178
179
  }
179
180
 
181
+ @DoNotStrip
182
+ @Keep
183
+ override fun getVoiceInputVolume(): VolumeChangeEvent {
184
+ return volumeChangeEvent
185
+ }
186
+
180
187
  @DoNotStrip
181
188
  @Keep
182
189
  override fun getSupportedLocalesIOS(): Array<String> {
@@ -204,12 +211,14 @@ class HybridRecognizer: HybridRecognizerSpec() {
204
211
  }
205
212
  )
206
213
  val recognitionListenerSession = RecognitionListenerSession(
207
- autoStopper,
208
- config,
209
- onVolumeChange
210
- ) { result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean ->
211
- onFinishRecognition(result, errorMessage, recordingStopped)
212
- }
214
+ autoStopper,
215
+ config,
216
+ fireVolumeChangeEvent = { event -> fireVolumeChangeEvent(event) },
217
+ onFinishRecognition = { result, errorMessage, recordingStopped ->
218
+ onFinishRecognition(result, errorMessage, recordingStopped)
219
+ }
220
+ )
221
+
213
222
  speechRecognizer?.setRecognitionListener(recognitionListenerSession.createRecognitionListener())
214
223
 
215
224
  val languageModel = if (config?.androidUseWebSearchModel == true) RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH else RecognizerIntent.LANGUAGE_MODEL_FREE_FORM
@@ -262,7 +271,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
262
271
 
263
272
  private fun cleanup() {
264
273
  try {
265
- Log.d(TAG, "cleanup called")
274
+ logger.log("cleanup called")
266
275
  autoStopper?.stop()
267
276
  autoStopper = null
268
277
  speechRecognizer?.stopListening()
@@ -270,7 +279,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
270
279
  speechRecognizer = null
271
280
  isActive = false
272
281
  // Reset voice meter in JS consumers after stop/error cleanup.
273
- onVolumeChange?.invoke(VolumeChangeEvent(0.0,0.0,null))
282
+ fireVolumeChangeEvent(VolumeChangeEvent(0.0,0.0,null))
274
283
  } catch (e: Exception) {
275
284
  onFinishRecognition(
276
285
  null,
@@ -291,4 +300,10 @@ class HybridRecognizer: HybridRecognizerSpec() {
291
300
  onResult?.invoke(result.toTypedArray())
292
301
  }
293
302
  }
303
+
304
+ private fun fireVolumeChangeEvent(event: VolumeChangeEvent) {
305
+ logger.log("fireVolumeChangeEvent ${event}")
306
+ volumeChangeEvent = event
307
+ onVolumeChange?.invoke(event)
308
+ }
294
309
  }
@@ -0,0 +1,16 @@
1
+ package com.margelo.nitro.nitrospeech.recognizer
2
+
3
+ import android.util.Log
4
+
5
+ class Logger (
6
+ private val disable: Boolean
7
+ ) {
8
+ private val isLogging = false
9
+ companion object {
10
+ private const val TAG = "HybridRecognizer"
11
+ }
12
+ fun log(message: String) {
13
+ if (disable || !isLogging) return
14
+ Log.d(TAG, message)
15
+ }
16
+ }
@@ -3,7 +3,6 @@ package com.margelo.nitro.nitrospeech.recognizer
3
3
  import android.os.Bundle
4
4
  import android.speech.RecognitionListener
5
5
  import android.speech.SpeechRecognizer
6
- import android.util.Log
7
6
  import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
8
7
  import com.margelo.nitro.nitrospeech.VolumeChangeEvent
9
8
  import kotlin.math.max
@@ -12,11 +11,11 @@ import kotlin.math.roundToInt
12
11
  class RecognitionListenerSession (
13
12
  private val autoStopper: AutoStopper?,
14
13
  private val config: SpeechRecognitionConfig?,
15
- private val onVolumeChange: ((event: VolumeChangeEvent) -> Unit)?,
14
+ private val fireVolumeChangeEvent: (event: VolumeChangeEvent) -> Unit,
16
15
  private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
17
16
  ) {
17
+ private val logger = Logger(disable = false)
18
18
  companion object {
19
- private const val TAG = "HybridRecognizer"
20
19
  private const val SPEECH_LEVEL_THRESHOLD = 0.35
21
20
  private const val FLOOR_RISE_ALPHA = 0.01f
22
21
  private const val FLOOR_FALL_ALPHA = 0.20f
@@ -40,11 +39,11 @@ class RecognitionListenerSession (
40
39
  override fun onBeginningOfSpeech() {}
41
40
  override fun onRmsChanged(rmsdB: Float) {
42
41
  val volumeEvent = getVolume(rmsdB)
43
- onVolumeChange?.invoke(volumeEvent)
42
+ fireVolumeChangeEvent(volumeEvent)
44
43
  val threshold =
45
44
  config?.resetAutoFinishVoiceSensitivity?.coerceIn(0.0, 1.0)
46
45
  ?: SPEECH_LEVEL_THRESHOLD.toDouble()
47
- Log.d(TAG, "onRmsChanged: ${volumeEvent}")
46
+ // logger.log("onRmsChanged: ${volumeEvent}")
48
47
  if (volumeEvent.rawVolume > threshold) {
49
48
  autoStopper?.resetTimer()
50
49
  }
@@ -75,7 +74,7 @@ class RecognitionListenerSession (
75
74
  }
76
75
 
77
76
  override fun onResults(results: Bundle?) {
78
- Log.d(TAG, "onResults: $resultBatches")
77
+ logger.log("onResults: $resultBatches")
79
78
  onFinishRecognition(resultBatches, null, true)
80
79
  autoStopper?.stop()
81
80
  autoStopper?.onTimeout()
@@ -85,26 +84,26 @@ class RecognitionListenerSession (
85
84
  val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
86
85
 
87
86
  if (matches.isNullOrEmpty() || matches[0] == "") {
88
- Log.d(TAG, "onPartialResults[0], skip, NO RECOGNIZE")
87
+ logger.log("onPartialResults[0], skip, NO RECOGNIZE")
89
88
  return
90
89
  }
91
90
 
92
91
  autoStopper?.resetTimer()
93
- Log.d(TAG, "onPartialResults[0], add ${matches[0]}")
92
+ logger.log("onPartialResults[0], add ${matches[0]}")
94
93
  var currentBatches = resultBatches
95
94
  if (currentBatches.isNullOrEmpty()) {
96
- Log.d(TAG, "onPartialResults[1], NO BATCHES YET | add first")
95
+ logger.log("onPartialResults[1], NO BATCHES YET | add first")
97
96
  currentBatches = arrayListOf(matches[0])
98
97
  } else {
99
- Log.d(TAG, "onPartialResults[1], current batches $currentBatches")
98
+ logger.log("onPartialResults[1], current batches $currentBatches")
100
99
  val prevBatchLength = currentBatches[currentBatches.lastIndex].length
101
100
  val match = if (config?.disableRepeatingFilter == true) matches[0] else repeatingFilter(matches[0])
102
101
  val matchLength = match.length
103
102
  if (config?.androidDisableBatchHandling == true || matchLength + 3 < prevBatchLength) {
104
- Log.d(TAG, "onPartialResults[2], append new batch")
103
+ logger.log("onPartialResults[2], append new batch")
105
104
  currentBatches.add(match)
106
105
  } else {
107
- Log.d(TAG, "onPartialResults[2], update batch, replace #${currentBatches.lastIndex}")
106
+ logger.log("onPartialResults[2], update batch, replace #${currentBatches.lastIndex}")
108
107
  currentBatches[currentBatches.lastIndex] = match
109
108
  }
110
109
  }
@@ -16,33 +16,18 @@ final class AudioLevelTracker {
16
16
  private static let meterRelease: Float = 0.08
17
17
  private static let defaultAutoStopResetThreshold: Double = 0.4
18
18
 
19
- private var autoStopResetThreshold: Double
20
19
  private var smoothedLevel: Float = 0
20
+
21
+ var currentSample: AudioLevelSample?
21
22
 
22
- init(resetAutoFinishVoiceSensitivity: Double?) {
23
- if let resetAutoFinishVoiceSensitivity {
24
- // Clamp value between 0 and 1
25
- self.autoStopResetThreshold = max(0, min(1, resetAutoFinishVoiceSensitivity))
26
- } else {
27
- self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
28
- }
29
- }
30
-
31
- func updateResetAutoFinishVoiceSensitivity(newValue: Double?) {
32
- if let newValue {
33
- // Clamp value between 0 and 1
34
- self.autoStopResetThreshold = max(0, min(1, newValue))
35
- } else {
36
- self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
37
- }
38
- }
23
+ private let lg = Lg(prefix: "RecognizerEngine")
39
24
 
40
25
  func reset() {
41
26
  smoothedLevel = 0
42
- self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
27
+ currentSample = nil
43
28
  }
44
29
 
45
- func process(_ buffer: AVAudioPCMBuffer) -> AudioLevelSample? {
30
+ func process(_ buffer: AVAudioPCMBuffer,_ autoStopResetThreshold: Double? = nil) -> AudioLevelSample? {
46
31
  guard let samples = buffer.floatChannelData?[0] else { return nil }
47
32
 
48
33
  let frameCount = Int(buffer.frameLength)
@@ -56,11 +41,20 @@ final class AudioLevelTracker {
56
41
  let coeff = normalized > smoothedLevel ? Self.meterAttack : Self.meterRelease
57
42
  smoothedLevel += coeff * (normalized - smoothedLevel)
58
43
 
59
- return AudioLevelSample(
44
+ var threshold = Self.defaultAutoStopResetThreshold
45
+ if let autoStopResetThreshold {
46
+ threshold = max(0, min(1, autoStopResetThreshold))
47
+ }
48
+
49
+ currentSample = AudioLevelSample(
60
50
  smoothed: Double(smoothedLevel * 1_000_000).rounded() / 1_000_000,
61
51
  raw: Double(normalized * 1_000_000).rounded() / 1_000_000,
62
52
  db: Double(db * 1_000).rounded() / 1_000,
63
- resetTimer: Double(normalized) >= self.autoStopResetThreshold
53
+ resetTimer: Double(normalized) >= threshold
64
54
  )
55
+
56
+ lg.log("[AudioLevelTracker.process] autoStopResetThreshold: \(threshold)")
57
+
58
+ return currentSample
65
59
  }
66
60
  }
@@ -18,7 +18,7 @@ class RecognizerEngine {
18
18
  var hardwareFormat: AVAudioFormat?
19
19
  weak var recognizerDelegate: RecognizerDelegate?
20
20
 
21
- private let audioLevelTracker: AudioLevelTracker
21
+ private let audioLevelTracker = AudioLevelTracker()
22
22
  private var appStateObserver: AppStateObserver?
23
23
  private var audioEngine: AVAudioEngine?
24
24
  private var autoStopper: AutoStopper?
@@ -29,9 +29,6 @@ class RecognizerEngine {
29
29
  init(locale: Locale, delegate: RecognizerDelegate) {
30
30
  self.locale = locale
31
31
  self.recognizerDelegate = delegate
32
- self.audioLevelTracker = AudioLevelTracker(
33
- resetAutoFinishVoiceSensitivity: delegate.config?.resetAutoFinishVoiceSensitivity
34
- )
35
32
  }
36
33
 
37
34
  // MARK: - Recognizer Methods
@@ -84,7 +81,10 @@ class RecognizerEngine {
84
81
  format: hardwareFormat
85
82
  ) { [weak self] buffer, _ in
86
83
  guard let self, let recognizerDelegate = self.recognizerDelegate else { return }
87
- if let sample = self.audioLevelTracker.process(buffer) {
84
+ if let sample = self.audioLevelTracker.process(
85
+ buffer,
86
+ recognizerDelegate.config?.resetAutoFinishVoiceSensitivity
87
+ ) {
88
88
  // Send buffer volume data
89
89
  recognizerDelegate.volumeChange(
90
90
  event:
@@ -148,13 +148,7 @@ class RecognizerEngine {
148
148
  from: "updateSession"
149
149
  )
150
150
  }
151
- // Update AutoFinish reset voice sensitivity interval
152
- if let newSensitivity = newConfig?.resetAutoFinishVoiceSensitivity,
153
- newSensitivity != currentConfig?.resetAutoFinishVoiceSensitivity {
154
- audioLevelTracker.updateResetAutoFinishVoiceSensitivity(
155
- newValue: newSensitivity
156
- )
157
- }
151
+
158
152
  if let addMsToTimer {
159
153
  // Add time to the timer once
160
154
  autoStopper?.addMsOnce(
@@ -168,7 +162,16 @@ class RecognizerEngine {
168
162
  // Only update new non-nil values in the config
169
163
  recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
170
164
  }
171
-
165
+
166
+ func getVoiceInputVolume() -> VolumeChangeEvent? {
167
+ guard let currentSample = audioLevelTracker.currentSample else { return nil }
168
+ return VolumeChangeEvent(
169
+ smoothedVolume: currentSample.smoothed,
170
+ rawVolume: currentSample.raw,
171
+ db: currentSample.db
172
+ )
173
+ }
174
+
172
175
  func cleanup(from: String) {
173
176
  lg.log("[cleanup]: \(from)")
174
177
  let wasActive = isActive
@@ -68,6 +68,14 @@ class HybridRecognizer: HybridRecognizerSpec {
68
68
  func getIsActive() -> Bool {
69
69
  engine?.isActive ?? false
70
70
  }
71
+
72
+ func getVoiceInputVolume() -> VolumeChangeEvent {
73
+ return engine?.getVoiceInputVolume() ?? VolumeChangeEvent(
74
+ smoothedVolume: 0,
75
+ rawVolume: 0,
76
+ db: nil
77
+ )
78
+ }
71
79
 
72
80
  func getSupportedLocalesIOS() -> [String] {
73
81
  return self.coordinator.getSupportedLocales()
@@ -5,7 +5,7 @@ final class AutoStopper {
5
5
  private static let defaultProgressIntervalMs = 1000.0
6
6
  private static let minProgressIntervalMs = 50.0
7
7
 
8
- private let lg = Lg(prefix: "AutoStopper", disable: true)
8
+ private let lg = Lg(prefix: "AutoStopper", disable: false)
9
9
 
10
10
  private let queue = DispatchQueue(label: "com.margelo.nitrospeech.autostopper")
11
11
 
@@ -1,5 +1,7 @@
1
1
  import type { RecognizerMethods } from './types';
2
2
  /**
3
3
  * Safe cross-component reference to the Speech Recognizer methods.
4
+ *
5
+ * All methods support worklets and UI thread calls
4
6
  */
5
7
  export declare const RecognizerRef: RecognizerMethods;
@@ -1,13 +1,17 @@
1
- import { recognizerAddAutoFinishTime, recognizerGetSupportedLocalesIOS, recognizerGetIsActive, recognizerResetAutoFinishTime, recognizerStartListening, recognizerStopListening, recognizerUpdateConfig, } from './methods';
1
+ import { recognizerAddAutoFinishTime, recognizerGetSupportedLocalesIOS, recognizerGetIsActive, recognizerResetAutoFinishTime, recognizerStartListening, recognizerStopListening, recognizerUpdateConfig, recognizerGetVoiceInputVolume, recognizerPrewarm, } from './methods';
2
2
  /**
3
3
  * Safe cross-component reference to the Speech Recognizer methods.
4
+ *
5
+ * All methods support worklets and UI thread calls
4
6
  */
5
7
  export const RecognizerRef = {
8
+ prewarm: recognizerPrewarm,
6
9
  startListening: recognizerStartListening,
7
10
  stopListening: recognizerStopListening,
8
11
  resetAutoFinishTime: recognizerResetAutoFinishTime,
9
12
  addAutoFinishTime: recognizerAddAutoFinishTime,
10
13
  updateConfig: recognizerUpdateConfig,
11
14
  getIsActive: recognizerGetIsActive,
15
+ getVoiceInputVolume: recognizerGetVoiceInputVolume,
12
16
  getSupportedLocalesIOS: recognizerGetSupportedLocalesIOS,
13
17
  };
@@ -3,6 +3,7 @@
3
3
  *
4
4
  * Direct access to the all Speech Recognizer methods and callbacks.
5
5
  *
6
- * @note unsafe, might lead to race conditions
6
+ * @note Unsafe, might lead to race conditions
7
+ * @warning Since it reflects the original hybrid object, its API may change in the future.
7
8
  */
8
9
  export declare const SpeechRecognizer: import("./types").RecognizerSpec;
@@ -4,6 +4,7 @@ import { NitroSpeech } from '../NitroSpeech';
4
4
  *
5
5
  * Direct access to the all Speech Recognizer methods and callbacks.
6
6
  *
7
- * @note unsafe, might lead to race conditions
7
+ * @note Unsafe, might lead to race conditions
8
+ * @warning Since it reflects the original hybrid object, its API may change in the future.
8
9
  */
9
10
  export const SpeechRecognizer = NitroSpeech.recognizer;