@gmessier/nitro-speech 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +64 -11
  2. package/android/build.gradle +2 -0
  3. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HapticImpact.kt +11 -1
  4. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +12 -6
  5. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +73 -7
  6. package/ios/AnylyzerTranscriber.swift +331 -0
  7. package/ios/AutoStopper.swift +9 -10
  8. package/ios/BufferUtil.swift +80 -0
  9. package/ios/HapticImpact.swift +12 -3
  10. package/ios/HybridNitroSpeech.swift +10 -1
  11. package/ios/HybridRecognizer.swift +139 -167
  12. package/ios/LegacySpeechRecognizer.swift +161 -0
  13. package/lib/commonjs/index.js +54 -5
  14. package/lib/commonjs/index.js.map +1 -1
  15. package/lib/module/index.js +52 -3
  16. package/lib/module/index.js.map +1 -1
  17. package/lib/tsconfig.tsbuildinfo +1 -1
  18. package/lib/typescript/index.d.ts +25 -8
  19. package/lib/typescript/index.d.ts.map +1 -1
  20. package/lib/typescript/specs/NitroSpeech.nitro.d.ts +24 -12
  21. package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +1 -1
  22. package/nitrogen/generated/android/c++/JHapticFeedbackStyle.hpp +3 -0
  23. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +22 -0
  24. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +3 -0
  25. package/nitrogen/generated/android/c++/JSpeechToTextParams.hpp +4 -4
  26. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HapticFeedbackStyle.kt +2 -1
  27. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -0
  28. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +3 -3
  29. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +24 -0
  30. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +15 -0
  31. package/nitrogen/generated/ios/swift/HapticFeedbackStyle.swift +4 -0
  32. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +2 -0
  33. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +44 -0
  34. package/nitrogen/generated/ios/swift/SpeechToTextParams.swift +6 -6
  35. package/nitrogen/generated/shared/c++/HapticFeedbackStyle.hpp +4 -0
  36. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +3 -0
  37. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +3 -0
  38. package/nitrogen/generated/shared/c++/SpeechToTextParams.hpp +5 -5
  39. package/package.json +7 -7
  40. package/src/index.ts +59 -2
  41. package/src/specs/NitroSpeech.nitro.ts +25 -12
package/README.md CHANGED
@@ -16,11 +16,13 @@ React Native Real-Time Speech Recognition Library, powered by [Nitro Modules](ht
16
16
  #### Key Features:
17
17
 
18
18
  - Built on Nitro Modules for low-overhead native bridging
19
+ - Uses newest advanced Speech-to-Text API for iOS 26+ (with fallback to legacy SpeechRecognition for older versions)
19
20
  - Configurable Timer for silence (default: 8 sec)
20
21
  - Callback `onAutoFinishProgress` for progress bars, etc...
21
22
  - Method `addAutoFinishTime` for single timer update
22
23
  - Method `updateAutoFinishTime` for constant timer update
23
- - Optional Haptic Feedback on start and finish
24
+ - Configurable Haptic Feedback on start and finish
25
+ - Flexible `useVoiceInputVolume` hook to display user input volume in UI animations
24
26
  - Speech-quality configurations:
25
27
  - Result is grouped by speech segments into Batches.
26
28
  - Param `disableRepeatingFilter` for consecutive duplicate-word filtering.
@@ -38,6 +40,7 @@ React Native Real-Time Speech Recognition Library, powered by [Nitro Modules](ht
38
40
  - [Recommended: useRecognizer Hook](#recommended-userecognizer-hook)
39
41
  - [With React Navigation (important)](#with-react-navigation-important)
40
42
  - [Cross-component control: RecognizerRef](#cross-component-control-recognizerref)
43
+ - [Voice input volume](#voice-input-volume)
41
44
  - [Unsafe: RecognizerSession](#unsafe-recognizersession)
42
45
  - [API Reference](#api-reference)
43
46
  - [Requirements](#requirements)
@@ -107,6 +110,7 @@ Both permissions are required for speech recognition to work on iOS.
107
110
  | **Haptic feedback** | Optional haptics on recording start/stop | ✅ | ✅ |
108
111
  | **Background handling** | Auto-stop when app loses focus/goes to background | ✅ | Not Safe *(TODO)* |
109
112
  | **Permission handling** | Dedicated `onPermissionDenied` callback | ✅ | ✅ |
113
+ | **Voice input volume** | Normalized voice input level for UI meters (`useVoiceInputVolume`) | ✅ | ✅ |
110
114
  | **Repeating word filter** | Removes consecutive duplicate words from artifacts | ✅ | ✅ |
111
115
  | **Locale support** | Configure speech recognizer for different languages | ✅ | ✅ |
112
116
  | **Contextual strings** | Domain-specific vocabulary for improved accuracy | ✅ | ✅ |
@@ -166,7 +170,7 @@ function MyComponent() {
166
170
  // iOS specific
167
171
  iosAddPunctuation: true,
168
172
  // Android specific
169
- androidMaskOffensiveWords: false,
173
+ maskOffensiveWords: false,
170
174
  androidFormattingPreferQuality: false,
171
175
  androidUseWebSearchModel: false,
172
176
  androidDisableBatchHandling: false,
@@ -218,17 +222,58 @@ import { RecognizerRef } from '@gmessier/nitro-speech';
218
222
  RecognizerRef.startListening({ locale: 'en-US' });
219
223
  RecognizerRef.addAutoFinishTime(5000);
220
224
  RecognizerRef.updateAutoFinishTime(10000, true);
225
+ RecognizerRef.getIsActive();
221
226
  RecognizerRef.stopListening();
222
227
  ```
223
228
 
224
229
  `RecognizerRef` exposes only method handlers and is safe for cross-component method access.
225
230
 
231
+ ### Voice input volume
232
+
233
+ #### useVoiceInputVolume
234
+
235
+ By default you have access to `useVoiceInputVolume` to read normalized voice input level (`0..1`) for UI meters.
236
+ ⚠️ **Technical limitation**: this approach re-renders component a lot.
237
+
238
+ ```typescript
239
+ import { useVoiceInputVolume } from '@gmessier/nitro-speech';
240
+
241
+ function VoiceMeter() {
242
+ const volume = useVoiceInputVolume();
243
+ return <Text>{volume.toFixed(2)}</Text>;
244
+ }
245
+ ```
246
+
247
+ #### Reanimated: useSharedValue, worklets, UI thread
248
+
249
+ As a better alternative you can control volume via SharedValue and apply it only on UI thread with Reanimated
250
+ This way you will avoid re-renders since the volume will be stored on UI thread
251
+
252
+ ```typescript
253
+ function VoiceMeter() {
254
+ const sharedVolume = useSharedValue(0)
255
+ const {
256
+ // ...
257
+ } = useRecognizer(
258
+ {
259
+ // ...
260
+ onVolumeChange: (normVolume) => {
261
+ "worklet";
262
+ sharedVolume.value = normValue
263
+ },
264
+ // ...
265
+ }
266
+ );
267
+ }
268
+ ```
269
+
270
+
226
271
  ### Unsafe: RecognizerSession
227
272
 
228
273
  `RecognizerSession` is the hybrid object. It gives direct access to callbacks and control methods, but it is unsafe to orchestrate the full session directly from it.
229
274
 
230
275
  ```typescript
231
- import { RecognizerSession } from '@gmessier/nitro-speech';
276
+ import { RecognizerSession, unsafe_onVolumeChange } from '@gmessier/nitro-speech';
232
277
 
233
278
  // Set up callbacks
234
279
  RecognizerSession.onReadyForSpeech = () => {
@@ -255,6 +300,13 @@ RecognizerSession.onPermissionDenied = () => {
255
300
  console.log('Permission denied');
256
301
  };
257
302
 
303
+ RecognizerSession.onVolumeChange = (volume) => {
304
+ console.log('new volume: ', volume);
305
+ };
306
+ // OR use unsafe_onVolumeChange to enable useVoiceInputVolume hook manually
307
+ RecognizerSession.onVolumeChange = unsafe_onVolumeChange
308
+
309
+
258
310
  // Start listening
259
311
  RecognizerSession.startListening({
260
312
  locale: 'en-US',
@@ -305,6 +357,7 @@ The `RecognizerSession.dispose()` method is **NOT SAFE** and should rarely be us
305
357
  - `stopListening()` - Stop speech recognition
306
358
  - `addAutoFinishTime(additionalTimeMs?: number)` - Add time to the auto-finish timer (or reset to original if no parameter)
307
359
  - `updateAutoFinishTime(newTimeMs: number, withRefresh?: boolean)` - Update the auto-finish timer
360
+ - `getIsActive()` - Returns true if the speech recognition is active
308
361
 
309
362
  ### `RecognizerRef`
310
363
 
@@ -312,6 +365,11 @@ The `RecognizerSession.dispose()` method is **NOT SAFE** and should rarely be us
312
365
  - `stopListening()`
313
366
  - `addAutoFinishTime(additionalTimeMs?: number)`
314
367
  - `updateAutoFinishTime(newTimeMs: number, withRefresh?: boolean)`
368
+ - `getIsActive()`
369
+
370
+ ### `useVoiceInputVolume`
371
+
372
+ - `useVoiceInputVolume(): number`
315
373
 
316
374
  ### `RecognizerSession`
317
375
 
@@ -328,8 +386,9 @@ Configuration object for speech recognition.
328
386
  - `autoFinishRecognitionMs?: number` - Auto-stop timeout in milliseconds (default: `8000`)
329
387
  - `contextualStrings?: string[]` - Array of domain-specific words for better recognition
330
388
  - `disableRepeatingFilter?: boolean` - Disable filter that removes consecutive duplicate words (default: `false`)
331
- - `startHapticFeedbackStyle?: 'light' | 'medium' | 'heavy'` - Haptic feedback style when microphone starts recording (default: `null` / disabled)
332
- - `stopHapticFeedbackStyle?: 'light' | 'medium' | 'heavy'` - Haptic feedback style when microphone stops recording (default: `null` / disabled)
389
+ - `startHapticFeedbackStyle?: 'light' | 'medium' | 'heavy' | 'none'` - Haptic feedback style when microphone starts recording (default: `"medium"`)
390
+ - `stopHapticFeedbackStyle?: 'light' | 'medium' | 'heavy' | 'none'` - Haptic feedback style when microphone stops recording (default: `"medium"`)
391
+ - `maskOffensiveWords?: boolean` - Mask offensive words with asterisks. (Android 13+, iOS 26+, default: `false`. iOS <26: always `false`)
333
392
 
334
393
  #### iOS-Specific Parameters
335
394
 
@@ -337,7 +396,6 @@ Configuration object for speech recognition.
337
396
 
338
397
  #### Android-Specific Parameters
339
398
 
340
- - `androidMaskOffensiveWords?: boolean` - Mask offensive words (Android 13+, default: `false`)
341
399
  - `androidFormattingPreferQuality?: boolean` - Prefer quality over latency (Android 13+, default: `false`)
342
400
  - `androidUseWebSearchModel?: boolean` - Use web search language model instead of free-form (default: `false`)
343
401
  - `androidDisableBatchHandling?: boolean` - Disable default batch handling (may add many empty batches, default: `false`)
@@ -361,8 +419,3 @@ cd android && ./gradlew :react-native-nitro-modules:preBuild
361
419
  ## License
362
420
 
363
421
  MIT
364
-
365
- ## TODO
366
-
367
- - [ ] (Android) Timer till the auto finish is called
368
- - [ ] (Android) Cleanup when app loses the focus
@@ -62,6 +62,8 @@ android {
62
62
  }
63
63
  }
64
64
  }
65
+
66
+ consumerProguardFiles 'proguard-rules.pro'
65
67
  }
66
68
 
67
69
  externalNativeBuild {
@@ -8,7 +8,7 @@ import android.os.VibratorManager
8
8
  import com.margelo.nitro.nitrospeech.HapticFeedbackStyle
9
9
 
10
10
  class HapticImpact(
11
- private val style: HapticFeedbackStyle = HapticFeedbackStyle.MEDIUM,
11
+ private val style: HapticFeedbackStyle?
12
12
  ) {
13
13
  private data class LegacyOneShot(
14
14
  val durationMs: Long,
@@ -16,6 +16,10 @@ class HapticImpact(
16
16
  )
17
17
 
18
18
  fun trigger(context: Context) {
19
+ if (style == HapticFeedbackStyle.NONE) {
20
+ return
21
+ }
22
+
19
23
  val vibrator = getVibrator(context) ?: return
20
24
  if (!vibrator.hasVibrator()) return
21
25
 
@@ -25,7 +29,10 @@ class HapticImpact(
25
29
  HapticFeedbackStyle.LIGHT -> VibrationEffect.EFFECT_TICK
26
30
  HapticFeedbackStyle.MEDIUM -> VibrationEffect.EFFECT_CLICK
27
31
  HapticFeedbackStyle.HEAVY -> VibrationEffect.EFFECT_HEAVY_CLICK
32
+ null -> VibrationEffect.EFFECT_CLICK
33
+ else -> null
28
34
  }
35
+ if (effect == null) { return }
29
36
  vibrator.vibrate(VibrationEffect.createPredefined(effect))
30
37
  return
31
38
  }
@@ -34,7 +41,10 @@ class HapticImpact(
34
41
  HapticFeedbackStyle.LIGHT -> LegacyOneShot(durationMs = 12L, amplitude = 50)
35
42
  HapticFeedbackStyle.MEDIUM -> LegacyOneShot(durationMs = 18L, amplitude = 100)
36
43
  HapticFeedbackStyle.HEAVY -> LegacyOneShot(durationMs = 28L, amplitude = 180)
44
+ null -> LegacyOneShot(durationMs = 18L, amplitude = 100)
45
+ else -> null
37
46
  }
47
+ if (legacyOneShot == null) { return }
38
48
  if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
39
49
  vibrator.vibrate(
40
50
  VibrationEffect.createOneShot(
@@ -33,6 +33,11 @@ class HybridRecognizer: HybridRecognizerSpec() {
33
33
  override var onAutoFinishProgress: ((timeLeftMs: Double) -> Unit)? = null
34
34
  override var onError: ((error: String) -> Unit)? = null
35
35
  override var onPermissionDenied: (() -> Unit)? = null
36
+ override var onVolumeChange: ((normVolume: Double) -> Unit)? = null
37
+
38
+ override fun getIsActive(): Boolean {
39
+ return isActive
40
+ }
36
41
 
37
42
  @DoNotStrip
38
43
  @Keep
@@ -86,7 +91,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
86
91
  mainHandler.postDelayed({
87
92
  val context = NitroModules.applicationContext
88
93
  val hapticImpact = config?.stopHapticFeedbackStyle
89
- if (hapticImpact != null && context != null) {
94
+ if (context != null) {
90
95
  HapticImpact(hapticImpact).trigger(context)
91
96
  }
92
97
  cleanup()
@@ -129,6 +134,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
129
134
  val recognitionListenerSession = RecognitionListenerSession(
130
135
  autoStopper,
131
136
  config,
137
+ onVolumeChange
132
138
  ) { result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean ->
133
139
  onFinishRecognition(result, errorMessage, recordingStopped)
134
140
  }
@@ -140,10 +146,10 @@ class HybridRecognizer: HybridRecognizerSpec() {
140
146
  intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, languageModel)
141
147
  intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, config?.locale ?: "en-US")
142
148
  intent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
143
- // set many secs to avoid cutting early
149
+ // Set a lot of time to avoid cutting early
144
150
  intent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 300000)
145
151
 
146
- if (config?.androidMaskOffensiveWords != true && Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
152
+ if (config?.maskOffensiveWords != true && Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
147
153
  intent.putExtra(RecognizerIntent.EXTRA_MASK_OFFENSIVE_WORDS, false)
148
154
  }
149
155
 
@@ -163,10 +169,8 @@ class HybridRecognizer: HybridRecognizerSpec() {
163
169
  isActive = true
164
170
 
165
171
  val hapticImpact = config?.startHapticFeedbackStyle
166
- if (hapticImpact != null) {
167
- HapticImpact(hapticImpact).trigger(context)
168
- }
169
172
 
173
+ HapticImpact(hapticImpact).trigger(context)
170
174
  mainHandler.postDelayed({
171
175
  if (isActive) {
172
176
  onReadyForSpeech?.invoke()
@@ -192,6 +196,8 @@ class HybridRecognizer: HybridRecognizerSpec() {
192
196
  speechRecognizer?.destroy()
193
197
  speechRecognizer = null
194
198
  isActive = false
199
+ // Reset voice meter in JS consumers after stop/error cleanup.
200
+ onVolumeChange?.invoke(0.0)
195
201
  } catch (e: Exception) {
196
202
  onFinishRecognition(
197
203
  null,
@@ -5,17 +5,32 @@ import android.speech.RecognitionListener
5
5
  import android.speech.SpeechRecognizer
6
6
  import android.util.Log
7
7
  import com.margelo.nitro.nitrospeech.SpeechToTextParams
8
+ import kotlin.math.max
9
+ import kotlin.math.roundToInt
8
10
 
9
11
  class RecognitionListenerSession (
10
12
  private val autoStopper: AutoStopper?,
11
13
  private val config: SpeechToTextParams?,
14
+ private val onVolumeChange: ((normVolume: Double) -> Unit)?,
12
15
  private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
13
16
  ) {
14
17
  companion object {
15
18
  private const val TAG = "HybridRecognizer"
19
+ private const val SPEECH_LEVEL_THRESHOLD = 0.08f
20
+ private const val FLOOR_RISE_ALPHA = 0.01f
21
+ private const val FLOOR_FALL_ALPHA = 0.20f
22
+ private const val PEAK_ATTACK_ALPHA = 0.25f
23
+ private const val PEAK_DECAY_ALPHA = 0.01f
24
+ private const val METER_ATTACK = 0.35f
25
+ private const val METER_RELEASE = 0.08f
26
+ private const val MIN_SPAN_DB = 6f
27
+ private const val PRECISION_SCALE = 1_000_000f
16
28
  }
17
29
 
18
30
  private var resultBatches: ArrayList<String>? = null
31
+ private var noiseFloorDb = Float.NaN
32
+ private var peakDb = Float.NaN
33
+ private var levelSmoothed = 0f
19
34
 
20
35
  fun createRecognitionListener(): RecognitionListener {
21
36
  resultBatches = null
@@ -23,7 +38,11 @@ class RecognitionListenerSession (
23
38
  override fun onReadyForSpeech(params: Bundle?) {}
24
39
  override fun onBeginningOfSpeech() {}
25
40
  override fun onRmsChanged(rmsdB: Float) {
26
- autoStopper?.indicateRecordingActivity()
41
+ val normLevel = normalizeRmsDb(rmsdB)
42
+ onVolumeChange?.invoke(normLevel.toDouble())
43
+ if (normLevel > SPEECH_LEVEL_THRESHOLD) {
44
+ autoStopper?.indicateRecordingActivity()
45
+ }
27
46
  }
28
47
  override fun onBufferReceived(buffer: ByteArray?) {}
29
48
  override fun onEndOfSpeech() {}
@@ -92,15 +111,62 @@ class RecognitionListenerSession (
92
111
  }
93
112
  }
94
113
 
95
- // Filters out 2 or more repeating words in a row, like "and and"
114
+ // Filters out 2 or more consecutive duplicate words, like "and and"
96
115
  private fun repeatingFilter(text: String): String {
97
- val words = text.split(Regex("\\s+")).toMutableList()
98
- var joiner = words[0]
116
+ var words = text.split(Regex("\\s+")).filter { it.isNotBlank() }
117
+ if (words.isEmpty()) {
118
+ return ""
119
+ }
120
+
121
+ val joiner = StringBuilder()
122
+
123
+ // 10 - arbitrary number of last substrings that is still unstable
124
+ // and needs to be filtered. Prev substrings were handled earlier.
125
+ if (words.size >= 10) {
126
+ joiner.append(words.take(words.size - 9).joinToString(" "))
127
+ words = words.takeLast(10)
128
+ } else {
129
+ joiner.append(words.first())
130
+ }
131
+
99
132
  for (i in words.indices) {
100
133
  if (i == 0) continue
101
- if (words[i] == words[i-1]) continue
102
- joiner += " ${words[i]}"
134
+ // Always add number-containing strings.
135
+ if (Regex("\\d+").containsMatchIn(words[i])) {
136
+ joiner.append(" ").append(words[i])
137
+ continue
138
+ }
139
+
140
+ // Skip consecutive duplicate strings.
141
+ if (words[i] == words[i - 1]) continue
142
+ joiner.append(" ").append(words[i])
103
143
  }
104
- return joiner
144
+ return joiner.toString()
145
+ }
146
+
147
+ private fun normalizeRmsDb(rmsdB: Float): Double {
148
+ if (!rmsdB.isFinite()) {
149
+ return 0.0
150
+ }
151
+
152
+ if (noiseFloorDb.isNaN()) {
153
+ noiseFloorDb = rmsdB
154
+ }
155
+ if (peakDb.isNaN()) {
156
+ peakDb = rmsdB + MIN_SPAN_DB
157
+ }
158
+
159
+ val floorAlpha = if (rmsdB < noiseFloorDb) FLOOR_FALL_ALPHA else FLOOR_RISE_ALPHA
160
+ noiseFloorDb += floorAlpha * (rmsdB - noiseFloorDb)
161
+
162
+ val peakAlpha = if (rmsdB > peakDb) PEAK_ATTACK_ALPHA else PEAK_DECAY_ALPHA
163
+ peakDb += peakAlpha * (rmsdB - peakDb)
164
+
165
+ val span = max(peakDb - noiseFloorDb, MIN_SPAN_DB)
166
+ val raw = ((rmsdB - noiseFloorDb) / span).coerceIn(0f, 1f)
167
+ val smoothingCoeff = if (raw > levelSmoothed) METER_ATTACK else METER_RELEASE
168
+ levelSmoothed += smoothingCoeff * (raw - levelSmoothed)
169
+
170
+ return ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
105
171
  }
106
172
  }