@gmessier/nitro-speech 0.1.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +72 -11
  2. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HapticImpact.kt +11 -1
  3. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +12 -6
  4. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +73 -7
  5. package/ios/AnylyzerTranscriber.swift +331 -0
  6. package/ios/AutoStopper.swift +9 -10
  7. package/ios/BufferUtil.swift +80 -0
  8. package/ios/HapticImpact.swift +12 -3
  9. package/ios/HybridNitroSpeech.swift +10 -1
  10. package/ios/HybridRecognizer.swift +139 -167
  11. package/ios/LegacySpeechRecognizer.swift +161 -0
  12. package/lib/commonjs/index.js +54 -5
  13. package/lib/commonjs/index.js.map +1 -1
  14. package/lib/module/index.js +52 -3
  15. package/lib/module/index.js.map +1 -1
  16. package/lib/tsconfig.tsbuildinfo +1 -1
  17. package/lib/typescript/index.d.ts +25 -8
  18. package/lib/typescript/index.d.ts.map +1 -1
  19. package/lib/typescript/specs/NitroSpeech.nitro.d.ts +24 -12
  20. package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +1 -1
  21. package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +1 -1
  22. package/nitrogen/generated/android/NitroSpeech+autolinking.gradle +1 -1
  23. package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +32 -22
  24. package/nitrogen/generated/android/NitroSpeechOnLoad.hpp +14 -5
  25. package/nitrogen/generated/android/c++/JFunc_void.hpp +1 -1
  26. package/nitrogen/generated/android/c++/JFunc_void_double.hpp +1 -1
  27. package/nitrogen/generated/android/c++/JFunc_void_std__string.hpp +1 -1
  28. package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +1 -1
  29. package/nitrogen/generated/android/c++/JHapticFeedbackStyle.hpp +7 -5
  30. package/nitrogen/generated/android/c++/JHybridNitroSpeechSpec.cpp +23 -22
  31. package/nitrogen/generated/android/c++/JHybridNitroSpeechSpec.hpp +20 -22
  32. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +58 -35
  33. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +23 -22
  34. package/nitrogen/generated/android/c++/JSpeechToTextParams.hpp +5 -5
  35. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void.kt +1 -1
  36. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_double.kt +1 -1
  37. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_std__string.kt +1 -1
  38. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_std__vector_std__string_.kt +1 -1
  39. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HapticFeedbackStyle.kt +5 -2
  40. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridNitroSpeechSpec.kt +16 -19
  41. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +34 -19
  42. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/NitroSpeechOnLoad.kt +1 -1
  43. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +5 -5
  44. package/nitrogen/generated/ios/NitroSpeech+autolinking.rb +2 -2
  45. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +1 -1
  46. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +34 -10
  47. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +1 -1
  48. package/nitrogen/generated/ios/NitroSpeechAutolinking.mm +1 -1
  49. package/nitrogen/generated/ios/NitroSpeechAutolinking.swift +9 -8
  50. package/nitrogen/generated/ios/c++/HybridNitroSpeechSpecSwift.cpp +1 -1
  51. package/nitrogen/generated/ios/c++/HybridNitroSpeechSpecSwift.hpp +7 -1
  52. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.cpp +1 -1
  53. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +22 -1
  54. package/nitrogen/generated/ios/swift/Func_void.swift +1 -2
  55. package/nitrogen/generated/ios/swift/Func_void_double.swift +1 -2
  56. package/nitrogen/generated/ios/swift/Func_void_std__string.swift +1 -2
  57. package/nitrogen/generated/ios/swift/Func_void_std__vector_std__string_.swift +1 -2
  58. package/nitrogen/generated/ios/swift/HapticFeedbackStyle.swift +5 -1
  59. package/nitrogen/generated/ios/swift/HybridNitroSpeechSpec.swift +3 -4
  60. package/nitrogen/generated/ios/swift/HybridNitroSpeechSpec_cxx.swift +10 -3
  61. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +5 -4
  62. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +61 -3
  63. package/nitrogen/generated/ios/swift/SpeechToTextParams.swift +89 -221
  64. package/nitrogen/generated/shared/c++/HapticFeedbackStyle.hpp +5 -1
  65. package/nitrogen/generated/shared/c++/HybridNitroSpeechSpec.cpp +1 -1
  66. package/nitrogen/generated/shared/c++/HybridNitroSpeechSpec.hpp +1 -1
  67. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +4 -1
  68. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +4 -1
  69. package/nitrogen/generated/shared/c++/SpeechToTextParams.hpp +45 -37
  70. package/package.json +11 -8
  71. package/src/index.ts +59 -2
  72. package/src/specs/NitroSpeech.nitro.ts +25 -12
package/README.md CHANGED
@@ -13,14 +13,24 @@
13
13
 
14
14
  React Native Real-Time Speech Recognition Library, powered by [Nitro Modules](https://github.com/mrousavy/nitro).
15
15
 
16
+ #### Compatibility:
17
+ ‼️ Newest versions of `@gmessier/nitro-speech` requires [react-native-nitro-modules 0.35.0 or higher](https://github.com/mrousavy/nitro/releases/tag/v0.35.0).
18
+
19
+ | Compatibility | Supported versions |
20
+ |---|---|
21
+ | `react-native-nitro-modules <= 0.34.*` | `@gmessier/nitro-speech <= 0.2.*` |
22
+ | `react-native-nitro-modules >= 0.35.*` | `@gmessier/nitro-speech >= 0.3.*` |
23
+
16
24
  #### Key Features:
17
25
 
18
26
  - Built on Nitro Modules for low-overhead native bridging
27
+ - Uses newest advanced `SpeechAnalyzer` and `SpeechTranscriber` API for iOS 26+ (with fallback to legacy `SFSpeechRecognition` for older versions)
19
28
  - Configurable Timer for silence (default: 8 sec)
20
29
  - Callback `onAutoFinishProgress` for progress bars, etc...
21
30
  - Method `addAutoFinishTime` for single timer update
22
31
  - Method `updateAutoFinishTime` for constant timer update
23
- - Optional Haptic Feedback on start and finish
32
+ - Configurable Haptic Feedback on start and finish
33
+ - Flexible `onVolumeChange` to display input volume in UI with built-in `useVoiceInputVolume` hook
24
34
  - Speech-quality configurations:
25
35
  - Result is grouped by speech segments into Batches.
26
36
  - Param `disableRepeatingFilter` for consecutive duplicate-word filtering.
@@ -38,6 +48,7 @@ React Native Real-Time Speech Recognition Library, powered by [Nitro Modules](ht
38
48
  - [Recommended: useRecognizer Hook](#recommended-userecognizer-hook)
39
49
  - [With React Navigation (important)](#with-react-navigation-important)
40
50
  - [Cross-component control: RecognizerRef](#cross-component-control-recognizerref)
51
+ - [Voice input volume](#voice-input-volume)
41
52
  - [Unsafe: RecognizerSession](#unsafe-recognizersession)
42
53
  - [API Reference](#api-reference)
43
54
  - [Requirements](#requirements)
@@ -107,6 +118,7 @@ Both permissions are required for speech recognition to work on iOS.
107
118
  | **Haptic feedback** | Optional haptics on recording start/stop | ✅ | ✅ |
108
119
  | **Background handling** | Auto-stop when app loses focus/goes to background | ✅ | Not Safe *(TODO)* |
109
120
  | **Permission handling** | Dedicated `onPermissionDenied` callback | ✅ | ✅ |
121
+ | **Voice input volume** | Normalized voice input level for UI meters (`useVoiceInputVolume`) | ✅ | ✅ |
110
122
  | **Repeating word filter** | Removes consecutive duplicate words from artifacts | ✅ | ✅ |
111
123
  | **Locale support** | Configure speech recognizer for different languages | ✅ | ✅ |
112
124
  | **Contextual strings** | Domain-specific vocabulary for improved accuracy | ✅ | ✅ |
@@ -166,7 +178,7 @@ function MyComponent() {
166
178
  // iOS specific
167
179
  iosAddPunctuation: true,
168
180
  // Android specific
169
- androidMaskOffensiveWords: false,
181
+ maskOffensiveWords: false,
170
182
  androidFormattingPreferQuality: false,
171
183
  androidUseWebSearchModel: false,
172
184
  androidDisableBatchHandling: false,
@@ -218,17 +230,58 @@ import { RecognizerRef } from '@gmessier/nitro-speech';
218
230
  RecognizerRef.startListening({ locale: 'en-US' });
219
231
  RecognizerRef.addAutoFinishTime(5000);
220
232
  RecognizerRef.updateAutoFinishTime(10000, true);
233
+ RecognizerRef.getIsActive();
221
234
  RecognizerRef.stopListening();
222
235
  ```
223
236
 
224
237
  `RecognizerRef` exposes only method handlers and is safe for cross-component method access.
225
238
 
239
+ ### Voice input volume
240
+
241
+ #### useVoiceInputVolume
242
+
243
+ By default you have access to `useVoiceInputVolume` to read normalized voice input level (`0..1`) for UI meters.
244
+ ⚠️ **Technical limitation**: this approach re-renders component a lot.
245
+
246
+ ```typescript
247
+ import { useVoiceInputVolume } from '@gmessier/nitro-speech';
248
+
249
+ function VoiceMeter() {
250
+ const volume = useVoiceInputVolume();
251
+ return <Text>{volume.toFixed(2)}</Text>;
252
+ }
253
+ ```
254
+
255
+ #### Reanimated: useSharedValue, worklets, UI thread
256
+
257
+ As a better alternative you can control volume via SharedValue and apply it only on UI thread with Reanimated.
258
+ This way you will avoid re-renders since the volume will be stored on UI thread
259
+
260
+ ```typescript
261
+ function VoiceMeter() {
262
+ const sharedVolume = useSharedValue(0)
263
+ const {
264
+ // ...
265
+ } = useRecognizer(
266
+ {
267
+ // ...
268
+ onVolumeChange: (normVolume) => {
269
+ "worklet";
270
+ sharedVolume.value = normValue
271
+ },
272
+ // ...
273
+ }
274
+ );
275
+ }
276
+ ```
277
+
278
+
226
279
  ### Unsafe: RecognizerSession
227
280
 
228
281
  `RecognizerSession` is the hybrid object. It gives direct access to callbacks and control methods, but it is unsafe to orchestrate the full session directly from it.
229
282
 
230
283
  ```typescript
231
- import { RecognizerSession } from '@gmessier/nitro-speech';
284
+ import { RecognizerSession, unsafe_onVolumeChange } from '@gmessier/nitro-speech';
232
285
 
233
286
  // Set up callbacks
234
287
  RecognizerSession.onReadyForSpeech = () => {
@@ -255,6 +308,13 @@ RecognizerSession.onPermissionDenied = () => {
255
308
  console.log('Permission denied');
256
309
  };
257
310
 
311
+ RecognizerSession.onVolumeChange = (volume) => {
312
+ console.log('new volume: ', volume);
313
+ };
314
+ // OR use unsafe_onVolumeChange to enable useVoiceInputVolume hook manually
315
+ RecognizerSession.onVolumeChange = unsafe_onVolumeChange
316
+
317
+
258
318
  // Start listening
259
319
  RecognizerSession.startListening({
260
320
  locale: 'en-US',
@@ -305,6 +365,7 @@ The `RecognizerSession.dispose()` method is **NOT SAFE** and should rarely be us
305
365
  - `stopListening()` - Stop speech recognition
306
366
  - `addAutoFinishTime(additionalTimeMs?: number)` - Add time to the auto-finish timer (or reset to original if no parameter)
307
367
  - `updateAutoFinishTime(newTimeMs: number, withRefresh?: boolean)` - Update the auto-finish timer
368
+ - `getIsActive()` - Returns true if the speech recognition is active
308
369
 
309
370
  ### `RecognizerRef`
310
371
 
@@ -312,6 +373,11 @@ The `RecognizerSession.dispose()` method is **NOT SAFE** and should rarely be us
312
373
  - `stopListening()`
313
374
  - `addAutoFinishTime(additionalTimeMs?: number)`
314
375
  - `updateAutoFinishTime(newTimeMs: number, withRefresh?: boolean)`
376
+ - `getIsActive()`
377
+
378
+ ### `useVoiceInputVolume`
379
+
380
+ - `useVoiceInputVolume(): number`
315
381
 
316
382
  ### `RecognizerSession`
317
383
 
@@ -328,8 +394,9 @@ Configuration object for speech recognition.
328
394
  - `autoFinishRecognitionMs?: number` - Auto-stop timeout in milliseconds (default: `8000`)
329
395
  - `contextualStrings?: string[]` - Array of domain-specific words for better recognition
330
396
  - `disableRepeatingFilter?: boolean` - Disable filter that removes consecutive duplicate words (default: `false`)
331
- - `startHapticFeedbackStyle?: 'light' | 'medium' | 'heavy'` - Haptic feedback style when microphone starts recording (default: `null` / disabled)
332
- - `stopHapticFeedbackStyle?: 'light' | 'medium' | 'heavy'` - Haptic feedback style when microphone stops recording (default: `null` / disabled)
397
+ - `startHapticFeedbackStyle?: 'light' | 'medium' | 'heavy' | 'none'` - Haptic feedback style when microphone starts recording (default: `"medium"`)
398
+ - `stopHapticFeedbackStyle?: 'light' | 'medium' | 'heavy' | 'none'` - Haptic feedback style when microphone stops recording (default: `"medium"`)
399
+ - `maskOffensiveWords?: boolean` - Mask offensive words with asterisks. (Android 13+, iOS 26+, default: `false`. iOS <26: always `false`)
333
400
 
334
401
  #### iOS-Specific Parameters
335
402
 
@@ -337,7 +404,6 @@ Configuration object for speech recognition.
337
404
 
338
405
  #### Android-Specific Parameters
339
406
 
340
- - `androidMaskOffensiveWords?: boolean` - Mask offensive words (Android 13+, default: `false`)
341
407
  - `androidFormattingPreferQuality?: boolean` - Prefer quality over latency (Android 13+, default: `false`)
342
408
  - `androidUseWebSearchModel?: boolean` - Use web search language model instead of free-form (default: `false`)
343
409
  - `androidDisableBatchHandling?: boolean` - Disable default batch handling (may add many empty batches, default: `false`)
@@ -361,8 +427,3 @@ cd android && ./gradlew :react-native-nitro-modules:preBuild
361
427
  ## License
362
428
 
363
429
  MIT
364
-
365
- ## TODO
366
-
367
- - [ ] (Android) Timer till the auto finish is called
368
- - [ ] (Android) Cleanup when app loses the focus
@@ -8,7 +8,7 @@ import android.os.VibratorManager
8
8
  import com.margelo.nitro.nitrospeech.HapticFeedbackStyle
9
9
 
10
10
  class HapticImpact(
11
- private val style: HapticFeedbackStyle = HapticFeedbackStyle.MEDIUM,
11
+ private val style: HapticFeedbackStyle?
12
12
  ) {
13
13
  private data class LegacyOneShot(
14
14
  val durationMs: Long,
@@ -16,6 +16,10 @@ class HapticImpact(
16
16
  )
17
17
 
18
18
  fun trigger(context: Context) {
19
+ if (style == HapticFeedbackStyle.NONE) {
20
+ return
21
+ }
22
+
19
23
  val vibrator = getVibrator(context) ?: return
20
24
  if (!vibrator.hasVibrator()) return
21
25
 
@@ -25,7 +29,10 @@ class HapticImpact(
25
29
  HapticFeedbackStyle.LIGHT -> VibrationEffect.EFFECT_TICK
26
30
  HapticFeedbackStyle.MEDIUM -> VibrationEffect.EFFECT_CLICK
27
31
  HapticFeedbackStyle.HEAVY -> VibrationEffect.EFFECT_HEAVY_CLICK
32
+ null -> VibrationEffect.EFFECT_CLICK
33
+ else -> null
28
34
  }
35
+ if (effect == null) { return }
29
36
  vibrator.vibrate(VibrationEffect.createPredefined(effect))
30
37
  return
31
38
  }
@@ -34,7 +41,10 @@ class HapticImpact(
34
41
  HapticFeedbackStyle.LIGHT -> LegacyOneShot(durationMs = 12L, amplitude = 50)
35
42
  HapticFeedbackStyle.MEDIUM -> LegacyOneShot(durationMs = 18L, amplitude = 100)
36
43
  HapticFeedbackStyle.HEAVY -> LegacyOneShot(durationMs = 28L, amplitude = 180)
44
+ null -> LegacyOneShot(durationMs = 18L, amplitude = 100)
45
+ else -> null
37
46
  }
47
+ if (legacyOneShot == null) { return }
38
48
  if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
39
49
  vibrator.vibrate(
40
50
  VibrationEffect.createOneShot(
@@ -33,6 +33,11 @@ class HybridRecognizer: HybridRecognizerSpec() {
33
33
  override var onAutoFinishProgress: ((timeLeftMs: Double) -> Unit)? = null
34
34
  override var onError: ((error: String) -> Unit)? = null
35
35
  override var onPermissionDenied: (() -> Unit)? = null
36
+ override var onVolumeChange: ((normVolume: Double) -> Unit)? = null
37
+
38
+ override fun getIsActive(): Boolean {
39
+ return isActive
40
+ }
36
41
 
37
42
  @DoNotStrip
38
43
  @Keep
@@ -86,7 +91,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
86
91
  mainHandler.postDelayed({
87
92
  val context = NitroModules.applicationContext
88
93
  val hapticImpact = config?.stopHapticFeedbackStyle
89
- if (hapticImpact != null && context != null) {
94
+ if (context != null) {
90
95
  HapticImpact(hapticImpact).trigger(context)
91
96
  }
92
97
  cleanup()
@@ -129,6 +134,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
129
134
  val recognitionListenerSession = RecognitionListenerSession(
130
135
  autoStopper,
131
136
  config,
137
+ onVolumeChange
132
138
  ) { result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean ->
133
139
  onFinishRecognition(result, errorMessage, recordingStopped)
134
140
  }
@@ -140,10 +146,10 @@ class HybridRecognizer: HybridRecognizerSpec() {
140
146
  intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, languageModel)
141
147
  intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, config?.locale ?: "en-US")
142
148
  intent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
143
- // set many secs to avoid cutting early
149
+ // Set a lot of time to avoid cutting early
144
150
  intent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 300000)
145
151
 
146
- if (config?.androidMaskOffensiveWords != true && Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
152
+ if (config?.maskOffensiveWords != true && Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
147
153
  intent.putExtra(RecognizerIntent.EXTRA_MASK_OFFENSIVE_WORDS, false)
148
154
  }
149
155
 
@@ -163,10 +169,8 @@ class HybridRecognizer: HybridRecognizerSpec() {
163
169
  isActive = true
164
170
 
165
171
  val hapticImpact = config?.startHapticFeedbackStyle
166
- if (hapticImpact != null) {
167
- HapticImpact(hapticImpact).trigger(context)
168
- }
169
172
 
173
+ HapticImpact(hapticImpact).trigger(context)
170
174
  mainHandler.postDelayed({
171
175
  if (isActive) {
172
176
  onReadyForSpeech?.invoke()
@@ -192,6 +196,8 @@ class HybridRecognizer: HybridRecognizerSpec() {
192
196
  speechRecognizer?.destroy()
193
197
  speechRecognizer = null
194
198
  isActive = false
199
+ // Reset voice meter in JS consumers after stop/error cleanup.
200
+ onVolumeChange?.invoke(0.0)
195
201
  } catch (e: Exception) {
196
202
  onFinishRecognition(
197
203
  null,
@@ -5,17 +5,32 @@ import android.speech.RecognitionListener
5
5
  import android.speech.SpeechRecognizer
6
6
  import android.util.Log
7
7
  import com.margelo.nitro.nitrospeech.SpeechToTextParams
8
+ import kotlin.math.max
9
+ import kotlin.math.roundToInt
8
10
 
9
11
  class RecognitionListenerSession (
10
12
  private val autoStopper: AutoStopper?,
11
13
  private val config: SpeechToTextParams?,
14
+ private val onVolumeChange: ((normVolume: Double) -> Unit)?,
12
15
  private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
13
16
  ) {
14
17
  companion object {
15
18
  private const val TAG = "HybridRecognizer"
19
+ private const val SPEECH_LEVEL_THRESHOLD = 0.08f
20
+ private const val FLOOR_RISE_ALPHA = 0.01f
21
+ private const val FLOOR_FALL_ALPHA = 0.20f
22
+ private const val PEAK_ATTACK_ALPHA = 0.25f
23
+ private const val PEAK_DECAY_ALPHA = 0.01f
24
+ private const val METER_ATTACK = 0.35f
25
+ private const val METER_RELEASE = 0.08f
26
+ private const val MIN_SPAN_DB = 6f
27
+ private const val PRECISION_SCALE = 1_000_000f
16
28
  }
17
29
 
18
30
  private var resultBatches: ArrayList<String>? = null
31
+ private var noiseFloorDb = Float.NaN
32
+ private var peakDb = Float.NaN
33
+ private var levelSmoothed = 0f
19
34
 
20
35
  fun createRecognitionListener(): RecognitionListener {
21
36
  resultBatches = null
@@ -23,7 +38,11 @@ class RecognitionListenerSession (
23
38
  override fun onReadyForSpeech(params: Bundle?) {}
24
39
  override fun onBeginningOfSpeech() {}
25
40
  override fun onRmsChanged(rmsdB: Float) {
26
- autoStopper?.indicateRecordingActivity()
41
+ val normLevel = normalizeRmsDb(rmsdB)
42
+ onVolumeChange?.invoke(normLevel.toDouble())
43
+ if (normLevel > SPEECH_LEVEL_THRESHOLD) {
44
+ autoStopper?.indicateRecordingActivity()
45
+ }
27
46
  }
28
47
  override fun onBufferReceived(buffer: ByteArray?) {}
29
48
  override fun onEndOfSpeech() {}
@@ -92,15 +111,62 @@ class RecognitionListenerSession (
92
111
  }
93
112
  }
94
113
 
95
- // Filters out 2 or more repeating words in a row, like "and and"
114
+ // Filters out 2 or more consecutive duplicate words, like "and and"
96
115
  private fun repeatingFilter(text: String): String {
97
- val words = text.split(Regex("\\s+")).toMutableList()
98
- var joiner = words[0]
116
+ var words = text.split(Regex("\\s+")).filter { it.isNotBlank() }
117
+ if (words.isEmpty()) {
118
+ return ""
119
+ }
120
+
121
+ val joiner = StringBuilder()
122
+
123
+ // 10 - arbitrary number of last substrings that is still unstable
124
+ // and needs to be filtered. Prev substrings were handled earlier.
125
+ if (words.size >= 10) {
126
+ joiner.append(words.take(words.size - 9).joinToString(" "))
127
+ words = words.takeLast(10)
128
+ } else {
129
+ joiner.append(words.first())
130
+ }
131
+
99
132
  for (i in words.indices) {
100
133
  if (i == 0) continue
101
- if (words[i] == words[i-1]) continue
102
- joiner += " ${words[i]}"
134
+ // Always add number-containing strings.
135
+ if (Regex("\\d+").containsMatchIn(words[i])) {
136
+ joiner.append(" ").append(words[i])
137
+ continue
138
+ }
139
+
140
+ // Skip consecutive duplicate strings.
141
+ if (words[i] == words[i - 1]) continue
142
+ joiner.append(" ").append(words[i])
103
143
  }
104
- return joiner
144
+ return joiner.toString()
145
+ }
146
+
147
+ private fun normalizeRmsDb(rmsdB: Float): Double {
148
+ if (!rmsdB.isFinite()) {
149
+ return 0.0
150
+ }
151
+
152
+ if (noiseFloorDb.isNaN()) {
153
+ noiseFloorDb = rmsdB
154
+ }
155
+ if (peakDb.isNaN()) {
156
+ peakDb = rmsdB + MIN_SPAN_DB
157
+ }
158
+
159
+ val floorAlpha = if (rmsdB < noiseFloorDb) FLOOR_FALL_ALPHA else FLOOR_RISE_ALPHA
160
+ noiseFloorDb += floorAlpha * (rmsdB - noiseFloorDb)
161
+
162
+ val peakAlpha = if (rmsdB > peakDb) PEAK_ATTACK_ALPHA else PEAK_DECAY_ALPHA
163
+ peakDb += peakAlpha * (rmsdB - peakDb)
164
+
165
+ val span = max(peakDb - noiseFloorDb, MIN_SPAN_DB)
166
+ val raw = ((rmsdB - noiseFloorDb) / span).coerceIn(0f, 1f)
167
+ val smoothingCoeff = if (raw > levelSmoothed) METER_ATTACK else METER_RELEASE
168
+ levelSmoothed += smoothingCoeff * (raw - levelSmoothed)
169
+
170
+ return ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
105
171
  }
106
172
  }