@gmessier/nitro-speech 0.1.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -11
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HapticImpact.kt +11 -1
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +12 -6
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +73 -7
- package/ios/AnylyzerTranscriber.swift +331 -0
- package/ios/AutoStopper.swift +9 -10
- package/ios/BufferUtil.swift +80 -0
- package/ios/HapticImpact.swift +12 -3
- package/ios/HybridNitroSpeech.swift +10 -1
- package/ios/HybridRecognizer.swift +139 -167
- package/ios/LegacySpeechRecognizer.swift +161 -0
- package/lib/commonjs/index.js +54 -5
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +52 -3
- package/lib/module/index.js.map +1 -1
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/lib/typescript/index.d.ts +25 -8
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts +24 -12
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +1 -1
- package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +1 -1
- package/nitrogen/generated/android/NitroSpeech+autolinking.gradle +1 -1
- package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +32 -22
- package/nitrogen/generated/android/NitroSpeechOnLoad.hpp +14 -5
- package/nitrogen/generated/android/c++/JFunc_void.hpp +1 -1
- package/nitrogen/generated/android/c++/JFunc_void_double.hpp +1 -1
- package/nitrogen/generated/android/c++/JFunc_void_std__string.hpp +1 -1
- package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +1 -1
- package/nitrogen/generated/android/c++/JHapticFeedbackStyle.hpp +7 -5
- package/nitrogen/generated/android/c++/JHybridNitroSpeechSpec.cpp +23 -22
- package/nitrogen/generated/android/c++/JHybridNitroSpeechSpec.hpp +20 -22
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +58 -35
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +23 -22
- package/nitrogen/generated/android/c++/JSpeechToTextParams.hpp +5 -5
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_double.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_std__string.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_std__vector_std__string_.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HapticFeedbackStyle.kt +5 -2
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridNitroSpeechSpec.kt +16 -19
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +34 -19
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/NitroSpeechOnLoad.kt +1 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +5 -5
- package/nitrogen/generated/ios/NitroSpeech+autolinking.rb +2 -2
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +1 -1
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +34 -10
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +1 -1
- package/nitrogen/generated/ios/NitroSpeechAutolinking.mm +1 -1
- package/nitrogen/generated/ios/NitroSpeechAutolinking.swift +9 -8
- package/nitrogen/generated/ios/c++/HybridNitroSpeechSpecSwift.cpp +1 -1
- package/nitrogen/generated/ios/c++/HybridNitroSpeechSpecSwift.hpp +7 -1
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.cpp +1 -1
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +22 -1
- package/nitrogen/generated/ios/swift/Func_void.swift +1 -2
- package/nitrogen/generated/ios/swift/Func_void_double.swift +1 -2
- package/nitrogen/generated/ios/swift/Func_void_std__string.swift +1 -2
- package/nitrogen/generated/ios/swift/Func_void_std__vector_std__string_.swift +1 -2
- package/nitrogen/generated/ios/swift/HapticFeedbackStyle.swift +5 -1
- package/nitrogen/generated/ios/swift/HybridNitroSpeechSpec.swift +3 -4
- package/nitrogen/generated/ios/swift/HybridNitroSpeechSpec_cxx.swift +10 -3
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +5 -4
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +61 -3
- package/nitrogen/generated/ios/swift/SpeechToTextParams.swift +89 -221
- package/nitrogen/generated/shared/c++/HapticFeedbackStyle.hpp +5 -1
- package/nitrogen/generated/shared/c++/HybridNitroSpeechSpec.cpp +1 -1
- package/nitrogen/generated/shared/c++/HybridNitroSpeechSpec.hpp +1 -1
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +4 -1
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +4 -1
- package/nitrogen/generated/shared/c++/SpeechToTextParams.hpp +45 -37
- package/package.json +11 -8
- package/src/index.ts +59 -2
- package/src/specs/NitroSpeech.nitro.ts +25 -12
package/README.md
CHANGED
|
@@ -13,14 +13,24 @@
|
|
|
13
13
|
|
|
14
14
|
React Native Real-Time Speech Recognition Library, powered by [Nitro Modules](https://github.com/mrousavy/nitro).
|
|
15
15
|
|
|
16
|
+
#### Compatibility:
|
|
17
|
+
‼️ Newest versions of `@gmessier/nitro-speech` requires [react-native-nitro-modules 0.35.0 or higher](https://github.com/mrousavy/nitro/releases/tag/v0.35.0).
|
|
18
|
+
|
|
19
|
+
| Compatibility | Supported versions |
|
|
20
|
+
|---|---|
|
|
21
|
+
| `react-native-nitro-modules <= 0.34.*` | `@gmessier/nitro-speech <= 0.2.*` |
|
|
22
|
+
| `react-native-nitro-modules >= 0.35.*` | `@gmessier/nitro-speech >= 0.3.*` |
|
|
23
|
+
|
|
16
24
|
#### Key Features:
|
|
17
25
|
|
|
18
26
|
- Built on Nitro Modules for low-overhead native bridging
|
|
27
|
+
- Uses newest advanced `SpeechAnalyzer` and `SpeechTranscriber` API for iOS 26+ (with fallback to legacy `SFSpeechRecognition` for older versions)
|
|
19
28
|
- Configurable Timer for silence (default: 8 sec)
|
|
20
29
|
- Callback `onAutoFinishProgress` for progress bars, etc...
|
|
21
30
|
- Method `addAutoFinishTime` for single timer update
|
|
22
31
|
- Method `updateAutoFinishTime` for constant timer update
|
|
23
|
-
-
|
|
32
|
+
- Configurable Haptic Feedback on start and finish
|
|
33
|
+
- Flexible `onVolumeChange` to display input volume in UI with built-in `useVoiceInputVolume` hook
|
|
24
34
|
- Speech-quality configurations:
|
|
25
35
|
- Result is grouped by speech segments into Batches.
|
|
26
36
|
- Param `disableRepeatingFilter` for consecutive duplicate-word filtering.
|
|
@@ -38,6 +48,7 @@ React Native Real-Time Speech Recognition Library, powered by [Nitro Modules](ht
|
|
|
38
48
|
- [Recommended: useRecognizer Hook](#recommended-userecognizer-hook)
|
|
39
49
|
- [With React Navigation (important)](#with-react-navigation-important)
|
|
40
50
|
- [Cross-component control: RecognizerRef](#cross-component-control-recognizerref)
|
|
51
|
+
- [Voice input volume](#voice-input-volume)
|
|
41
52
|
- [Unsafe: RecognizerSession](#unsafe-recognizersession)
|
|
42
53
|
- [API Reference](#api-reference)
|
|
43
54
|
- [Requirements](#requirements)
|
|
@@ -107,6 +118,7 @@ Both permissions are required for speech recognition to work on iOS.
|
|
|
107
118
|
| **Haptic feedback** | Optional haptics on recording start/stop | ✅ | ✅ |
|
|
108
119
|
| **Background handling** | Auto-stop when app loses focus/goes to background | ✅ | Not Safe *(TODO)* |
|
|
109
120
|
| **Permission handling** | Dedicated `onPermissionDenied` callback | ✅ | ✅ |
|
|
121
|
+
| **Voice input volume** | Normalized voice input level for UI meters (`useVoiceInputVolume`) | ✅ | ✅ |
|
|
110
122
|
| **Repeating word filter** | Removes consecutive duplicate words from artifacts | ✅ | ✅ |
|
|
111
123
|
| **Locale support** | Configure speech recognizer for different languages | ✅ | ✅ |
|
|
112
124
|
| **Contextual strings** | Domain-specific vocabulary for improved accuracy | ✅ | ✅ |
|
|
@@ -166,7 +178,7 @@ function MyComponent() {
|
|
|
166
178
|
// iOS specific
|
|
167
179
|
iosAddPunctuation: true,
|
|
168
180
|
// Android specific
|
|
169
|
-
|
|
181
|
+
maskOffensiveWords: false,
|
|
170
182
|
androidFormattingPreferQuality: false,
|
|
171
183
|
androidUseWebSearchModel: false,
|
|
172
184
|
androidDisableBatchHandling: false,
|
|
@@ -218,17 +230,58 @@ import { RecognizerRef } from '@gmessier/nitro-speech';
|
|
|
218
230
|
RecognizerRef.startListening({ locale: 'en-US' });
|
|
219
231
|
RecognizerRef.addAutoFinishTime(5000);
|
|
220
232
|
RecognizerRef.updateAutoFinishTime(10000, true);
|
|
233
|
+
RecognizerRef.getIsActive();
|
|
221
234
|
RecognizerRef.stopListening();
|
|
222
235
|
```
|
|
223
236
|
|
|
224
237
|
`RecognizerRef` exposes only method handlers and is safe for cross-component method access.
|
|
225
238
|
|
|
239
|
+
### Voice input volume
|
|
240
|
+
|
|
241
|
+
#### useVoiceInputVolume
|
|
242
|
+
|
|
243
|
+
By default you have access to `useVoiceInputVolume` to read normalized voice input level (`0..1`) for UI meters.
|
|
244
|
+
⚠️ **Technical limitation**: this approach re-renders component a lot.
|
|
245
|
+
|
|
246
|
+
```typescript
|
|
247
|
+
import { useVoiceInputVolume } from '@gmessier/nitro-speech';
|
|
248
|
+
|
|
249
|
+
function VoiceMeter() {
|
|
250
|
+
const volume = useVoiceInputVolume();
|
|
251
|
+
return <Text>{volume.toFixed(2)}</Text>;
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
#### Reanimated: useSharedValue, worklets, UI thread
|
|
256
|
+
|
|
257
|
+
As a better alternative you can control volume via SharedValue and apply it only on UI thread with Reanimated.
|
|
258
|
+
This way you will avoid re-renders since the volume will be stored on UI thread
|
|
259
|
+
|
|
260
|
+
```typescript
|
|
261
|
+
function VoiceMeter() {
|
|
262
|
+
const sharedVolume = useSharedValue(0)
|
|
263
|
+
const {
|
|
264
|
+
// ...
|
|
265
|
+
} = useRecognizer(
|
|
266
|
+
{
|
|
267
|
+
// ...
|
|
268
|
+
onVolumeChange: (normVolume) => {
|
|
269
|
+
"worklet";
|
|
270
|
+
sharedVolume.value = normValue
|
|
271
|
+
},
|
|
272
|
+
// ...
|
|
273
|
+
}
|
|
274
|
+
);
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
|
|
226
279
|
### Unsafe: RecognizerSession
|
|
227
280
|
|
|
228
281
|
`RecognizerSession` is the hybrid object. It gives direct access to callbacks and control methods, but it is unsafe to orchestrate the full session directly from it.
|
|
229
282
|
|
|
230
283
|
```typescript
|
|
231
|
-
import { RecognizerSession } from '@gmessier/nitro-speech';
|
|
284
|
+
import { RecognizerSession, unsafe_onVolumeChange } from '@gmessier/nitro-speech';
|
|
232
285
|
|
|
233
286
|
// Set up callbacks
|
|
234
287
|
RecognizerSession.onReadyForSpeech = () => {
|
|
@@ -255,6 +308,13 @@ RecognizerSession.onPermissionDenied = () => {
|
|
|
255
308
|
console.log('Permission denied');
|
|
256
309
|
};
|
|
257
310
|
|
|
311
|
+
RecognizerSession.onVolumeChange = (volume) => {
|
|
312
|
+
console.log('new volume: ', volume);
|
|
313
|
+
};
|
|
314
|
+
// OR use unsafe_onVolumeChange to enable useVoiceInputVolume hook manually
|
|
315
|
+
RecognizerSession.onVolumeChange = unsafe_onVolumeChange
|
|
316
|
+
|
|
317
|
+
|
|
258
318
|
// Start listening
|
|
259
319
|
RecognizerSession.startListening({
|
|
260
320
|
locale: 'en-US',
|
|
@@ -305,6 +365,7 @@ The `RecognizerSession.dispose()` method is **NOT SAFE** and should rarely be us
|
|
|
305
365
|
- `stopListening()` - Stop speech recognition
|
|
306
366
|
- `addAutoFinishTime(additionalTimeMs?: number)` - Add time to the auto-finish timer (or reset to original if no parameter)
|
|
307
367
|
- `updateAutoFinishTime(newTimeMs: number, withRefresh?: boolean)` - Update the auto-finish timer
|
|
368
|
+
- `getIsActive()` - Returns true if the speech recognition is active
|
|
308
369
|
|
|
309
370
|
### `RecognizerRef`
|
|
310
371
|
|
|
@@ -312,6 +373,11 @@ The `RecognizerSession.dispose()` method is **NOT SAFE** and should rarely be us
|
|
|
312
373
|
- `stopListening()`
|
|
313
374
|
- `addAutoFinishTime(additionalTimeMs?: number)`
|
|
314
375
|
- `updateAutoFinishTime(newTimeMs: number, withRefresh?: boolean)`
|
|
376
|
+
- `getIsActive()`
|
|
377
|
+
|
|
378
|
+
### `useVoiceInputVolume`
|
|
379
|
+
|
|
380
|
+
- `useVoiceInputVolume(): number`
|
|
315
381
|
|
|
316
382
|
### `RecognizerSession`
|
|
317
383
|
|
|
@@ -328,8 +394,9 @@ Configuration object for speech recognition.
|
|
|
328
394
|
- `autoFinishRecognitionMs?: number` - Auto-stop timeout in milliseconds (default: `8000`)
|
|
329
395
|
- `contextualStrings?: string[]` - Array of domain-specific words for better recognition
|
|
330
396
|
- `disableRepeatingFilter?: boolean` - Disable filter that removes consecutive duplicate words (default: `false`)
|
|
331
|
-
- `startHapticFeedbackStyle?: 'light' | 'medium' | 'heavy'` - Haptic feedback style when microphone starts recording (default: `
|
|
332
|
-
- `stopHapticFeedbackStyle?: 'light' | 'medium' | 'heavy'` - Haptic feedback style when microphone stops recording (default: `
|
|
397
|
+
- `startHapticFeedbackStyle?: 'light' | 'medium' | 'heavy' | 'none'` - Haptic feedback style when microphone starts recording (default: `"medium"`)
|
|
398
|
+
- `stopHapticFeedbackStyle?: 'light' | 'medium' | 'heavy' | 'none'` - Haptic feedback style when microphone stops recording (default: `"medium"`)
|
|
399
|
+
- `maskOffensiveWords?: boolean` - Mask offensive words with asterisks. (Android 13+, iOS 26+, default: `false`. iOS <26: always `false`)
|
|
333
400
|
|
|
334
401
|
#### iOS-Specific Parameters
|
|
335
402
|
|
|
@@ -337,7 +404,6 @@ Configuration object for speech recognition.
|
|
|
337
404
|
|
|
338
405
|
#### Android-Specific Parameters
|
|
339
406
|
|
|
340
|
-
- `androidMaskOffensiveWords?: boolean` - Mask offensive words (Android 13+, default: `false`)
|
|
341
407
|
- `androidFormattingPreferQuality?: boolean` - Prefer quality over latency (Android 13+, default: `false`)
|
|
342
408
|
- `androidUseWebSearchModel?: boolean` - Use web search language model instead of free-form (default: `false`)
|
|
343
409
|
- `androidDisableBatchHandling?: boolean` - Disable default batch handling (may add many empty batches, default: `false`)
|
|
@@ -361,8 +427,3 @@ cd android && ./gradlew :react-native-nitro-modules:preBuild
|
|
|
361
427
|
## License
|
|
362
428
|
|
|
363
429
|
MIT
|
|
364
|
-
|
|
365
|
-
## TODO
|
|
366
|
-
|
|
367
|
-
- [ ] (Android) Timer till the auto finish is called
|
|
368
|
-
- [ ] (Android) Cleanup when app loses the focus
|
|
@@ -8,7 +8,7 @@ import android.os.VibratorManager
|
|
|
8
8
|
import com.margelo.nitro.nitrospeech.HapticFeedbackStyle
|
|
9
9
|
|
|
10
10
|
class HapticImpact(
|
|
11
|
-
private val style: HapticFeedbackStyle
|
|
11
|
+
private val style: HapticFeedbackStyle?
|
|
12
12
|
) {
|
|
13
13
|
private data class LegacyOneShot(
|
|
14
14
|
val durationMs: Long,
|
|
@@ -16,6 +16,10 @@ class HapticImpact(
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
fun trigger(context: Context) {
|
|
19
|
+
if (style == HapticFeedbackStyle.NONE) {
|
|
20
|
+
return
|
|
21
|
+
}
|
|
22
|
+
|
|
19
23
|
val vibrator = getVibrator(context) ?: return
|
|
20
24
|
if (!vibrator.hasVibrator()) return
|
|
21
25
|
|
|
@@ -25,7 +29,10 @@ class HapticImpact(
|
|
|
25
29
|
HapticFeedbackStyle.LIGHT -> VibrationEffect.EFFECT_TICK
|
|
26
30
|
HapticFeedbackStyle.MEDIUM -> VibrationEffect.EFFECT_CLICK
|
|
27
31
|
HapticFeedbackStyle.HEAVY -> VibrationEffect.EFFECT_HEAVY_CLICK
|
|
32
|
+
null -> VibrationEffect.EFFECT_CLICK
|
|
33
|
+
else -> null
|
|
28
34
|
}
|
|
35
|
+
if (effect == null) { return }
|
|
29
36
|
vibrator.vibrate(VibrationEffect.createPredefined(effect))
|
|
30
37
|
return
|
|
31
38
|
}
|
|
@@ -34,7 +41,10 @@ class HapticImpact(
|
|
|
34
41
|
HapticFeedbackStyle.LIGHT -> LegacyOneShot(durationMs = 12L, amplitude = 50)
|
|
35
42
|
HapticFeedbackStyle.MEDIUM -> LegacyOneShot(durationMs = 18L, amplitude = 100)
|
|
36
43
|
HapticFeedbackStyle.HEAVY -> LegacyOneShot(durationMs = 28L, amplitude = 180)
|
|
44
|
+
null -> LegacyOneShot(durationMs = 18L, amplitude = 100)
|
|
45
|
+
else -> null
|
|
37
46
|
}
|
|
47
|
+
if (legacyOneShot == null) { return }
|
|
38
48
|
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
|
39
49
|
vibrator.vibrate(
|
|
40
50
|
VibrationEffect.createOneShot(
|
|
@@ -33,6 +33,11 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
33
33
|
override var onAutoFinishProgress: ((timeLeftMs: Double) -> Unit)? = null
|
|
34
34
|
override var onError: ((error: String) -> Unit)? = null
|
|
35
35
|
override var onPermissionDenied: (() -> Unit)? = null
|
|
36
|
+
override var onVolumeChange: ((normVolume: Double) -> Unit)? = null
|
|
37
|
+
|
|
38
|
+
override fun getIsActive(): Boolean {
|
|
39
|
+
return isActive
|
|
40
|
+
}
|
|
36
41
|
|
|
37
42
|
@DoNotStrip
|
|
38
43
|
@Keep
|
|
@@ -86,7 +91,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
86
91
|
mainHandler.postDelayed({
|
|
87
92
|
val context = NitroModules.applicationContext
|
|
88
93
|
val hapticImpact = config?.stopHapticFeedbackStyle
|
|
89
|
-
if (
|
|
94
|
+
if (context != null) {
|
|
90
95
|
HapticImpact(hapticImpact).trigger(context)
|
|
91
96
|
}
|
|
92
97
|
cleanup()
|
|
@@ -129,6 +134,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
129
134
|
val recognitionListenerSession = RecognitionListenerSession(
|
|
130
135
|
autoStopper,
|
|
131
136
|
config,
|
|
137
|
+
onVolumeChange
|
|
132
138
|
) { result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean ->
|
|
133
139
|
onFinishRecognition(result, errorMessage, recordingStopped)
|
|
134
140
|
}
|
|
@@ -140,10 +146,10 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
140
146
|
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, languageModel)
|
|
141
147
|
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, config?.locale ?: "en-US")
|
|
142
148
|
intent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
|
|
143
|
-
//
|
|
149
|
+
// Set a lot of time to avoid cutting early
|
|
144
150
|
intent.putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 300000)
|
|
145
151
|
|
|
146
|
-
if (config?.
|
|
152
|
+
if (config?.maskOffensiveWords != true && Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
|
|
147
153
|
intent.putExtra(RecognizerIntent.EXTRA_MASK_OFFENSIVE_WORDS, false)
|
|
148
154
|
}
|
|
149
155
|
|
|
@@ -163,10 +169,8 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
163
169
|
isActive = true
|
|
164
170
|
|
|
165
171
|
val hapticImpact = config?.startHapticFeedbackStyle
|
|
166
|
-
if (hapticImpact != null) {
|
|
167
|
-
HapticImpact(hapticImpact).trigger(context)
|
|
168
|
-
}
|
|
169
172
|
|
|
173
|
+
HapticImpact(hapticImpact).trigger(context)
|
|
170
174
|
mainHandler.postDelayed({
|
|
171
175
|
if (isActive) {
|
|
172
176
|
onReadyForSpeech?.invoke()
|
|
@@ -192,6 +196,8 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
192
196
|
speechRecognizer?.destroy()
|
|
193
197
|
speechRecognizer = null
|
|
194
198
|
isActive = false
|
|
199
|
+
// Reset voice meter in JS consumers after stop/error cleanup.
|
|
200
|
+
onVolumeChange?.invoke(0.0)
|
|
195
201
|
} catch (e: Exception) {
|
|
196
202
|
onFinishRecognition(
|
|
197
203
|
null,
|
package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt
CHANGED
|
@@ -5,17 +5,32 @@ import android.speech.RecognitionListener
|
|
|
5
5
|
import android.speech.SpeechRecognizer
|
|
6
6
|
import android.util.Log
|
|
7
7
|
import com.margelo.nitro.nitrospeech.SpeechToTextParams
|
|
8
|
+
import kotlin.math.max
|
|
9
|
+
import kotlin.math.roundToInt
|
|
8
10
|
|
|
9
11
|
class RecognitionListenerSession (
|
|
10
12
|
private val autoStopper: AutoStopper?,
|
|
11
13
|
private val config: SpeechToTextParams?,
|
|
14
|
+
private val onVolumeChange: ((normVolume: Double) -> Unit)?,
|
|
12
15
|
private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
|
|
13
16
|
) {
|
|
14
17
|
companion object {
|
|
15
18
|
private const val TAG = "HybridRecognizer"
|
|
19
|
+
private const val SPEECH_LEVEL_THRESHOLD = 0.08f
|
|
20
|
+
private const val FLOOR_RISE_ALPHA = 0.01f
|
|
21
|
+
private const val FLOOR_FALL_ALPHA = 0.20f
|
|
22
|
+
private const val PEAK_ATTACK_ALPHA = 0.25f
|
|
23
|
+
private const val PEAK_DECAY_ALPHA = 0.01f
|
|
24
|
+
private const val METER_ATTACK = 0.35f
|
|
25
|
+
private const val METER_RELEASE = 0.08f
|
|
26
|
+
private const val MIN_SPAN_DB = 6f
|
|
27
|
+
private const val PRECISION_SCALE = 1_000_000f
|
|
16
28
|
}
|
|
17
29
|
|
|
18
30
|
private var resultBatches: ArrayList<String>? = null
|
|
31
|
+
private var noiseFloorDb = Float.NaN
|
|
32
|
+
private var peakDb = Float.NaN
|
|
33
|
+
private var levelSmoothed = 0f
|
|
19
34
|
|
|
20
35
|
fun createRecognitionListener(): RecognitionListener {
|
|
21
36
|
resultBatches = null
|
|
@@ -23,7 +38,11 @@ class RecognitionListenerSession (
|
|
|
23
38
|
override fun onReadyForSpeech(params: Bundle?) {}
|
|
24
39
|
override fun onBeginningOfSpeech() {}
|
|
25
40
|
override fun onRmsChanged(rmsdB: Float) {
|
|
26
|
-
|
|
41
|
+
val normLevel = normalizeRmsDb(rmsdB)
|
|
42
|
+
onVolumeChange?.invoke(normLevel.toDouble())
|
|
43
|
+
if (normLevel > SPEECH_LEVEL_THRESHOLD) {
|
|
44
|
+
autoStopper?.indicateRecordingActivity()
|
|
45
|
+
}
|
|
27
46
|
}
|
|
28
47
|
override fun onBufferReceived(buffer: ByteArray?) {}
|
|
29
48
|
override fun onEndOfSpeech() {}
|
|
@@ -92,15 +111,62 @@ class RecognitionListenerSession (
|
|
|
92
111
|
}
|
|
93
112
|
}
|
|
94
113
|
|
|
95
|
-
// Filters out 2 or more
|
|
114
|
+
// Filters out 2 or more consecutive duplicate words, like "and and"
|
|
96
115
|
private fun repeatingFilter(text: String): String {
|
|
97
|
-
|
|
98
|
-
|
|
116
|
+
var words = text.split(Regex("\\s+")).filter { it.isNotBlank() }
|
|
117
|
+
if (words.isEmpty()) {
|
|
118
|
+
return ""
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
val joiner = StringBuilder()
|
|
122
|
+
|
|
123
|
+
// 10 - arbitrary number of last substrings that is still unstable
|
|
124
|
+
// and needs to be filtered. Prev substrings were handled earlier.
|
|
125
|
+
if (words.size >= 10) {
|
|
126
|
+
joiner.append(words.take(words.size - 9).joinToString(" "))
|
|
127
|
+
words = words.takeLast(10)
|
|
128
|
+
} else {
|
|
129
|
+
joiner.append(words.first())
|
|
130
|
+
}
|
|
131
|
+
|
|
99
132
|
for (i in words.indices) {
|
|
100
133
|
if (i == 0) continue
|
|
101
|
-
|
|
102
|
-
|
|
134
|
+
// Always add number-containing strings.
|
|
135
|
+
if (Regex("\\d+").containsMatchIn(words[i])) {
|
|
136
|
+
joiner.append(" ").append(words[i])
|
|
137
|
+
continue
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Skip consecutive duplicate strings.
|
|
141
|
+
if (words[i] == words[i - 1]) continue
|
|
142
|
+
joiner.append(" ").append(words[i])
|
|
103
143
|
}
|
|
104
|
-
return joiner
|
|
144
|
+
return joiner.toString()
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
private fun normalizeRmsDb(rmsdB: Float): Double {
|
|
148
|
+
if (!rmsdB.isFinite()) {
|
|
149
|
+
return 0.0
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (noiseFloorDb.isNaN()) {
|
|
153
|
+
noiseFloorDb = rmsdB
|
|
154
|
+
}
|
|
155
|
+
if (peakDb.isNaN()) {
|
|
156
|
+
peakDb = rmsdB + MIN_SPAN_DB
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
val floorAlpha = if (rmsdB < noiseFloorDb) FLOOR_FALL_ALPHA else FLOOR_RISE_ALPHA
|
|
160
|
+
noiseFloorDb += floorAlpha * (rmsdB - noiseFloorDb)
|
|
161
|
+
|
|
162
|
+
val peakAlpha = if (rmsdB > peakDb) PEAK_ATTACK_ALPHA else PEAK_DECAY_ALPHA
|
|
163
|
+
peakDb += peakAlpha * (rmsdB - peakDb)
|
|
164
|
+
|
|
165
|
+
val span = max(peakDb - noiseFloorDb, MIN_SPAN_DB)
|
|
166
|
+
val raw = ((rmsdB - noiseFloorDb) / span).coerceIn(0f, 1f)
|
|
167
|
+
val smoothingCoeff = if (raw > levelSmoothed) METER_ATTACK else METER_RELEASE
|
|
168
|
+
levelSmoothed += smoothingCoeff * (raw - levelSmoothed)
|
|
169
|
+
|
|
170
|
+
return ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
|
|
105
171
|
}
|
|
106
172
|
}
|