@gmessier/nitro-speech 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -16
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +7 -7
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +29 -14
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/Logger.kt +16 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +11 -12
- package/ios/Audio/AudioLevelTracker.swift +16 -22
- package/ios/Engines/RecognizerEngine.swift +16 -13
- package/ios/HybridRecognizer.swift +8 -0
- package/ios/Shared/AutoStopper.swift +1 -1
- package/lib/Recognizer/RecognizerRef.d.ts +2 -0
- package/lib/Recognizer/RecognizerRef.js +5 -1
- package/lib/Recognizer/SpeechRecognizer.d.ts +2 -1
- package/lib/Recognizer/SpeechRecognizer.js +2 -1
- package/lib/Recognizer/methods.d.ts +5 -3
- package/lib/Recognizer/methods.js +8 -0
- package/lib/Recognizer/types.d.ts +3 -3
- package/lib/Recognizer/useRecognizer.js +10 -9
- package/lib/Recognizer/useRecognizerIsActive.d.ts +25 -0
- package/lib/Recognizer/useRecognizerIsActive.js +40 -0
- package/lib/Recognizer/useVoiceInputVolume.d.ts +1 -1
- package/lib/index.d.ts +7 -6
- package/lib/index.js +7 -6
- package/lib/specs/Recognizer.nitro.d.ts +26 -11
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +5 -0
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +1 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +4 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +9 -0
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +8 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +1 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +12 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +1 -0
- package/package.json +1 -1
- package/src/Recognizer/RecognizerRef.ts +6 -0
- package/src/Recognizer/SpeechRecognizer.ts +2 -1
- package/src/Recognizer/methods.ts +16 -3
- package/src/Recognizer/types.ts +7 -1
- package/src/Recognizer/useRecognizer.ts +11 -7
- package/src/Recognizer/useRecognizerIsActive.ts +49 -0
- package/src/Recognizer/useVoiceInputVolume.ts +1 -1
- package/src/index.ts +20 -6
- package/src/specs/Recognizer.nitro.ts +27 -11
package/README.md
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
#### Key Features:
|
|
15
15
|
|
|
16
|
-
- ⚡ Built
|
|
16
|
+
- ⚡ Built with Nitro Modules for low-overhead native binding
|
|
17
17
|
- 🌎 Supports 60+ languages
|
|
18
18
|
- 🍎 The only library that uses new `SpeechAnalyzer` with `SpeechTranscriber` or `DictationTranscriber` API for iOS 26+ (with fallback to legacy `SFSpeechRecognition` for older versions)
|
|
19
19
|
- ⏱️ Timer for silence
|
|
@@ -21,16 +21,16 @@
|
|
|
21
21
|
- Callback `onAutoFinishProgress` fires periodically with interval
|
|
22
22
|
- Configurable interval `autoFinishProgressIntervalMs` value (default: 1 sec)
|
|
23
23
|
- Method `updateConfig` with `autoFinishRecognitionMs` and `autoFinishProgressIntervalMs`
|
|
24
|
-
allows
|
|
24
|
+
allows changing the value on the fly
|
|
25
25
|
- Method `resetAutoFinishTime` resets the Timer to the threshold
|
|
26
26
|
- Method `addAutoFinishTime` adds ms once without changing threshold
|
|
27
27
|
- Configurable volume-based sensitivity `resetAutoFinishVoiceSensitivity` for the timer from 0 to 1
|
|
28
28
|
- 🎤 Rich user voice input management
|
|
29
|
-
- Hook `useVoiceInputVolume()` for `raw` or `smoothed` normalized
|
|
30
|
-
volume level from 0 to 1 -> easy to use for UI animations;
|
|
29
|
+
- Hook `useVoiceInputVolume()` for `raw` or `smoothed` normalized volume level from 0 to 1 -> easy to use for UI animations;
|
|
31
30
|
And `db` as human-friendly value
|
|
32
31
|
- Flexible callback `onVolumeChange` for custom behavior
|
|
33
|
-
-
|
|
32
|
+
- Static method `getVoiceInputVolume()`
|
|
33
|
+
- 🧩 Lifecycle methods: `prewarm` | `updateConfig` | `getIsActive`
|
|
34
34
|
- 👆 Configurable Haptic Feedback on start and finish
|
|
35
35
|
- 🎚️ Speech-quality configurations:
|
|
36
36
|
- Result is grouped by speech segments into Batches.
|
|
@@ -53,6 +53,7 @@
|
|
|
53
53
|
- [Cross-component control: RecognizerRef](#cross-component-control-recognizerref)
|
|
54
54
|
- [Multithreading (react-native-worklets)](#multithreading-react-native-worklets)
|
|
55
55
|
- [Voice input volume](#voice-input-volume)
|
|
56
|
+
- [useRecognizerIsActive](#userecognizerisactive)
|
|
56
57
|
- [Unsafe: SpeechRecognizer](#unsafe-speechrecognizer)
|
|
57
58
|
- [Requirements](#requirements)
|
|
58
59
|
- [Compatibility](#compatibility)
|
|
@@ -123,11 +124,11 @@ Both permissions are required for speech recognition to work on iOS.
|
|
|
123
124
|
| **Auto-finish progress** | Callback `onAutoFinishProgress` with countdown until auto-stop | ✅ | ✅ |
|
|
124
125
|
| **Add Auto-finish Time** | Adds time to the auto finish timer once without changing the timer threshold | ✅ | ✅ |
|
|
125
126
|
| **Reset Auto-finish Time** | Resets the Timer to the threshold | ✅ | ✅ |
|
|
126
|
-
| **Voice input volume** |
|
|
127
|
+
| **Voice input volume** | `useVoiceInputVolume`, `getVoiceInputVolume()`, `onVolumeChange` | ✅ | ✅ |
|
|
127
128
|
| **Reset Auto-finish Sensitivity** | The voice detector sensitivity to reset the Auto-finish time | ✅ | ✅ |
|
|
128
129
|
| **Prewarm** | Prepares resources, downloads assets, confirms locale availability | ✅ | ✅ |
|
|
129
|
-
| **Update config** | Static method `updateConfig` allows
|
|
130
|
-
| **
|
|
130
|
+
| **Update config** | Static method `updateConfig` allows updating the config on the fly | ✅ | ✅ |
|
|
131
|
+
| **Is Active** | Static method `getIsActive()` | ✅ | ✅ |
|
|
131
132
|
| **Haptic feedback** | Haptic feedback on recording start/stop | ✅ | ✅ |
|
|
132
133
|
| **Permission handling** | Dedicated `onPermissionDenied` callback | ✅ | ✅ |
|
|
133
134
|
| **Background handling** | Stop when app loses focus/goes to background | ✅ | ✅ |
|
|
@@ -154,7 +155,8 @@ Because of that, treat it as a **single session owner** setup hook: use it once
|
|
|
154
155
|
import { useRecognizer } from '@gmessier/nitro-speech';
|
|
155
156
|
|
|
156
157
|
function MyComponent() {
|
|
157
|
-
const {
|
|
158
|
+
const {
|
|
159
|
+
prewarm,
|
|
158
160
|
startListening,
|
|
159
161
|
stopListening,
|
|
160
162
|
resetAutoFinishTime,
|
|
@@ -162,6 +164,7 @@ function MyComponent() {
|
|
|
162
164
|
updateConfig,
|
|
163
165
|
getSupportedLocalesIOS,
|
|
164
166
|
getIsActive,
|
|
167
|
+
getVoiceInputVolume,
|
|
165
168
|
} = useRecognizer({
|
|
166
169
|
onReadyForSpeech: () => {
|
|
167
170
|
console.log('Listening...');
|
|
@@ -259,6 +262,7 @@ If you need to call recognizer methods from other components without prop drilli
|
|
|
259
262
|
```typescript
|
|
260
263
|
import { RecognizerRef } from '@gmessier/nitro-speech';
|
|
261
264
|
|
|
265
|
+
RecognizerRef.prewarm({ locale: 'en-US' });
|
|
262
266
|
RecognizerRef.startListening({ locale: 'en-US' });
|
|
263
267
|
RecognizerRef.addAutoFinishTime(5000);
|
|
264
268
|
RecognizerRef.resetAutoFinishTime();
|
|
@@ -271,6 +275,7 @@ RecognizerRef.updateConfig(
|
|
|
271
275
|
true
|
|
272
276
|
);
|
|
273
277
|
RecognizerRef.getIsActive();
|
|
278
|
+
RecognizerRef.getVoiceInputVolume();
|
|
274
279
|
RecognizerRef.stopListening();
|
|
275
280
|
// iOS only
|
|
276
281
|
RecognizerRef.getSupportedLocalesIOS();
|
|
@@ -321,8 +326,6 @@ function VoiceMeter() {
|
|
|
321
326
|
As a better alternative you can control volume via SharedValue and apply it only on UI thread with Reanimated.
|
|
322
327
|
This way you will avoid re-renders since the volume will be stored on UI thread
|
|
323
328
|
|
|
324
|
-
Warning: this approach will disable the built-in `useVoiceInputVolume` hook.
|
|
325
|
-
|
|
326
329
|
```typescript
|
|
327
330
|
function VoiceMeter() {
|
|
328
331
|
const sharedVolume = useSharedValue(0)
|
|
@@ -341,17 +344,35 @@ function VoiceMeter() {
|
|
|
341
344
|
}
|
|
342
345
|
```
|
|
343
346
|
|
|
347
|
+
### useRecognizerIsActive
|
|
348
|
+
|
|
349
|
+
```typescript
|
|
350
|
+
import { useRecognizerIsActive } from '@gmessier/nitro-speech';
|
|
351
|
+
|
|
352
|
+
function MyComponent() {
|
|
353
|
+
const isActive = useRecognizerIsActive();
|
|
354
|
+
return <Text>{isActive ? 'Listening...' : 'Not listening'}</Text>;
|
|
355
|
+
}
|
|
356
|
+
```
|
|
344
357
|
|
|
345
358
|
### Unsafe: SpeechRecognizer
|
|
346
359
|
|
|
347
360
|
`SpeechRecognizer` is the hybrid object. It gives direct access to callbacks and control methods, but it is unsafe to orchestrate the full session directly from it.
|
|
348
361
|
|
|
362
|
+
**Warning**: Since it reflects the original hybrid object, its API may change in the future.
|
|
363
|
+
|
|
349
364
|
```typescript
|
|
350
|
-
import {
|
|
365
|
+
import {
|
|
366
|
+
SpeechRecognizer,
|
|
367
|
+
speechRecognizerVolumeChangeHandler,
|
|
368
|
+
speechRecognizerActiveStateHandler,
|
|
369
|
+
} from '@gmessier/nitro-speech';
|
|
351
370
|
|
|
352
371
|
// Set up callbacks
|
|
353
372
|
SpeechRecognizer.onReadyForSpeech = () => {
|
|
354
373
|
console.log('Listening...');
|
|
374
|
+
// Add speechRecognizerActiveStateHandler to enable useRecognizerIsActive hook manually
|
|
375
|
+
speechRecognizerActiveStateHandler(true);
|
|
355
376
|
};
|
|
356
377
|
|
|
357
378
|
SpeechRecognizer.onResult = (textBatches) => {
|
|
@@ -360,6 +381,8 @@ SpeechRecognizer.onResult = (textBatches) => {
|
|
|
360
381
|
|
|
361
382
|
SpeechRecognizer.onRecordingStopped = () => {
|
|
362
383
|
console.log('Stopped');
|
|
384
|
+
// Add speechRecognizerActiveStateHandler to enable useRecognizerIsActive hook manually
|
|
385
|
+
speechRecognizerActiveStateHandler(false);
|
|
363
386
|
};
|
|
364
387
|
|
|
365
388
|
SpeechRecognizer.onAutoFinishProgress = (timeLeftMs) => {
|
|
@@ -376,10 +399,27 @@ SpeechRecognizer.onPermissionDenied = () => {
|
|
|
376
399
|
|
|
377
400
|
SpeechRecognizer.onVolumeChange = (volume) => {
|
|
378
401
|
console.log('new volume: ', volume);
|
|
402
|
+
// Add speechRecognizerVolumeChangeHandler to enable useVoiceInputVolume hook manually
|
|
403
|
+
speechRecognizerVolumeChangeHandler(volume);
|
|
379
404
|
};
|
|
380
|
-
// OR use speechRecognizerVolumeChangeHandler to enable useVoiceInputVolume hook manually
|
|
381
|
-
SpeechRecognizer.onVolumeChange = speechRecognizerVolumeChangeHandler
|
|
382
405
|
|
|
406
|
+
// Prepare resources, download assets, confirms locale availability
|
|
407
|
+
SpeechRecognizer.prewarm({
|
|
408
|
+
locale: 'en-US',
|
|
409
|
+
// ... your config to prepare
|
|
410
|
+
});
|
|
411
|
+
// OR `await` if you want to react to the success
|
|
412
|
+
await SpeechRecognizer.prewarm({
|
|
413
|
+
locale: 'en-US',
|
|
414
|
+
// ... your config to prepare
|
|
415
|
+
});
|
|
416
|
+
// OR from worklet (only sync)
|
|
417
|
+
scheduleOnRuntime(workletRuntime, () => {
|
|
418
|
+
SpeechRecognizer.prewarm({
|
|
419
|
+
locale: 'en-US',
|
|
420
|
+
// ... your config to prepare
|
|
421
|
+
});
|
|
422
|
+
});
|
|
383
423
|
|
|
384
424
|
// Start listening
|
|
385
425
|
SpeechRecognizer.startListening({
|
|
@@ -415,7 +455,7 @@ The `SpeechRecognizer.dispose()` method is **NOT SAFE** and should rarely be use
|
|
|
415
455
|
|
|
416
456
|
## Compatibility
|
|
417
457
|
|
|
418
|
-
Latest versions of `@gmessier/nitro-speech`
|
|
458
|
+
Latest versions of `@gmessier/nitro-speech` require [react-native-nitro-modules 0.35.0 or higher](https://github.com/mrousavy/nitro/releases/tag/v0.35.0).
|
|
419
459
|
|
|
420
460
|
|
|
421
461
|
| Compatibility | Supported versions |
|
|
@@ -427,7 +467,7 @@ Latest versions of `@gmessier/nitro-speech` requires [react-native-nitro-modules
|
|
|
427
467
|
|
|
428
468
|
### Android Gradle sync issues
|
|
429
469
|
|
|
430
|
-
If you're having issues with Android Gradle sync, try running the prebuild for the library
|
|
470
|
+
If you're having issues with Android Gradle sync, try running the prebuild for the library that causes the issue:
|
|
431
471
|
|
|
432
472
|
e.g. failed in `react-native-nitro-modules`:
|
|
433
473
|
|
|
@@ -2,7 +2,6 @@ package com.margelo.nitro.nitrospeech.recognizer
|
|
|
2
2
|
|
|
3
3
|
import android.os.Handler
|
|
4
4
|
import android.os.Looper
|
|
5
|
-
import android.util.Log
|
|
6
5
|
import kotlin.math.max
|
|
7
6
|
|
|
8
7
|
class AutoStopper(
|
|
@@ -12,12 +11,13 @@ class AutoStopper(
|
|
|
12
11
|
val onTimeout: () -> Unit,
|
|
13
12
|
) {
|
|
14
13
|
companion object {
|
|
15
|
-
private const val TAG = "HybridRecognizer"
|
|
16
14
|
private const val DEFAULT_SILENCE_THRESHOLD_MS = 8000.0
|
|
17
15
|
private const val DEFAULT_PROGRESS_INTERVAL_MS = 1000.0
|
|
18
16
|
private const val MIN_PROGRESS_INTERVAL_MS = 50.0
|
|
19
17
|
}
|
|
20
18
|
|
|
19
|
+
private val logger = Logger(disable = false)
|
|
20
|
+
|
|
21
21
|
private var silenceThresholdMs: Double = clampMs(silenceThresholdMs ?: DEFAULT_SILENCE_THRESHOLD_MS)
|
|
22
22
|
private var progressIntervalMs: Double = clampMs(progressIntervalMs ?: DEFAULT_PROGRESS_INTERVAL_MS)
|
|
23
23
|
|
|
@@ -31,7 +31,7 @@ class AutoStopper(
|
|
|
31
31
|
private val tickRunnable = Runnable { tick() }
|
|
32
32
|
|
|
33
33
|
fun resetTimer() {
|
|
34
|
-
|
|
34
|
+
logger.log("resetTimer | isStopped: $isStopped | ms: ${System.currentTimeMillis()}")
|
|
35
35
|
handler.removeCallbacks(tickRunnable)
|
|
36
36
|
isTimerScheduled = false
|
|
37
37
|
if (isStopped) return
|
|
@@ -55,7 +55,7 @@ class AutoStopper(
|
|
|
55
55
|
|
|
56
56
|
fun addMsOnce(extraMs: Double) {
|
|
57
57
|
if (isStopped || !extraMs.isFinite()) return
|
|
58
|
-
|
|
58
|
+
logger.log("addMsOnce | extraMs: $extraMs")
|
|
59
59
|
timeLeftMs += extraMs
|
|
60
60
|
didTimeout = false
|
|
61
61
|
if (timeLeftMs > 0 && isTimerScheduled) {
|
|
@@ -65,7 +65,7 @@ class AutoStopper(
|
|
|
65
65
|
|
|
66
66
|
fun updateProgressInterval(newIntervalMs: Double) {
|
|
67
67
|
if (isStopped) return
|
|
68
|
-
|
|
68
|
+
logger.log("updateProgressInterval | newIntervalMs: $newIntervalMs")
|
|
69
69
|
progressIntervalMs = clampMs(newIntervalMs)
|
|
70
70
|
if (isTimerScheduled) {
|
|
71
71
|
scheduleNextTickLocked()
|
|
@@ -83,7 +83,7 @@ class AutoStopper(
|
|
|
83
83
|
if (isStopped || didTimeout) return
|
|
84
84
|
timeLeftMs -= progressIntervalMs
|
|
85
85
|
if (timeLeftMs > 0) {
|
|
86
|
-
|
|
86
|
+
logger.log("onProgress | timeLeftMs: $timeLeftMs")
|
|
87
87
|
onProgress(timeLeftMs)
|
|
88
88
|
scheduleNextTickLocked()
|
|
89
89
|
return
|
|
@@ -92,7 +92,7 @@ class AutoStopper(
|
|
|
92
92
|
didTimeout = true
|
|
93
93
|
handler.removeCallbacks(tickRunnable)
|
|
94
94
|
isTimerScheduled = false
|
|
95
|
-
|
|
95
|
+
logger.log("onTimeout | ms: ${System.currentTimeMillis()}")
|
|
96
96
|
onTimeout()
|
|
97
97
|
}
|
|
98
98
|
|
|
@@ -7,7 +7,6 @@ import android.os.Handler
|
|
|
7
7
|
import android.os.Looper
|
|
8
8
|
import android.speech.RecognizerIntent
|
|
9
9
|
import android.speech.SpeechRecognizer
|
|
10
|
-
import android.util.Log
|
|
11
10
|
import androidx.annotation.Keep
|
|
12
11
|
import com.facebook.proguard.annotations.DoNotStrip
|
|
13
12
|
import com.margelo.nitro.NitroModules
|
|
@@ -21,12 +20,14 @@ import com.margelo.nitro.nitrospeech.VolumeChangeEvent
|
|
|
21
20
|
@Keep
|
|
22
21
|
class HybridRecognizer: HybridRecognizerSpec() {
|
|
23
22
|
companion object {
|
|
24
|
-
private const val TAG = "HybridRecognizer"
|
|
25
23
|
private const val POST_RECOGNITION_DELAY = 250L
|
|
26
24
|
}
|
|
27
25
|
|
|
26
|
+
private val logger = Logger(disable = false)
|
|
27
|
+
|
|
28
28
|
private var isActive: Boolean = false
|
|
29
29
|
private var config: SpeechRecognitionConfig? = null
|
|
30
|
+
private var volumeChangeEvent: VolumeChangeEvent = VolumeChangeEvent(0.0,0.0,null)
|
|
30
31
|
private var autoStopper: AutoStopper? = null
|
|
31
32
|
private var speechRecognizer: SpeechRecognizer? = null
|
|
32
33
|
private val mainHandler = Handler(Looper.getMainLooper())
|
|
@@ -51,7 +52,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
51
52
|
@DoNotStrip
|
|
52
53
|
@Keep
|
|
53
54
|
override fun startListening(params: SpeechRecognitionConfig?) {
|
|
54
|
-
|
|
55
|
+
logger.log("startListening: $params")
|
|
55
56
|
if (isActive) {
|
|
56
57
|
onFinishRecognition(
|
|
57
58
|
null,
|
|
@@ -94,7 +95,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
94
95
|
@DoNotStrip
|
|
95
96
|
@Keep
|
|
96
97
|
override fun stopListening() {
|
|
97
|
-
|
|
98
|
+
logger.log("stopListening called")
|
|
98
99
|
if (!isActive) return
|
|
99
100
|
onFinishRecognition(null, null, true)
|
|
100
101
|
mainHandler.postDelayed({
|
|
@@ -117,7 +118,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
117
118
|
@DoNotStrip
|
|
118
119
|
@Keep
|
|
119
120
|
override fun addAutoFinishTime(additionalTimeMs: Double?) {
|
|
120
|
-
|
|
121
|
+
logger.log("addAutoFinishTime")
|
|
121
122
|
if (!isActive) return
|
|
122
123
|
|
|
123
124
|
if (additionalTimeMs != null) {
|
|
@@ -134,7 +135,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
134
135
|
newConfig: MutableSpeechRecognitionConfig?,
|
|
135
136
|
resetAutoFinishTime: Boolean?
|
|
136
137
|
) {
|
|
137
|
-
|
|
138
|
+
logger.log("updateConfig $newConfig",)
|
|
138
139
|
if (!isActive) return
|
|
139
140
|
|
|
140
141
|
val newTimeMs = if (newConfig?.autoFinishRecognitionMs != null) newConfig.autoFinishRecognitionMs else config?.autoFinishRecognitionMs
|
|
@@ -177,6 +178,12 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
177
178
|
return isActive
|
|
178
179
|
}
|
|
179
180
|
|
|
181
|
+
@DoNotStrip
|
|
182
|
+
@Keep
|
|
183
|
+
override fun getVoiceInputVolume(): VolumeChangeEvent {
|
|
184
|
+
return volumeChangeEvent
|
|
185
|
+
}
|
|
186
|
+
|
|
180
187
|
@DoNotStrip
|
|
181
188
|
@Keep
|
|
182
189
|
override fun getSupportedLocalesIOS(): Array<String> {
|
|
@@ -204,12 +211,14 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
204
211
|
}
|
|
205
212
|
)
|
|
206
213
|
val recognitionListenerSession = RecognitionListenerSession(
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
214
|
+
autoStopper,
|
|
215
|
+
config,
|
|
216
|
+
fireVolumeChangeEvent = { event -> fireVolumeChangeEvent(event) },
|
|
217
|
+
onFinishRecognition = { result, errorMessage, recordingStopped ->
|
|
218
|
+
onFinishRecognition(result, errorMessage, recordingStopped)
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
|
|
213
222
|
speechRecognizer?.setRecognitionListener(recognitionListenerSession.createRecognitionListener())
|
|
214
223
|
|
|
215
224
|
val languageModel = if (config?.androidUseWebSearchModel == true) RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH else RecognizerIntent.LANGUAGE_MODEL_FREE_FORM
|
|
@@ -262,7 +271,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
262
271
|
|
|
263
272
|
private fun cleanup() {
|
|
264
273
|
try {
|
|
265
|
-
|
|
274
|
+
logger.log("cleanup called")
|
|
266
275
|
autoStopper?.stop()
|
|
267
276
|
autoStopper = null
|
|
268
277
|
speechRecognizer?.stopListening()
|
|
@@ -270,7 +279,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
270
279
|
speechRecognizer = null
|
|
271
280
|
isActive = false
|
|
272
281
|
// Reset voice meter in JS consumers after stop/error cleanup.
|
|
273
|
-
|
|
282
|
+
fireVolumeChangeEvent(VolumeChangeEvent(0.0,0.0,null))
|
|
274
283
|
} catch (e: Exception) {
|
|
275
284
|
onFinishRecognition(
|
|
276
285
|
null,
|
|
@@ -291,4 +300,10 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
291
300
|
onResult?.invoke(result.toTypedArray())
|
|
292
301
|
}
|
|
293
302
|
}
|
|
303
|
+
|
|
304
|
+
private fun fireVolumeChangeEvent(event: VolumeChangeEvent) {
|
|
305
|
+
logger.log("fireVolumeChangeEvent ${event}")
|
|
306
|
+
volumeChangeEvent = event
|
|
307
|
+
onVolumeChange?.invoke(event)
|
|
308
|
+
}
|
|
294
309
|
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
package com.margelo.nitro.nitrospeech.recognizer
|
|
2
|
+
|
|
3
|
+
import android.util.Log
|
|
4
|
+
|
|
5
|
+
class Logger (
|
|
6
|
+
private val disable: Boolean
|
|
7
|
+
) {
|
|
8
|
+
private val isLogging = false
|
|
9
|
+
companion object {
|
|
10
|
+
private const val TAG = "HybridRecognizer"
|
|
11
|
+
}
|
|
12
|
+
fun log(message: String) {
|
|
13
|
+
if (disable || !isLogging) return
|
|
14
|
+
Log.d(TAG, message)
|
|
15
|
+
}
|
|
16
|
+
}
|
package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt
CHANGED
|
@@ -3,7 +3,6 @@ package com.margelo.nitro.nitrospeech.recognizer
|
|
|
3
3
|
import android.os.Bundle
|
|
4
4
|
import android.speech.RecognitionListener
|
|
5
5
|
import android.speech.SpeechRecognizer
|
|
6
|
-
import android.util.Log
|
|
7
6
|
import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
|
|
8
7
|
import com.margelo.nitro.nitrospeech.VolumeChangeEvent
|
|
9
8
|
import kotlin.math.max
|
|
@@ -12,11 +11,11 @@ import kotlin.math.roundToInt
|
|
|
12
11
|
class RecognitionListenerSession (
|
|
13
12
|
private val autoStopper: AutoStopper?,
|
|
14
13
|
private val config: SpeechRecognitionConfig?,
|
|
15
|
-
private val
|
|
14
|
+
private val fireVolumeChangeEvent: (event: VolumeChangeEvent) -> Unit,
|
|
16
15
|
private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
|
|
17
16
|
) {
|
|
17
|
+
private val logger = Logger(disable = false)
|
|
18
18
|
companion object {
|
|
19
|
-
private const val TAG = "HybridRecognizer"
|
|
20
19
|
private const val SPEECH_LEVEL_THRESHOLD = 0.35
|
|
21
20
|
private const val FLOOR_RISE_ALPHA = 0.01f
|
|
22
21
|
private const val FLOOR_FALL_ALPHA = 0.20f
|
|
@@ -40,11 +39,11 @@ class RecognitionListenerSession (
|
|
|
40
39
|
override fun onBeginningOfSpeech() {}
|
|
41
40
|
override fun onRmsChanged(rmsdB: Float) {
|
|
42
41
|
val volumeEvent = getVolume(rmsdB)
|
|
43
|
-
|
|
42
|
+
fireVolumeChangeEvent(volumeEvent)
|
|
44
43
|
val threshold =
|
|
45
44
|
config?.resetAutoFinishVoiceSensitivity?.coerceIn(0.0, 1.0)
|
|
46
45
|
?: SPEECH_LEVEL_THRESHOLD.toDouble()
|
|
47
|
-
|
|
46
|
+
// logger.log("onRmsChanged: ${volumeEvent}")
|
|
48
47
|
if (volumeEvent.rawVolume > threshold) {
|
|
49
48
|
autoStopper?.resetTimer()
|
|
50
49
|
}
|
|
@@ -75,7 +74,7 @@ class RecognitionListenerSession (
|
|
|
75
74
|
}
|
|
76
75
|
|
|
77
76
|
override fun onResults(results: Bundle?) {
|
|
78
|
-
|
|
77
|
+
logger.log("onResults: $resultBatches")
|
|
79
78
|
onFinishRecognition(resultBatches, null, true)
|
|
80
79
|
autoStopper?.stop()
|
|
81
80
|
autoStopper?.onTimeout()
|
|
@@ -85,26 +84,26 @@ class RecognitionListenerSession (
|
|
|
85
84
|
val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
|
|
86
85
|
|
|
87
86
|
if (matches.isNullOrEmpty() || matches[0] == "") {
|
|
88
|
-
|
|
87
|
+
logger.log("onPartialResults[0], skip, NO RECOGNIZE")
|
|
89
88
|
return
|
|
90
89
|
}
|
|
91
90
|
|
|
92
91
|
autoStopper?.resetTimer()
|
|
93
|
-
|
|
92
|
+
logger.log("onPartialResults[0], add ${matches[0]}")
|
|
94
93
|
var currentBatches = resultBatches
|
|
95
94
|
if (currentBatches.isNullOrEmpty()) {
|
|
96
|
-
|
|
95
|
+
logger.log("onPartialResults[1], NO BATCHES YET | add first")
|
|
97
96
|
currentBatches = arrayListOf(matches[0])
|
|
98
97
|
} else {
|
|
99
|
-
|
|
98
|
+
logger.log("onPartialResults[1], current batches $currentBatches")
|
|
100
99
|
val prevBatchLength = currentBatches[currentBatches.lastIndex].length
|
|
101
100
|
val match = if (config?.disableRepeatingFilter == true) matches[0] else repeatingFilter(matches[0])
|
|
102
101
|
val matchLength = match.length
|
|
103
102
|
if (config?.androidDisableBatchHandling == true || matchLength + 3 < prevBatchLength) {
|
|
104
|
-
|
|
103
|
+
logger.log("onPartialResults[2], append new batch")
|
|
105
104
|
currentBatches.add(match)
|
|
106
105
|
} else {
|
|
107
|
-
|
|
106
|
+
logger.log("onPartialResults[2], update batch, replace #${currentBatches.lastIndex}")
|
|
108
107
|
currentBatches[currentBatches.lastIndex] = match
|
|
109
108
|
}
|
|
110
109
|
}
|
|
@@ -16,33 +16,18 @@ final class AudioLevelTracker {
|
|
|
16
16
|
private static let meterRelease: Float = 0.08
|
|
17
17
|
private static let defaultAutoStopResetThreshold: Double = 0.4
|
|
18
18
|
|
|
19
|
-
private var autoStopResetThreshold: Double
|
|
20
19
|
private var smoothedLevel: Float = 0
|
|
20
|
+
|
|
21
|
+
var currentSample: AudioLevelSample?
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
if let resetAutoFinishVoiceSensitivity {
|
|
24
|
-
// Clamp value between 0 and 1
|
|
25
|
-
self.autoStopResetThreshold = max(0, min(1, resetAutoFinishVoiceSensitivity))
|
|
26
|
-
} else {
|
|
27
|
-
self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
func updateResetAutoFinishVoiceSensitivity(newValue: Double?) {
|
|
32
|
-
if let newValue {
|
|
33
|
-
// Clamp value between 0 and 1
|
|
34
|
-
self.autoStopResetThreshold = max(0, min(1, newValue))
|
|
35
|
-
} else {
|
|
36
|
-
self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
|
|
37
|
-
}
|
|
38
|
-
}
|
|
23
|
+
private let lg = Lg(prefix: "RecognizerEngine")
|
|
39
24
|
|
|
40
25
|
func reset() {
|
|
41
26
|
smoothedLevel = 0
|
|
42
|
-
|
|
27
|
+
currentSample = nil
|
|
43
28
|
}
|
|
44
29
|
|
|
45
|
-
func process(_ buffer: AVAudioPCMBuffer) -> AudioLevelSample? {
|
|
30
|
+
func process(_ buffer: AVAudioPCMBuffer,_ autoStopResetThreshold: Double? = nil) -> AudioLevelSample? {
|
|
46
31
|
guard let samples = buffer.floatChannelData?[0] else { return nil }
|
|
47
32
|
|
|
48
33
|
let frameCount = Int(buffer.frameLength)
|
|
@@ -56,11 +41,20 @@ final class AudioLevelTracker {
|
|
|
56
41
|
let coeff = normalized > smoothedLevel ? Self.meterAttack : Self.meterRelease
|
|
57
42
|
smoothedLevel += coeff * (normalized - smoothedLevel)
|
|
58
43
|
|
|
59
|
-
|
|
44
|
+
var threshold = Self.defaultAutoStopResetThreshold
|
|
45
|
+
if let autoStopResetThreshold {
|
|
46
|
+
threshold = max(0, min(1, autoStopResetThreshold))
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
currentSample = AudioLevelSample(
|
|
60
50
|
smoothed: Double(smoothedLevel * 1_000_000).rounded() / 1_000_000,
|
|
61
51
|
raw: Double(normalized * 1_000_000).rounded() / 1_000_000,
|
|
62
52
|
db: Double(db * 1_000).rounded() / 1_000,
|
|
63
|
-
resetTimer: Double(normalized) >=
|
|
53
|
+
resetTimer: Double(normalized) >= threshold
|
|
64
54
|
)
|
|
55
|
+
|
|
56
|
+
lg.log("[AudioLevelTracker.process] autoStopResetThreshold: \(threshold)")
|
|
57
|
+
|
|
58
|
+
return currentSample
|
|
65
59
|
}
|
|
66
60
|
}
|
|
@@ -18,7 +18,7 @@ class RecognizerEngine {
|
|
|
18
18
|
var hardwareFormat: AVAudioFormat?
|
|
19
19
|
weak var recognizerDelegate: RecognizerDelegate?
|
|
20
20
|
|
|
21
|
-
private let audioLevelTracker
|
|
21
|
+
private let audioLevelTracker = AudioLevelTracker()
|
|
22
22
|
private var appStateObserver: AppStateObserver?
|
|
23
23
|
private var audioEngine: AVAudioEngine?
|
|
24
24
|
private var autoStopper: AutoStopper?
|
|
@@ -29,9 +29,6 @@ class RecognizerEngine {
|
|
|
29
29
|
init(locale: Locale, delegate: RecognizerDelegate) {
|
|
30
30
|
self.locale = locale
|
|
31
31
|
self.recognizerDelegate = delegate
|
|
32
|
-
self.audioLevelTracker = AudioLevelTracker(
|
|
33
|
-
resetAutoFinishVoiceSensitivity: delegate.config?.resetAutoFinishVoiceSensitivity
|
|
34
|
-
)
|
|
35
32
|
}
|
|
36
33
|
|
|
37
34
|
// MARK: - Recognizer Methods
|
|
@@ -84,7 +81,10 @@ class RecognizerEngine {
|
|
|
84
81
|
format: hardwareFormat
|
|
85
82
|
) { [weak self] buffer, _ in
|
|
86
83
|
guard let self, let recognizerDelegate = self.recognizerDelegate else { return }
|
|
87
|
-
if let sample = self.audioLevelTracker.process(
|
|
84
|
+
if let sample = self.audioLevelTracker.process(
|
|
85
|
+
buffer,
|
|
86
|
+
recognizerDelegate.config?.resetAutoFinishVoiceSensitivity
|
|
87
|
+
) {
|
|
88
88
|
// Send buffer volume data
|
|
89
89
|
recognizerDelegate.volumeChange(
|
|
90
90
|
event:
|
|
@@ -148,13 +148,7 @@ class RecognizerEngine {
|
|
|
148
148
|
from: "updateSession"
|
|
149
149
|
)
|
|
150
150
|
}
|
|
151
|
-
|
|
152
|
-
if let newSensitivity = newConfig?.resetAutoFinishVoiceSensitivity,
|
|
153
|
-
newSensitivity != currentConfig?.resetAutoFinishVoiceSensitivity {
|
|
154
|
-
audioLevelTracker.updateResetAutoFinishVoiceSensitivity(
|
|
155
|
-
newValue: newSensitivity
|
|
156
|
-
)
|
|
157
|
-
}
|
|
151
|
+
|
|
158
152
|
if let addMsToTimer {
|
|
159
153
|
// Add time to the timer once
|
|
160
154
|
autoStopper?.addMsOnce(
|
|
@@ -168,7 +162,16 @@ class RecognizerEngine {
|
|
|
168
162
|
// Only update new non-nil values in the config
|
|
169
163
|
recognizerDelegate.softlyUpdateConfig(newConfig: newConfig)
|
|
170
164
|
}
|
|
171
|
-
|
|
165
|
+
|
|
166
|
+
func getVoiceInputVolume() -> VolumeChangeEvent? {
|
|
167
|
+
guard let currentSample = audioLevelTracker.currentSample else { return nil }
|
|
168
|
+
return VolumeChangeEvent(
|
|
169
|
+
smoothedVolume: currentSample.smoothed,
|
|
170
|
+
rawVolume: currentSample.raw,
|
|
171
|
+
db: currentSample.db
|
|
172
|
+
)
|
|
173
|
+
}
|
|
174
|
+
|
|
172
175
|
func cleanup(from: String) {
|
|
173
176
|
lg.log("[cleanup]: \(from)")
|
|
174
177
|
let wasActive = isActive
|
|
@@ -68,6 +68,14 @@ class HybridRecognizer: HybridRecognizerSpec {
|
|
|
68
68
|
func getIsActive() -> Bool {
|
|
69
69
|
engine?.isActive ?? false
|
|
70
70
|
}
|
|
71
|
+
|
|
72
|
+
func getVoiceInputVolume() -> VolumeChangeEvent {
|
|
73
|
+
return engine?.getVoiceInputVolume() ?? VolumeChangeEvent(
|
|
74
|
+
smoothedVolume: 0,
|
|
75
|
+
rawVolume: 0,
|
|
76
|
+
db: nil
|
|
77
|
+
)
|
|
78
|
+
}
|
|
71
79
|
|
|
72
80
|
func getSupportedLocalesIOS() -> [String] {
|
|
73
81
|
return self.coordinator.getSupportedLocales()
|
|
@@ -5,7 +5,7 @@ final class AutoStopper {
|
|
|
5
5
|
private static let defaultProgressIntervalMs = 1000.0
|
|
6
6
|
private static let minProgressIntervalMs = 50.0
|
|
7
7
|
|
|
8
|
-
private let lg = Lg(prefix: "AutoStopper", disable:
|
|
8
|
+
private let lg = Lg(prefix: "AutoStopper", disable: false)
|
|
9
9
|
|
|
10
10
|
private let queue = DispatchQueue(label: "com.margelo.nitrospeech.autostopper")
|
|
11
11
|
|
|
@@ -1,13 +1,17 @@
|
|
|
1
|
-
import { recognizerAddAutoFinishTime, recognizerGetSupportedLocalesIOS, recognizerGetIsActive, recognizerResetAutoFinishTime, recognizerStartListening, recognizerStopListening, recognizerUpdateConfig, } from './methods';
|
|
1
|
+
import { recognizerAddAutoFinishTime, recognizerGetSupportedLocalesIOS, recognizerGetIsActive, recognizerResetAutoFinishTime, recognizerStartListening, recognizerStopListening, recognizerUpdateConfig, recognizerGetVoiceInputVolume, recognizerPrewarm, } from './methods';
|
|
2
2
|
/**
|
|
3
3
|
* Safe cross-component reference to the Speech Recognizer methods.
|
|
4
|
+
*
|
|
5
|
+
* All methods support worklets and UI thread calls
|
|
4
6
|
*/
|
|
5
7
|
export const RecognizerRef = {
|
|
8
|
+
prewarm: recognizerPrewarm,
|
|
6
9
|
startListening: recognizerStartListening,
|
|
7
10
|
stopListening: recognizerStopListening,
|
|
8
11
|
resetAutoFinishTime: recognizerResetAutoFinishTime,
|
|
9
12
|
addAutoFinishTime: recognizerAddAutoFinishTime,
|
|
10
13
|
updateConfig: recognizerUpdateConfig,
|
|
11
14
|
getIsActive: recognizerGetIsActive,
|
|
15
|
+
getVoiceInputVolume: recognizerGetVoiceInputVolume,
|
|
12
16
|
getSupportedLocalesIOS: recognizerGetSupportedLocalesIOS,
|
|
13
17
|
};
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Direct access to the all Speech Recognizer methods and callbacks.
|
|
5
5
|
*
|
|
6
|
-
* @note
|
|
6
|
+
* @note Unsafe, might lead to race conditions
|
|
7
|
+
* @warning Since it reflects the original hybrid object, its API may change in the future.
|
|
7
8
|
*/
|
|
8
9
|
export declare const SpeechRecognizer: import("./types").RecognizerSpec;
|
|
@@ -4,6 +4,7 @@ import { NitroSpeech } from '../NitroSpeech';
|
|
|
4
4
|
*
|
|
5
5
|
* Direct access to the all Speech Recognizer methods and callbacks.
|
|
6
6
|
*
|
|
7
|
-
* @note
|
|
7
|
+
* @note Unsafe, might lead to race conditions
|
|
8
|
+
* @warning Since it reflects the original hybrid object, its API may change in the future.
|
|
8
9
|
*/
|
|
9
10
|
export const SpeechRecognizer = NitroSpeech.recognizer;
|