@gmessier/nitro-speech 0.3.3 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +176 -148
- package/android/build.gradle +0 -1
- package/android/src/main/cpp/cpp-adapter.cpp +5 -1
- package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +82 -18
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +118 -30
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/Logger.kt +16 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +35 -24
- package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
- package/ios/Audio/AudioLevelTracker.swift +60 -0
- package/ios/Coordinator.swift +105 -0
- package/ios/Engines/AnalyzerEngine.swift +241 -0
- package/ios/Engines/DictationRuntime.swift +67 -0
- package/ios/Engines/RecognizerEngine.swift +315 -0
- package/ios/Engines/SFSpeechEngine.swift +119 -0
- package/ios/Engines/SpeechRuntime.swift +58 -0
- package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
- package/ios/HybridNitroSpeech.swift +1 -10
- package/ios/HybridRecognizer.swift +142 -191
- package/ios/LocaleManager.swift +73 -0
- package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
- package/ios/Shared/AutoStopper.swift +147 -0
- package/ios/Shared/HapticImpact.swift +24 -0
- package/ios/Shared/Log.swift +41 -0
- package/ios/Shared/Permissions.swift +59 -0
- package/ios/Shared/Utils.swift +58 -0
- package/lib/NitroSpeech.d.ts +2 -0
- package/lib/NitroSpeech.js +2 -0
- package/lib/Recognizer/RecognizerRef.d.ts +7 -0
- package/lib/Recognizer/RecognizerRef.js +16 -0
- package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
- package/lib/Recognizer/SpeechRecognizer.js +9 -0
- package/lib/Recognizer/methods.d.ts +9 -0
- package/lib/Recognizer/methods.js +33 -0
- package/lib/Recognizer/types.d.ts +6 -0
- package/lib/Recognizer/types.js +1 -0
- package/lib/Recognizer/useRecognizer.d.ts +16 -0
- package/lib/Recognizer/useRecognizer.js +71 -0
- package/lib/Recognizer/useRecognizerIsActive.d.ts +25 -0
- package/lib/Recognizer/useRecognizerIsActive.js +40 -0
- package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
- package/lib/Recognizer/useVoiceInputVolume.js +52 -0
- package/lib/index.d.ts +7 -0
- package/lib/index.js +7 -0
- package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
- package/lib/specs/NitroSpeech.nitro.js +1 -0
- package/lib/specs/Recognizer.nitro.d.ts +97 -0
- package/lib/specs/Recognizer.nitro.js +1 -0
- package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
- package/lib/specs/SpeechRecognitionConfig.js +1 -0
- package/lib/specs/VolumeChangeEvent.d.ts +31 -0
- package/lib/specs/VolumeChangeEvent.js +1 -0
- package/nitro.json +0 -4
- package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
- package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +4 -2
- package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
- package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +73 -19
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +8 -4
- package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
- package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
- package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
- package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +22 -5
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +211 -69
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +49 -9
- package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +7 -3
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +78 -18
- package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
- package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
- package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
- package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +5 -1
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +18 -7
- package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
- package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
- package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
- package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
- package/package.json +15 -16
- package/src/NitroSpeech.ts +5 -0
- package/src/Recognizer/RecognizerRef.ts +27 -0
- package/src/Recognizer/SpeechRecognizer.ts +10 -0
- package/src/Recognizer/methods.ts +45 -0
- package/src/Recognizer/types.ts +34 -0
- package/src/Recognizer/useRecognizer.ts +87 -0
- package/src/Recognizer/useRecognizerIsActive.ts +49 -0
- package/src/Recognizer/useVoiceInputVolume.ts +65 -0
- package/src/index.ts +13 -182
- package/src/specs/NitroSpeech.nitro.ts +2 -163
- package/src/specs/Recognizer.nitro.ts +113 -0
- package/src/specs/SpeechRecognitionConfig.ts +167 -0
- package/src/specs/VolumeChangeEvent.ts +31 -0
- package/android/proguard-rules.pro +0 -1
- package/ios/AnylyzerTranscriber.swift +0 -331
- package/ios/AutoStopper.swift +0 -69
- package/ios/HapticImpact.swift +0 -32
- package/ios/LegacySpeechRecognizer.swift +0 -161
- package/lib/commonjs/index.js +0 -145
- package/lib/commonjs/index.js.map +0 -1
- package/lib/commonjs/package.json +0 -1
- package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
- package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/module/index.js +0 -138
- package/lib/module/index.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/module/specs/NitroSpeech.nitro.js +0 -4
- package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/tsconfig.tsbuildinfo +0 -1
- package/lib/typescript/index.d.ts +0 -50
- package/lib/typescript/index.d.ts.map +0 -1
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
|
@@ -2,38 +2,102 @@ package com.margelo.nitro.nitrospeech.recognizer
|
|
|
2
2
|
|
|
3
3
|
import android.os.Handler
|
|
4
4
|
import android.os.Looper
|
|
5
|
-
import
|
|
5
|
+
import kotlin.math.max
|
|
6
6
|
|
|
7
|
-
class AutoStopper
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
class AutoStopper(
|
|
8
|
+
silenceThresholdMs: Double?,
|
|
9
|
+
progressIntervalMs: Double?,
|
|
10
|
+
private val onProgress: (Double) -> Unit,
|
|
11
|
+
val onTimeout: () -> Unit,
|
|
10
12
|
) {
|
|
11
13
|
companion object {
|
|
12
|
-
private const val
|
|
14
|
+
private const val DEFAULT_SILENCE_THRESHOLD_MS = 8000.0
|
|
15
|
+
private const val DEFAULT_PROGRESS_INTERVAL_MS = 1000.0
|
|
16
|
+
private const val MIN_PROGRESS_INTERVAL_MS = 50.0
|
|
13
17
|
}
|
|
14
18
|
|
|
19
|
+
private val logger = Logger(disable = false)
|
|
20
|
+
|
|
21
|
+
private var silenceThresholdMs: Double = clampMs(silenceThresholdMs ?: DEFAULT_SILENCE_THRESHOLD_MS)
|
|
22
|
+
private var progressIntervalMs: Double = clampMs(progressIntervalMs ?: DEFAULT_PROGRESS_INTERVAL_MS)
|
|
23
|
+
|
|
24
|
+
private var timeLeftMs: Double = this.silenceThresholdMs
|
|
15
25
|
private var isStopped = false
|
|
26
|
+
private var didTimeout = false
|
|
27
|
+
private var isTimerScheduled = false
|
|
28
|
+
|
|
16
29
|
private val handler = Handler(Looper.getMainLooper())
|
|
17
30
|
|
|
18
|
-
private val
|
|
19
|
-
if (isStopped) return@Runnable
|
|
20
|
-
Log.d(TAG, "forceStopRecording, ms: ${System.currentTimeMillis()}")
|
|
21
|
-
forceStopRecording()
|
|
22
|
-
}
|
|
31
|
+
private val tickRunnable = Runnable { tick() }
|
|
23
32
|
|
|
24
|
-
fun
|
|
25
|
-
|
|
26
|
-
handler.removeCallbacks(
|
|
33
|
+
fun resetTimer() {
|
|
34
|
+
logger.log("resetTimer | isStopped: $isStopped | ms: ${System.currentTimeMillis()}")
|
|
35
|
+
handler.removeCallbacks(tickRunnable)
|
|
36
|
+
isTimerScheduled = false
|
|
27
37
|
if (isStopped) return
|
|
28
|
-
|
|
38
|
+
didTimeout = false
|
|
39
|
+
timeLeftMs = silenceThresholdMs
|
|
40
|
+
if (timeLeftMs > 0) {
|
|
41
|
+
onProgress(timeLeftMs)
|
|
42
|
+
}
|
|
43
|
+
scheduleNextTickLocked()
|
|
29
44
|
}
|
|
30
45
|
|
|
31
46
|
fun stop() {
|
|
32
47
|
isStopped = true
|
|
33
|
-
handler.removeCallbacks(
|
|
48
|
+
handler.removeCallbacks(tickRunnable)
|
|
49
|
+
isTimerScheduled = false
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
fun updateSilenceThreshold(newThresholdMs: Double) {
|
|
53
|
+
silenceThresholdMs = clampMs(newThresholdMs)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
fun addMsOnce(extraMs: Double) {
|
|
57
|
+
if (isStopped || !extraMs.isFinite()) return
|
|
58
|
+
logger.log("addMsOnce | extraMs: $extraMs")
|
|
59
|
+
timeLeftMs += extraMs
|
|
60
|
+
didTimeout = false
|
|
61
|
+
if (timeLeftMs > 0 && isTimerScheduled) {
|
|
62
|
+
onProgress(timeLeftMs)
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
fun updateProgressInterval(newIntervalMs: Double) {
|
|
67
|
+
if (isStopped) return
|
|
68
|
+
logger.log("updateProgressInterval | newIntervalMs: $newIntervalMs")
|
|
69
|
+
progressIntervalMs = clampMs(newIntervalMs)
|
|
70
|
+
if (isTimerScheduled) {
|
|
71
|
+
scheduleNextTickLocked()
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
private fun scheduleNextTickLocked() {
|
|
76
|
+
handler.removeCallbacks(tickRunnable)
|
|
77
|
+
val delayMs = progressIntervalMs.toLong().coerceAtLeast(MIN_PROGRESS_INTERVAL_MS.toLong())
|
|
78
|
+
handler.postDelayed(tickRunnable, delayMs)
|
|
79
|
+
isTimerScheduled = true
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
private fun tick() {
|
|
83
|
+
if (isStopped || didTimeout) return
|
|
84
|
+
timeLeftMs -= progressIntervalMs
|
|
85
|
+
if (timeLeftMs > 0) {
|
|
86
|
+
logger.log("onProgress | timeLeftMs: $timeLeftMs")
|
|
87
|
+
onProgress(timeLeftMs)
|
|
88
|
+
scheduleNextTickLocked()
|
|
89
|
+
return
|
|
90
|
+
}
|
|
91
|
+
timeLeftMs = 0.0
|
|
92
|
+
didTimeout = true
|
|
93
|
+
handler.removeCallbacks(tickRunnable)
|
|
94
|
+
isTimerScheduled = false
|
|
95
|
+
logger.log("onTimeout | ms: ${System.currentTimeMillis()}")
|
|
96
|
+
onTimeout()
|
|
34
97
|
}
|
|
35
98
|
|
|
36
|
-
fun
|
|
37
|
-
|
|
99
|
+
private fun clampMs(value: Double): Double {
|
|
100
|
+
if (!value.isFinite()) return MIN_PROGRESS_INTERVAL_MS
|
|
101
|
+
return max(MIN_PROGRESS_INTERVAL_MS, value)
|
|
38
102
|
}
|
|
39
|
-
}
|
|
103
|
+
}
|
|
@@ -7,21 +7,27 @@ import android.os.Handler
|
|
|
7
7
|
import android.os.Looper
|
|
8
8
|
import android.speech.RecognizerIntent
|
|
9
9
|
import android.speech.SpeechRecognizer
|
|
10
|
-
import android.util.Log
|
|
11
10
|
import androidx.annotation.Keep
|
|
12
11
|
import com.facebook.proguard.annotations.DoNotStrip
|
|
13
12
|
import com.margelo.nitro.NitroModules
|
|
13
|
+
import com.margelo.nitro.core.Promise
|
|
14
|
+
import com.margelo.nitro.nitrospeech.MutableSpeechRecognitionConfig
|
|
14
15
|
import com.margelo.nitro.nitrospeech.HybridRecognizerSpec
|
|
15
|
-
import com.margelo.nitro.nitrospeech.
|
|
16
|
+
import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
|
|
17
|
+
import com.margelo.nitro.nitrospeech.VolumeChangeEvent
|
|
16
18
|
|
|
19
|
+
@DoNotStrip
|
|
20
|
+
@Keep
|
|
17
21
|
class HybridRecognizer: HybridRecognizerSpec() {
|
|
18
22
|
companion object {
|
|
19
|
-
private const val TAG = "HybridRecognizer"
|
|
20
23
|
private const val POST_RECOGNITION_DELAY = 250L
|
|
21
24
|
}
|
|
22
25
|
|
|
26
|
+
private val logger = Logger(disable = false)
|
|
27
|
+
|
|
23
28
|
private var isActive: Boolean = false
|
|
24
|
-
private var config:
|
|
29
|
+
private var config: SpeechRecognitionConfig? = null
|
|
30
|
+
private var volumeChangeEvent: VolumeChangeEvent = VolumeChangeEvent(0.0,0.0,null)
|
|
25
31
|
private var autoStopper: AutoStopper? = null
|
|
26
32
|
private var speechRecognizer: SpeechRecognizer? = null
|
|
27
33
|
private val mainHandler = Handler(Looper.getMainLooper())
|
|
@@ -33,16 +39,20 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
33
39
|
override var onAutoFinishProgress: ((timeLeftMs: Double) -> Unit)? = null
|
|
34
40
|
override var onError: ((error: String) -> Unit)? = null
|
|
35
41
|
override var onPermissionDenied: (() -> Unit)? = null
|
|
36
|
-
override var onVolumeChange: ((
|
|
42
|
+
override var onVolumeChange: ((event: VolumeChangeEvent) -> Unit)? = null
|
|
37
43
|
|
|
38
|
-
|
|
39
|
-
|
|
44
|
+
@DoNotStrip
|
|
45
|
+
@Keep
|
|
46
|
+
override fun prewarm(defaultParams: SpeechRecognitionConfig?): Promise<Unit> {
|
|
47
|
+
// no-op
|
|
48
|
+
// nothing to prewarm
|
|
49
|
+
return Promise()
|
|
40
50
|
}
|
|
41
51
|
|
|
42
52
|
@DoNotStrip
|
|
43
53
|
@Keep
|
|
44
|
-
override fun startListening(params:
|
|
45
|
-
|
|
54
|
+
override fun startListening(params: SpeechRecognitionConfig?) {
|
|
55
|
+
logger.log("startListening: $params")
|
|
46
56
|
if (isActive) {
|
|
47
57
|
onFinishRecognition(
|
|
48
58
|
null,
|
|
@@ -85,7 +95,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
85
95
|
@DoNotStrip
|
|
86
96
|
@Keep
|
|
87
97
|
override fun stopListening() {
|
|
88
|
-
|
|
98
|
+
logger.log("stopListening called")
|
|
89
99
|
if (!isActive) return
|
|
90
100
|
onFinishRecognition(null, null, true)
|
|
91
101
|
mainHandler.postDelayed({
|
|
@@ -98,25 +108,90 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
98
108
|
}, POST_RECOGNITION_DELAY)
|
|
99
109
|
}
|
|
100
110
|
|
|
111
|
+
@DoNotStrip
|
|
112
|
+
@Keep
|
|
113
|
+
override fun resetAutoFinishTime() {
|
|
114
|
+
if (!isActive) return
|
|
115
|
+
autoStopper?.resetTimer()
|
|
116
|
+
}
|
|
117
|
+
|
|
101
118
|
@DoNotStrip
|
|
102
119
|
@Keep
|
|
103
120
|
override fun addAutoFinishTime(additionalTimeMs: Double?) {
|
|
104
|
-
|
|
121
|
+
logger.log("addAutoFinishTime")
|
|
105
122
|
if (!isActive) return
|
|
106
|
-
|
|
123
|
+
|
|
124
|
+
if (additionalTimeMs != null) {
|
|
125
|
+
autoStopper?.addMsOnce(additionalTimeMs)
|
|
126
|
+
} else {
|
|
127
|
+
// Reset timer to original baseline.
|
|
128
|
+
autoStopper?.resetTimer()
|
|
129
|
+
}
|
|
107
130
|
}
|
|
108
131
|
|
|
109
132
|
@DoNotStrip
|
|
110
133
|
@Keep
|
|
111
|
-
override fun
|
|
112
|
-
|
|
134
|
+
override fun updateConfig(
|
|
135
|
+
newConfig: MutableSpeechRecognitionConfig?,
|
|
136
|
+
resetAutoFinishTime: Boolean?
|
|
137
|
+
) {
|
|
138
|
+
logger.log("updateConfig $newConfig",)
|
|
113
139
|
if (!isActive) return
|
|
114
|
-
|
|
115
|
-
if (
|
|
116
|
-
|
|
140
|
+
|
|
141
|
+
val newTimeMs = if (newConfig?.autoFinishRecognitionMs != null) newConfig.autoFinishRecognitionMs else config?.autoFinishRecognitionMs
|
|
142
|
+
if (newTimeMs != null && newTimeMs != config?.autoFinishRecognitionMs) {
|
|
143
|
+
autoStopper?.updateSilenceThreshold(newTimeMs)
|
|
144
|
+
}
|
|
145
|
+
val newInterval = if (newConfig?.autoFinishProgressIntervalMs != null) newConfig.autoFinishProgressIntervalMs else config?.autoFinishProgressIntervalMs
|
|
146
|
+
if (newInterval != null && newInterval != config?.autoFinishProgressIntervalMs) {
|
|
147
|
+
autoStopper?.updateProgressInterval(newInterval)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (resetAutoFinishTime == true) {
|
|
151
|
+
autoStopper?.resetTimer()
|
|
117
152
|
}
|
|
153
|
+
|
|
154
|
+
if (newConfig != null) {
|
|
155
|
+
config = SpeechRecognitionConfig(
|
|
156
|
+
locale = config?.locale,
|
|
157
|
+
contextualStrings = config?.contextualStrings,
|
|
158
|
+
maskOffensiveWords = config?.maskOffensiveWords,
|
|
159
|
+
autoFinishRecognitionMs = newConfig.autoFinishRecognitionMs ?: config?.autoFinishRecognitionMs,
|
|
160
|
+
autoFinishProgressIntervalMs = newConfig.autoFinishProgressIntervalMs ?: config?.autoFinishProgressIntervalMs,
|
|
161
|
+
resetAutoFinishVoiceSensitivity = newConfig.resetAutoFinishVoiceSensitivity ?: config?.resetAutoFinishVoiceSensitivity,
|
|
162
|
+
disableRepeatingFilter = newConfig.disableRepeatingFilter ?: config?.disableRepeatingFilter,
|
|
163
|
+
startHapticFeedbackStyle = newConfig.startHapticFeedbackStyle ?: config?.startHapticFeedbackStyle,
|
|
164
|
+
stopHapticFeedbackStyle = newConfig.stopHapticFeedbackStyle ?: config?.stopHapticFeedbackStyle,
|
|
165
|
+
androidFormattingPreferQuality = config?.androidFormattingPreferQuality,
|
|
166
|
+
androidUseWebSearchModel = config?.androidUseWebSearchModel,
|
|
167
|
+
androidDisableBatchHandling = config?.androidDisableBatchHandling,
|
|
168
|
+
iosAddPunctuation = config?.iosAddPunctuation,
|
|
169
|
+
iosPreset = config?.iosPreset,
|
|
170
|
+
iosAtypicalSpeech = config?.iosAtypicalSpeech
|
|
171
|
+
)
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
@DoNotStrip
|
|
176
|
+
@Keep
|
|
177
|
+
override fun getIsActive(): Boolean {
|
|
178
|
+
return isActive
|
|
118
179
|
}
|
|
119
180
|
|
|
181
|
+
@DoNotStrip
|
|
182
|
+
@Keep
|
|
183
|
+
override fun getVoiceInputVolume(): VolumeChangeEvent {
|
|
184
|
+
return volumeChangeEvent
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
@DoNotStrip
|
|
188
|
+
@Keep
|
|
189
|
+
override fun getSupportedLocalesIOS(): Array<String> {
|
|
190
|
+
return emptyArray()
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
@DoNotStrip
|
|
194
|
+
@Keep
|
|
120
195
|
override fun dispose() {
|
|
121
196
|
stopListening()
|
|
122
197
|
}
|
|
@@ -125,19 +200,25 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
125
200
|
mainHandler.post {
|
|
126
201
|
try {
|
|
127
202
|
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
|
|
128
|
-
val silenceThreshold = config?.autoFinishRecognitionMs?.toLong() ?: 8000
|
|
129
203
|
autoStopper = AutoStopper(
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
204
|
+
silenceThresholdMs = config?.autoFinishRecognitionMs,
|
|
205
|
+
progressIntervalMs = config?.autoFinishProgressIntervalMs,
|
|
206
|
+
onProgress = { timeLeftMs ->
|
|
207
|
+
onAutoFinishProgress?.invoke(timeLeftMs)
|
|
208
|
+
},
|
|
209
|
+
onTimeout = {
|
|
210
|
+
stopListening()
|
|
211
|
+
}
|
|
212
|
+
)
|
|
134
213
|
val recognitionListenerSession = RecognitionListenerSession(
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
214
|
+
autoStopper,
|
|
215
|
+
config,
|
|
216
|
+
fireVolumeChangeEvent = { event -> fireVolumeChangeEvent(event) },
|
|
217
|
+
onFinishRecognition = { result, errorMessage, recordingStopped ->
|
|
218
|
+
onFinishRecognition(result, errorMessage, recordingStopped)
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
|
|
141
222
|
speechRecognizer?.setRecognitionListener(recognitionListenerSession.createRecognitionListener())
|
|
142
223
|
|
|
143
224
|
val languageModel = if (config?.androidUseWebSearchModel == true) RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH else RecognizerIntent.LANGUAGE_MODEL_FREE_FORM
|
|
@@ -175,6 +256,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
175
256
|
if (isActive) {
|
|
176
257
|
onReadyForSpeech?.invoke()
|
|
177
258
|
onFinishRecognition(arrayListOf(), null, false)
|
|
259
|
+
autoStopper?.resetTimer()
|
|
178
260
|
}
|
|
179
261
|
}, 500)
|
|
180
262
|
} catch (e: Exception) {
|
|
@@ -189,7 +271,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
189
271
|
|
|
190
272
|
private fun cleanup() {
|
|
191
273
|
try {
|
|
192
|
-
|
|
274
|
+
logger.log("cleanup called")
|
|
193
275
|
autoStopper?.stop()
|
|
194
276
|
autoStopper = null
|
|
195
277
|
speechRecognizer?.stopListening()
|
|
@@ -197,7 +279,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
197
279
|
speechRecognizer = null
|
|
198
280
|
isActive = false
|
|
199
281
|
// Reset voice meter in JS consumers after stop/error cleanup.
|
|
200
|
-
|
|
282
|
+
fireVolumeChangeEvent(VolumeChangeEvent(0.0,0.0,null))
|
|
201
283
|
} catch (e: Exception) {
|
|
202
284
|
onFinishRecognition(
|
|
203
285
|
null,
|
|
@@ -218,4 +300,10 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
218
300
|
onResult?.invoke(result.toTypedArray())
|
|
219
301
|
}
|
|
220
302
|
}
|
|
303
|
+
|
|
304
|
+
private fun fireVolumeChangeEvent(event: VolumeChangeEvent) {
|
|
305
|
+
logger.log("fireVolumeChangeEvent ${event}")
|
|
306
|
+
volumeChangeEvent = event
|
|
307
|
+
onVolumeChange?.invoke(event)
|
|
308
|
+
}
|
|
221
309
|
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
package com.margelo.nitro.nitrospeech.recognizer
|
|
2
|
+
|
|
3
|
+
import android.util.Log
|
|
4
|
+
|
|
5
|
+
class Logger (
|
|
6
|
+
private val disable: Boolean
|
|
7
|
+
) {
|
|
8
|
+
private val isLogging = false
|
|
9
|
+
companion object {
|
|
10
|
+
private const val TAG = "HybridRecognizer"
|
|
11
|
+
}
|
|
12
|
+
fun log(message: String) {
|
|
13
|
+
if (disable || !isLogging) return
|
|
14
|
+
Log.d(TAG, message)
|
|
15
|
+
}
|
|
16
|
+
}
|
package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt
CHANGED
|
@@ -3,20 +3,20 @@ package com.margelo.nitro.nitrospeech.recognizer
|
|
|
3
3
|
import android.os.Bundle
|
|
4
4
|
import android.speech.RecognitionListener
|
|
5
5
|
import android.speech.SpeechRecognizer
|
|
6
|
-
import
|
|
7
|
-
import com.margelo.nitro.nitrospeech.
|
|
6
|
+
import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
|
|
7
|
+
import com.margelo.nitro.nitrospeech.VolumeChangeEvent
|
|
8
8
|
import kotlin.math.max
|
|
9
9
|
import kotlin.math.roundToInt
|
|
10
10
|
|
|
11
11
|
class RecognitionListenerSession (
|
|
12
12
|
private val autoStopper: AutoStopper?,
|
|
13
|
-
private val config:
|
|
14
|
-
private val
|
|
13
|
+
private val config: SpeechRecognitionConfig?,
|
|
14
|
+
private val fireVolumeChangeEvent: (event: VolumeChangeEvent) -> Unit,
|
|
15
15
|
private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
|
|
16
16
|
) {
|
|
17
|
+
private val logger = Logger(disable = false)
|
|
17
18
|
companion object {
|
|
18
|
-
private const val
|
|
19
|
-
private const val SPEECH_LEVEL_THRESHOLD = 0.08f
|
|
19
|
+
private const val SPEECH_LEVEL_THRESHOLD = 0.35
|
|
20
20
|
private const val FLOOR_RISE_ALPHA = 0.01f
|
|
21
21
|
private const val FLOOR_FALL_ALPHA = 0.20f
|
|
22
22
|
private const val PEAK_ATTACK_ALPHA = 0.25f
|
|
@@ -38,10 +38,14 @@ class RecognitionListenerSession (
|
|
|
38
38
|
override fun onReadyForSpeech(params: Bundle?) {}
|
|
39
39
|
override fun onBeginningOfSpeech() {}
|
|
40
40
|
override fun onRmsChanged(rmsdB: Float) {
|
|
41
|
-
val
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
41
|
+
val volumeEvent = getVolume(rmsdB)
|
|
42
|
+
fireVolumeChangeEvent(volumeEvent)
|
|
43
|
+
val threshold =
|
|
44
|
+
config?.resetAutoFinishVoiceSensitivity?.coerceIn(0.0, 1.0)
|
|
45
|
+
?: SPEECH_LEVEL_THRESHOLD.toDouble()
|
|
46
|
+
// logger.log("onRmsChanged: ${volumeEvent}")
|
|
47
|
+
if (volumeEvent.rawVolume > threshold) {
|
|
48
|
+
autoStopper?.resetTimer()
|
|
45
49
|
}
|
|
46
50
|
}
|
|
47
51
|
override fun onBufferReceived(buffer: ByteArray?) {}
|
|
@@ -66,40 +70,40 @@ class RecognitionListenerSession (
|
|
|
66
70
|
true
|
|
67
71
|
)
|
|
68
72
|
autoStopper?.stop()
|
|
69
|
-
autoStopper?.
|
|
73
|
+
autoStopper?.onTimeout()
|
|
70
74
|
}
|
|
71
75
|
|
|
72
76
|
override fun onResults(results: Bundle?) {
|
|
73
|
-
|
|
77
|
+
logger.log("onResults: $resultBatches")
|
|
74
78
|
onFinishRecognition(resultBatches, null, true)
|
|
75
79
|
autoStopper?.stop()
|
|
76
|
-
autoStopper?.
|
|
80
|
+
autoStopper?.onTimeout()
|
|
77
81
|
}
|
|
78
82
|
|
|
79
83
|
override fun onPartialResults(partialResults: Bundle?) {
|
|
80
|
-
autoStopper?.indicateRecordingActivity()
|
|
81
84
|
val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
|
|
82
85
|
|
|
83
86
|
if (matches.isNullOrEmpty() || matches[0] == "") {
|
|
84
|
-
|
|
87
|
+
logger.log("onPartialResults[0], skip, NO RECOGNIZE")
|
|
85
88
|
return
|
|
86
89
|
}
|
|
87
90
|
|
|
88
|
-
|
|
91
|
+
autoStopper?.resetTimer()
|
|
92
|
+
logger.log("onPartialResults[0], add ${matches[0]}")
|
|
89
93
|
var currentBatches = resultBatches
|
|
90
94
|
if (currentBatches.isNullOrEmpty()) {
|
|
91
|
-
|
|
95
|
+
logger.log("onPartialResults[1], NO BATCHES YET | add first")
|
|
92
96
|
currentBatches = arrayListOf(matches[0])
|
|
93
97
|
} else {
|
|
94
|
-
|
|
98
|
+
logger.log("onPartialResults[1], current batches $currentBatches")
|
|
95
99
|
val prevBatchLength = currentBatches[currentBatches.lastIndex].length
|
|
96
100
|
val match = if (config?.disableRepeatingFilter == true) matches[0] else repeatingFilter(matches[0])
|
|
97
101
|
val matchLength = match.length
|
|
98
102
|
if (config?.androidDisableBatchHandling == true || matchLength + 3 < prevBatchLength) {
|
|
99
|
-
|
|
103
|
+
logger.log("onPartialResults[2], append new batch")
|
|
100
104
|
currentBatches.add(match)
|
|
101
105
|
} else {
|
|
102
|
-
|
|
106
|
+
logger.log("onPartialResults[2], update batch, replace #${currentBatches.lastIndex}")
|
|
103
107
|
currentBatches[currentBatches.lastIndex] = match
|
|
104
108
|
}
|
|
105
109
|
}
|
|
@@ -144,9 +148,9 @@ class RecognitionListenerSession (
|
|
|
144
148
|
return joiner.toString()
|
|
145
149
|
}
|
|
146
150
|
|
|
147
|
-
private fun
|
|
151
|
+
private fun getVolume(rmsdB: Float): VolumeChangeEvent {
|
|
148
152
|
if (!rmsdB.isFinite()) {
|
|
149
|
-
return 0.0
|
|
153
|
+
return VolumeChangeEvent(0.0,0.0,null)
|
|
150
154
|
}
|
|
151
155
|
|
|
152
156
|
if (noiseFloorDb.isNaN()) {
|
|
@@ -166,7 +170,14 @@ class RecognitionListenerSession (
|
|
|
166
170
|
val raw = ((rmsdB - noiseFloorDb) / span).coerceIn(0f, 1f)
|
|
167
171
|
val smoothingCoeff = if (raw > levelSmoothed) METER_ATTACK else METER_RELEASE
|
|
168
172
|
levelSmoothed += smoothingCoeff * (raw - levelSmoothed)
|
|
169
|
-
|
|
170
|
-
|
|
173
|
+
val roundedSmoothed = ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
|
|
174
|
+
val roundedRaw = ((raw * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
|
|
175
|
+
val db = (rmsdB * 1000).roundToInt() / 1000.0
|
|
176
|
+
|
|
177
|
+
return VolumeChangeEvent(
|
|
178
|
+
smoothedVolume = roundedSmoothed,
|
|
179
|
+
rawVolume = roundedRaw,
|
|
180
|
+
db = db
|
|
181
|
+
)
|
|
171
182
|
}
|
|
172
183
|
}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import Foundation
|
|
2
2
|
import AVFoundation
|
|
3
|
-
import Accelerate
|
|
4
3
|
|
|
5
4
|
private final class SendablePCMBufferBox: @unchecked Sendable {
|
|
6
5
|
let buffer: AVAudioPCMBuffer
|
|
@@ -10,42 +9,12 @@ private final class SendablePCMBufferBox: @unchecked Sendable {
|
|
|
10
9
|
}
|
|
11
10
|
}
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
private static let meterMaxDb: Float = -10 // loud speech ceiling
|
|
16
|
-
private static let meterAttack: Float = 0.35 // rise speed
|
|
17
|
-
private static let meterRelease: Float = 0.08 // fall speed
|
|
18
|
-
|
|
19
|
-
func calcRmsVolume(
|
|
20
|
-
levelSmoothed: Float,
|
|
21
|
-
buffer: AVAudioPCMBuffer
|
|
22
|
-
) -> (Float, Float)? {
|
|
23
|
-
guard let samples = buffer.floatChannelData?[0] else { return nil }
|
|
24
|
-
|
|
25
|
-
let frameL = Int(buffer.frameLength)
|
|
26
|
-
var rms: Float = 0
|
|
27
|
-
|
|
28
|
-
vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameL))
|
|
29
|
-
|
|
30
|
-
// 2) RMS -> dBFS
|
|
31
|
-
let db = 20 * log10(rms + 0.00001)
|
|
32
|
-
|
|
33
|
-
// 3) Normalize dB to 0...1
|
|
34
|
-
let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
|
|
35
|
-
let normalized = max(0, min(1, raw))
|
|
36
|
-
|
|
37
|
-
// 4) Smooth (fast attack, slow release)
|
|
38
|
-
let coeff = normalized > levelSmoothed ? Self.meterAttack : Self.meterRelease
|
|
39
|
-
let nextLevelSmoothed = levelSmoothed + coeff * (normalized - levelSmoothed)
|
|
40
|
-
|
|
41
|
-
return (rms, nextLevelSmoothed)
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
func convertBuffer(
|
|
12
|
+
enum AudioBufferConverter {
|
|
13
|
+
static func convertBuffer(
|
|
45
14
|
converter: AVAudioConverter,
|
|
46
15
|
audioFormat: AVAudioFormat,
|
|
47
16
|
pcmBuffer: AVAudioPCMBuffer
|
|
48
|
-
) throws -> AVAudioPCMBuffer?
|
|
17
|
+
) throws -> AVAudioPCMBuffer? {
|
|
49
18
|
let resampledCapacity = AVAudioFrameCount(
|
|
50
19
|
(Double(pcmBuffer.frameLength) * (audioFormat.sampleRate / pcmBuffer.format.sampleRate)).rounded(.up)
|
|
51
20
|
)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import AVFoundation
|
|
3
|
+
import Accelerate
|
|
4
|
+
|
|
5
|
+
struct AudioLevelSample {
|
|
6
|
+
let smoothed: Double
|
|
7
|
+
let raw: Double
|
|
8
|
+
let db: Double
|
|
9
|
+
let resetTimer: Bool
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
final class AudioLevelTracker {
|
|
13
|
+
private static let meterMinDb: Float = -70
|
|
14
|
+
private static let meterMaxDb: Float = -10
|
|
15
|
+
private static let meterAttack: Float = 0.35
|
|
16
|
+
private static let meterRelease: Float = 0.08
|
|
17
|
+
private static let defaultAutoStopResetThreshold: Double = 0.4
|
|
18
|
+
|
|
19
|
+
private var smoothedLevel: Float = 0
|
|
20
|
+
|
|
21
|
+
var currentSample: AudioLevelSample?
|
|
22
|
+
|
|
23
|
+
private let lg = Lg(prefix: "RecognizerEngine")
|
|
24
|
+
|
|
25
|
+
func reset() {
|
|
26
|
+
smoothedLevel = 0
|
|
27
|
+
currentSample = nil
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
func process(_ buffer: AVAudioPCMBuffer,_ autoStopResetThreshold: Double? = nil) -> AudioLevelSample? {
|
|
31
|
+
guard let samples = buffer.floatChannelData?[0] else { return nil }
|
|
32
|
+
|
|
33
|
+
let frameCount = Int(buffer.frameLength)
|
|
34
|
+
var rms: Float = 0
|
|
35
|
+
vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameCount))
|
|
36
|
+
|
|
37
|
+
let db = 20 * log10(rms + 0.00001)
|
|
38
|
+
let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
|
|
39
|
+
let normalized = max(0, min(1, raw))
|
|
40
|
+
|
|
41
|
+
let coeff = normalized > smoothedLevel ? Self.meterAttack : Self.meterRelease
|
|
42
|
+
smoothedLevel += coeff * (normalized - smoothedLevel)
|
|
43
|
+
|
|
44
|
+
var threshold = Self.defaultAutoStopResetThreshold
|
|
45
|
+
if let autoStopResetThreshold {
|
|
46
|
+
threshold = max(0, min(1, autoStopResetThreshold))
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
currentSample = AudioLevelSample(
|
|
50
|
+
smoothed: Double(smoothedLevel * 1_000_000).rounded() / 1_000_000,
|
|
51
|
+
raw: Double(normalized * 1_000_000).rounded() / 1_000_000,
|
|
52
|
+
db: Double(db * 1_000).rounded() / 1_000,
|
|
53
|
+
resetTimer: Double(normalized) >= threshold
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
lg.log("[AudioLevelTracker.process] autoStopResetThreshold: \(threshold)")
|
|
57
|
+
|
|
58
|
+
return currentSample
|
|
59
|
+
}
|
|
60
|
+
}
|