@gmessier/nitro-speech 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +165 -148
- package/android/build.gradle +0 -1
- package/android/src/main/cpp/cpp-adapter.cpp +5 -1
- package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +80 -16
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +93 -20
- package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +27 -15
- package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
- package/ios/Audio/AudioLevelTracker.swift +66 -0
- package/ios/Coordinator.swift +105 -0
- package/ios/Engines/AnalyzerEngine.swift +241 -0
- package/ios/Engines/DictationRuntime.swift +67 -0
- package/ios/Engines/RecognizerEngine.swift +312 -0
- package/ios/Engines/SFSpeechEngine.swift +119 -0
- package/ios/Engines/SpeechRuntime.swift +58 -0
- package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
- package/ios/HybridNitroSpeech.swift +1 -10
- package/ios/HybridRecognizer.swift +135 -192
- package/ios/LocaleManager.swift +73 -0
- package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
- package/ios/Shared/AutoStopper.swift +147 -0
- package/ios/Shared/HapticImpact.swift +24 -0
- package/ios/Shared/Log.swift +41 -0
- package/ios/Shared/Permissions.swift +59 -0
- package/ios/Shared/Utils.swift +58 -0
- package/lib/NitroSpeech.d.ts +2 -0
- package/lib/NitroSpeech.js +2 -0
- package/lib/Recognizer/RecognizerRef.d.ts +5 -0
- package/lib/Recognizer/RecognizerRef.js +13 -0
- package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
- package/lib/Recognizer/SpeechRecognizer.js +9 -0
- package/lib/Recognizer/methods.d.ts +8 -0
- package/lib/Recognizer/methods.js +29 -0
- package/lib/Recognizer/types.d.ts +6 -0
- package/lib/Recognizer/types.js +1 -0
- package/lib/Recognizer/useRecognizer.d.ts +16 -0
- package/lib/Recognizer/useRecognizer.js +71 -0
- package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
- package/lib/Recognizer/useVoiceInputVolume.js +52 -0
- package/lib/index.d.ts +6 -0
- package/lib/index.js +6 -0
- package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
- package/lib/specs/NitroSpeech.nitro.js +1 -0
- package/lib/specs/Recognizer.nitro.d.ts +95 -0
- package/lib/specs/Recognizer.nitro.js +1 -0
- package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
- package/lib/specs/SpeechRecognitionConfig.js +1 -0
- package/lib/specs/VolumeChangeEvent.d.ts +31 -0
- package/lib/specs/VolumeChangeEvent.js +1 -0
- package/nitro.json +0 -4
- package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
- package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +4 -2
- package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
- package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +68 -19
- package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +7 -4
- package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
- package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
- package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
- package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -5
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +203 -70
- package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
- package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +41 -9
- package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
- package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +6 -3
- package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +66 -18
- package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
- package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
- package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
- package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +4 -1
- package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +17 -7
- package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
- package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
- package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
- package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
- package/package.json +15 -16
- package/src/NitroSpeech.ts +5 -0
- package/src/Recognizer/RecognizerRef.ts +23 -0
- package/src/Recognizer/SpeechRecognizer.ts +10 -0
- package/src/Recognizer/methods.ts +40 -0
- package/src/Recognizer/types.ts +33 -0
- package/src/Recognizer/useRecognizer.ts +85 -0
- package/src/Recognizer/useVoiceInputVolume.ts +65 -0
- package/src/index.ts +6 -182
- package/src/specs/NitroSpeech.nitro.ts +2 -163
- package/src/specs/Recognizer.nitro.ts +110 -0
- package/src/specs/SpeechRecognitionConfig.ts +167 -0
- package/src/specs/VolumeChangeEvent.ts +31 -0
- package/android/proguard-rules.pro +0 -1
- package/ios/AnylyzerTranscriber.swift +0 -331
- package/ios/AutoStopper.swift +0 -69
- package/ios/HapticImpact.swift +0 -32
- package/ios/LegacySpeechRecognizer.swift +0 -161
- package/lib/commonjs/index.js +0 -145
- package/lib/commonjs/index.js.map +0 -1
- package/lib/commonjs/package.json +0 -1
- package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
- package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/module/index.js +0 -138
- package/lib/module/index.js.map +0 -1
- package/lib/module/package.json +0 -1
- package/lib/module/specs/NitroSpeech.nitro.js +0 -4
- package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
- package/lib/tsconfig.tsbuildinfo +0 -1
- package/lib/typescript/index.d.ts +0 -50
- package/lib/typescript/index.d.ts.map +0 -1
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
- package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
|
@@ -11,9 +11,14 @@ import android.util.Log
|
|
|
11
11
|
import androidx.annotation.Keep
|
|
12
12
|
import com.facebook.proguard.annotations.DoNotStrip
|
|
13
13
|
import com.margelo.nitro.NitroModules
|
|
14
|
+
import com.margelo.nitro.core.Promise
|
|
15
|
+
import com.margelo.nitro.nitrospeech.MutableSpeechRecognitionConfig
|
|
14
16
|
import com.margelo.nitro.nitrospeech.HybridRecognizerSpec
|
|
15
|
-
import com.margelo.nitro.nitrospeech.
|
|
17
|
+
import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
|
|
18
|
+
import com.margelo.nitro.nitrospeech.VolumeChangeEvent
|
|
16
19
|
|
|
20
|
+
@DoNotStrip
|
|
21
|
+
@Keep
|
|
17
22
|
class HybridRecognizer: HybridRecognizerSpec() {
|
|
18
23
|
companion object {
|
|
19
24
|
private const val TAG = "HybridRecognizer"
|
|
@@ -21,7 +26,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
21
26
|
}
|
|
22
27
|
|
|
23
28
|
private var isActive: Boolean = false
|
|
24
|
-
private var config:
|
|
29
|
+
private var config: SpeechRecognitionConfig? = null
|
|
25
30
|
private var autoStopper: AutoStopper? = null
|
|
26
31
|
private var speechRecognizer: SpeechRecognizer? = null
|
|
27
32
|
private val mainHandler = Handler(Looper.getMainLooper())
|
|
@@ -33,15 +38,19 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
33
38
|
override var onAutoFinishProgress: ((timeLeftMs: Double) -> Unit)? = null
|
|
34
39
|
override var onError: ((error: String) -> Unit)? = null
|
|
35
40
|
override var onPermissionDenied: (() -> Unit)? = null
|
|
36
|
-
override var onVolumeChange: ((
|
|
41
|
+
override var onVolumeChange: ((event: VolumeChangeEvent) -> Unit)? = null
|
|
37
42
|
|
|
38
|
-
|
|
39
|
-
|
|
43
|
+
@DoNotStrip
|
|
44
|
+
@Keep
|
|
45
|
+
override fun prewarm(defaultParams: SpeechRecognitionConfig?): Promise<Unit> {
|
|
46
|
+
// no-op
|
|
47
|
+
// nothing to prewarm
|
|
48
|
+
return Promise()
|
|
40
49
|
}
|
|
41
50
|
|
|
42
51
|
@DoNotStrip
|
|
43
52
|
@Keep
|
|
44
|
-
override fun startListening(params:
|
|
53
|
+
override fun startListening(params: SpeechRecognitionConfig?) {
|
|
45
54
|
Log.d(TAG, "startListening: $params")
|
|
46
55
|
if (isActive) {
|
|
47
56
|
onFinishRecognition(
|
|
@@ -98,25 +107,84 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
98
107
|
}, POST_RECOGNITION_DELAY)
|
|
99
108
|
}
|
|
100
109
|
|
|
110
|
+
@DoNotStrip
|
|
111
|
+
@Keep
|
|
112
|
+
override fun resetAutoFinishTime() {
|
|
113
|
+
if (!isActive) return
|
|
114
|
+
autoStopper?.resetTimer()
|
|
115
|
+
}
|
|
116
|
+
|
|
101
117
|
@DoNotStrip
|
|
102
118
|
@Keep
|
|
103
119
|
override fun addAutoFinishTime(additionalTimeMs: Double?) {
|
|
104
|
-
Log.d(TAG, "
|
|
120
|
+
Log.d(TAG, "addAutoFinishTime")
|
|
105
121
|
if (!isActive) return
|
|
106
|
-
|
|
122
|
+
|
|
123
|
+
if (additionalTimeMs != null) {
|
|
124
|
+
autoStopper?.addMsOnce(additionalTimeMs)
|
|
125
|
+
} else {
|
|
126
|
+
// Reset timer to original baseline.
|
|
127
|
+
autoStopper?.resetTimer()
|
|
128
|
+
}
|
|
107
129
|
}
|
|
108
130
|
|
|
109
131
|
@DoNotStrip
|
|
110
132
|
@Keep
|
|
111
|
-
override fun
|
|
112
|
-
|
|
133
|
+
override fun updateConfig(
|
|
134
|
+
newConfig: MutableSpeechRecognitionConfig?,
|
|
135
|
+
resetAutoFinishTime: Boolean?
|
|
136
|
+
) {
|
|
137
|
+
Log.d(TAG, "updateConfig $newConfig",)
|
|
113
138
|
if (!isActive) return
|
|
114
|
-
|
|
115
|
-
if (
|
|
116
|
-
|
|
139
|
+
|
|
140
|
+
val newTimeMs = if (newConfig?.autoFinishRecognitionMs != null) newConfig.autoFinishRecognitionMs else config?.autoFinishRecognitionMs
|
|
141
|
+
if (newTimeMs != null && newTimeMs != config?.autoFinishRecognitionMs) {
|
|
142
|
+
autoStopper?.updateSilenceThreshold(newTimeMs)
|
|
117
143
|
}
|
|
144
|
+
val newInterval = if (newConfig?.autoFinishProgressIntervalMs != null) newConfig.autoFinishProgressIntervalMs else config?.autoFinishProgressIntervalMs
|
|
145
|
+
if (newInterval != null && newInterval != config?.autoFinishProgressIntervalMs) {
|
|
146
|
+
autoStopper?.updateProgressInterval(newInterval)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (resetAutoFinishTime == true) {
|
|
150
|
+
autoStopper?.resetTimer()
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (newConfig != null) {
|
|
154
|
+
config = SpeechRecognitionConfig(
|
|
155
|
+
locale = config?.locale,
|
|
156
|
+
contextualStrings = config?.contextualStrings,
|
|
157
|
+
maskOffensiveWords = config?.maskOffensiveWords,
|
|
158
|
+
autoFinishRecognitionMs = newConfig.autoFinishRecognitionMs ?: config?.autoFinishRecognitionMs,
|
|
159
|
+
autoFinishProgressIntervalMs = newConfig.autoFinishProgressIntervalMs ?: config?.autoFinishProgressIntervalMs,
|
|
160
|
+
resetAutoFinishVoiceSensitivity = newConfig.resetAutoFinishVoiceSensitivity ?: config?.resetAutoFinishVoiceSensitivity,
|
|
161
|
+
disableRepeatingFilter = newConfig.disableRepeatingFilter ?: config?.disableRepeatingFilter,
|
|
162
|
+
startHapticFeedbackStyle = newConfig.startHapticFeedbackStyle ?: config?.startHapticFeedbackStyle,
|
|
163
|
+
stopHapticFeedbackStyle = newConfig.stopHapticFeedbackStyle ?: config?.stopHapticFeedbackStyle,
|
|
164
|
+
androidFormattingPreferQuality = config?.androidFormattingPreferQuality,
|
|
165
|
+
androidUseWebSearchModel = config?.androidUseWebSearchModel,
|
|
166
|
+
androidDisableBatchHandling = config?.androidDisableBatchHandling,
|
|
167
|
+
iosAddPunctuation = config?.iosAddPunctuation,
|
|
168
|
+
iosPreset = config?.iosPreset,
|
|
169
|
+
iosAtypicalSpeech = config?.iosAtypicalSpeech
|
|
170
|
+
)
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
@DoNotStrip
|
|
175
|
+
@Keep
|
|
176
|
+
override fun getIsActive(): Boolean {
|
|
177
|
+
return isActive
|
|
118
178
|
}
|
|
119
179
|
|
|
180
|
+
@DoNotStrip
|
|
181
|
+
@Keep
|
|
182
|
+
override fun getSupportedLocalesIOS(): Array<String> {
|
|
183
|
+
return emptyArray()
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
@DoNotStrip
|
|
187
|
+
@Keep
|
|
120
188
|
override fun dispose() {
|
|
121
189
|
stopListening()
|
|
122
190
|
}
|
|
@@ -125,12 +193,16 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
125
193
|
mainHandler.post {
|
|
126
194
|
try {
|
|
127
195
|
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
|
|
128
|
-
val silenceThreshold = config?.autoFinishRecognitionMs?.toLong() ?: 8000
|
|
129
196
|
autoStopper = AutoStopper(
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
197
|
+
silenceThresholdMs = config?.autoFinishRecognitionMs,
|
|
198
|
+
progressIntervalMs = config?.autoFinishProgressIntervalMs,
|
|
199
|
+
onProgress = { timeLeftMs ->
|
|
200
|
+
onAutoFinishProgress?.invoke(timeLeftMs)
|
|
201
|
+
},
|
|
202
|
+
onTimeout = {
|
|
203
|
+
stopListening()
|
|
204
|
+
}
|
|
205
|
+
)
|
|
134
206
|
val recognitionListenerSession = RecognitionListenerSession(
|
|
135
207
|
autoStopper,
|
|
136
208
|
config,
|
|
@@ -175,6 +247,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
175
247
|
if (isActive) {
|
|
176
248
|
onReadyForSpeech?.invoke()
|
|
177
249
|
onFinishRecognition(arrayListOf(), null, false)
|
|
250
|
+
autoStopper?.resetTimer()
|
|
178
251
|
}
|
|
179
252
|
}, 500)
|
|
180
253
|
} catch (e: Exception) {
|
|
@@ -189,7 +262,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
189
262
|
|
|
190
263
|
private fun cleanup() {
|
|
191
264
|
try {
|
|
192
|
-
Log.d(TAG, "
|
|
265
|
+
Log.d(TAG, "cleanup called")
|
|
193
266
|
autoStopper?.stop()
|
|
194
267
|
autoStopper = null
|
|
195
268
|
speechRecognizer?.stopListening()
|
|
@@ -197,7 +270,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
|
|
|
197
270
|
speechRecognizer = null
|
|
198
271
|
isActive = false
|
|
199
272
|
// Reset voice meter in JS consumers after stop/error cleanup.
|
|
200
|
-
onVolumeChange?.invoke(0.0)
|
|
273
|
+
onVolumeChange?.invoke(VolumeChangeEvent(0.0,0.0,null))
|
|
201
274
|
} catch (e: Exception) {
|
|
202
275
|
onFinishRecognition(
|
|
203
276
|
null,
|
package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt
CHANGED
|
@@ -4,19 +4,20 @@ import android.os.Bundle
|
|
|
4
4
|
import android.speech.RecognitionListener
|
|
5
5
|
import android.speech.SpeechRecognizer
|
|
6
6
|
import android.util.Log
|
|
7
|
-
import com.margelo.nitro.nitrospeech.
|
|
7
|
+
import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
|
|
8
|
+
import com.margelo.nitro.nitrospeech.VolumeChangeEvent
|
|
8
9
|
import kotlin.math.max
|
|
9
10
|
import kotlin.math.roundToInt
|
|
10
11
|
|
|
11
12
|
class RecognitionListenerSession (
|
|
12
13
|
private val autoStopper: AutoStopper?,
|
|
13
|
-
private val config:
|
|
14
|
-
private val onVolumeChange: ((
|
|
14
|
+
private val config: SpeechRecognitionConfig?,
|
|
15
|
+
private val onVolumeChange: ((event: VolumeChangeEvent) -> Unit)?,
|
|
15
16
|
private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
|
|
16
17
|
) {
|
|
17
18
|
companion object {
|
|
18
19
|
private const val TAG = "HybridRecognizer"
|
|
19
|
-
private const val SPEECH_LEVEL_THRESHOLD = 0.
|
|
20
|
+
private const val SPEECH_LEVEL_THRESHOLD = 0.35
|
|
20
21
|
private const val FLOOR_RISE_ALPHA = 0.01f
|
|
21
22
|
private const val FLOOR_FALL_ALPHA = 0.20f
|
|
22
23
|
private const val PEAK_ATTACK_ALPHA = 0.25f
|
|
@@ -38,10 +39,14 @@ class RecognitionListenerSession (
|
|
|
38
39
|
override fun onReadyForSpeech(params: Bundle?) {}
|
|
39
40
|
override fun onBeginningOfSpeech() {}
|
|
40
41
|
override fun onRmsChanged(rmsdB: Float) {
|
|
41
|
-
val
|
|
42
|
-
onVolumeChange?.invoke(
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
val volumeEvent = getVolume(rmsdB)
|
|
43
|
+
onVolumeChange?.invoke(volumeEvent)
|
|
44
|
+
val threshold =
|
|
45
|
+
config?.resetAutoFinishVoiceSensitivity?.coerceIn(0.0, 1.0)
|
|
46
|
+
?: SPEECH_LEVEL_THRESHOLD.toDouble()
|
|
47
|
+
Log.d(TAG, "onRmsChanged: ${volumeEvent}")
|
|
48
|
+
if (volumeEvent.rawVolume > threshold) {
|
|
49
|
+
autoStopper?.resetTimer()
|
|
45
50
|
}
|
|
46
51
|
}
|
|
47
52
|
override fun onBufferReceived(buffer: ByteArray?) {}
|
|
@@ -66,18 +71,17 @@ class RecognitionListenerSession (
|
|
|
66
71
|
true
|
|
67
72
|
)
|
|
68
73
|
autoStopper?.stop()
|
|
69
|
-
autoStopper?.
|
|
74
|
+
autoStopper?.onTimeout()
|
|
70
75
|
}
|
|
71
76
|
|
|
72
77
|
override fun onResults(results: Bundle?) {
|
|
73
78
|
Log.d(TAG, "onResults: $resultBatches")
|
|
74
79
|
onFinishRecognition(resultBatches, null, true)
|
|
75
80
|
autoStopper?.stop()
|
|
76
|
-
autoStopper?.
|
|
81
|
+
autoStopper?.onTimeout()
|
|
77
82
|
}
|
|
78
83
|
|
|
79
84
|
override fun onPartialResults(partialResults: Bundle?) {
|
|
80
|
-
autoStopper?.indicateRecordingActivity()
|
|
81
85
|
val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
|
|
82
86
|
|
|
83
87
|
if (matches.isNullOrEmpty() || matches[0] == "") {
|
|
@@ -85,6 +89,7 @@ class RecognitionListenerSession (
|
|
|
85
89
|
return
|
|
86
90
|
}
|
|
87
91
|
|
|
92
|
+
autoStopper?.resetTimer()
|
|
88
93
|
Log.d(TAG, "onPartialResults[0], add ${matches[0]}")
|
|
89
94
|
var currentBatches = resultBatches
|
|
90
95
|
if (currentBatches.isNullOrEmpty()) {
|
|
@@ -144,9 +149,9 @@ class RecognitionListenerSession (
|
|
|
144
149
|
return joiner.toString()
|
|
145
150
|
}
|
|
146
151
|
|
|
147
|
-
private fun
|
|
152
|
+
private fun getVolume(rmsdB: Float): VolumeChangeEvent {
|
|
148
153
|
if (!rmsdB.isFinite()) {
|
|
149
|
-
return 0.0
|
|
154
|
+
return VolumeChangeEvent(0.0,0.0,null)
|
|
150
155
|
}
|
|
151
156
|
|
|
152
157
|
if (noiseFloorDb.isNaN()) {
|
|
@@ -166,7 +171,14 @@ class RecognitionListenerSession (
|
|
|
166
171
|
val raw = ((rmsdB - noiseFloorDb) / span).coerceIn(0f, 1f)
|
|
167
172
|
val smoothingCoeff = if (raw > levelSmoothed) METER_ATTACK else METER_RELEASE
|
|
168
173
|
levelSmoothed += smoothingCoeff * (raw - levelSmoothed)
|
|
169
|
-
|
|
170
|
-
|
|
174
|
+
val roundedSmoothed = ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
|
|
175
|
+
val roundedRaw = ((raw * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
|
|
176
|
+
val db = (rmsdB * 1000).roundToInt() / 1000.0
|
|
177
|
+
|
|
178
|
+
return VolumeChangeEvent(
|
|
179
|
+
smoothedVolume = roundedSmoothed,
|
|
180
|
+
rawVolume = roundedRaw,
|
|
181
|
+
db = db
|
|
182
|
+
)
|
|
171
183
|
}
|
|
172
184
|
}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import Foundation
|
|
2
2
|
import AVFoundation
|
|
3
|
-
import Accelerate
|
|
4
3
|
|
|
5
4
|
private final class SendablePCMBufferBox: @unchecked Sendable {
|
|
6
5
|
let buffer: AVAudioPCMBuffer
|
|
@@ -10,42 +9,12 @@ private final class SendablePCMBufferBox: @unchecked Sendable {
|
|
|
10
9
|
}
|
|
11
10
|
}
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
private static let meterMaxDb: Float = -10 // loud speech ceiling
|
|
16
|
-
private static let meterAttack: Float = 0.35 // rise speed
|
|
17
|
-
private static let meterRelease: Float = 0.08 // fall speed
|
|
18
|
-
|
|
19
|
-
func calcRmsVolume(
|
|
20
|
-
levelSmoothed: Float,
|
|
21
|
-
buffer: AVAudioPCMBuffer
|
|
22
|
-
) -> (Float, Float)? {
|
|
23
|
-
guard let samples = buffer.floatChannelData?[0] else { return nil }
|
|
24
|
-
|
|
25
|
-
let frameL = Int(buffer.frameLength)
|
|
26
|
-
var rms: Float = 0
|
|
27
|
-
|
|
28
|
-
vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameL))
|
|
29
|
-
|
|
30
|
-
// 2) RMS -> dBFS
|
|
31
|
-
let db = 20 * log10(rms + 0.00001)
|
|
32
|
-
|
|
33
|
-
// 3) Normalize dB to 0...1
|
|
34
|
-
let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
|
|
35
|
-
let normalized = max(0, min(1, raw))
|
|
36
|
-
|
|
37
|
-
// 4) Smooth (fast attack, slow release)
|
|
38
|
-
let coeff = normalized > levelSmoothed ? Self.meterAttack : Self.meterRelease
|
|
39
|
-
let nextLevelSmoothed = levelSmoothed + coeff * (normalized - levelSmoothed)
|
|
40
|
-
|
|
41
|
-
return (rms, nextLevelSmoothed)
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
func convertBuffer(
|
|
12
|
+
enum AudioBufferConverter {
|
|
13
|
+
static func convertBuffer(
|
|
45
14
|
converter: AVAudioConverter,
|
|
46
15
|
audioFormat: AVAudioFormat,
|
|
47
16
|
pcmBuffer: AVAudioPCMBuffer
|
|
48
|
-
) throws -> AVAudioPCMBuffer?
|
|
17
|
+
) throws -> AVAudioPCMBuffer? {
|
|
49
18
|
let resampledCapacity = AVAudioFrameCount(
|
|
50
19
|
(Double(pcmBuffer.frameLength) * (audioFormat.sampleRate / pcmBuffer.format.sampleRate)).rounded(.up)
|
|
51
20
|
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import AVFoundation
|
|
3
|
+
import Accelerate
|
|
4
|
+
|
|
5
|
+
struct AudioLevelSample {
|
|
6
|
+
let smoothed: Double
|
|
7
|
+
let raw: Double
|
|
8
|
+
let db: Double
|
|
9
|
+
let resetTimer: Bool
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
final class AudioLevelTracker {
|
|
13
|
+
private static let meterMinDb: Float = -70
|
|
14
|
+
private static let meterMaxDb: Float = -10
|
|
15
|
+
private static let meterAttack: Float = 0.35
|
|
16
|
+
private static let meterRelease: Float = 0.08
|
|
17
|
+
private static let defaultAutoStopResetThreshold: Double = 0.4
|
|
18
|
+
|
|
19
|
+
private var autoStopResetThreshold: Double
|
|
20
|
+
private var smoothedLevel: Float = 0
|
|
21
|
+
|
|
22
|
+
init(resetAutoFinishVoiceSensitivity: Double?) {
|
|
23
|
+
if let resetAutoFinishVoiceSensitivity {
|
|
24
|
+
// Clamp value between 0 and 1
|
|
25
|
+
self.autoStopResetThreshold = max(0, min(1, resetAutoFinishVoiceSensitivity))
|
|
26
|
+
} else {
|
|
27
|
+
self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
func updateResetAutoFinishVoiceSensitivity(newValue: Double?) {
|
|
32
|
+
if let newValue {
|
|
33
|
+
// Clamp value between 0 and 1
|
|
34
|
+
self.autoStopResetThreshold = max(0, min(1, newValue))
|
|
35
|
+
} else {
|
|
36
|
+
self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
func reset() {
|
|
41
|
+
smoothedLevel = 0
|
|
42
|
+
self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
func process(_ buffer: AVAudioPCMBuffer) -> AudioLevelSample? {
|
|
46
|
+
guard let samples = buffer.floatChannelData?[0] else { return nil }
|
|
47
|
+
|
|
48
|
+
let frameCount = Int(buffer.frameLength)
|
|
49
|
+
var rms: Float = 0
|
|
50
|
+
vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameCount))
|
|
51
|
+
|
|
52
|
+
let db = 20 * log10(rms + 0.00001)
|
|
53
|
+
let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
|
|
54
|
+
let normalized = max(0, min(1, raw))
|
|
55
|
+
|
|
56
|
+
let coeff = normalized > smoothedLevel ? Self.meterAttack : Self.meterRelease
|
|
57
|
+
smoothedLevel += coeff * (normalized - smoothedLevel)
|
|
58
|
+
|
|
59
|
+
return AudioLevelSample(
|
|
60
|
+
smoothed: Double(smoothedLevel * 1_000_000).rounded() / 1_000_000,
|
|
61
|
+
raw: Double(normalized * 1_000_000).rounded() / 1_000_000,
|
|
62
|
+
db: Double(db * 1_000).rounded() / 1_000,
|
|
63
|
+
resetTimer: Double(normalized) >= self.autoStopResetThreshold
|
|
64
|
+
)
|
|
65
|
+
}
|
|
66
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import NitroModules
|
|
3
|
+
import Speech
|
|
4
|
+
|
|
5
|
+
enum RecognizerBackend {
|
|
6
|
+
case speechTranscriber
|
|
7
|
+
case dictationTranscriber
|
|
8
|
+
case sfSpeech
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
final class Coordinator {
|
|
12
|
+
weak var recognizerDelegate: RecognizerDelegate?
|
|
13
|
+
private var localeManager: LocaleManager?
|
|
14
|
+
private var candidates: [RecognizerBackend] = []
|
|
15
|
+
private var localeTask: Task<Void, Never>?
|
|
16
|
+
|
|
17
|
+
init() {
|
|
18
|
+
self.localeTask = Task {
|
|
19
|
+
self.localeManager = await LocaleManager()
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
func initialize() async {
|
|
24
|
+
let params = self.recognizerDelegate?.config
|
|
25
|
+
Log.log("[Coordinator] LocaleManager - init (\(params?.locale))")
|
|
26
|
+
if self.localeManager == nil {
|
|
27
|
+
self.localeTask?.cancel()
|
|
28
|
+
self.localeTask = nil
|
|
29
|
+
self.localeManager = await LocaleManager()
|
|
30
|
+
}
|
|
31
|
+
guard let localeManager else { return }
|
|
32
|
+
await localeManager.ensureLocale(localeString: params?.locale)
|
|
33
|
+
self.candidates = []
|
|
34
|
+
guard #available(iOS 26.0, *) else {
|
|
35
|
+
if localeManager.SFLocale != nil {
|
|
36
|
+
self.candidates = [.sfSpeech]
|
|
37
|
+
}
|
|
38
|
+
return
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if params?.iosPreset == IosPreset.shortform
|
|
42
|
+
|| params?.iosAddPunctuation == false
|
|
43
|
+
|| params?.iosAtypicalSpeech == true {
|
|
44
|
+
// DictationTranscriber priority
|
|
45
|
+
if localeManager.dictationLocale != nil {
|
|
46
|
+
self.candidates.append(.dictationTranscriber)
|
|
47
|
+
}
|
|
48
|
+
if localeManager.speechLocale != nil {
|
|
49
|
+
self.candidates.append(.speechTranscriber)
|
|
50
|
+
}
|
|
51
|
+
} else {
|
|
52
|
+
// SpeechTranscriber priority
|
|
53
|
+
if localeManager.speechLocale != nil {
|
|
54
|
+
self.candidates.append(.speechTranscriber)
|
|
55
|
+
}
|
|
56
|
+
if localeManager.dictationLocale != nil {
|
|
57
|
+
self.candidates.append(.dictationTranscriber)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// Add SF Engine at the end
|
|
61
|
+
if localeManager.SFLocale != nil {
|
|
62
|
+
self.candidates.append(.sfSpeech)
|
|
63
|
+
}
|
|
64
|
+
Log.log("[Coordinator] candidates: \(self.candidates)")
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
func getEngine() -> RecognizerEngine? {
|
|
68
|
+
Log.log("[Coordinator] getEngine")
|
|
69
|
+
guard let recognizerDelegate else { return nil }
|
|
70
|
+
guard let localeManager else { return nil }
|
|
71
|
+
guard let backend = candidates.first else { return nil }
|
|
72
|
+
Log.log("[Coordinator] backend: \(backend)")
|
|
73
|
+
if backend == .sfSpeech, let locale = localeManager.SFLocale {
|
|
74
|
+
Log.log("[Coordinator] SFSpeechEngine Activated")
|
|
75
|
+
return SFSpeechEngine(locale: locale, delegate: recognizerDelegate)
|
|
76
|
+
}
|
|
77
|
+
if #available(iOS 26.0, *) {
|
|
78
|
+
if backend == .speechTranscriber, let locale = localeManager.speechLocale {
|
|
79
|
+
Log.log("[Coordinator] SpeechTranscriber Activated")
|
|
80
|
+
return AnalyzerEngine(
|
|
81
|
+
backend: .speechTranscriber,
|
|
82
|
+
locale: locale,
|
|
83
|
+
delegate: recognizerDelegate
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
if backend == .dictationTranscriber, let locale = localeManager.dictationLocale {
|
|
87
|
+
Log.log("[Coordinator] DictationTranscriber Activated")
|
|
88
|
+
return AnalyzerEngine(
|
|
89
|
+
backend: .dictationTranscriber,
|
|
90
|
+
locale: locale,
|
|
91
|
+
delegate: recognizerDelegate
|
|
92
|
+
)
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return nil
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
func reportEngineFailure() {
|
|
99
|
+
self.candidates = Array(self.candidates.dropFirst())
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
func getSupportedLocales() -> [String] {
|
|
103
|
+
return localeManager?.supportedLocales ?? []
|
|
104
|
+
}
|
|
105
|
+
}
|