@gmessier/nitro-speech 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +176 -148
  3. package/android/build.gradle +0 -1
  4. package/android/src/main/cpp/cpp-adapter.cpp +5 -1
  5. package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
  6. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +82 -18
  7. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +118 -30
  8. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/Logger.kt +16 -0
  9. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +35 -24
  10. package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
  11. package/ios/Audio/AudioLevelTracker.swift +60 -0
  12. package/ios/Coordinator.swift +105 -0
  13. package/ios/Engines/AnalyzerEngine.swift +241 -0
  14. package/ios/Engines/DictationRuntime.swift +67 -0
  15. package/ios/Engines/RecognizerEngine.swift +315 -0
  16. package/ios/Engines/SFSpeechEngine.swift +119 -0
  17. package/ios/Engines/SpeechRuntime.swift +58 -0
  18. package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
  19. package/ios/HybridNitroSpeech.swift +1 -10
  20. package/ios/HybridRecognizer.swift +142 -191
  21. package/ios/LocaleManager.swift +73 -0
  22. package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
  23. package/ios/Shared/AutoStopper.swift +147 -0
  24. package/ios/Shared/HapticImpact.swift +24 -0
  25. package/ios/Shared/Log.swift +41 -0
  26. package/ios/Shared/Permissions.swift +59 -0
  27. package/ios/Shared/Utils.swift +58 -0
  28. package/lib/NitroSpeech.d.ts +2 -0
  29. package/lib/NitroSpeech.js +2 -0
  30. package/lib/Recognizer/RecognizerRef.d.ts +7 -0
  31. package/lib/Recognizer/RecognizerRef.js +16 -0
  32. package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
  33. package/lib/Recognizer/SpeechRecognizer.js +9 -0
  34. package/lib/Recognizer/methods.d.ts +9 -0
  35. package/lib/Recognizer/methods.js +33 -0
  36. package/lib/Recognizer/types.d.ts +6 -0
  37. package/lib/Recognizer/types.js +1 -0
  38. package/lib/Recognizer/useRecognizer.d.ts +16 -0
  39. package/lib/Recognizer/useRecognizer.js +71 -0
  40. package/lib/Recognizer/useRecognizerIsActive.d.ts +25 -0
  41. package/lib/Recognizer/useRecognizerIsActive.js +40 -0
  42. package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
  43. package/lib/Recognizer/useVoiceInputVolume.js +52 -0
  44. package/lib/index.d.ts +7 -0
  45. package/lib/index.js +7 -0
  46. package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
  47. package/lib/specs/NitroSpeech.nitro.js +1 -0
  48. package/lib/specs/Recognizer.nitro.d.ts +97 -0
  49. package/lib/specs/Recognizer.nitro.js +1 -0
  50. package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
  51. package/lib/specs/SpeechRecognitionConfig.js +1 -0
  52. package/lib/specs/VolumeChangeEvent.d.ts +31 -0
  53. package/lib/specs/VolumeChangeEvent.js +1 -0
  54. package/nitro.json +0 -4
  55. package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
  56. package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +4 -2
  57. package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
  58. package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
  59. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +73 -19
  60. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +8 -4
  61. package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
  62. package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
  63. package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
  64. package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
  65. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
  66. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +22 -5
  67. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
  68. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
  69. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
  70. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
  71. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
  72. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +211 -69
  73. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
  74. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +49 -9
  75. package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
  76. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  77. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +7 -3
  78. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +78 -18
  79. package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
  80. package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
  81. package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
  82. package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
  83. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +5 -1
  84. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +18 -7
  85. package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
  86. package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
  87. package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
  88. package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
  89. package/package.json +15 -16
  90. package/src/NitroSpeech.ts +5 -0
  91. package/src/Recognizer/RecognizerRef.ts +27 -0
  92. package/src/Recognizer/SpeechRecognizer.ts +10 -0
  93. package/src/Recognizer/methods.ts +45 -0
  94. package/src/Recognizer/types.ts +34 -0
  95. package/src/Recognizer/useRecognizer.ts +87 -0
  96. package/src/Recognizer/useRecognizerIsActive.ts +49 -0
  97. package/src/Recognizer/useVoiceInputVolume.ts +65 -0
  98. package/src/index.ts +13 -182
  99. package/src/specs/NitroSpeech.nitro.ts +2 -163
  100. package/src/specs/Recognizer.nitro.ts +113 -0
  101. package/src/specs/SpeechRecognitionConfig.ts +167 -0
  102. package/src/specs/VolumeChangeEvent.ts +31 -0
  103. package/android/proguard-rules.pro +0 -1
  104. package/ios/AnylyzerTranscriber.swift +0 -331
  105. package/ios/AutoStopper.swift +0 -69
  106. package/ios/HapticImpact.swift +0 -32
  107. package/ios/LegacySpeechRecognizer.swift +0 -161
  108. package/lib/commonjs/index.js +0 -145
  109. package/lib/commonjs/index.js.map +0 -1
  110. package/lib/commonjs/package.json +0 -1
  111. package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
  112. package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
  113. package/lib/module/index.js +0 -138
  114. package/lib/module/index.js.map +0 -1
  115. package/lib/module/package.json +0 -1
  116. package/lib/module/specs/NitroSpeech.nitro.js +0 -4
  117. package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
  118. package/lib/tsconfig.tsbuildinfo +0 -1
  119. package/lib/typescript/index.d.ts +0 -50
  120. package/lib/typescript/index.d.ts.map +0 -1
  121. package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
  122. package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
  123. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
@@ -2,38 +2,102 @@ package com.margelo.nitro.nitrospeech.recognizer
2
2
 
3
3
  import android.os.Handler
4
4
  import android.os.Looper
5
- import android.util.Log
5
+ import kotlin.math.max
6
6
 
7
- class AutoStopper (
8
- private var silenceThreshold: Long,
9
- val forceStopRecording: () -> Unit,
7
+ class AutoStopper(
8
+ silenceThresholdMs: Double?,
9
+ progressIntervalMs: Double?,
10
+ private val onProgress: (Double) -> Unit,
11
+ val onTimeout: () -> Unit,
10
12
  ) {
11
13
  companion object {
12
- private const val TAG = "HybridRecognizer"
14
+ private const val DEFAULT_SILENCE_THRESHOLD_MS = 8000.0
15
+ private const val DEFAULT_PROGRESS_INTERVAL_MS = 1000.0
16
+ private const val MIN_PROGRESS_INTERVAL_MS = 50.0
13
17
  }
14
18
 
19
+ private val logger = Logger(disable = false)
20
+
21
+ private var silenceThresholdMs: Double = clampMs(silenceThresholdMs ?: DEFAULT_SILENCE_THRESHOLD_MS)
22
+ private var progressIntervalMs: Double = clampMs(progressIntervalMs ?: DEFAULT_PROGRESS_INTERVAL_MS)
23
+
24
+ private var timeLeftMs: Double = this.silenceThresholdMs
15
25
  private var isStopped = false
26
+ private var didTimeout = false
27
+ private var isTimerScheduled = false
28
+
16
29
  private val handler = Handler(Looper.getMainLooper())
17
30
 
18
- private val autoStopRecording = Runnable {
19
- if (isStopped) return@Runnable
20
- Log.d(TAG, "forceStopRecording, ms: ${System.currentTimeMillis()}")
21
- forceStopRecording()
22
- }
31
+ private val tickRunnable = Runnable { tick() }
23
32
 
24
- fun indicateRecordingActivity() {
25
- Log.d(TAG, "indicateRecordingActivity | isStopped: $isStopped | ms: ${System.currentTimeMillis()}")
26
- handler.removeCallbacks(autoStopRecording)
33
+ fun resetTimer() {
34
+ logger.log("resetTimer | isStopped: $isStopped | ms: ${System.currentTimeMillis()}")
35
+ handler.removeCallbacks(tickRunnable)
36
+ isTimerScheduled = false
27
37
  if (isStopped) return
28
- handler.postDelayed(autoStopRecording, silenceThreshold)
38
+ didTimeout = false
39
+ timeLeftMs = silenceThresholdMs
40
+ if (timeLeftMs > 0) {
41
+ onProgress(timeLeftMs)
42
+ }
43
+ scheduleNextTickLocked()
29
44
  }
30
45
 
31
46
  fun stop() {
32
47
  isStopped = true
33
- handler.removeCallbacks(autoStopRecording)
48
+ handler.removeCallbacks(tickRunnable)
49
+ isTimerScheduled = false
50
+ }
51
+
52
+ fun updateSilenceThreshold(newThresholdMs: Double) {
53
+ silenceThresholdMs = clampMs(newThresholdMs)
54
+ }
55
+
56
+ fun addMsOnce(extraMs: Double) {
57
+ if (isStopped || !extraMs.isFinite()) return
58
+ logger.log("addMsOnce | extraMs: $extraMs")
59
+ timeLeftMs += extraMs
60
+ didTimeout = false
61
+ if (timeLeftMs > 0 && isTimerScheduled) {
62
+ onProgress(timeLeftMs)
63
+ }
64
+ }
65
+
66
+ fun updateProgressInterval(newIntervalMs: Double) {
67
+ if (isStopped) return
68
+ logger.log("updateProgressInterval | newIntervalMs: $newIntervalMs")
69
+ progressIntervalMs = clampMs(newIntervalMs)
70
+ if (isTimerScheduled) {
71
+ scheduleNextTickLocked()
72
+ }
73
+ }
74
+
75
+ private fun scheduleNextTickLocked() {
76
+ handler.removeCallbacks(tickRunnable)
77
+ val delayMs = progressIntervalMs.toLong().coerceAtLeast(MIN_PROGRESS_INTERVAL_MS.toLong())
78
+ handler.postDelayed(tickRunnable, delayMs)
79
+ isTimerScheduled = true
80
+ }
81
+
82
+ private fun tick() {
83
+ if (isStopped || didTimeout) return
84
+ timeLeftMs -= progressIntervalMs
85
+ if (timeLeftMs > 0) {
86
+ logger.log("onProgress | timeLeftMs: $timeLeftMs")
87
+ onProgress(timeLeftMs)
88
+ scheduleNextTickLocked()
89
+ return
90
+ }
91
+ timeLeftMs = 0.0
92
+ didTimeout = true
93
+ handler.removeCallbacks(tickRunnable)
94
+ isTimerScheduled = false
95
+ logger.log("onTimeout | ms: ${System.currentTimeMillis()}")
96
+ onTimeout()
34
97
  }
35
98
 
36
- fun updateSilenceThreshold(newThreshold: Long) {
37
- silenceThreshold = newThreshold
99
+ private fun clampMs(value: Double): Double {
100
+ if (!value.isFinite()) return MIN_PROGRESS_INTERVAL_MS
101
+ return max(MIN_PROGRESS_INTERVAL_MS, value)
38
102
  }
39
- }
103
+ }
@@ -7,21 +7,27 @@ import android.os.Handler
7
7
  import android.os.Looper
8
8
  import android.speech.RecognizerIntent
9
9
  import android.speech.SpeechRecognizer
10
- import android.util.Log
11
10
  import androidx.annotation.Keep
12
11
  import com.facebook.proguard.annotations.DoNotStrip
13
12
  import com.margelo.nitro.NitroModules
13
+ import com.margelo.nitro.core.Promise
14
+ import com.margelo.nitro.nitrospeech.MutableSpeechRecognitionConfig
14
15
  import com.margelo.nitro.nitrospeech.HybridRecognizerSpec
15
- import com.margelo.nitro.nitrospeech.SpeechToTextParams
16
+ import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
17
+ import com.margelo.nitro.nitrospeech.VolumeChangeEvent
16
18
 
19
+ @DoNotStrip
20
+ @Keep
17
21
  class HybridRecognizer: HybridRecognizerSpec() {
18
22
  companion object {
19
- private const val TAG = "HybridRecognizer"
20
23
  private const val POST_RECOGNITION_DELAY = 250L
21
24
  }
22
25
 
26
+ private val logger = Logger(disable = false)
27
+
23
28
  private var isActive: Boolean = false
24
- private var config: SpeechToTextParams? = null
29
+ private var config: SpeechRecognitionConfig? = null
30
+ private var volumeChangeEvent: VolumeChangeEvent = VolumeChangeEvent(0.0,0.0,null)
25
31
  private var autoStopper: AutoStopper? = null
26
32
  private var speechRecognizer: SpeechRecognizer? = null
27
33
  private val mainHandler = Handler(Looper.getMainLooper())
@@ -33,16 +39,20 @@ class HybridRecognizer: HybridRecognizerSpec() {
33
39
  override var onAutoFinishProgress: ((timeLeftMs: Double) -> Unit)? = null
34
40
  override var onError: ((error: String) -> Unit)? = null
35
41
  override var onPermissionDenied: (() -> Unit)? = null
36
- override var onVolumeChange: ((normVolume: Double) -> Unit)? = null
42
+ override var onVolumeChange: ((event: VolumeChangeEvent) -> Unit)? = null
37
43
 
38
- override fun getIsActive(): Boolean {
39
- return isActive
44
+ @DoNotStrip
45
+ @Keep
46
+ override fun prewarm(defaultParams: SpeechRecognitionConfig?): Promise<Unit> {
47
+ // no-op
48
+ // nothing to prewarm
49
+ return Promise()
40
50
  }
41
51
 
42
52
  @DoNotStrip
43
53
  @Keep
44
- override fun startListening(params: SpeechToTextParams) {
45
- Log.d(TAG, "startListening: $params")
54
+ override fun startListening(params: SpeechRecognitionConfig?) {
55
+ logger.log("startListening: $params")
46
56
  if (isActive) {
47
57
  onFinishRecognition(
48
58
  null,
@@ -85,7 +95,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
85
95
  @DoNotStrip
86
96
  @Keep
87
97
  override fun stopListening() {
88
- Log.d(TAG, "stopListening called")
98
+ logger.log("stopListening called")
89
99
  if (!isActive) return
90
100
  onFinishRecognition(null, null, true)
91
101
  mainHandler.postDelayed({
@@ -98,25 +108,90 @@ class HybridRecognizer: HybridRecognizerSpec() {
98
108
  }, POST_RECOGNITION_DELAY)
99
109
  }
100
110
 
111
+ @DoNotStrip
112
+ @Keep
113
+ override fun resetAutoFinishTime() {
114
+ if (!isActive) return
115
+ autoStopper?.resetTimer()
116
+ }
117
+
101
118
  @DoNotStrip
102
119
  @Keep
103
120
  override fun addAutoFinishTime(additionalTimeMs: Double?) {
104
- Log.d(TAG, "stopListening called")
121
+ logger.log("addAutoFinishTime")
105
122
  if (!isActive) return
106
- autoStopper?.indicateRecordingActivity()
123
+
124
+ if (additionalTimeMs != null) {
125
+ autoStopper?.addMsOnce(additionalTimeMs)
126
+ } else {
127
+ // Reset timer to original baseline.
128
+ autoStopper?.resetTimer()
129
+ }
107
130
  }
108
131
 
109
132
  @DoNotStrip
110
133
  @Keep
111
- override fun updateAutoFinishTime(newTimeMs: Double, withRefresh: Boolean?) {
112
- Log.d(TAG, "updateAutoFinishTime: $newTimeMs")
134
+ override fun updateConfig(
135
+ newConfig: MutableSpeechRecognitionConfig?,
136
+ resetAutoFinishTime: Boolean?
137
+ ) {
138
+ logger.log("updateConfig $newConfig",)
113
139
  if (!isActive) return
114
- autoStopper?.updateSilenceThreshold(newTimeMs.toLong())
115
- if (withRefresh == true) {
116
- autoStopper?.indicateRecordingActivity()
140
+
141
+ val newTimeMs = if (newConfig?.autoFinishRecognitionMs != null) newConfig.autoFinishRecognitionMs else config?.autoFinishRecognitionMs
142
+ if (newTimeMs != null && newTimeMs != config?.autoFinishRecognitionMs) {
143
+ autoStopper?.updateSilenceThreshold(newTimeMs)
144
+ }
145
+ val newInterval = if (newConfig?.autoFinishProgressIntervalMs != null) newConfig.autoFinishProgressIntervalMs else config?.autoFinishProgressIntervalMs
146
+ if (newInterval != null && newInterval != config?.autoFinishProgressIntervalMs) {
147
+ autoStopper?.updateProgressInterval(newInterval)
148
+ }
149
+
150
+ if (resetAutoFinishTime == true) {
151
+ autoStopper?.resetTimer()
117
152
  }
153
+
154
+ if (newConfig != null) {
155
+ config = SpeechRecognitionConfig(
156
+ locale = config?.locale,
157
+ contextualStrings = config?.contextualStrings,
158
+ maskOffensiveWords = config?.maskOffensiveWords,
159
+ autoFinishRecognitionMs = newConfig.autoFinishRecognitionMs ?: config?.autoFinishRecognitionMs,
160
+ autoFinishProgressIntervalMs = newConfig.autoFinishProgressIntervalMs ?: config?.autoFinishProgressIntervalMs,
161
+ resetAutoFinishVoiceSensitivity = newConfig.resetAutoFinishVoiceSensitivity ?: config?.resetAutoFinishVoiceSensitivity,
162
+ disableRepeatingFilter = newConfig.disableRepeatingFilter ?: config?.disableRepeatingFilter,
163
+ startHapticFeedbackStyle = newConfig.startHapticFeedbackStyle ?: config?.startHapticFeedbackStyle,
164
+ stopHapticFeedbackStyle = newConfig.stopHapticFeedbackStyle ?: config?.stopHapticFeedbackStyle,
165
+ androidFormattingPreferQuality = config?.androidFormattingPreferQuality,
166
+ androidUseWebSearchModel = config?.androidUseWebSearchModel,
167
+ androidDisableBatchHandling = config?.androidDisableBatchHandling,
168
+ iosAddPunctuation = config?.iosAddPunctuation,
169
+ iosPreset = config?.iosPreset,
170
+ iosAtypicalSpeech = config?.iosAtypicalSpeech
171
+ )
172
+ }
173
+ }
174
+
175
+ @DoNotStrip
176
+ @Keep
177
+ override fun getIsActive(): Boolean {
178
+ return isActive
118
179
  }
119
180
 
181
+ @DoNotStrip
182
+ @Keep
183
+ override fun getVoiceInputVolume(): VolumeChangeEvent {
184
+ return volumeChangeEvent
185
+ }
186
+
187
+ @DoNotStrip
188
+ @Keep
189
+ override fun getSupportedLocalesIOS(): Array<String> {
190
+ return emptyArray()
191
+ }
192
+
193
+ @DoNotStrip
194
+ @Keep
120
195
  override fun dispose() {
121
196
  stopListening()
122
197
  }
@@ -125,19 +200,25 @@ class HybridRecognizer: HybridRecognizerSpec() {
125
200
  mainHandler.post {
126
201
  try {
127
202
  speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
128
- val silenceThreshold = config?.autoFinishRecognitionMs?.toLong() ?: 8000
129
203
  autoStopper = AutoStopper(
130
- silenceThreshold,
131
- ) {
132
- stopListening()
133
- }
204
+ silenceThresholdMs = config?.autoFinishRecognitionMs,
205
+ progressIntervalMs = config?.autoFinishProgressIntervalMs,
206
+ onProgress = { timeLeftMs ->
207
+ onAutoFinishProgress?.invoke(timeLeftMs)
208
+ },
209
+ onTimeout = {
210
+ stopListening()
211
+ }
212
+ )
134
213
  val recognitionListenerSession = RecognitionListenerSession(
135
- autoStopper,
136
- config,
137
- onVolumeChange
138
- ) { result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean ->
139
- onFinishRecognition(result, errorMessage, recordingStopped)
140
- }
214
+ autoStopper,
215
+ config,
216
+ fireVolumeChangeEvent = { event -> fireVolumeChangeEvent(event) },
217
+ onFinishRecognition = { result, errorMessage, recordingStopped ->
218
+ onFinishRecognition(result, errorMessage, recordingStopped)
219
+ }
220
+ )
221
+
141
222
  speechRecognizer?.setRecognitionListener(recognitionListenerSession.createRecognitionListener())
142
223
 
143
224
  val languageModel = if (config?.androidUseWebSearchModel == true) RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH else RecognizerIntent.LANGUAGE_MODEL_FREE_FORM
@@ -175,6 +256,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
175
256
  if (isActive) {
176
257
  onReadyForSpeech?.invoke()
177
258
  onFinishRecognition(arrayListOf(), null, false)
259
+ autoStopper?.resetTimer()
178
260
  }
179
261
  }, 500)
180
262
  } catch (e: Exception) {
@@ -189,7 +271,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
189
271
 
190
272
  private fun cleanup() {
191
273
  try {
192
- Log.d(TAG, "stopListening called")
274
+ logger.log("cleanup called")
193
275
  autoStopper?.stop()
194
276
  autoStopper = null
195
277
  speechRecognizer?.stopListening()
@@ -197,7 +279,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
197
279
  speechRecognizer = null
198
280
  isActive = false
199
281
  // Reset voice meter in JS consumers after stop/error cleanup.
200
- onVolumeChange?.invoke(0.0)
282
+ fireVolumeChangeEvent(VolumeChangeEvent(0.0,0.0,null))
201
283
  } catch (e: Exception) {
202
284
  onFinishRecognition(
203
285
  null,
@@ -218,4 +300,10 @@ class HybridRecognizer: HybridRecognizerSpec() {
218
300
  onResult?.invoke(result.toTypedArray())
219
301
  }
220
302
  }
303
+
304
+ private fun fireVolumeChangeEvent(event: VolumeChangeEvent) {
305
+ logger.log("fireVolumeChangeEvent ${event}")
306
+ volumeChangeEvent = event
307
+ onVolumeChange?.invoke(event)
308
+ }
221
309
  }
@@ -0,0 +1,16 @@
1
+ package com.margelo.nitro.nitrospeech.recognizer
2
+
3
+ import android.util.Log
4
+
5
+ class Logger (
6
+ private val disable: Boolean
7
+ ) {
8
+ private val isLogging = false
9
+ companion object {
10
+ private const val TAG = "HybridRecognizer"
11
+ }
12
+ fun log(message: String) {
13
+ if (disable || !isLogging) return
14
+ Log.d(TAG, message)
15
+ }
16
+ }
@@ -3,20 +3,20 @@ package com.margelo.nitro.nitrospeech.recognizer
3
3
  import android.os.Bundle
4
4
  import android.speech.RecognitionListener
5
5
  import android.speech.SpeechRecognizer
6
- import android.util.Log
7
- import com.margelo.nitro.nitrospeech.SpeechToTextParams
6
+ import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
7
+ import com.margelo.nitro.nitrospeech.VolumeChangeEvent
8
8
  import kotlin.math.max
9
9
  import kotlin.math.roundToInt
10
10
 
11
11
  class RecognitionListenerSession (
12
12
  private val autoStopper: AutoStopper?,
13
- private val config: SpeechToTextParams?,
14
- private val onVolumeChange: ((normVolume: Double) -> Unit)?,
13
+ private val config: SpeechRecognitionConfig?,
14
+ private val fireVolumeChangeEvent: (event: VolumeChangeEvent) -> Unit,
15
15
  private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
16
16
  ) {
17
+ private val logger = Logger(disable = false)
17
18
  companion object {
18
- private const val TAG = "HybridRecognizer"
19
- private const val SPEECH_LEVEL_THRESHOLD = 0.08f
19
+ private const val SPEECH_LEVEL_THRESHOLD = 0.35
20
20
  private const val FLOOR_RISE_ALPHA = 0.01f
21
21
  private const val FLOOR_FALL_ALPHA = 0.20f
22
22
  private const val PEAK_ATTACK_ALPHA = 0.25f
@@ -38,10 +38,14 @@ class RecognitionListenerSession (
38
38
  override fun onReadyForSpeech(params: Bundle?) {}
39
39
  override fun onBeginningOfSpeech() {}
40
40
  override fun onRmsChanged(rmsdB: Float) {
41
- val normLevel = normalizeRmsDb(rmsdB)
42
- onVolumeChange?.invoke(normLevel.toDouble())
43
- if (normLevel > SPEECH_LEVEL_THRESHOLD) {
44
- autoStopper?.indicateRecordingActivity()
41
+ val volumeEvent = getVolume(rmsdB)
42
+ fireVolumeChangeEvent(volumeEvent)
43
+ val threshold =
44
+ config?.resetAutoFinishVoiceSensitivity?.coerceIn(0.0, 1.0)
45
+ ?: SPEECH_LEVEL_THRESHOLD.toDouble()
46
+ // logger.log("onRmsChanged: ${volumeEvent}")
47
+ if (volumeEvent.rawVolume > threshold) {
48
+ autoStopper?.resetTimer()
45
49
  }
46
50
  }
47
51
  override fun onBufferReceived(buffer: ByteArray?) {}
@@ -66,40 +70,40 @@ class RecognitionListenerSession (
66
70
  true
67
71
  )
68
72
  autoStopper?.stop()
69
- autoStopper?.forceStopRecording()
73
+ autoStopper?.onTimeout()
70
74
  }
71
75
 
72
76
  override fun onResults(results: Bundle?) {
73
- Log.d(TAG, "onResults: $resultBatches")
77
+ logger.log("onResults: $resultBatches")
74
78
  onFinishRecognition(resultBatches, null, true)
75
79
  autoStopper?.stop()
76
- autoStopper?.forceStopRecording()
80
+ autoStopper?.onTimeout()
77
81
  }
78
82
 
79
83
  override fun onPartialResults(partialResults: Bundle?) {
80
- autoStopper?.indicateRecordingActivity()
81
84
  val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
82
85
 
83
86
  if (matches.isNullOrEmpty() || matches[0] == "") {
84
- Log.d(TAG, "onPartialResults[0], skip, NO RECOGNIZE")
87
+ logger.log("onPartialResults[0], skip, NO RECOGNIZE")
85
88
  return
86
89
  }
87
90
 
88
- Log.d(TAG, "onPartialResults[0], add ${matches[0]}")
91
+ autoStopper?.resetTimer()
92
+ logger.log("onPartialResults[0], add ${matches[0]}")
89
93
  var currentBatches = resultBatches
90
94
  if (currentBatches.isNullOrEmpty()) {
91
- Log.d(TAG, "onPartialResults[1], NO BATCHES YET | add first")
95
+ logger.log("onPartialResults[1], NO BATCHES YET | add first")
92
96
  currentBatches = arrayListOf(matches[0])
93
97
  } else {
94
- Log.d(TAG, "onPartialResults[1], current batches $currentBatches")
98
+ logger.log("onPartialResults[1], current batches $currentBatches")
95
99
  val prevBatchLength = currentBatches[currentBatches.lastIndex].length
96
100
  val match = if (config?.disableRepeatingFilter == true) matches[0] else repeatingFilter(matches[0])
97
101
  val matchLength = match.length
98
102
  if (config?.androidDisableBatchHandling == true || matchLength + 3 < prevBatchLength) {
99
- Log.d(TAG, "onPartialResults[2], append new batch")
103
+ logger.log("onPartialResults[2], append new batch")
100
104
  currentBatches.add(match)
101
105
  } else {
102
- Log.d(TAG, "onPartialResults[2], update batch, replace #${currentBatches.lastIndex}")
106
+ logger.log("onPartialResults[2], update batch, replace #${currentBatches.lastIndex}")
103
107
  currentBatches[currentBatches.lastIndex] = match
104
108
  }
105
109
  }
@@ -144,9 +148,9 @@ class RecognitionListenerSession (
144
148
  return joiner.toString()
145
149
  }
146
150
 
147
- private fun normalizeRmsDb(rmsdB: Float): Double {
151
+ private fun getVolume(rmsdB: Float): VolumeChangeEvent {
148
152
  if (!rmsdB.isFinite()) {
149
- return 0.0
153
+ return VolumeChangeEvent(0.0,0.0,null)
150
154
  }
151
155
 
152
156
  if (noiseFloorDb.isNaN()) {
@@ -166,7 +170,14 @@ class RecognitionListenerSession (
166
170
  val raw = ((rmsdB - noiseFloorDb) / span).coerceIn(0f, 1f)
167
171
  val smoothingCoeff = if (raw > levelSmoothed) METER_ATTACK else METER_RELEASE
168
172
  levelSmoothed += smoothingCoeff * (raw - levelSmoothed)
169
-
170
- return ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
173
+ val roundedSmoothed = ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
174
+ val roundedRaw = ((raw * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
175
+ val db = (rmsdB * 1000).roundToInt() / 1000.0
176
+
177
+ return VolumeChangeEvent(
178
+ smoothedVolume = roundedSmoothed,
179
+ rawVolume = roundedRaw,
180
+ db = db
181
+ )
171
182
  }
172
183
  }
@@ -1,6 +1,5 @@
1
1
  import Foundation
2
2
  import AVFoundation
3
- import Accelerate
4
3
 
5
4
  private final class SendablePCMBufferBox: @unchecked Sendable {
6
5
  let buffer: AVAudioPCMBuffer
@@ -10,42 +9,12 @@ private final class SendablePCMBufferBox: @unchecked Sendable {
10
9
  }
11
10
  }
12
11
 
13
- class BufferUtil {
14
- private static let meterMinDb: Float = -70 // silence floor
15
- private static let meterMaxDb: Float = -10 // loud speech ceiling
16
- private static let meterAttack: Float = 0.35 // rise speed
17
- private static let meterRelease: Float = 0.08 // fall speed
18
-
19
- func calcRmsVolume(
20
- levelSmoothed: Float,
21
- buffer: AVAudioPCMBuffer
22
- ) -> (Float, Float)? {
23
- guard let samples = buffer.floatChannelData?[0] else { return nil }
24
-
25
- let frameL = Int(buffer.frameLength)
26
- var rms: Float = 0
27
-
28
- vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameL))
29
-
30
- // 2) RMS -> dBFS
31
- let db = 20 * log10(rms + 0.00001)
32
-
33
- // 3) Normalize dB to 0...1
34
- let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
35
- let normalized = max(0, min(1, raw))
36
-
37
- // 4) Smooth (fast attack, slow release)
38
- let coeff = normalized > levelSmoothed ? Self.meterAttack : Self.meterRelease
39
- let nextLevelSmoothed = levelSmoothed + coeff * (normalized - levelSmoothed)
40
-
41
- return (rms, nextLevelSmoothed)
42
- }
43
-
44
- func convertBuffer(
12
+ enum AudioBufferConverter {
13
+ static func convertBuffer(
45
14
  converter: AVAudioConverter,
46
15
  audioFormat: AVAudioFormat,
47
16
  pcmBuffer: AVAudioPCMBuffer
48
- ) throws -> AVAudioPCMBuffer? {
17
+ ) throws -> AVAudioPCMBuffer? {
49
18
  let resampledCapacity = AVAudioFrameCount(
50
19
  (Double(pcmBuffer.frameLength) * (audioFormat.sampleRate / pcmBuffer.format.sampleRate)).rounded(.up)
51
20
  )
@@ -0,0 +1,60 @@
1
+ import Foundation
2
+ import AVFoundation
3
+ import Accelerate
4
+
5
+ struct AudioLevelSample {
6
+ let smoothed: Double
7
+ let raw: Double
8
+ let db: Double
9
+ let resetTimer: Bool
10
+ }
11
+
12
+ final class AudioLevelTracker {
13
+ private static let meterMinDb: Float = -70
14
+ private static let meterMaxDb: Float = -10
15
+ private static let meterAttack: Float = 0.35
16
+ private static let meterRelease: Float = 0.08
17
+ private static let defaultAutoStopResetThreshold: Double = 0.4
18
+
19
+ private var smoothedLevel: Float = 0
20
+
21
+ var currentSample: AudioLevelSample?
22
+
23
+ private let lg = Lg(prefix: "RecognizerEngine")
24
+
25
+ func reset() {
26
+ smoothedLevel = 0
27
+ currentSample = nil
28
+ }
29
+
30
+ func process(_ buffer: AVAudioPCMBuffer,_ autoStopResetThreshold: Double? = nil) -> AudioLevelSample? {
31
+ guard let samples = buffer.floatChannelData?[0] else { return nil }
32
+
33
+ let frameCount = Int(buffer.frameLength)
34
+ var rms: Float = 0
35
+ vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameCount))
36
+
37
+ let db = 20 * log10(rms + 0.00001)
38
+ let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
39
+ let normalized = max(0, min(1, raw))
40
+
41
+ let coeff = normalized > smoothedLevel ? Self.meterAttack : Self.meterRelease
42
+ smoothedLevel += coeff * (normalized - smoothedLevel)
43
+
44
+ var threshold = Self.defaultAutoStopResetThreshold
45
+ if let autoStopResetThreshold {
46
+ threshold = max(0, min(1, autoStopResetThreshold))
47
+ }
48
+
49
+ currentSample = AudioLevelSample(
50
+ smoothed: Double(smoothedLevel * 1_000_000).rounded() / 1_000_000,
51
+ raw: Double(normalized * 1_000_000).rounded() / 1_000_000,
52
+ db: Double(db * 1_000).rounded() / 1_000,
53
+ resetTimer: Double(normalized) >= threshold
54
+ )
55
+
56
+ lg.log("[AudioLevelTracker.process] autoStopResetThreshold: \(threshold)")
57
+
58
+ return currentSample
59
+ }
60
+ }