@gmessier/nitro-speech 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +165 -148
  3. package/android/build.gradle +0 -1
  4. package/android/src/main/cpp/cpp-adapter.cpp +5 -1
  5. package/android/src/main/java/com/margelo/nitro/nitrospeech/HybridNitroSpeech.kt +2 -0
  6. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/AutoStopper.kt +80 -16
  7. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/HybridRecognizer.kt +93 -20
  8. package/android/src/main/java/com/margelo/nitro/nitrospeech/recognizer/RecognitionListenerSession.kt +27 -15
  9. package/ios/{BufferUtil.swift → Audio/AudioBufferConverter.swift} +3 -34
  10. package/ios/Audio/AudioLevelTracker.swift +66 -0
  11. package/ios/Coordinator.swift +105 -0
  12. package/ios/Engines/AnalyzerEngine.swift +241 -0
  13. package/ios/Engines/DictationRuntime.swift +67 -0
  14. package/ios/Engines/RecognizerEngine.swift +312 -0
  15. package/ios/Engines/SFSpeechEngine.swift +119 -0
  16. package/ios/Engines/SpeechRuntime.swift +58 -0
  17. package/ios/Engines/TranscriberRuntimeProtocol.swift +21 -0
  18. package/ios/HybridNitroSpeech.swift +1 -10
  19. package/ios/HybridRecognizer.swift +135 -192
  20. package/ios/LocaleManager.swift +73 -0
  21. package/ios/{AppStateObserver.swift → Shared/AppStateObserver.swift} +1 -2
  22. package/ios/Shared/AutoStopper.swift +147 -0
  23. package/ios/Shared/HapticImpact.swift +24 -0
  24. package/ios/Shared/Log.swift +41 -0
  25. package/ios/Shared/Permissions.swift +59 -0
  26. package/ios/Shared/Utils.swift +58 -0
  27. package/lib/NitroSpeech.d.ts +2 -0
  28. package/lib/NitroSpeech.js +2 -0
  29. package/lib/Recognizer/RecognizerRef.d.ts +5 -0
  30. package/lib/Recognizer/RecognizerRef.js +13 -0
  31. package/lib/Recognizer/SpeechRecognizer.d.ts +8 -0
  32. package/lib/Recognizer/SpeechRecognizer.js +9 -0
  33. package/lib/Recognizer/methods.d.ts +8 -0
  34. package/lib/Recognizer/methods.js +29 -0
  35. package/lib/Recognizer/types.d.ts +6 -0
  36. package/lib/Recognizer/types.js +1 -0
  37. package/lib/Recognizer/useRecognizer.d.ts +16 -0
  38. package/lib/Recognizer/useRecognizer.js +71 -0
  39. package/lib/Recognizer/useVoiceInputVolume.d.ts +25 -0
  40. package/lib/Recognizer/useVoiceInputVolume.js +52 -0
  41. package/lib/index.d.ts +6 -0
  42. package/lib/index.js +6 -0
  43. package/lib/specs/NitroSpeech.nitro.d.ts +8 -0
  44. package/lib/specs/NitroSpeech.nitro.js +1 -0
  45. package/lib/specs/Recognizer.nitro.d.ts +95 -0
  46. package/lib/specs/Recognizer.nitro.js +1 -0
  47. package/lib/specs/SpeechRecognitionConfig.d.ts +162 -0
  48. package/lib/specs/SpeechRecognitionConfig.js +1 -0
  49. package/lib/specs/VolumeChangeEvent.d.ts +31 -0
  50. package/lib/specs/VolumeChangeEvent.js +1 -0
  51. package/nitro.json +2 -6
  52. package/nitrogen/generated/android/NitroSpeech+autolinking.cmake +2 -2
  53. package/nitrogen/generated/android/NitroSpeechOnLoad.cpp +5 -3
  54. package/nitrogen/generated/android/c++/JFunc_void_VolumeChangeEvent.hpp +78 -0
  55. package/nitrogen/generated/android/c++/JFunc_void_std__vector_std__string_.hpp +14 -14
  56. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.cpp +68 -19
  57. package/nitrogen/generated/android/c++/JHybridRecognizerSpec.hpp +7 -4
  58. package/nitrogen/generated/android/c++/JIosPreset.hpp +58 -0
  59. package/nitrogen/generated/android/c++/JMutableSpeechRecognitionConfig.hpp +79 -0
  60. package/nitrogen/generated/android/c++/{JSpeechToTextParams.hpp → JSpeechRecognitionConfig.hpp} +48 -30
  61. package/nitrogen/generated/android/c++/JVolumeChangeEvent.hpp +65 -0
  62. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/Func_void_VolumeChangeEvent.kt +80 -0
  63. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/HybridRecognizerSpec.kt +18 -5
  64. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/IosPreset.kt +23 -0
  65. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/MutableSpeechRecognitionConfig.kt +76 -0
  66. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechRecognitionConfig.kt +121 -0
  67. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/VolumeChangeEvent.kt +61 -0
  68. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.cpp +46 -30
  69. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Bridge.hpp +203 -70
  70. package/nitrogen/generated/ios/NitroSpeech-Swift-Cxx-Umbrella.hpp +13 -3
  71. package/nitrogen/generated/ios/NitroSpeechAutolinking.swift +2 -2
  72. package/nitrogen/generated/ios/c++/HybridRecognizerSpecSwift.hpp +41 -9
  73. package/nitrogen/generated/ios/swift/Func_void_VolumeChangeEvent.swift +46 -0
  74. package/nitrogen/generated/ios/swift/Func_void_std__exception_ptr.swift +46 -0
  75. package/nitrogen/generated/ios/swift/HybridRecognizerSpec.swift +6 -3
  76. package/nitrogen/generated/ios/swift/HybridRecognizerSpec_cxx.swift +66 -18
  77. package/nitrogen/generated/ios/swift/IosPreset.swift +40 -0
  78. package/nitrogen/generated/ios/swift/MutableSpeechRecognitionConfig.swift +118 -0
  79. package/nitrogen/generated/ios/swift/{SpeechToTextParams.swift → SpeechRecognitionConfig.swift} +108 -43
  80. package/nitrogen/generated/ios/swift/VolumeChangeEvent.swift +52 -0
  81. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.cpp +4 -1
  82. package/nitrogen/generated/shared/c++/HybridRecognizerSpec.hpp +17 -7
  83. package/nitrogen/generated/shared/c++/IosPreset.hpp +76 -0
  84. package/nitrogen/generated/shared/c++/MutableSpeechRecognitionConfig.hpp +105 -0
  85. package/nitrogen/generated/shared/c++/{SpeechToTextParams.hpp → SpeechRecognitionConfig.hpp} +39 -20
  86. package/nitrogen/generated/shared/c++/VolumeChangeEvent.hpp +91 -0
  87. package/package.json +15 -16
  88. package/src/NitroSpeech.ts +5 -0
  89. package/src/Recognizer/RecognizerRef.ts +23 -0
  90. package/src/Recognizer/SpeechRecognizer.ts +10 -0
  91. package/src/Recognizer/methods.ts +40 -0
  92. package/src/Recognizer/types.ts +33 -0
  93. package/src/Recognizer/useRecognizer.ts +85 -0
  94. package/src/Recognizer/useVoiceInputVolume.ts +65 -0
  95. package/src/index.ts +6 -182
  96. package/src/specs/NitroSpeech.nitro.ts +2 -163
  97. package/src/specs/Recognizer.nitro.ts +110 -0
  98. package/src/specs/SpeechRecognitionConfig.ts +167 -0
  99. package/src/specs/VolumeChangeEvent.ts +31 -0
  100. package/android/proguard-rules.pro +0 -1
  101. package/ios/AnylyzerTranscriber.swift +0 -331
  102. package/ios/AutoStopper.swift +0 -69
  103. package/ios/HapticImpact.swift +0 -32
  104. package/ios/LegacySpeechRecognizer.swift +0 -161
  105. package/lib/commonjs/index.js +0 -145
  106. package/lib/commonjs/index.js.map +0 -1
  107. package/lib/commonjs/package.json +0 -1
  108. package/lib/commonjs/specs/NitroSpeech.nitro.js +0 -6
  109. package/lib/commonjs/specs/NitroSpeech.nitro.js.map +0 -1
  110. package/lib/module/index.js +0 -138
  111. package/lib/module/index.js.map +0 -1
  112. package/lib/module/package.json +0 -1
  113. package/lib/module/specs/NitroSpeech.nitro.js +0 -4
  114. package/lib/module/specs/NitroSpeech.nitro.js.map +0 -1
  115. package/lib/tsconfig.tsbuildinfo +0 -1
  116. package/lib/typescript/index.d.ts +0 -50
  117. package/lib/typescript/index.d.ts.map +0 -1
  118. package/lib/typescript/specs/NitroSpeech.nitro.d.ts +0 -162
  119. package/lib/typescript/specs/NitroSpeech.nitro.d.ts.map +0 -1
  120. package/nitrogen/generated/android/kotlin/com/margelo/nitro/nitrospeech/SpeechToTextParams.kt +0 -68
@@ -11,9 +11,14 @@ import android.util.Log
11
11
  import androidx.annotation.Keep
12
12
  import com.facebook.proguard.annotations.DoNotStrip
13
13
  import com.margelo.nitro.NitroModules
14
+ import com.margelo.nitro.core.Promise
15
+ import com.margelo.nitro.nitrospeech.MutableSpeechRecognitionConfig
14
16
  import com.margelo.nitro.nitrospeech.HybridRecognizerSpec
15
- import com.margelo.nitro.nitrospeech.SpeechToTextParams
17
+ import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
18
+ import com.margelo.nitro.nitrospeech.VolumeChangeEvent
16
19
 
20
+ @DoNotStrip
21
+ @Keep
17
22
  class HybridRecognizer: HybridRecognizerSpec() {
18
23
  companion object {
19
24
  private const val TAG = "HybridRecognizer"
@@ -21,7 +26,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
21
26
  }
22
27
 
23
28
  private var isActive: Boolean = false
24
- private var config: SpeechToTextParams? = null
29
+ private var config: SpeechRecognitionConfig? = null
25
30
  private var autoStopper: AutoStopper? = null
26
31
  private var speechRecognizer: SpeechRecognizer? = null
27
32
  private val mainHandler = Handler(Looper.getMainLooper())
@@ -33,15 +38,19 @@ class HybridRecognizer: HybridRecognizerSpec() {
33
38
  override var onAutoFinishProgress: ((timeLeftMs: Double) -> Unit)? = null
34
39
  override var onError: ((error: String) -> Unit)? = null
35
40
  override var onPermissionDenied: (() -> Unit)? = null
36
- override var onVolumeChange: ((normVolume: Double) -> Unit)? = null
41
+ override var onVolumeChange: ((event: VolumeChangeEvent) -> Unit)? = null
37
42
 
38
- override fun getIsActive(): Boolean {
39
- return isActive
43
+ @DoNotStrip
44
+ @Keep
45
+ override fun prewarm(defaultParams: SpeechRecognitionConfig?): Promise<Unit> {
46
+ // no-op
47
+ // nothing to prewarm
48
+ return Promise()
40
49
  }
41
50
 
42
51
  @DoNotStrip
43
52
  @Keep
44
- override fun startListening(params: SpeechToTextParams) {
53
+ override fun startListening(params: SpeechRecognitionConfig?) {
45
54
  Log.d(TAG, "startListening: $params")
46
55
  if (isActive) {
47
56
  onFinishRecognition(
@@ -98,25 +107,84 @@ class HybridRecognizer: HybridRecognizerSpec() {
98
107
  }, POST_RECOGNITION_DELAY)
99
108
  }
100
109
 
110
+ @DoNotStrip
111
+ @Keep
112
+ override fun resetAutoFinishTime() {
113
+ if (!isActive) return
114
+ autoStopper?.resetTimer()
115
+ }
116
+
101
117
  @DoNotStrip
102
118
  @Keep
103
119
  override fun addAutoFinishTime(additionalTimeMs: Double?) {
104
- Log.d(TAG, "stopListening called")
120
+ Log.d(TAG, "addAutoFinishTime")
105
121
  if (!isActive) return
106
- autoStopper?.indicateRecordingActivity()
122
+
123
+ if (additionalTimeMs != null) {
124
+ autoStopper?.addMsOnce(additionalTimeMs)
125
+ } else {
126
+ // Reset timer to original baseline.
127
+ autoStopper?.resetTimer()
128
+ }
107
129
  }
108
130
 
109
131
  @DoNotStrip
110
132
  @Keep
111
- override fun updateAutoFinishTime(newTimeMs: Double, withRefresh: Boolean?) {
112
- Log.d(TAG, "updateAutoFinishTime: $newTimeMs")
133
+ override fun updateConfig(
134
+ newConfig: MutableSpeechRecognitionConfig?,
135
+ resetAutoFinishTime: Boolean?
136
+ ) {
137
+ Log.d(TAG, "updateConfig $newConfig",)
113
138
  if (!isActive) return
114
- autoStopper?.updateSilenceThreshold(newTimeMs.toLong())
115
- if (withRefresh == true) {
116
- autoStopper?.indicateRecordingActivity()
139
+
140
+ val newTimeMs = if (newConfig?.autoFinishRecognitionMs != null) newConfig.autoFinishRecognitionMs else config?.autoFinishRecognitionMs
141
+ if (newTimeMs != null && newTimeMs != config?.autoFinishRecognitionMs) {
142
+ autoStopper?.updateSilenceThreshold(newTimeMs)
117
143
  }
144
+ val newInterval = if (newConfig?.autoFinishProgressIntervalMs != null) newConfig.autoFinishProgressIntervalMs else config?.autoFinishProgressIntervalMs
145
+ if (newInterval != null && newInterval != config?.autoFinishProgressIntervalMs) {
146
+ autoStopper?.updateProgressInterval(newInterval)
147
+ }
148
+
149
+ if (resetAutoFinishTime == true) {
150
+ autoStopper?.resetTimer()
151
+ }
152
+
153
+ if (newConfig != null) {
154
+ config = SpeechRecognitionConfig(
155
+ locale = config?.locale,
156
+ contextualStrings = config?.contextualStrings,
157
+ maskOffensiveWords = config?.maskOffensiveWords,
158
+ autoFinishRecognitionMs = newConfig.autoFinishRecognitionMs ?: config?.autoFinishRecognitionMs,
159
+ autoFinishProgressIntervalMs = newConfig.autoFinishProgressIntervalMs ?: config?.autoFinishProgressIntervalMs,
160
+ resetAutoFinishVoiceSensitivity = newConfig.resetAutoFinishVoiceSensitivity ?: config?.resetAutoFinishVoiceSensitivity,
161
+ disableRepeatingFilter = newConfig.disableRepeatingFilter ?: config?.disableRepeatingFilter,
162
+ startHapticFeedbackStyle = newConfig.startHapticFeedbackStyle ?: config?.startHapticFeedbackStyle,
163
+ stopHapticFeedbackStyle = newConfig.stopHapticFeedbackStyle ?: config?.stopHapticFeedbackStyle,
164
+ androidFormattingPreferQuality = config?.androidFormattingPreferQuality,
165
+ androidUseWebSearchModel = config?.androidUseWebSearchModel,
166
+ androidDisableBatchHandling = config?.androidDisableBatchHandling,
167
+ iosAddPunctuation = config?.iosAddPunctuation,
168
+ iosPreset = config?.iosPreset,
169
+ iosAtypicalSpeech = config?.iosAtypicalSpeech
170
+ )
171
+ }
172
+ }
173
+
174
+ @DoNotStrip
175
+ @Keep
176
+ override fun getIsActive(): Boolean {
177
+ return isActive
118
178
  }
119
179
 
180
+ @DoNotStrip
181
+ @Keep
182
+ override fun getSupportedLocalesIOS(): Array<String> {
183
+ return emptyArray()
184
+ }
185
+
186
+ @DoNotStrip
187
+ @Keep
120
188
  override fun dispose() {
121
189
  stopListening()
122
190
  }
@@ -125,12 +193,16 @@ class HybridRecognizer: HybridRecognizerSpec() {
125
193
  mainHandler.post {
126
194
  try {
127
195
  speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
128
- val silenceThreshold = config?.autoFinishRecognitionMs?.toLong() ?: 8000
129
196
  autoStopper = AutoStopper(
130
- silenceThreshold,
131
- ) {
132
- stopListening()
133
- }
197
+ silenceThresholdMs = config?.autoFinishRecognitionMs,
198
+ progressIntervalMs = config?.autoFinishProgressIntervalMs,
199
+ onProgress = { timeLeftMs ->
200
+ onAutoFinishProgress?.invoke(timeLeftMs)
201
+ },
202
+ onTimeout = {
203
+ stopListening()
204
+ }
205
+ )
134
206
  val recognitionListenerSession = RecognitionListenerSession(
135
207
  autoStopper,
136
208
  config,
@@ -175,6 +247,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
175
247
  if (isActive) {
176
248
  onReadyForSpeech?.invoke()
177
249
  onFinishRecognition(arrayListOf(), null, false)
250
+ autoStopper?.resetTimer()
178
251
  }
179
252
  }, 500)
180
253
  } catch (e: Exception) {
@@ -189,7 +262,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
189
262
 
190
263
  private fun cleanup() {
191
264
  try {
192
- Log.d(TAG, "stopListening called")
265
+ Log.d(TAG, "cleanup called")
193
266
  autoStopper?.stop()
194
267
  autoStopper = null
195
268
  speechRecognizer?.stopListening()
@@ -197,7 +270,7 @@ class HybridRecognizer: HybridRecognizerSpec() {
197
270
  speechRecognizer = null
198
271
  isActive = false
199
272
  // Reset voice meter in JS consumers after stop/error cleanup.
200
- onVolumeChange?.invoke(0.0)
273
+ onVolumeChange?.invoke(VolumeChangeEvent(0.0,0.0,null))
201
274
  } catch (e: Exception) {
202
275
  onFinishRecognition(
203
276
  null,
@@ -4,19 +4,20 @@ import android.os.Bundle
4
4
  import android.speech.RecognitionListener
5
5
  import android.speech.SpeechRecognizer
6
6
  import android.util.Log
7
- import com.margelo.nitro.nitrospeech.SpeechToTextParams
7
+ import com.margelo.nitro.nitrospeech.SpeechRecognitionConfig
8
+ import com.margelo.nitro.nitrospeech.VolumeChangeEvent
8
9
  import kotlin.math.max
9
10
  import kotlin.math.roundToInt
10
11
 
11
12
  class RecognitionListenerSession (
12
13
  private val autoStopper: AutoStopper?,
13
- private val config: SpeechToTextParams?,
14
- private val onVolumeChange: ((normVolume: Double) -> Unit)?,
14
+ private val config: SpeechRecognitionConfig?,
15
+ private val onVolumeChange: ((event: VolumeChangeEvent) -> Unit)?,
15
16
  private val onFinishRecognition: (result: ArrayList<String>?, errorMessage: String?, recordingStopped: Boolean) -> Unit,
16
17
  ) {
17
18
  companion object {
18
19
  private const val TAG = "HybridRecognizer"
19
- private const val SPEECH_LEVEL_THRESHOLD = 0.08f
20
+ private const val SPEECH_LEVEL_THRESHOLD = 0.35
20
21
  private const val FLOOR_RISE_ALPHA = 0.01f
21
22
  private const val FLOOR_FALL_ALPHA = 0.20f
22
23
  private const val PEAK_ATTACK_ALPHA = 0.25f
@@ -38,10 +39,14 @@ class RecognitionListenerSession (
38
39
  override fun onReadyForSpeech(params: Bundle?) {}
39
40
  override fun onBeginningOfSpeech() {}
40
41
  override fun onRmsChanged(rmsdB: Float) {
41
- val normLevel = normalizeRmsDb(rmsdB)
42
- onVolumeChange?.invoke(normLevel.toDouble())
43
- if (normLevel > SPEECH_LEVEL_THRESHOLD) {
44
- autoStopper?.indicateRecordingActivity()
42
+ val volumeEvent = getVolume(rmsdB)
43
+ onVolumeChange?.invoke(volumeEvent)
44
+ val threshold =
45
+ config?.resetAutoFinishVoiceSensitivity?.coerceIn(0.0, 1.0)
46
+ ?: SPEECH_LEVEL_THRESHOLD.toDouble()
47
+ Log.d(TAG, "onRmsChanged: ${volumeEvent}")
48
+ if (volumeEvent.rawVolume > threshold) {
49
+ autoStopper?.resetTimer()
45
50
  }
46
51
  }
47
52
  override fun onBufferReceived(buffer: ByteArray?) {}
@@ -66,18 +71,17 @@ class RecognitionListenerSession (
66
71
  true
67
72
  )
68
73
  autoStopper?.stop()
69
- autoStopper?.forceStopRecording()
74
+ autoStopper?.onTimeout()
70
75
  }
71
76
 
72
77
  override fun onResults(results: Bundle?) {
73
78
  Log.d(TAG, "onResults: $resultBatches")
74
79
  onFinishRecognition(resultBatches, null, true)
75
80
  autoStopper?.stop()
76
- autoStopper?.forceStopRecording()
81
+ autoStopper?.onTimeout()
77
82
  }
78
83
 
79
84
  override fun onPartialResults(partialResults: Bundle?) {
80
- autoStopper?.indicateRecordingActivity()
81
85
  val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
82
86
 
83
87
  if (matches.isNullOrEmpty() || matches[0] == "") {
@@ -85,6 +89,7 @@ class RecognitionListenerSession (
85
89
  return
86
90
  }
87
91
 
92
+ autoStopper?.resetTimer()
88
93
  Log.d(TAG, "onPartialResults[0], add ${matches[0]}")
89
94
  var currentBatches = resultBatches
90
95
  if (currentBatches.isNullOrEmpty()) {
@@ -144,9 +149,9 @@ class RecognitionListenerSession (
144
149
  return joiner.toString()
145
150
  }
146
151
 
147
- private fun normalizeRmsDb(rmsdB: Float): Double {
152
+ private fun getVolume(rmsdB: Float): VolumeChangeEvent {
148
153
  if (!rmsdB.isFinite()) {
149
- return 0.0
154
+ return VolumeChangeEvent(0.0,0.0,null)
150
155
  }
151
156
 
152
157
  if (noiseFloorDb.isNaN()) {
@@ -166,7 +171,14 @@ class RecognitionListenerSession (
166
171
  val raw = ((rmsdB - noiseFloorDb) / span).coerceIn(0f, 1f)
167
172
  val smoothingCoeff = if (raw > levelSmoothed) METER_ATTACK else METER_RELEASE
168
173
  levelSmoothed += smoothingCoeff * (raw - levelSmoothed)
169
-
170
- return ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
174
+ val roundedSmoothed = ((levelSmoothed * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
175
+ val roundedRaw = ((raw * PRECISION_SCALE).roundToInt() / PRECISION_SCALE).toDouble()
176
+ val db = (rmsdB * 1000).roundToInt() / 1000.0
177
+
178
+ return VolumeChangeEvent(
179
+ smoothedVolume = roundedSmoothed,
180
+ rawVolume = roundedRaw,
181
+ db = db
182
+ )
171
183
  }
172
184
  }
@@ -1,6 +1,5 @@
1
1
  import Foundation
2
2
  import AVFoundation
3
- import Accelerate
4
3
 
5
4
  private final class SendablePCMBufferBox: @unchecked Sendable {
6
5
  let buffer: AVAudioPCMBuffer
@@ -10,42 +9,12 @@ private final class SendablePCMBufferBox: @unchecked Sendable {
10
9
  }
11
10
  }
12
11
 
13
- class BufferUtil {
14
- private static let meterMinDb: Float = -70 // silence floor
15
- private static let meterMaxDb: Float = -10 // loud speech ceiling
16
- private static let meterAttack: Float = 0.35 // rise speed
17
- private static let meterRelease: Float = 0.08 // fall speed
18
-
19
- func calcRmsVolume(
20
- levelSmoothed: Float,
21
- buffer: AVAudioPCMBuffer
22
- ) -> (Float, Float)? {
23
- guard let samples = buffer.floatChannelData?[0] else { return nil }
24
-
25
- let frameL = Int(buffer.frameLength)
26
- var rms: Float = 0
27
-
28
- vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameL))
29
-
30
- // 2) RMS -> dBFS
31
- let db = 20 * log10(rms + 0.00001)
32
-
33
- // 3) Normalize dB to 0...1
34
- let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
35
- let normalized = max(0, min(1, raw))
36
-
37
- // 4) Smooth (fast attack, slow release)
38
- let coeff = normalized > levelSmoothed ? Self.meterAttack : Self.meterRelease
39
- let nextLevelSmoothed = levelSmoothed + coeff * (normalized - levelSmoothed)
40
-
41
- return (rms, nextLevelSmoothed)
42
- }
43
-
44
- func convertBuffer(
12
+ enum AudioBufferConverter {
13
+ static func convertBuffer(
45
14
  converter: AVAudioConverter,
46
15
  audioFormat: AVAudioFormat,
47
16
  pcmBuffer: AVAudioPCMBuffer
48
- ) throws -> AVAudioPCMBuffer? {
17
+ ) throws -> AVAudioPCMBuffer? {
49
18
  let resampledCapacity = AVAudioFrameCount(
50
19
  (Double(pcmBuffer.frameLength) * (audioFormat.sampleRate / pcmBuffer.format.sampleRate)).rounded(.up)
51
20
  )
@@ -0,0 +1,66 @@
1
+ import Foundation
2
+ import AVFoundation
3
+ import Accelerate
4
+
5
+ struct AudioLevelSample {
6
+ let smoothed: Double
7
+ let raw: Double
8
+ let db: Double
9
+ let resetTimer: Bool
10
+ }
11
+
12
+ final class AudioLevelTracker {
13
+ private static let meterMinDb: Float = -70
14
+ private static let meterMaxDb: Float = -10
15
+ private static let meterAttack: Float = 0.35
16
+ private static let meterRelease: Float = 0.08
17
+ private static let defaultAutoStopResetThreshold: Double = 0.4
18
+
19
+ private var autoStopResetThreshold: Double
20
+ private var smoothedLevel: Float = 0
21
+
22
+ init(resetAutoFinishVoiceSensitivity: Double?) {
23
+ if let resetAutoFinishVoiceSensitivity {
24
+ // Clamp value between 0 and 1
25
+ self.autoStopResetThreshold = max(0, min(1, resetAutoFinishVoiceSensitivity))
26
+ } else {
27
+ self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
28
+ }
29
+ }
30
+
31
+ func updateResetAutoFinishVoiceSensitivity(newValue: Double?) {
32
+ if let newValue {
33
+ // Clamp value between 0 and 1
34
+ self.autoStopResetThreshold = max(0, min(1, newValue))
35
+ } else {
36
+ self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
37
+ }
38
+ }
39
+
40
+ func reset() {
41
+ smoothedLevel = 0
42
+ self.autoStopResetThreshold = Self.defaultAutoStopResetThreshold
43
+ }
44
+
45
+ func process(_ buffer: AVAudioPCMBuffer) -> AudioLevelSample? {
46
+ guard let samples = buffer.floatChannelData?[0] else { return nil }
47
+
48
+ let frameCount = Int(buffer.frameLength)
49
+ var rms: Float = 0
50
+ vDSP_rmsqv(samples, 1, &rms, vDSP_Length(frameCount))
51
+
52
+ let db = 20 * log10(rms + 0.00001)
53
+ let raw = (db - Self.meterMinDb) / (Self.meterMaxDb - Self.meterMinDb)
54
+ let normalized = max(0, min(1, raw))
55
+
56
+ let coeff = normalized > smoothedLevel ? Self.meterAttack : Self.meterRelease
57
+ smoothedLevel += coeff * (normalized - smoothedLevel)
58
+
59
+ return AudioLevelSample(
60
+ smoothed: Double(smoothedLevel * 1_000_000).rounded() / 1_000_000,
61
+ raw: Double(normalized * 1_000_000).rounded() / 1_000_000,
62
+ db: Double(db * 1_000).rounded() / 1_000,
63
+ resetTimer: Double(normalized) >= self.autoStopResetThreshold
64
+ )
65
+ }
66
+ }
@@ -0,0 +1,105 @@
1
+ import Foundation
2
+ import NitroModules
3
+ import Speech
4
+
5
+ enum RecognizerBackend {
6
+ case speechTranscriber
7
+ case dictationTranscriber
8
+ case sfSpeech
9
+ }
10
+
11
+ final class Coordinator {
12
+ weak var recognizerDelegate: RecognizerDelegate?
13
+ private var localeManager: LocaleManager?
14
+ private var candidates: [RecognizerBackend] = []
15
+ private var localeTask: Task<Void, Never>?
16
+
17
+ init() {
18
+ self.localeTask = Task {
19
+ self.localeManager = await LocaleManager()
20
+ }
21
+ }
22
+
23
+ func initialize() async {
24
+ let params = self.recognizerDelegate?.config
25
+ Log.log("[Coordinator] LocaleManager - init (\(params?.locale))")
26
+ if self.localeManager == nil {
27
+ self.localeTask?.cancel()
28
+ self.localeTask = nil
29
+ self.localeManager = await LocaleManager()
30
+ }
31
+ guard let localeManager else { return }
32
+ await localeManager.ensureLocale(localeString: params?.locale)
33
+ self.candidates = []
34
+ guard #available(iOS 26.0, *) else {
35
+ if localeManager.SFLocale != nil {
36
+ self.candidates = [.sfSpeech]
37
+ }
38
+ return
39
+ }
40
+
41
+ if params?.iosPreset == IosPreset.shortform
42
+ || params?.iosAddPunctuation == false
43
+ || params?.iosAtypicalSpeech == true {
44
+ // DictationTranscriber priority
45
+ if localeManager.dictationLocale != nil {
46
+ self.candidates.append(.dictationTranscriber)
47
+ }
48
+ if localeManager.speechLocale != nil {
49
+ self.candidates.append(.speechTranscriber)
50
+ }
51
+ } else {
52
+ // SpeechTranscriber priority
53
+ if localeManager.speechLocale != nil {
54
+ self.candidates.append(.speechTranscriber)
55
+ }
56
+ if localeManager.dictationLocale != nil {
57
+ self.candidates.append(.dictationTranscriber)
58
+ }
59
+ }
60
+ // Add SF Engine at the end
61
+ if localeManager.SFLocale != nil {
62
+ self.candidates.append(.sfSpeech)
63
+ }
64
+ Log.log("[Coordinator] candidates: \(self.candidates)")
65
+ }
66
+
67
+ func getEngine() -> RecognizerEngine? {
68
+ Log.log("[Coordinator] getEngine")
69
+ guard let recognizerDelegate else { return nil }
70
+ guard let localeManager else { return nil }
71
+ guard let backend = candidates.first else { return nil }
72
+ Log.log("[Coordinator] backend: \(backend)")
73
+ if backend == .sfSpeech, let locale = localeManager.SFLocale {
74
+ Log.log("[Coordinator] SFSpeechEngine Activated")
75
+ return SFSpeechEngine(locale: locale, delegate: recognizerDelegate)
76
+ }
77
+ if #available(iOS 26.0, *) {
78
+ if backend == .speechTranscriber, let locale = localeManager.speechLocale {
79
+ Log.log("[Coordinator] SpeechTranscriber Activated")
80
+ return AnalyzerEngine(
81
+ backend: .speechTranscriber,
82
+ locale: locale,
83
+ delegate: recognizerDelegate
84
+ )
85
+ }
86
+ if backend == .dictationTranscriber, let locale = localeManager.dictationLocale {
87
+ Log.log("[Coordinator] DictationTranscriber Activated")
88
+ return AnalyzerEngine(
89
+ backend: .dictationTranscriber,
90
+ locale: locale,
91
+ delegate: recognizerDelegate
92
+ )
93
+ }
94
+ }
95
+ return nil
96
+ }
97
+
98
+ func reportEngineFailure() {
99
+ self.candidates = Array(self.candidates.dropFirst())
100
+ }
101
+
102
+ func getSupportedLocales() -> [String] {
103
+ return localeManager?.supportedLocales ?? []
104
+ }
105
+ }