@edkimmel/expo-audio-stream 0.3.1 → 0.3.2-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,12 +7,27 @@ import android.media.audiofx.NoiseSuppressor
7
7
  import android.util.Log
8
8
 
9
9
  /**
10
- * Manages audio effects for voice recording, including:
11
- * - Acoustic Echo Cancellation (AEC)
12
- * - Noise Suppression (NS)
13
- * - Automatic Gain Control (AGC)
10
+ * Manages hardware audio effects for voice recording.
11
+ *
12
+ * We use VOICE_RECOGNITION as our audio source. The Android CDD (Section 5.4)
13
+ * mandates that this source delivers unprocessed audio:
14
+ * [C-1-2] MUST disable noise reduction by default
15
+ * [C-1-3] MUST disable automatic gain control by default
16
+ *
17
+ * NS and AGC are therefore off by default to honor the spec. Enabling them
18
+ * re-introduces the processing the CDD explicitly prohibits for this source
19
+ * and can cause low-volume capture on many OEMs.
20
+ *
21
+ * AEC is the one effect the CDD permits for VOICE_RECOGNITION ("expects a
22
+ * stream that has an echo cancellation effect if available"), so it is
23
+ * enabled by default.
14
24
  */
15
- class AudioEffectsManager {
25
+ class AudioEffectsManager(
26
+ /** Enable hardware noise suppressor. Default false — CDD 5.4 [C-1-2] prohibits it for VOICE_RECOGNITION. */
27
+ private val enableNS: Boolean = false,
28
+ /** Enable hardware AGC. Default false — CDD 5.4 [C-1-3] prohibits it for VOICE_RECOGNITION. */
29
+ private val enableAGC: Boolean = false
30
+ ) {
16
31
  // Audio effects
17
32
  private var acousticEchoCanceler: AcousticEchoCanceler? = null
18
33
  private var noiseSuppressor: NoiseSuppressor? = null
@@ -41,11 +56,21 @@ class AudioEffectsManager {
41
56
  Log.d(Constants.TAG, "Acoustic Echo Canceler enabled: ${acousticEchoCanceler?.enabled}")
42
57
  }
43
58
 
44
- // Apply noise suppression
45
- enableNoiseSuppression(audioSessionId)
46
-
47
- // Apply automatic gain control
48
- enableAutomaticGainControl(audioSessionId)
59
+ // NS off by default — CDD 5.4 [C-1-2] prohibits it for VOICE_RECOGNITION.
60
+ // Enabling it can aggressively attenuate speech on many OEMs.
61
+ if (enableNS) {
62
+ enableNoiseSuppression(audioSessionId)
63
+ } else {
64
+ Log.d(Constants.TAG, "Noise Suppressor skipped (CDD 5.4 [C-1-2])")
65
+ }
66
+
67
+ // AGC off by default — CDD 5.4 [C-1-3] prohibits it for VOICE_RECOGNITION.
68
+ // Hardware AGC is also unreliable across devices.
69
+ if (enableAGC) {
70
+ enableAutomaticGainControl(audioSessionId)
71
+ } else {
72
+ Log.d(Constants.TAG, "Hardware AGC skipped (CDD 5.4 [C-1-3])")
73
+ }
49
74
 
50
75
  } catch (e: Exception) {
51
76
  Log.e(Constants.TAG, "Error setting up audio effects", e)
@@ -330,6 +330,7 @@ class AudioRecorderManager(
330
330
 
331
331
  if (bytesRead > 0) {
332
332
  consecutiveErrors = 0
333
+ gainNormalizer.apply(audioData, bytesRead)
333
334
  totalDataSize += bytesRead
334
335
  // Emit immediately — each read is one interval of audio
335
336
  emitAudioData(audioData, bytesRead)
@@ -0,0 +1,83 @@
1
+ package expo.modules.audiostream
2
+
3
+ import java.nio.ByteBuffer
4
+ import java.nio.ByteOrder
5
+
6
+ /**
7
+ * Adaptive gain normalizer for PCM-16 audio.
8
+ *
9
+ * Measures per-chunk RMS and adjusts a smoothed gain multiplier to push
10
+ * speech toward [targetLevelDbfs]. Attack is fast (captures the start of
11
+ * an utterance quickly), release is slow (holds gain through pauses and
12
+ * plosives so it doesn't clip the next syllable).
13
+ *
14
+ * Yes, this is effectively AGC — the CDD says VOICE_RECOGNITION shouldn't
15
+ * have it, but the raw levels on many devices are too low for third-party
16
+ * server-side VAD that we don't control. Pragmatism wins.
17
+ */
18
+ class GainNormalizer(
19
+ /** Target RMS level in dBFS. -16 is loud enough for most VAD services. */
20
+ private val targetLevelDbfs: Float = -16f,
21
+
22
+ /** RMS below this is silence — don't adapt gain during silence. */
23
+ private val silenceThresholdDbfs: Float = -50f,
24
+
25
+ /** Attack coefficient (0–1). Lower = faster. 0.2 ≈ ramps up in 2–3 chunks. */
26
+ private val attackCoeff: Float = 0.2f,
27
+
28
+ /** Release coefficient (0–1). Higher = slower. 0.95 ≈ holds through ~500ms pause at 100ms chunks. */
29
+ private val releaseCoeff: Float = 0.95f,
30
+
31
+ /** Hard ceiling on gain to prevent blowing up near-silence into noise. */
32
+ private val maxGain: Float = 10.0f,
33
+
34
+ /** Minimum gain — never attenuate below unity. */
35
+ private val minGain: Float = 1.0f
36
+ ) {
37
+ private var currentGain: Float = 1.0f
38
+
39
+ /**
40
+ * Process a PCM-16 LE chunk in place.
41
+ *
42
+ * @param data PCM-16 little-endian byte array
43
+ * @param length valid bytes (must be even)
44
+ */
45
+ fun apply(data: ByteArray, length: Int): ByteArray {
46
+ val buf = ByteBuffer.wrap(data, 0, length).order(ByteOrder.LITTLE_ENDIAN)
47
+ val sampleCount = length / 2
48
+
49
+ // --- measure RMS ---
50
+ var sumSquares = 0.0
51
+ for (i in 0 until sampleCount) {
52
+ val s = buf.getShort(i * 2).toInt()
53
+ sumSquares += s.toDouble() * s.toDouble()
54
+ }
55
+ val rms = Math.sqrt(sumSquares / sampleCount).toFloat()
56
+ val rmsDbfs = if (rms > 0f) 20f * Math.log10(rms.toDouble() / Short.MAX_VALUE).toFloat() else -100f
57
+
58
+ // --- adapt gain (only during speech, not silence) ---
59
+ if (rmsDbfs > silenceThresholdDbfs) {
60
+ val desiredGain = Math.pow(10.0, (targetLevelDbfs - rmsDbfs).toDouble() / 20.0)
61
+ .toFloat()
62
+ .coerceIn(minGain, maxGain)
63
+
64
+ // Fast attack, slow release
65
+ val coeff = if (desiredGain < currentGain) attackCoeff else releaseCoeff
66
+ currentGain = coeff * currentGain + (1f - coeff) * desiredGain
67
+ }
68
+ // During silence: hold currentGain — don't adapt, don't reset.
69
+
70
+ if (currentGain < 1.01f) return data // unity, skip work
71
+
72
+ // --- apply gain ---
73
+ for (i in 0 until sampleCount) {
74
+ val offset = i * 2
75
+ val sample = buf.getShort(offset).toInt()
76
+ val amplified = (sample * currentGain).toInt()
77
+ .coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
78
+ buf.putShort(offset, amplified.toShort())
79
+ }
80
+
81
+ return data
82
+ }
83
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@edkimmel/expo-audio-stream",
3
- "version": "0.3.1",
3
+ "version": "0.3.2-0",
4
4
  "description": "Expo Play Audio Stream module",
5
5
  "main": "build/index.js",
6
6
  "types": "build/index.d.ts",
@@ -45,5 +45,6 @@
45
45
  "publishConfig": {
46
46
  "access": "public",
47
47
  "registry": "https://registry.npmjs.org/"
48
- }
48
+ },
49
+ "stableVersion": "0.3.1"
49
50
  }