@edkimmel/expo-audio-stream 0.3.2-beta.0 → 0.3.2-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,7 @@ class AudioRecorderManager(
37
37
  // Flag to control whether actual audio data or silence is sent
38
38
  private var isSilent = false
39
39
  private var frequencyBandAnalyzer: FrequencyBandAnalyzer? = null
40
+ private val gainNormalizer = GainNormalizer()
40
41
 
41
42
  private lateinit var recordingConfig: RecordingConfig
42
43
  private var mimeType = "audio/wav"
@@ -330,6 +331,7 @@ class AudioRecorderManager(
330
331
 
331
332
  if (bytesRead > 0) {
332
333
  consecutiveErrors = 0
334
+ gainNormalizer.apply(audioData, bytesRead)
333
335
  totalDataSize += bytesRead
334
336
  // Emit immediately — each read is one interval of audio
335
337
  emitAudioData(audioData, bytesRead)
@@ -0,0 +1,83 @@
1
+ package expo.modules.audiostream
2
+
3
+ import java.nio.ByteBuffer
4
+ import java.nio.ByteOrder
5
+
6
+ /**
7
+ * Adaptive gain normalizer for PCM-16 audio.
8
+ *
9
+ * Measures per-chunk RMS and adjusts a smoothed gain multiplier to push
10
+ * speech toward [targetLevelDbfs]. Attack is fast (captures the start of
11
+ * an utterance quickly), release is slow (holds gain through pauses and
12
+ * plosives so it doesn't clip the next syllable).
13
+ *
14
+ * Yes, this is effectively AGC — the CDD says VOICE_RECOGNITION shouldn't
15
+ * have it, but the raw levels on many devices are too low for third-party
16
+ * server-side VAD that we don't control. Pragmatism wins.
17
+ */
18
+ class GainNormalizer(
19
+ /** Target RMS level in dBFS. -16 is loud enough for most VAD services. */
20
+ private val targetLevelDbfs: Float = -16f,
21
+
22
+ /** RMS below this is silence — don't adapt gain during silence. */
23
+ private val silenceThresholdDbfs: Float = -50f,
24
+
25
+ /** Attack coefficient (0–1). Lower = faster. 0.2 ≈ ramps up in 2–3 chunks. */
26
+ private val attackCoeff: Float = 0.2f,
27
+
28
+ /** Release coefficient (0–1). Higher = slower. 0.95 ≈ holds through ~500ms pause at 100ms chunks. */
29
+ private val releaseCoeff: Float = 0.95f,
30
+
31
+ /** Hard ceiling on gain to prevent blowing up near-silence into noise. */
32
+ private val maxGain: Float = 10.0f,
33
+
34
+ /** Minimum gain — never attenuate below unity. */
35
+ private val minGain: Float = 1.0f
36
+ ) {
37
+ private var currentGain: Float = 1.0f
38
+
39
+ /**
40
+ * Process a PCM-16 LE chunk in place.
41
+ *
42
+ * @param data PCM-16 little-endian byte array
43
+ * @param length valid bytes (must be even)
44
+ */
45
+ fun apply(data: ByteArray, length: Int): ByteArray {
46
+ val buf = ByteBuffer.wrap(data, 0, length).order(ByteOrder.LITTLE_ENDIAN)
47
+ val sampleCount = length / 2
48
+
49
+ // --- measure RMS ---
50
+ var sumSquares = 0.0
51
+ for (i in 0 until sampleCount) {
52
+ val s = buf.getShort(i * 2).toInt()
53
+ sumSquares += s.toDouble() * s.toDouble()
54
+ }
55
+ val rms = Math.sqrt(sumSquares / sampleCount).toFloat()
56
+ val rmsDbfs = if (rms > 0f) 20f * Math.log10(rms.toDouble() / Short.MAX_VALUE).toFloat() else -100f
57
+
58
+ // --- adapt gain (only during speech, not silence) ---
59
+ if (rmsDbfs > silenceThresholdDbfs) {
60
+ val desiredGain = Math.pow(10.0, (targetLevelDbfs - rmsDbfs).toDouble() / 20.0)
61
+ .toFloat()
62
+ .coerceIn(minGain, maxGain)
63
+
64
+ // Fast attack, slow release
65
+ val coeff = if (desiredGain < currentGain) attackCoeff else releaseCoeff
66
+ currentGain = coeff * currentGain + (1f - coeff) * desiredGain
67
+ }
68
+ // During silence: hold currentGain — don't adapt, don't reset.
69
+
70
+ if (currentGain < 1.01f) return data // unity, skip work
71
+
72
+ // --- apply gain ---
73
+ for (i in 0 until sampleCount) {
74
+ val offset = i * 2
75
+ val sample = buf.getShort(offset).toInt()
76
+ val amplified = (sample * currentGain).toInt()
77
+ .coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
78
+ buf.putShort(offset, amplified.toShort())
79
+ }
80
+
81
+ return data
82
+ }
83
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@edkimmel/expo-audio-stream",
3
- "version": "0.3.2-beta.0",
3
+ "version": "0.3.2-beta.2",
4
4
  "description": "Expo Play Audio Stream module",
5
5
  "main": "build/index.js",
6
6
  "types": "build/index.d.ts",