@edkimmel/expo-audio-stream 0.3.2-beta.0 → 0.3.2-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -37,6 +37,7 @@ class AudioRecorderManager(
|
|
|
37
37
|
// Flag to control whether actual audio data or silence is sent
|
|
38
38
|
private var isSilent = false
|
|
39
39
|
private var frequencyBandAnalyzer: FrequencyBandAnalyzer? = null
|
|
40
|
+
private val gainNormalizer = GainNormalizer()
|
|
40
41
|
|
|
41
42
|
private lateinit var recordingConfig: RecordingConfig
|
|
42
43
|
private var mimeType = "audio/wav"
|
|
@@ -330,6 +331,7 @@ class AudioRecorderManager(
|
|
|
330
331
|
|
|
331
332
|
if (bytesRead > 0) {
|
|
332
333
|
consecutiveErrors = 0
|
|
334
|
+
gainNormalizer.apply(audioData, bytesRead)
|
|
333
335
|
totalDataSize += bytesRead
|
|
334
336
|
// Emit immediately — each read is one interval of audio
|
|
335
337
|
emitAudioData(audioData, bytesRead)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
package expo.modules.audiostream
|
|
2
|
+
|
|
3
|
+
import java.nio.ByteBuffer
|
|
4
|
+
import java.nio.ByteOrder
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Adaptive gain normalizer for PCM-16 audio.
|
|
8
|
+
*
|
|
9
|
+
* Measures per-chunk RMS and adjusts a smoothed gain multiplier to push
|
|
10
|
+
* speech toward [targetLevelDbfs]. Attack is fast (captures the start of
|
|
11
|
+
* an utterance quickly), release is slow (holds gain through pauses and
|
|
12
|
+
* plosives so it doesn't clip the next syllable).
|
|
13
|
+
*
|
|
14
|
+
* Yes, this is effectively AGC — the CDD says VOICE_RECOGNITION shouldn't
|
|
15
|
+
* have it, but the raw levels on many devices are too low for third-party
|
|
16
|
+
* server-side VAD that we don't control. Pragmatism wins.
|
|
17
|
+
*/
|
|
18
|
+
class GainNormalizer(
|
|
19
|
+
/** Target RMS level in dBFS. -16 is loud enough for most VAD services. */
|
|
20
|
+
private val targetLevelDbfs: Float = -16f,
|
|
21
|
+
|
|
22
|
+
/** RMS below this is silence — don't adapt gain during silence. */
|
|
23
|
+
private val silenceThresholdDbfs: Float = -50f,
|
|
24
|
+
|
|
25
|
+
/** Attack coefficient (0–1). Lower = faster. 0.2 ≈ ramps up in 2–3 chunks. */
|
|
26
|
+
private val attackCoeff: Float = 0.2f,
|
|
27
|
+
|
|
28
|
+
/** Release coefficient (0–1). Higher = slower. 0.95 ≈ holds through ~500ms pause at 100ms chunks. */
|
|
29
|
+
private val releaseCoeff: Float = 0.95f,
|
|
30
|
+
|
|
31
|
+
/** Hard ceiling on gain to prevent blowing up near-silence into noise. */
|
|
32
|
+
private val maxGain: Float = 10.0f,
|
|
33
|
+
|
|
34
|
+
/** Minimum gain — never attenuate below unity. */
|
|
35
|
+
private val minGain: Float = 1.0f
|
|
36
|
+
) {
|
|
37
|
+
private var currentGain: Float = 1.0f
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Process a PCM-16 LE chunk in place.
|
|
41
|
+
*
|
|
42
|
+
* @param data PCM-16 little-endian byte array
|
|
43
|
+
* @param length valid bytes (must be even)
|
|
44
|
+
*/
|
|
45
|
+
fun apply(data: ByteArray, length: Int): ByteArray {
|
|
46
|
+
val buf = ByteBuffer.wrap(data, 0, length).order(ByteOrder.LITTLE_ENDIAN)
|
|
47
|
+
val sampleCount = length / 2
|
|
48
|
+
|
|
49
|
+
// --- measure RMS ---
|
|
50
|
+
var sumSquares = 0.0
|
|
51
|
+
for (i in 0 until sampleCount) {
|
|
52
|
+
val s = buf.getShort(i * 2).toInt()
|
|
53
|
+
sumSquares += s.toDouble() * s.toDouble()
|
|
54
|
+
}
|
|
55
|
+
val rms = Math.sqrt(sumSquares / sampleCount).toFloat()
|
|
56
|
+
val rmsDbfs = if (rms > 0f) 20f * Math.log10(rms.toDouble() / Short.MAX_VALUE).toFloat() else -100f
|
|
57
|
+
|
|
58
|
+
// --- adapt gain (only during speech, not silence) ---
|
|
59
|
+
if (rmsDbfs > silenceThresholdDbfs) {
|
|
60
|
+
val desiredGain = Math.pow(10.0, (targetLevelDbfs - rmsDbfs).toDouble() / 20.0)
|
|
61
|
+
.toFloat()
|
|
62
|
+
.coerceIn(minGain, maxGain)
|
|
63
|
+
|
|
64
|
+
// Fast attack, slow release
|
|
65
|
+
val coeff = if (desiredGain < currentGain) attackCoeff else releaseCoeff
|
|
66
|
+
currentGain = coeff * currentGain + (1f - coeff) * desiredGain
|
|
67
|
+
}
|
|
68
|
+
// During silence: hold currentGain — don't adapt, don't reset.
|
|
69
|
+
|
|
70
|
+
if (currentGain < 1.01f) return data // unity, skip work
|
|
71
|
+
|
|
72
|
+
// --- apply gain ---
|
|
73
|
+
for (i in 0 until sampleCount) {
|
|
74
|
+
val offset = i * 2
|
|
75
|
+
val sample = buf.getShort(offset).toInt()
|
|
76
|
+
val amplified = (sample * currentGain).toInt()
|
|
77
|
+
.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
|
|
78
|
+
buf.putShort(offset, amplified.toShort())
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return data
|
|
82
|
+
}
|
|
83
|
+
}
|