@edkimmel/expo-audio-stream 0.3.1 → 0.3.2-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -7,12 +7,27 @@ import android.media.audiofx.NoiseSuppressor
|
|
|
7
7
|
import android.util.Log
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
|
-
* Manages audio effects for voice recording
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
10
|
+
* Manages hardware audio effects for voice recording.
|
|
11
|
+
*
|
|
12
|
+
* We use VOICE_RECOGNITION as our audio source. The Android CDD (Section 5.4)
|
|
13
|
+
* mandates that this source delivers unprocessed audio:
|
|
14
|
+
* [C-1-2] MUST disable noise reduction by default
|
|
15
|
+
* [C-1-3] MUST disable automatic gain control by default
|
|
16
|
+
*
|
|
17
|
+
* NS and AGC are therefore off by default to honor the spec. Enabling them
|
|
18
|
+
* re-introduces the processing the CDD explicitly prohibits for this source
|
|
19
|
+
* and can cause low-volume capture on many OEMs.
|
|
20
|
+
*
|
|
21
|
+
* AEC is the one effect the CDD permits for VOICE_RECOGNITION ("expects a
|
|
22
|
+
* stream that has an echo cancellation effect if available"), so it is
|
|
23
|
+
* enabled by default.
|
|
14
24
|
*/
|
|
15
|
-
class AudioEffectsManager
|
|
25
|
+
class AudioEffectsManager(
|
|
26
|
+
/** Enable hardware noise suppressor. Default false — CDD 5.4 [C-1-2] prohibits it for VOICE_RECOGNITION. */
|
|
27
|
+
private val enableNS: Boolean = false,
|
|
28
|
+
/** Enable hardware AGC. Default false — CDD 5.4 [C-1-3] prohibits it for VOICE_RECOGNITION. */
|
|
29
|
+
private val enableAGC: Boolean = false
|
|
30
|
+
) {
|
|
16
31
|
// Audio effects
|
|
17
32
|
private var acousticEchoCanceler: AcousticEchoCanceler? = null
|
|
18
33
|
private var noiseSuppressor: NoiseSuppressor? = null
|
|
@@ -41,11 +56,21 @@ class AudioEffectsManager {
|
|
|
41
56
|
Log.d(Constants.TAG, "Acoustic Echo Canceler enabled: ${acousticEchoCanceler?.enabled}")
|
|
42
57
|
}
|
|
43
58
|
|
|
44
|
-
//
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
59
|
+
// NS off by default — CDD 5.4 [C-1-2] prohibits it for VOICE_RECOGNITION.
|
|
60
|
+
// Enabling it can aggressively attenuate speech on many OEMs.
|
|
61
|
+
if (enableNS) {
|
|
62
|
+
enableNoiseSuppression(audioSessionId)
|
|
63
|
+
} else {
|
|
64
|
+
Log.d(Constants.TAG, "Noise Suppressor skipped (CDD 5.4 [C-1-2])")
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// AGC off by default — CDD 5.4 [C-1-3] prohibits it for VOICE_RECOGNITION.
|
|
68
|
+
// Hardware AGC is also unreliable across devices.
|
|
69
|
+
if (enableAGC) {
|
|
70
|
+
enableAutomaticGainControl(audioSessionId)
|
|
71
|
+
} else {
|
|
72
|
+
Log.d(Constants.TAG, "Hardware AGC skipped (CDD 5.4 [C-1-3])")
|
|
73
|
+
}
|
|
49
74
|
|
|
50
75
|
} catch (e: Exception) {
|
|
51
76
|
Log.e(Constants.TAG, "Error setting up audio effects", e)
|
|
@@ -330,6 +330,7 @@ class AudioRecorderManager(
|
|
|
330
330
|
|
|
331
331
|
if (bytesRead > 0) {
|
|
332
332
|
consecutiveErrors = 0
|
|
333
|
+
gainNormalizer.apply(audioData, bytesRead)
|
|
333
334
|
totalDataSize += bytesRead
|
|
334
335
|
// Emit immediately — each read is one interval of audio
|
|
335
336
|
emitAudioData(audioData, bytesRead)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
package expo.modules.audiostream
|
|
2
|
+
|
|
3
|
+
import java.nio.ByteBuffer
|
|
4
|
+
import java.nio.ByteOrder
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Adaptive gain normalizer for PCM-16 audio.
|
|
8
|
+
*
|
|
9
|
+
* Measures per-chunk RMS and adjusts a smoothed gain multiplier to push
|
|
10
|
+
* speech toward [targetLevelDbfs]. Attack is fast (captures the start of
|
|
11
|
+
* an utterance quickly), release is slow (holds gain through pauses and
|
|
12
|
+
* plosives so it doesn't clip the next syllable).
|
|
13
|
+
*
|
|
14
|
+
* Yes, this is effectively AGC — the CDD says VOICE_RECOGNITION shouldn't
|
|
15
|
+
* have it, but the raw levels on many devices are too low for third-party
|
|
16
|
+
* server-side VAD that we don't control. Pragmatism wins.
|
|
17
|
+
*/
|
|
18
|
+
class GainNormalizer(
|
|
19
|
+
/** Target RMS level in dBFS. -16 is loud enough for most VAD services. */
|
|
20
|
+
private val targetLevelDbfs: Float = -16f,
|
|
21
|
+
|
|
22
|
+
/** RMS below this is silence — don't adapt gain during silence. */
|
|
23
|
+
private val silenceThresholdDbfs: Float = -50f,
|
|
24
|
+
|
|
25
|
+
/** Attack coefficient (0–1). Lower = faster. 0.2 ≈ ramps up in 2–3 chunks. */
|
|
26
|
+
private val attackCoeff: Float = 0.2f,
|
|
27
|
+
|
|
28
|
+
/** Release coefficient (0–1). Higher = slower. 0.95 ≈ holds through ~500ms pause at 100ms chunks. */
|
|
29
|
+
private val releaseCoeff: Float = 0.95f,
|
|
30
|
+
|
|
31
|
+
/** Hard ceiling on gain to prevent blowing up near-silence into noise. */
|
|
32
|
+
private val maxGain: Float = 10.0f,
|
|
33
|
+
|
|
34
|
+
/** Minimum gain — never attenuate below unity. */
|
|
35
|
+
private val minGain: Float = 1.0f
|
|
36
|
+
) {
|
|
37
|
+
private var currentGain: Float = 1.0f
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Process a PCM-16 LE chunk in place.
|
|
41
|
+
*
|
|
42
|
+
* @param data PCM-16 little-endian byte array
|
|
43
|
+
* @param length valid bytes (must be even)
|
|
44
|
+
*/
|
|
45
|
+
fun apply(data: ByteArray, length: Int): ByteArray {
|
|
46
|
+
val buf = ByteBuffer.wrap(data, 0, length).order(ByteOrder.LITTLE_ENDIAN)
|
|
47
|
+
val sampleCount = length / 2
|
|
48
|
+
|
|
49
|
+
// --- measure RMS ---
|
|
50
|
+
var sumSquares = 0.0
|
|
51
|
+
for (i in 0 until sampleCount) {
|
|
52
|
+
val s = buf.getShort(i * 2).toInt()
|
|
53
|
+
sumSquares += s.toDouble() * s.toDouble()
|
|
54
|
+
}
|
|
55
|
+
val rms = Math.sqrt(sumSquares / sampleCount).toFloat()
|
|
56
|
+
val rmsDbfs = if (rms > 0f) 20f * Math.log10(rms.toDouble() / Short.MAX_VALUE).toFloat() else -100f
|
|
57
|
+
|
|
58
|
+
// --- adapt gain (only during speech, not silence) ---
|
|
59
|
+
if (rmsDbfs > silenceThresholdDbfs) {
|
|
60
|
+
val desiredGain = Math.pow(10.0, (targetLevelDbfs - rmsDbfs).toDouble() / 20.0)
|
|
61
|
+
.toFloat()
|
|
62
|
+
.coerceIn(minGain, maxGain)
|
|
63
|
+
|
|
64
|
+
// Fast attack, slow release
|
|
65
|
+
val coeff = if (desiredGain < currentGain) attackCoeff else releaseCoeff
|
|
66
|
+
currentGain = coeff * currentGain + (1f - coeff) * desiredGain
|
|
67
|
+
}
|
|
68
|
+
// During silence: hold currentGain — don't adapt, don't reset.
|
|
69
|
+
|
|
70
|
+
if (currentGain < 1.01f) return data // unity, skip work
|
|
71
|
+
|
|
72
|
+
// --- apply gain ---
|
|
73
|
+
for (i in 0 until sampleCount) {
|
|
74
|
+
val offset = i * 2
|
|
75
|
+
val sample = buf.getShort(offset).toInt()
|
|
76
|
+
val amplified = (sample * currentGain).toInt()
|
|
77
|
+
.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
|
|
78
|
+
buf.putShort(offset, amplified.toShort())
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return data
|
|
82
|
+
}
|
|
83
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@edkimmel/expo-audio-stream",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.2-0",
|
|
4
4
|
"description": "Expo Play Audio Stream module",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"types": "build/index.d.ts",
|
|
@@ -45,5 +45,6 @@
|
|
|
45
45
|
"publishConfig": {
|
|
46
46
|
"access": "public",
|
|
47
47
|
"registry": "https://registry.npmjs.org/"
|
|
48
|
-
}
|
|
48
|
+
},
|
|
49
|
+
"stableVersion": "0.3.1"
|
|
49
50
|
}
|