@siteed/expo-audio-stream 1.17.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -1
- package/README.md +1 -1
- package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
- package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
- package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
- package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
- package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +2 -4
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/ExpoAudioStream.types.d.ts +47 -3
- package/build/ExpoAudioStream.types.d.ts.map +1 -1
- package/build/ExpoAudioStream.types.js.map +1 -1
- package/build/ExpoAudioStream.web.d.ts.map +1 -1
- package/build/ExpoAudioStream.web.js +0 -1
- package/build/ExpoAudioStream.web.js.map +1 -1
- package/build/ExpoAudioStreamModule.d.ts.map +1 -1
- package/build/ExpoAudioStreamModule.js +216 -12
- package/build/ExpoAudioStreamModule.js.map +1 -1
- package/build/WebRecorder.web.d.ts +67 -13
- package/build/WebRecorder.web.d.ts.map +1 -1
- package/build/WebRecorder.web.js +177 -173
- package/build/WebRecorder.web.js.map +1 -1
- package/build/index.d.ts +3 -3
- package/build/index.d.ts.map +1 -1
- package/build/index.js +2 -2
- package/build/index.js.map +1 -1
- package/build/useAudioRecorder.d.ts.map +1 -1
- package/build/useAudioRecorder.js +12 -8
- package/build/useAudioRecorder.js.map +1 -1
- package/build/utils/audioProcessing.d.ts +24 -0
- package/build/utils/audioProcessing.d.ts.map +1 -0
- package/build/utils/audioProcessing.js +133 -0
- package/build/utils/audioProcessing.js.map +1 -0
- package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
- package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
- package/build/workers/InlineFeaturesExtractor.web.js +694 -194
- package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
- package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
- package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
- package/build/workers/inlineAudioWebWorker.web.js +3 -2
- package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
- package/ios/AudioAnalysisData.swift +51 -16
- package/ios/AudioProcessingHelpers.swift +710 -26
- package/ios/AudioProcessor.swift +334 -185
- package/ios/AudioStreamManager.swift +2 -3
- package/ios/DataPoint.swift +25 -12
- package/ios/DecodingConfig.swift +47 -0
- package/ios/ExpoAudioStreamModule.swift +187 -103
- package/ios/FFT.swift +62 -0
- package/ios/Features.swift +24 -3
- package/ios/RecordingSettings.swift +7 -7
- package/package.json +2 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
- package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
- package/src/ExpoAudioStream.types.ts +53 -7
- package/src/ExpoAudioStream.web.ts +0 -1
- package/src/ExpoAudioStreamModule.ts +255 -10
- package/src/WebRecorder.web.ts +231 -244
- package/src/index.ts +5 -3
- package/src/useAudioRecorder.tsx +14 -10
- package/src/utils/audioProcessing.ts +205 -0
- package/src/workers/InlineFeaturesExtractor.web.tsx +694 -194
- package/src/workers/inlineAudioWebWorker.web.tsx +3 -2
package/CHANGELOG.md
CHANGED
|
@@ -8,6 +8,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
## [2.0.0] - 2025-02-27
|
|
12
|
+
### Changed
|
|
13
|
+
- feat(playground): Enhance Audio Playground with Improved UX and Sample Audio Loading (#148) ([09d2794](https://github.com/deeeed/expo-audio-stream/commit/09d27940dcffa60e662c828742f4577bca5327f9))
|
|
14
|
+
- feat: Implement Enhanced Audio Transcription Workflow with Configurable Extraction and UI Updates (#147) ([c658c7e](https://github.com/deeeed/expo-audio-stream/commit/c658c7e8531dd731b01d9347bc7c744470a3b7b9))
|
|
15
|
+
- fix: audio recording reliability improvements and web IndexedDB management (#146) ([d4fa245](https://github.com/deeeed/expo-audio-stream/commit/d4fa245c46d487fe50c6454165efc2e1032ec126))
|
|
16
|
+
- feat(transcription): refactor and unify transcription services across platforms (#145) ([a94b905](https://github.com/deeeed/expo-audio-stream/commit/a94b90562fb2112f712f78c03ca6a5110d6b1401))
|
|
17
|
+
- feat(audio): enhance checksum verification and audio segment analysis (#143) ([49b6587](https://github.com/deeeed/expo-audio-stream/commit/49b65877d1fd9922f25b4892261c4fedf02ba3c3))
|
|
18
|
+
- feat(playground): implement cross-platform ONNX runtime with Silero VAD model (#142) ([4a94639](https://github.com/deeeed/expo-audio-stream/commit/4a9463995f1eadf6531a2b4d6d057e90da097920))
|
|
19
|
+
- feat(audio-analysis): enhance audio analysis and visualization capabilities (#141) ([ecf8f5d](https://github.com/deeeed/expo-audio-stream/commit/ecf8f5daf967bf27afb827c8cf6bca7510ce7b4e))
|
|
20
|
+
- android 15 (#140) ([5321a3c](https://github.com/deeeed/expo-audio-stream/commit/5321a3c805d22e6824fd11fee4290987d550bd06))
|
|
21
|
+
- refactor(audio): consolidate audio analysis APIs and migrate to segment-based processing (#139) ([5d45da8](https://github.com/deeeed/expo-audio-stream/commit/5d45da871ee1849898405ee4bf8bf8d296aebc48))
|
|
22
|
+
- feat: pcm player (#137) ([8db6f16](https://github.com/deeeed/expo-audio-stream/commit/8db6f16f13cbcf78fd4a8e412bb00689e47d5a72))
|
|
23
|
+
- feat(audio-stream): add extractAudioData API ([faf8915](https://github.com/deeeed/expo-audio-stream/commit/faf8915df3b18ea54ca7e562f61749d7cadf8bb4))
|
|
24
|
+
- feat(audio): improve audio trimming and waveform visualization (#136) ([ad5514b](https://github.com/deeeed/expo-audio-stream/commit/ad5514b412eedc7211cb200cc3747e8a83afbf88))
|
|
25
|
+
- feat(audio): enhance audio player with preview, trimming and feature analysis (#135) ([3f7eb9c](https://github.com/deeeed/expo-audio-stream/commit/3f7eb9cde7b314505d8ed3e4704c7b1321da6b15))
|
|
26
|
+
- feat: add web permission for microphone (#131) ([9a2ed7f](https://github.com/deeeed/expo-audio-stream/commit/9a2ed7f31ad41560d094a22d1248034cb2f5886d))
|
|
27
|
+
- refactor(audio): simplify amplitude analysis and remove redundant configuration (#133) ([5d64aa2](https://github.com/deeeed/expo-audio-stream/commit/5d64aa22299836cc9cb925d3e91f3d9470f3e856))
|
|
28
|
+
- feat: add full audio analysis with spectral features and time range controls (#132) ([5677dc3](https://github.com/deeeed/expo-audio-stream/commit/5677dc321f5a9ff4bea37fbbce3cb6ae3aad67f6))
|
|
29
|
+
- chore(expo-audio-stream): release @siteed/expo-audio-stream@1.17.0 ([689aead](https://github.com/deeeed/expo-audio-stream/commit/689aeadedaa58050cd18e8ec1fa5ff1fcd93f0db))
|
|
11
30
|
## [1.17.0] - 2025-02-18
|
|
12
31
|
### Changed
|
|
13
32
|
- feat(web): add audio interval analysis ([281b7e6](https://github.com/deeeed/expo-audio-stream/commit/281b7e6b1136afe0569450a9d1e3d5f01da7af28))
|
|
@@ -135,7 +154,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
135
154
|
- Feature: Audio features extraction during recording.
|
|
136
155
|
- Feature: Consistent WAV PCM recording format across all platforms.
|
|
137
156
|
|
|
138
|
-
[unreleased]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@
|
|
157
|
+
[unreleased]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@2.0.0...HEAD
|
|
158
|
+
[2.0.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.17.0...@siteed/expo-audio-stream@2.0.0
|
|
139
159
|
[1.17.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.16.0...@siteed/expo-audio-stream@1.17.0
|
|
140
160
|
[1.16.0]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.15.1...@siteed/expo-audio-stream@1.16.0
|
|
141
161
|
[1.15.1]: https://github.com/deeeed/expo-audio-stream/compare/@siteed/expo-audio-stream@1.15.0...@siteed/expo-audio-stream@1.15.1
|
package/README.md
CHANGED
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
- Background audio recording on iOS.
|
|
49
49
|
- Audio features extraction during recording.
|
|
50
50
|
- Consistent WAV PCM recording format across all platforms.
|
|
51
|
-
- Keep
|
|
51
|
+
- Keep recording acrtive while app is in background
|
|
52
52
|
- Rich notification system for recording status:
|
|
53
53
|
- Android: Live waveform visualization in notifications
|
|
54
54
|
- Android: Fully customizable notification appearance and actions
|
|
@@ -4,34 +4,53 @@ package net.siteed.audiostream
|
|
|
4
4
|
import android.os.Bundle
|
|
5
5
|
import androidx.core.os.bundleOf
|
|
6
6
|
|
|
7
|
+
data class SpeechFeatures(
|
|
8
|
+
val isActive: Boolean,
|
|
9
|
+
val speakerId: Int? = null
|
|
10
|
+
) {
|
|
11
|
+
fun toDictionary(): Map<String, Any?> {
|
|
12
|
+
return mapOf(
|
|
13
|
+
"isActive" to isActive,
|
|
14
|
+
"speakerId" to speakerId
|
|
15
|
+
)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
fun toBundle(): Bundle {
|
|
19
|
+
return bundleOf(
|
|
20
|
+
"isActive" to isActive,
|
|
21
|
+
"speakerId" to speakerId
|
|
22
|
+
)
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
7
26
|
data class DataPoint(
|
|
8
27
|
val id: Long,
|
|
9
28
|
val amplitude: Float,
|
|
10
|
-
val
|
|
11
|
-
val dB: Float
|
|
12
|
-
val silent: Boolean
|
|
29
|
+
val rms: Float,
|
|
30
|
+
val dB: Float,
|
|
31
|
+
val silent: Boolean,
|
|
13
32
|
val features: Features? = null,
|
|
33
|
+
val speech: SpeechFeatures? = null,
|
|
14
34
|
val startTime: Float? = null,
|
|
15
35
|
val endTime: Float? = null,
|
|
16
36
|
val startPosition: Int? = null,
|
|
17
37
|
val endPosition: Int? = null,
|
|
18
|
-
val samples: Int = 0
|
|
19
|
-
val speaker: Int? = null
|
|
38
|
+
val samples: Int = 0
|
|
20
39
|
) {
|
|
21
40
|
fun toDictionary(): Map<String, Any?> {
|
|
22
41
|
return mapOf(
|
|
23
42
|
"id" to id,
|
|
24
43
|
"amplitude" to amplitude,
|
|
25
|
-
"
|
|
44
|
+
"rms" to rms,
|
|
26
45
|
"dB" to dB,
|
|
27
46
|
"silent" to silent,
|
|
28
47
|
"features" to features?.toDictionary(),
|
|
48
|
+
"speech" to speech?.toDictionary(),
|
|
29
49
|
"startTime" to startTime,
|
|
30
50
|
"endTime" to endTime,
|
|
31
51
|
"startPosition" to startPosition,
|
|
32
52
|
"endPosition" to endPosition,
|
|
33
|
-
"samples" to samples
|
|
34
|
-
"speaker" to speaker
|
|
53
|
+
"samples" to samples
|
|
35
54
|
)
|
|
36
55
|
}
|
|
37
56
|
|
|
@@ -39,22 +58,22 @@ data class DataPoint(
|
|
|
39
58
|
return bundleOf(
|
|
40
59
|
"id" to id,
|
|
41
60
|
"amplitude" to amplitude,
|
|
42
|
-
"
|
|
61
|
+
"rms" to rms,
|
|
43
62
|
"dB" to dB,
|
|
44
63
|
"silent" to silent,
|
|
45
64
|
"features" to features?.toBundle(),
|
|
65
|
+
"speech" to speech?.toBundle(),
|
|
46
66
|
"startTime" to startTime,
|
|
47
67
|
"endTime" to endTime,
|
|
48
68
|
"startPosition" to startPosition,
|
|
49
69
|
"endPosition" to endPosition,
|
|
50
|
-
"samples" to samples
|
|
51
|
-
"speaker" to speaker
|
|
70
|
+
"samples" to samples
|
|
52
71
|
)
|
|
53
72
|
}
|
|
54
73
|
}
|
|
55
74
|
|
|
56
75
|
data class AudioAnalysisData(
|
|
57
|
-
val
|
|
76
|
+
val segmentDurationMs: Int,
|
|
58
77
|
val durationMs: Int,
|
|
59
78
|
val bitDepth: Int,
|
|
60
79
|
val numberOfChannels: Int,
|
|
@@ -62,7 +81,8 @@ data class AudioAnalysisData(
|
|
|
62
81
|
val samples: Int,
|
|
63
82
|
val dataPoints: List<DataPoint>,
|
|
64
83
|
val amplitudeRange: AmplitudeRange,
|
|
65
|
-
val
|
|
84
|
+
val rmsRange: AmplitudeRange,
|
|
85
|
+
val speechAnalysis: SpeechAnalysis? = null,
|
|
66
86
|
val extractionTimeMs: Float
|
|
67
87
|
) {
|
|
68
88
|
data class AmplitudeRange(val min: Float, val max: Float) {
|
|
@@ -75,19 +95,44 @@ data class AudioAnalysisData(
|
|
|
75
95
|
}
|
|
76
96
|
}
|
|
77
97
|
|
|
78
|
-
data class
|
|
98
|
+
data class SpeechAnalysis(
|
|
99
|
+
val speakerChanges: List<SpeakerChange>
|
|
100
|
+
) {
|
|
79
101
|
fun toDictionary(): Map<String, Any> {
|
|
80
|
-
return mapOf(
|
|
102
|
+
return mapOf(
|
|
103
|
+
"speakerChanges" to speakerChanges.map { it.toDictionary() }
|
|
104
|
+
)
|
|
81
105
|
}
|
|
82
106
|
|
|
83
107
|
fun toBundle(): Bundle {
|
|
84
|
-
return bundleOf(
|
|
108
|
+
return bundleOf(
|
|
109
|
+
"speakerChanges" to speakerChanges.map { it.toBundle() }.toTypedArray()
|
|
110
|
+
)
|
|
85
111
|
}
|
|
86
112
|
}
|
|
87
113
|
|
|
88
|
-
|
|
114
|
+
data class SpeakerChange(
|
|
115
|
+
val timestamp: Long,
|
|
116
|
+
val speakerId: Int
|
|
117
|
+
) {
|
|
118
|
+
fun toDictionary(): Map<String, Any> {
|
|
119
|
+
return mapOf(
|
|
120
|
+
"timestamp" to timestamp,
|
|
121
|
+
"speakerId" to speakerId
|
|
122
|
+
)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
fun toBundle(): Bundle {
|
|
126
|
+
return bundleOf(
|
|
127
|
+
"timestamp" to timestamp,
|
|
128
|
+
"speakerId" to speakerId
|
|
129
|
+
)
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fun toDictionary(): Map<String, Any?> {
|
|
89
134
|
return mapOf(
|
|
90
|
-
"
|
|
135
|
+
"segmentDurationMs" to segmentDurationMs,
|
|
91
136
|
"durationMs" to durationMs,
|
|
92
137
|
"bitDepth" to bitDepth,
|
|
93
138
|
"numberOfChannels" to numberOfChannels,
|
|
@@ -95,17 +140,17 @@ data class AudioAnalysisData(
|
|
|
95
140
|
"samples" to samples,
|
|
96
141
|
"dataPoints" to dataPoints.map { it.toDictionary() },
|
|
97
142
|
"amplitudeRange" to amplitudeRange.toDictionary(),
|
|
98
|
-
"
|
|
143
|
+
"rmsRange" to rmsRange.toDictionary(),
|
|
144
|
+
"speechAnalysis" to speechAnalysis?.toDictionary(),
|
|
99
145
|
"extractionTimeMs" to extractionTimeMs
|
|
100
146
|
)
|
|
101
147
|
}
|
|
102
148
|
|
|
103
149
|
fun toBundle(): Bundle {
|
|
104
150
|
val dataPointsBundleArray = dataPoints.map { it.toBundle() }.toTypedArray()
|
|
105
|
-
val speakerChangesBundleArray = speakerChanges.map { it.toBundle() }.toTypedArray()
|
|
106
151
|
|
|
107
152
|
return bundleOf(
|
|
108
|
-
"
|
|
153
|
+
"segmentDurationMs" to segmentDurationMs,
|
|
109
154
|
"durationMs" to durationMs,
|
|
110
155
|
"bitDepth" to bitDepth,
|
|
111
156
|
"numberOfChannels" to numberOfChannels,
|
|
@@ -113,7 +158,8 @@ data class AudioAnalysisData(
|
|
|
113
158
|
"samples" to samples,
|
|
114
159
|
"dataPoints" to dataPointsBundleArray,
|
|
115
160
|
"amplitudeRange" to amplitudeRange.toBundle(),
|
|
116
|
-
"
|
|
161
|
+
"rmsRange" to rmsRange.toBundle(),
|
|
162
|
+
"speechAnalysis" to speechAnalysis?.toBundle(),
|
|
117
163
|
"extractionTimeMs" to extractionTimeMs
|
|
118
164
|
)
|
|
119
165
|
}
|
|
@@ -76,4 +76,28 @@ object AudioFormatUtils {
|
|
|
76
76
|
else -> AudioFormat.ENCODING_PCM_16BIT // Default to 16-bit PCM
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Converts audio data between different bit depths
|
|
82
|
+
* @param audioData The raw audio data
|
|
83
|
+
* @param sourceBitDepth The original bit depth
|
|
84
|
+
* @param targetBitDepth The desired bit depth
|
|
85
|
+
* @return The converted audio data
|
|
86
|
+
*/
|
|
87
|
+
fun convertBitDepth(audioData: ByteArray, sourceBitDepth: Int, targetBitDepth: Int): ByteArray {
|
|
88
|
+
// First convert to float array for normalization
|
|
89
|
+
val floatArray = convertByteArrayToFloatArray(audioData, "pcm_${sourceBitDepth}bit")
|
|
90
|
+
|
|
91
|
+
// Convert back to bytes with new bit depth
|
|
92
|
+
return when (targetBitDepth) {
|
|
93
|
+
8 -> floatArray.map { ((it + 1.0f) * 127.5f).toInt().toByte() }.toByteArray()
|
|
94
|
+
16 -> ByteBuffer.allocate(floatArray.size * 2).order(ByteOrder.LITTLE_ENDIAN).apply {
|
|
95
|
+
floatArray.forEach { asShortBuffer().put((it * 32767f).toInt().toShort()) }
|
|
96
|
+
}.array()
|
|
97
|
+
32 -> ByteBuffer.allocate(floatArray.size * 4).order(ByteOrder.LITTLE_ENDIAN).apply {
|
|
98
|
+
floatArray.forEach { putFloat(it) }
|
|
99
|
+
}.array()
|
|
100
|
+
else -> throw IllegalArgumentException("Unsupported target bit depth: $targetBitDepth")
|
|
101
|
+
}
|
|
102
|
+
}
|
|
79
103
|
}
|