@siteed/expo-audio-stream 1.17.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +26 -1
  2. package/README.md +1 -1
  3. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
  4. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
  5. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
  6. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2
  7. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
  8. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
  9. package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
  10. package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
  11. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +2 -4
  12. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
  13. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
  14. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  15. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
  16. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
  17. package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
  18. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
  19. package/build/ExpoAudioStream.types.d.ts +47 -3
  20. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  21. package/build/ExpoAudioStream.types.js.map +1 -1
  22. package/build/ExpoAudioStream.web.d.ts.map +1 -1
  23. package/build/ExpoAudioStream.web.js +0 -1
  24. package/build/ExpoAudioStream.web.js.map +1 -1
  25. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  26. package/build/ExpoAudioStreamModule.js +216 -12
  27. package/build/ExpoAudioStreamModule.js.map +1 -1
  28. package/build/WebRecorder.web.d.ts +67 -13
  29. package/build/WebRecorder.web.d.ts.map +1 -1
  30. package/build/WebRecorder.web.js +177 -173
  31. package/build/WebRecorder.web.js.map +1 -1
  32. package/build/index.d.ts +3 -3
  33. package/build/index.d.ts.map +1 -1
  34. package/build/index.js +2 -2
  35. package/build/index.js.map +1 -1
  36. package/build/useAudioRecorder.d.ts.map +1 -1
  37. package/build/useAudioRecorder.js +12 -8
  38. package/build/useAudioRecorder.js.map +1 -1
  39. package/build/utils/audioProcessing.d.ts +24 -0
  40. package/build/utils/audioProcessing.d.ts.map +1 -0
  41. package/build/utils/audioProcessing.js +133 -0
  42. package/build/utils/audioProcessing.js.map +1 -0
  43. package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
  44. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
  45. package/build/workers/InlineFeaturesExtractor.web.js +694 -194
  46. package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
  47. package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
  48. package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
  49. package/build/workers/inlineAudioWebWorker.web.js +3 -2
  50. package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
  51. package/ios/AudioAnalysisData.swift +51 -16
  52. package/ios/AudioProcessingHelpers.swift +710 -26
  53. package/ios/AudioProcessor.swift +334 -185
  54. package/ios/AudioStreamManager.swift +2 -3
  55. package/ios/DataPoint.swift +25 -12
  56. package/ios/DecodingConfig.swift +47 -0
  57. package/ios/ExpoAudioStreamModule.swift +187 -103
  58. package/ios/FFT.swift +62 -0
  59. package/ios/Features.swift +24 -3
  60. package/ios/RecordingSettings.swift +7 -7
  61. package/package.json +2 -1
  62. package/plugin/build/index.js +6 -1
  63. package/plugin/src/index.ts +9 -1
  64. package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
  65. package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
  66. package/src/ExpoAudioStream.types.ts +53 -7
  67. package/src/ExpoAudioStream.web.ts +0 -1
  68. package/src/ExpoAudioStreamModule.ts +255 -10
  69. package/src/WebRecorder.web.ts +231 -244
  70. package/src/index.ts +5 -3
  71. package/src/useAudioRecorder.tsx +14 -10
  72. package/src/utils/audioProcessing.ts +205 -0
  73. package/src/workers/InlineFeaturesExtractor.web.tsx +694 -194
  74. package/src/workers/inlineAudioWebWorker.web.tsx +3 -2
@@ -90,7 +90,15 @@ const withRecordingPermission: ConfigPlugin<AudioStreamPluginOptions> = (
90
90
  enableBackgroundAudio === true &&
91
91
  !existingBackgroundModes.includes('audio')
92
92
  ) {
93
- existingBackgroundModes.push('audio')
93
+ // Don't automatically add 'audio' background mode as it's only for playback
94
+ // existingBackgroundModes.push('audio')
95
+
96
+ // Instead, ensure processing mode is used for background recording
97
+ if (options.iosBackgroundModes?.useProcessing !== true) {
98
+ console.warn(
99
+ `${LOG_PREFIX} Warning: Background audio recording requires 'processing' background mode. Please enable 'useProcessing' in iosBackgroundModes.`
100
+ )
101
+ }
94
102
  }
95
103
 
96
104
  if (
@@ -1,32 +1,56 @@
1
1
  // packages/expo-audio-stream/src/AudioAnalysis/AudioAnalysis.types.ts
2
2
 
3
+ import { BitDepth, ConsoleLike } from '../ExpoAudioStream.types'
4
+
3
5
  /**
4
6
  * Represents the configuration for decoding audio data.
5
7
  */
6
8
  export interface DecodingConfig {
9
+ /** Target sample rate for decoded audio (Android and Web) */
7
10
  targetSampleRate?: number
11
+ /** Target number of channels (Android and Web) */
8
12
  targetChannels?: number
9
- targetBitDepth?: number
13
+ /** Target bit depth (Android and Web) */
14
+ targetBitDepth?: BitDepth
15
+ /** Whether to normalize audio levels (Android and Web) */
10
16
  normalizeAudio?: boolean
11
17
  }
12
18
 
19
+ /**
20
+ * Represents speech-related features extracted from audio.
21
+ */
22
+ export interface SpeechFeatures {
23
+ isActive: boolean // Whether speech is detected in this segment
24
+ speakerId?: number // Optional speaker identification
25
+ // Could add more speech-related features here like:
26
+ // confidence: number
27
+ // language?: string
28
+ // sentiment?: number
29
+ // etc.
30
+ }
31
+
13
32
  /**
14
33
  * Represents various audio features extracted from an audio signal.
15
34
  */
16
35
  export interface AudioFeatures {
17
- energy: number // The infinite integral of the squared signal, representing the overall energy of the audio.
18
- mfcc: number[] // Mel-frequency cepstral coefficients, describing the short-term power spectrum of a sound.
19
- rms: number // Root mean square value, indicating the amplitude of the audio signal.
20
- minAmplitude: number // Minimum amplitude value in the audio signal.
21
- maxAmplitude: number // Maximum amplitude value in the audio signal.
22
- zcr: number // Zero-crossing rate, indicating the rate at which the signal changes sign.
23
- spectralCentroid: number // The center of mass of the spectrum, indicating the brightness of the sound.
24
- spectralFlatness: number // Measure of the flatness of the spectrum, indicating how noise-like the signal is.
25
- spectralRolloff: number // The frequency below which a specified percentage (usually 85%) of the total spectral energy lies.
26
- spectralBandwidth: number // The width of the spectrum, indicating the range of frequencies present.
27
- chromagram: number[] // Chromagram, representing the 12 different pitch classes of the audio.
28
- tempo: number // Estimated tempo of the audio signal, measured in beats per minute (BPM).
29
- hnr: number // Harmonics-to-noise ratio, indicating the proportion of harmonics to noise in the audio signal.
36
+ energy?: number // The infinite integral of the squared signal, representing the overall energy of the audio.
37
+ mfcc?: number[] // Mel-frequency cepstral coefficients, describing the short-term power spectrum of a sound.
38
+ rms?: number // Root mean square value, indicating the amplitude of the audio signal.
39
+ minAmplitude?: number // Minimum amplitude value in the audio signal.
40
+ maxAmplitude?: number // Maximum amplitude value in the audio signal.
41
+ zcr?: number // Zero-crossing rate, indicating the rate at which the signal changes sign.
42
+ spectralCentroid?: number // The center of mass of the spectrum, indicating the brightness of the sound.
43
+ spectralFlatness?: number // Measure of the flatness of the spectrum, indicating how noise-like the signal is.
44
+ spectralRolloff?: number // The frequency below which a specified percentage (usually 85%) of the total spectral energy lies.
45
+ spectralBandwidth?: number // The width of the spectrum, indicating the range of frequencies present.
46
+ chromagram?: number[] // Chromagram, representing the 12 different pitch classes of the audio.
47
+ tempo?: number // Estimated tempo of the audio signal, measured in beats per minute (BPM).
48
+ hnr?: number // Harmonics-to-noise ratio, indicating the proportion of harmonics to noise in the audio signal.
49
+ melSpectrogram?: number[] // Mel-scaled spectrogram representation of the audio.
50
+ spectralContrast?: number[] // Spectral contrast features representing the difference between peaks and valleys.
51
+ tonnetz?: number[] // Tonal network features representing harmonic relationships.
52
+ pitch?: number // Pitch of the audio signal, measured in Hertz (Hz).
53
+ crc32?: number // crc32 checksum of the audio signal, used to verify the integrity of the audio.
30
54
  }
31
55
 
32
56
  /**
@@ -44,6 +68,11 @@ export interface AudioFeaturesOptions {
44
68
  chromagram?: boolean
45
69
  tempo?: boolean
46
70
  hnr?: boolean
71
+ melSpectrogram?: boolean
72
+ spectralContrast?: boolean
73
+ tonnetz?: boolean
74
+ pitch?: boolean
75
+ crc32?: boolean
47
76
  }
48
77
 
49
78
  /**
@@ -51,11 +80,12 @@ export interface AudioFeaturesOptions {
51
80
  */
52
81
  export interface DataPoint {
53
82
  id: number
54
- amplitude: number
55
- activeSpeech?: boolean
56
- dB?: number
57
- silent?: boolean
83
+ amplitude: number // Peak amplitude for the segment
84
+ rms: number // Root mean square value
85
+ dB: number // Always computed
86
+ silent: boolean // Always computed
58
87
  features?: AudioFeatures
88
+ speech?: SpeechFeatures
59
89
  startTime?: number
60
90
  endTime?: number
61
91
  // start / end position in bytes
@@ -63,33 +93,38 @@ export interface DataPoint {
63
93
  endPosition?: number
64
94
  // number of audio samples for this point (samples size depends on bit depth)
65
95
  samples?: number
66
- // TODO: speaker detection
67
- speaker?: number
68
96
  }
69
97
 
70
- export type AmplitudeAlgorithm = 'peak' | 'rms'
71
-
72
98
  /**
73
99
  * Represents the complete data from the audio analysis.
74
100
  */
75
101
  export interface AudioAnalysis {
76
- pointsPerSecond: number // How many consolidated value per second
102
+ segmentDurationMs: number // Duration of each segment in milliseconds
77
103
  durationMs: number // Duration of the audio in milliseconds
78
104
  bitDepth: number // Bit depth of the audio
79
105
  samples: number // Size of the audio in bytes
80
106
  numberOfChannels: number // Number of audio channels
81
107
  sampleRate: number // Sample rate of the audio
82
108
  dataPoints: DataPoint[] // Array of data points from the analysis.
83
- amplitudeAlgorithm: AmplitudeAlgorithm // Algorithm used to calculate amplitude values.
84
109
  amplitudeRange: {
85
110
  min: number
86
111
  max: number
87
112
  }
88
- // TODO: speaker detection
89
- speakerChanges?: {
90
- timestamp: number // Timestamp of the speaker change in milliseconds.
91
- speaker: number // Speaker identifier.
92
- }[]
113
+ rmsRange: {
114
+ min: number
115
+ max: number
116
+ }
117
+ // TODO: speaker changes into a broader speech analysis section
118
+ speechAnalysis?: {
119
+ speakerChanges: {
120
+ timestamp: number
121
+ speakerId: number
122
+ }[]
123
+ // Could add more speech analysis data here like:
124
+ // dominantSpeaker?: number
125
+ // totalSpeechDuration?: number
126
+ // speakerStats?: { [speakerId: number]: { duration: number, segments: number } }
127
+ }
93
128
  }
94
129
 
95
130
  /**
@@ -97,9 +132,9 @@ export interface AudioAnalysis {
97
132
  */
98
133
  export interface AudioRangeOptions {
99
134
  /** Start time in milliseconds */
100
- startTime?: number
135
+ startTimeMs?: number
101
136
  /** End time in milliseconds */
102
- endTime?: number
137
+ endTimeMs?: number
103
138
  }
104
139
 
105
140
  /**
@@ -115,10 +150,9 @@ export interface PreviewOptions extends AudioRangeOptions {
115
150
  */
116
151
  numberOfPoints?: number
117
152
  /**
118
- * Algorithm used to calculate amplitude values
119
- * @default "rms"
153
+ * Optional logger for debugging.
120
154
  */
121
- algorithm?: AmplitudeAlgorithm
155
+ logger?: ConsoleLike
122
156
  /**
123
157
  * Optional configuration for decoding the audio file.
124
158
  * Defaults to:
@@ -129,21 +163,3 @@ export interface PreviewOptions extends AudioRangeOptions {
129
163
  */
130
164
  decodingOptions?: DecodingConfig
131
165
  }
132
-
133
- /**
134
- * Represents a simplified preview of audio waveform,
135
- * optimized for quick visualization.
136
- */
137
- export interface AudioPreview {
138
- /** Number of data points per second */
139
- pointsPerSecond: number
140
- /** Duration of the audio in milliseconds */
141
- durationMs: number
142
- /** Range of amplitude values in the preview */
143
- amplitudeRange: {
144
- min: number
145
- max: number
146
- }
147
- /** Array of data points representing the waveform */
148
- dataPoints: DataPoint[]
149
- }