@siteed/expo-audio-stream 1.0.3 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/README.md +26 -175
  2. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +47 -7
  3. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +1 -0
  4. package/android/src/main/java/net/siteed/audiostream/Constants.kt +5 -0
  5. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +12 -3
  6. package/build/index.js +8 -7
  7. package/ios/AudioProcessor.swift +7 -5
  8. package/ios/AudioStreamManager.swift +1 -0
  9. package/ios/ExpoAudioStream.podspec +1 -1
  10. package/ios/ExpoAudioStreamModule.swift +36 -0
  11. package/ios/RecordingResult.swift +1 -0
  12. package/package.json +95 -65
  13. package/src/AudioAnalysis/AudioAnalysis.types.ts +59 -60
  14. package/src/AudioAnalysis/extractAudioAnalysis.ts +132 -121
  15. package/src/AudioAnalysis/extractWaveform.ts +18 -18
  16. package/src/AudioRecorder.provider.tsx +53 -53
  17. package/src/ExpoAudioStream.native.ts +2 -2
  18. package/src/ExpoAudioStream.types.ts +59 -53
  19. package/src/ExpoAudioStream.web.ts +231 -205
  20. package/src/ExpoAudioStreamModule.ts +22 -15
  21. package/src/WebRecorder.web.ts +407 -390
  22. package/src/constants.ts +11 -11
  23. package/src/events.ts +27 -13
  24. package/src/index.ts +17 -15
  25. package/src/logger.ts +15 -19
  26. package/src/useAudioRecorder.tsx +394 -389
  27. package/src/utils/BlobFix.ts +550 -0
  28. package/src/utils/concatenateBuffers.ts +24 -0
  29. package/src/utils/convertPCMToFloat32.ts +72 -45
  30. package/src/utils/encodingToBitDepth.ts +14 -14
  31. package/src/utils/getWavFileInfo.ts +106 -99
  32. package/src/utils/writeWavHeader.ts +50 -45
  33. package/src/workers/InlineFeaturesExtractor.web.tsx +296 -286
  34. package/src/workers/inlineAudioWebWorker.web.tsx +230 -222
  35. package/.eslintrc.js +0 -2
  36. package/.size-limit.json +0 -6
  37. package/android/.gradle/8.1.1/checksums/checksums.lock +0 -0
  38. package/android/.gradle/8.1.1/dependencies-accessors/dependencies-accessors.lock +0 -0
  39. package/android/.gradle/8.1.1/dependencies-accessors/gc.properties +0 -0
  40. package/android/.gradle/8.1.1/fileChanges/last-build.bin +0 -0
  41. package/android/.gradle/8.1.1/fileHashes/fileHashes.lock +0 -0
  42. package/android/.gradle/8.1.1/gc.properties +0 -0
  43. package/android/.gradle/buildOutputCleanup/buildOutputCleanup.lock +0 -0
  44. package/android/.gradle/buildOutputCleanup/cache.properties +0 -2
  45. package/android/.gradle/vcs-1/gc.properties +0 -0
  46. package/app.plugin.js +0 -1
  47. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +0 -76
  48. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +0 -1
  49. package/build/AudioAnalysis/AudioAnalysis.types.js +0 -3
  50. package/build/AudioAnalysis/AudioAnalysis.types.js.map +0 -1
  51. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +0 -4
  52. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +0 -1
  53. package/build/AudioAnalysis/extractAudioAnalysis.js +0 -101
  54. package/build/AudioAnalysis/extractAudioAnalysis.js.map +0 -1
  55. package/build/AudioAnalysis/extractWaveform.d.ts +0 -8
  56. package/build/AudioAnalysis/extractWaveform.d.ts.map +0 -1
  57. package/build/AudioAnalysis/extractWaveform.js +0 -14
  58. package/build/AudioAnalysis/extractWaveform.js.map +0 -1
  59. package/build/AudioRecorder.provider.d.ts +0 -23
  60. package/build/AudioRecorder.provider.d.ts.map +0 -1
  61. package/build/AudioRecorder.provider.js +0 -36
  62. package/build/AudioRecorder.provider.js.map +0 -1
  63. package/build/ExpoAudioStream.native.d.ts +0 -3
  64. package/build/ExpoAudioStream.native.d.ts.map +0 -1
  65. package/build/ExpoAudioStream.native.js +0 -6
  66. package/build/ExpoAudioStream.native.js.map +0 -1
  67. package/build/ExpoAudioStream.types.d.ts +0 -60
  68. package/build/ExpoAudioStream.types.d.ts.map +0 -1
  69. package/build/ExpoAudioStream.types.js +0 -2
  70. package/build/ExpoAudioStream.types.js.map +0 -1
  71. package/build/ExpoAudioStream.web.d.ts +0 -42
  72. package/build/ExpoAudioStream.web.d.ts.map +0 -1
  73. package/build/ExpoAudioStream.web.js +0 -185
  74. package/build/ExpoAudioStream.web.js.map +0 -1
  75. package/build/ExpoAudioStreamModule.d.ts +0 -3
  76. package/build/ExpoAudioStreamModule.d.ts.map +0 -1
  77. package/build/ExpoAudioStreamModule.js +0 -18
  78. package/build/ExpoAudioStreamModule.js.map +0 -1
  79. package/build/WebRecorder.web.d.ts +0 -51
  80. package/build/WebRecorder.web.d.ts.map +0 -1
  81. package/build/WebRecorder.web.js +0 -288
  82. package/build/WebRecorder.web.js.map +0 -1
  83. package/build/constants.d.ts +0 -11
  84. package/build/constants.d.ts.map +0 -1
  85. package/build/constants.js +0 -14
  86. package/build/constants.js.map +0 -1
  87. package/build/events.d.ts +0 -6
  88. package/build/events.d.ts.map +0 -1
  89. package/build/events.js +0 -15
  90. package/build/events.js.map +0 -1
  91. package/build/index.d.ts +0 -10
  92. package/build/index.d.ts.map +0 -1
  93. package/build/index.js.map +0 -1
  94. package/build/logger.d.ts +0 -9
  95. package/build/logger.d.ts.map +0 -1
  96. package/build/logger.js +0 -17
  97. package/build/logger.js.map +0 -1
  98. package/build/useAudioRecorder.d.ts +0 -37
  99. package/build/useAudioRecorder.d.ts.map +0 -1
  100. package/build/useAudioRecorder.js +0 -271
  101. package/build/useAudioRecorder.js.map +0 -1
  102. package/build/utils/convertPCMToFloat32.d.ts +0 -11
  103. package/build/utils/convertPCMToFloat32.d.ts.map +0 -1
  104. package/build/utils/convertPCMToFloat32.js +0 -41
  105. package/build/utils/convertPCMToFloat32.js.map +0 -1
  106. package/build/utils/encodingToBitDepth.d.ts +0 -5
  107. package/build/utils/encodingToBitDepth.d.ts.map +0 -1
  108. package/build/utils/encodingToBitDepth.js +0 -13
  109. package/build/utils/encodingToBitDepth.js.map +0 -1
  110. package/build/utils/getWavFileInfo.d.ts +0 -25
  111. package/build/utils/getWavFileInfo.d.ts.map +0 -1
  112. package/build/utils/getWavFileInfo.js +0 -89
  113. package/build/utils/getWavFileInfo.js.map +0 -1
  114. package/build/utils/writeWavHeader.d.ts +0 -9
  115. package/build/utils/writeWavHeader.d.ts.map +0 -1
  116. package/build/utils/writeWavHeader.js +0 -41
  117. package/build/utils/writeWavHeader.js.map +0 -1
  118. package/build/workers/InlineFeaturesExtractor.web.d.ts +0 -2
  119. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +0 -1
  120. package/build/workers/InlineFeaturesExtractor.web.js +0 -303
  121. package/build/workers/InlineFeaturesExtractor.web.js.map +0 -1
  122. package/build/workers/inlineAudioWebWorker.web.d.ts +0 -2
  123. package/build/workers/inlineAudioWebWorker.web.d.ts.map +0 -1
  124. package/build/workers/inlineAudioWebWorker.web.js +0 -243
  125. package/build/workers/inlineAudioWebWorker.web.js.map +0 -1
  126. package/expo-module.config.json +0 -18
  127. package/plugin/build/index.d.ts +0 -5
  128. package/plugin/build/index.js +0 -28
  129. package/plugin/src/index.ts +0 -53
  130. package/plugin/tsconfig.json +0 -14
  131. package/publish.sh +0 -8
  132. package/tsconfig.json +0 -9
@@ -1,302 +1,312 @@
1
1
  export const InlineFeaturesExtractor = `
2
-
3
2
  // Unique ID counter
4
- let uniqueIdCounter = 0;
3
+ let uniqueIdCounter = 0
5
4
 
6
5
  self.onmessage = function (event) {
7
- const {
8
- channelData, // this is only the newly recorded data when live recording.
9
- sampleRate,
10
- pointsPerSecond,
11
- algorithm,
12
- bitDepth,
13
- fullAudioDurationMs,
14
- numberOfChannels,
15
- features: _features,
16
- } = event.data;
17
-
18
- console.log("[AudioFeaturesExtractor] Worker received message", event.data);
19
- const features = _features || {};
20
-
21
- const SILENCE_THRESHOLD = 0.01;
22
- const MIN_SILENCE_DURATION = 1.5 * sampleRate; // 1.5 seconds of silence
23
- const SPEECH_INERTIA_DURATION = 0.1 * sampleRate; // Speech inertia duration in samples
24
- const RMS_THRESHOLD = 0.01;
25
- const ZCR_THRESHOLD = 0.1;
26
-
27
- // Placeholder functions for feature extraction
28
- const extractMFCC = (segmentData, sampleRate) => {
29
- // Implement MFCC extraction logic here
30
- return [];
31
- };
32
-
33
- const extractSpectralCentroid = (segmentData, sampleRate) => {
34
- const magnitudeSpectrum = segmentData.map((v) => v * v);
35
- const sum = magnitudeSpectrum.reduce((a, b) => a + b, 0);
36
- if (sum === 0) return 0;
37
-
38
- const weightedSum = magnitudeSpectrum.reduce(
39
- (acc, value, index) => acc + index * value,
40
- 0,
41
- );
42
- return ((weightedSum / sum) * (sampleRate / 2)) / magnitudeSpectrum.length;
43
- };
44
-
45
- const extractSpectralFlatness = (segmentData) => {
46
- const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
47
- const geometricMean = Math.exp(
48
- magnitudeSpectrum
49
- .map((v) => Math.log(v + Number.MIN_VALUE))
50
- .reduce((a, b) => a + b) / magnitudeSpectrum.length,
51
- );
52
- const arithmeticMean =
53
- magnitudeSpectrum.reduce((a, b) => a + b) / magnitudeSpectrum.length;
54
- return arithmeticMean === 0 ? 0 : geometricMean / arithmeticMean;
55
- };
56
-
57
- const extractSpectralRollOff = (segmentData, sampleRate) => {
58
- const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
59
- const totalEnergy = magnitudeSpectrum.reduce((a, b) => a + b, 0);
60
- const rollOffThreshold = totalEnergy * 0.85;
61
- let cumulativeEnergy = 0;
62
-
63
- for (let i = 0; i < magnitudeSpectrum.length; i++) {
64
- cumulativeEnergy += magnitudeSpectrum[i];
65
- if (cumulativeEnergy >= rollOffThreshold) {
66
- return (i / magnitudeSpectrum.length) * (sampleRate / 2);
67
- }
6
+ const {
7
+ channelData, // this is only the newly recorded data when live recording.
8
+ sampleRate,
9
+ pointsPerSecond,
10
+ algorithm,
11
+ bitDepth,
12
+ fullAudioDurationMs,
13
+ numberOfChannels,
14
+ features: _features,
15
+ } = event.data
16
+
17
+ console.log('[AudioFeaturesExtractor] Worker received message', event.data)
18
+ const features = _features || {}
19
+
20
+ const SILENCE_THRESHOLD = 0.01
21
+ const MIN_SILENCE_DURATION = 1.5 * sampleRate // 1.5 seconds of silence
22
+ const SPEECH_INERTIA_DURATION = 0.1 * sampleRate // Speech inertia duration in samples
23
+ const RMS_THRESHOLD = 0.01
24
+ const ZCR_THRESHOLD = 0.1
25
+
26
+ // Placeholder functions for feature extraction
27
+ const extractMFCC = (segmentData, sampleRate) => {
28
+ // Implement MFCC extraction logic here
29
+ return []
30
+ }
31
+
32
+ const extractSpectralCentroid = (segmentData, sampleRate) => {
33
+ const magnitudeSpectrum = segmentData.map((v) => v * v)
34
+ const sum = magnitudeSpectrum.reduce((a, b) => a + b, 0)
35
+ if (sum === 0) return 0
36
+
37
+ const weightedSum = magnitudeSpectrum.reduce(
38
+ (acc, value, index) => acc + index * value,
39
+ 0
40
+ )
41
+ return (
42
+ ((weightedSum / sum) * (sampleRate / 2)) / magnitudeSpectrum.length
43
+ )
68
44
  }
69
45
 
70
- return 0;
71
- };
72
-
73
- const extractSpectralBandwidth = (segmentData, sampleRate) => {
74
- const centroid = extractSpectralCentroid(segmentData, sampleRate);
75
- const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
76
- const sum = magnitudeSpectrum.reduce((a, b) => a + b, 0);
77
- if (sum === 0) return 0;
78
-
79
- const weightedSum = magnitudeSpectrum.reduce(
80
- (acc, value, index) => acc + value * Math.pow(index - centroid, 2),
81
- 0,
82
- );
83
- return Math.sqrt(weightedSum / sum);
84
- };
85
-
86
- const extractChromagram = (segmentData, sampleRate) => {
87
- return []; // TODO implement
88
- };
89
-
90
- const extractHNR = (segmentData) => {
91
- const frameSize = segmentData.length;
92
- const autocorrelation = new Float32Array(frameSize);
93
-
94
- // Compute the autocorrelation of the segment data
95
- for (let i = 0; i < frameSize; i++) {
96
- let sum = 0;
97
- for (let j = 0; j < frameSize - i; j++) {
98
- sum += segmentData[j] * segmentData[j + i];
99
- }
100
- autocorrelation[i] = sum;
46
+ const extractSpectralFlatness = (segmentData) => {
47
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v))
48
+ const geometricMean = Math.exp(
49
+ magnitudeSpectrum
50
+ .map((v) => Math.log(v + Number.MIN_VALUE))
51
+ .reduce((a, b) => a + b) / magnitudeSpectrum.length
52
+ )
53
+ const arithmeticMean =
54
+ magnitudeSpectrum.reduce((a, b) => a + b) / magnitudeSpectrum.length
55
+ return arithmeticMean === 0 ? 0 : geometricMean / arithmeticMean
101
56
  }
102
57
 
103
- // Find the maximum autocorrelation value (excluding the zero lag)
104
- const maxAutocorrelation = Math.max(...autocorrelation.subarray(1));
105
-
106
- // Compute the HNR
107
- return autocorrelation[0] !== 0
108
- ? 10 *
109
- Math.log10(
110
- maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation),
111
- )
112
- : 0;
113
- };
114
-
115
- const extractWaveform = (
116
- channelData, // Float32Array
117
- sampleRate, // number
118
- pointsPerSecond, // number
119
- algorithm, // string
120
- ) => {
121
- const totalSamples = channelData.length;
122
- const segmentDuration = totalSamples / sampleRate;
123
- const totalPoints = Math.max(
124
- Math.ceil(segmentDuration * pointsPerSecond),
125
- 1,
126
- );
127
- const pointInterval = Math.ceil(totalSamples / totalPoints);
128
- const dataPoints = [];
129
- let minAmplitude = Infinity;
130
- let maxAmplitude = -Infinity;
131
- let silenceStart = null;
132
- let lastSpeechEnd = -Infinity;
133
- let isSpeech = false;
134
-
135
- const expectedPoints = segmentDuration * pointsPerSecond;
136
- const samplesPerPoint = Math.ceil(channelData.length / expectedPoints);
137
-
138
- for (let i = 0; i < expectedPoints; i++) {
139
- const start = i * samplesPerPoint;
140
- const end = Math.min(start + samplesPerPoint, totalSamples);
141
-
142
- let sumSquares = 0;
143
- let zeroCrossings = 0;
144
- let prevValue = channelData[start];
145
- let localMinAmplitude = Infinity;
146
- let localMaxAmplitude = -Infinity;
147
- let hasNonZeroValue = false;
148
-
149
- // compute values for the segment
150
- for (let j = start; j < end; j++) {
151
- const value = channelData[j];
152
- sumSquares += value * value;
153
- if (j > start && value * prevValue < 0) {
154
- zeroCrossings++;
58
+ const extractSpectralRollOff = (segmentData, sampleRate) => {
59
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v))
60
+ const totalEnergy = magnitudeSpectrum.reduce((a, b) => a + b, 0)
61
+ const rollOffThreshold = totalEnergy * 0.85
62
+ let cumulativeEnergy = 0
63
+
64
+ for (let i = 0; i < magnitudeSpectrum.length; i++) {
65
+ cumulativeEnergy += magnitudeSpectrum[i]
66
+ if (cumulativeEnergy >= rollOffThreshold) {
67
+ return (i / magnitudeSpectrum.length) * (sampleRate / 2)
68
+ }
155
69
  }
156
- prevValue = value;
157
70
 
158
- const absValue = Math.abs(value);
159
- localMinAmplitude = Math.min(localMinAmplitude, absValue);
160
- localMaxAmplitude = Math.max(localMaxAmplitude, absValue);
71
+ return 0
72
+ }
161
73
 
162
- if (absValue !== 0) {
163
- hasNonZeroValue = true;
74
+ const extractSpectralBandwidth = (segmentData, sampleRate) => {
75
+ const centroid = extractSpectralCentroid(segmentData, sampleRate)
76
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v))
77
+ const sum = magnitudeSpectrum.reduce((a, b) => a + b, 0)
78
+ if (sum === 0) return 0
79
+
80
+ const weightedSum = magnitudeSpectrum.reduce(
81
+ (acc, value, index) => acc + value * Math.pow(index - centroid, 2),
82
+ 0
83
+ )
84
+ return Math.sqrt(weightedSum / sum)
85
+ }
86
+
87
+ const extractChromagram = (segmentData, sampleRate) => {
88
+ return [] // TODO implement
89
+ }
90
+
91
+ const extractHNR = (segmentData) => {
92
+ const frameSize = segmentData.length
93
+ const autocorrelation = new Float32Array(frameSize)
94
+
95
+ // Compute the autocorrelation of the segment data
96
+ for (let i = 0; i < frameSize; i++) {
97
+ let sum = 0
98
+ for (let j = 0; j < frameSize - i; j++) {
99
+ sum += segmentData[j] * segmentData[j + i]
100
+ }
101
+ autocorrelation[i] = sum
164
102
  }
165
- }
166
-
167
- // Post-processing checks
168
- if (!hasNonZeroValue) {
169
- // All values are zero
170
- localMinAmplitude = 0;
171
- localMaxAmplitude = 0;
172
- }
173
-
174
- const rms = Math.sqrt(sumSquares / (end - start));
175
- minAmplitude = Math.min(minAmplitude, rms);
176
- maxAmplitude = Math.max(maxAmplitude, rms);
177
-
178
- const energy = sumSquares;
179
- const zcr = zeroCrossings / (end - start);
180
-
181
- const silent = rms < SILENCE_THRESHOLD;
182
- const dB = 20 * Math.log10(rms);
183
-
184
- if (silent) {
185
- if (silenceStart === null) {
186
- silenceStart = start;
187
- } else if (start - silenceStart > MIN_SILENCE_DURATION) {
188
- // Silence detected for longer than the threshold, set amplitude to 0
189
- localMaxAmplitude = 0;
190
- localMinAmplitude = 0;
191
- isSpeech = false;
103
+
104
+ // Find the maximum autocorrelation value (excluding the zero lag)
105
+ const maxAutocorrelation = Math.max(...autocorrelation.subarray(1))
106
+
107
+ // Compute the HNR
108
+ return autocorrelation[0] !== 0
109
+ ? 10 *
110
+ Math.log10(
111
+ maxAutocorrelation /
112
+ (autocorrelation[0] - maxAutocorrelation)
113
+ )
114
+ : 0
115
+ }
116
+
117
+ const extractWaveform = (
118
+ channelData, // Float32Array
119
+ sampleRate, // number
120
+ pointsPerSecond, // number
121
+ algorithm // string
122
+ ) => {
123
+ const totalSamples = channelData.length
124
+ const segmentDuration = totalSamples / sampleRate
125
+ const totalPoints = Math.max(
126
+ Math.ceil(segmentDuration * pointsPerSecond),
127
+ 1
128
+ )
129
+ const pointInterval = Math.ceil(totalSamples / totalPoints)
130
+ const dataPoints = []
131
+ let minAmplitude = Infinity
132
+ let maxAmplitude = -Infinity
133
+ let silenceStart = null
134
+ let lastSpeechEnd = -Infinity
135
+ let isSpeech = false
136
+
137
+ const expectedPoints = segmentDuration * pointsPerSecond
138
+ const samplesPerPoint = Math.ceil(channelData.length / expectedPoints)
139
+
140
+ for (let i = 0; i < expectedPoints; i++) {
141
+ const start = i * samplesPerPoint
142
+ const end = Math.min(start + samplesPerPoint, totalSamples)
143
+
144
+ let sumSquares = 0
145
+ let zeroCrossings = 0
146
+ let prevValue = channelData[start]
147
+ let localMinAmplitude = Infinity
148
+ let localMaxAmplitude = -Infinity
149
+ let hasNonZeroValue = false
150
+
151
+ // compute values for the segment
152
+ for (let j = start; j < end; j++) {
153
+ const value = channelData[j]
154
+ sumSquares += value * value
155
+ if (j > start && value * prevValue < 0) {
156
+ zeroCrossings++
157
+ }
158
+ prevValue = value
159
+
160
+ // We need to keep absolute value otherwise we cannot visualize properly
161
+ const absValue = Math.abs(value)
162
+ localMinAmplitude = Math.min(localMinAmplitude, absValue)
163
+ localMaxAmplitude = Math.max(localMaxAmplitude, absValue)
164
+
165
+ if (value !== 0) {
166
+ hasNonZeroValue = true
167
+ }
168
+ }
169
+
170
+ // Post-processing checks
171
+ if (!hasNonZeroValue) {
172
+ // All values are zero
173
+ localMinAmplitude = 0
174
+ localMaxAmplitude = 0
175
+ }
176
+
177
+ const rms = Math.sqrt(sumSquares / (end - start))
178
+ minAmplitude = Math.min(minAmplitude, localMinAmplitude)
179
+ maxAmplitude = Math.max(maxAmplitude, localMaxAmplitude)
180
+
181
+ const energy = sumSquares
182
+ const zcr = zeroCrossings / (end - start)
183
+
184
+ const silent = rms < SILENCE_THRESHOLD
185
+ const dB = 20 * Math.log10(rms)
186
+
187
+ if (silent) {
188
+ if (silenceStart === null) {
189
+ silenceStart = start
190
+ } else if (start - silenceStart > MIN_SILENCE_DURATION) {
191
+ // Silence detected for longer than the threshold, set amplitude to 0
192
+ localMaxAmplitude = 0
193
+ localMinAmplitude = 0
194
+ isSpeech = false
195
+ }
196
+ } else {
197
+ silenceStart = null
198
+ if (
199
+ !isSpeech &&
200
+ start - lastSpeechEnd < SPEECH_INERTIA_DURATION
201
+ ) {
202
+ isSpeech = true
203
+ }
204
+ lastSpeechEnd = end
205
+ }
206
+
207
+ const activeSpeech =
208
+ (rms > RMS_THRESHOLD && zcr > ZCR_THRESHOLD) ||
209
+ (isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION)
210
+
211
+ if (activeSpeech) {
212
+ isSpeech = true
213
+ lastSpeechEnd = end
214
+ } else {
215
+ isSpeech = false
216
+ }
217
+
218
+ const bytesPerSample = bitDepth / 8
219
+ const startPosition = start * bytesPerSample * numberOfChannels // Calculate start position in bytes
220
+ const endPosition = end * bytesPerSample * numberOfChannels // Calculate end position in bytes
221
+
222
+ // Compute features
223
+ const segmentData = channelData.slice(start, end)
224
+ const mfcc = features.mfcc
225
+ ? extractMFCC(segmentData, sampleRate)
226
+ : []
227
+ const spectralCentroid = features.spectralCentroid
228
+ ? extractSpectralCentroid(segmentData, sampleRate)
229
+ : 0
230
+ const spectralFlatness = features.spectralFlatness
231
+ ? extractSpectralFlatness(segmentData)
232
+ : 0
233
+ const spectralRollOff = features.spectralRollOff
234
+ ? extractSpectralRollOff(segmentData, sampleRate)
235
+ : 0
236
+ const spectralBandwidth = features.spectralBandwidth
237
+ ? extractSpectralBandwidth(segmentData, sampleRate)
238
+ : 0
239
+ const chromagram = features.chromagram
240
+ ? extractChromagram(segmentData, sampleRate)
241
+ : []
242
+ const hnr = features.hnr ? extractHNR(segmentData) : 0
243
+
244
+ const peakAmp = Math.max(Math.abs(localMaxAmplitude), Math.abs(localMinAmplitude))
245
+ const newData = {
246
+ id: uniqueIdCounter++, // Assign unique ID and increment the counter
247
+ amplitude: algorithm === 'peak' ? peakAmp : rms,
248
+ activeSpeech,
249
+ dB,
250
+ silent,
251
+ features: {
252
+ energy,
253
+ rms,
254
+ minAmplitude: localMinAmplitude,
255
+ maxAmplitude: localMaxAmplitude,
256
+ zcr,
257
+ mfcc: [], // Placeholder for MFCC features
258
+ spectralCentroid, // Computed spectral centroid
259
+ spectralFlatness, // Computed spectral flatness
260
+ spectralRollOff, // Computed spectral roll-off
261
+ spectralBandwidth, // Computed spectral bandwidth
262
+ chromagram, // Computed chromagram
263
+ hnr, // Computed HNR
264
+ },
265
+ startTime: start / sampleRate,
266
+ endTime: end / sampleRate,
267
+ startPosition,
268
+ endPosition,
269
+ samples: end - start,
270
+ speaker: 0, // Assuming speaker detection is to be handled later
271
+ }
272
+
273
+ dataPoints.push(newData)
192
274
  }
193
- } else {
194
- silenceStart = null;
195
- if (!isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION) {
196
- isSpeech = true;
275
+
276
+ return {
277
+ pointsPerSecond,
278
+ amplitudeAlgorithm: algorithm,
279
+ durationMs: fullAudioDurationMs,
280
+ bitDepth,
281
+ samples: totalSamples,
282
+ numberOfChannels,
283
+ sampleRate,
284
+ dataPoints,
285
+ amplitudeRange: {
286
+ min: minAmplitude,
287
+ max: maxAmplitude,
288
+ },
289
+ speakerChanges: [], // Placeholder for future speaker detection logic
197
290
  }
198
- lastSpeechEnd = end;
199
- }
200
-
201
- const activeSpeech =
202
- (rms > RMS_THRESHOLD && zcr > ZCR_THRESHOLD) ||
203
- (isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION);
204
-
205
- if (activeSpeech) {
206
- isSpeech = true;
207
- lastSpeechEnd = end;
208
- } else {
209
- isSpeech = false;
210
- }
211
-
212
- const bytesPerSample = bitDepth / 8;
213
- const startPosition = start * bytesPerSample * numberOfChannels; // Calculate start position in bytes
214
- const endPosition = end * bytesPerSample * numberOfChannels; // Calculate end position in bytes
215
-
216
- // Compute features
217
- const segmentData = channelData.slice(start, end);
218
- const mfcc = features.mfcc ? extractMFCC(segmentData, sampleRate) : [];
219
- const spectralCentroid = features.spectralCentroid
220
- ? extractSpectralCentroid(segmentData, sampleRate)
221
- : 0;
222
- const spectralFlatness = features.spectralFlatness
223
- ? extractSpectralFlatness(segmentData)
224
- : 0;
225
- const spectralRollOff = features.spectralRollOff
226
- ? extractSpectralRollOff(segmentData, sampleRate)
227
- : 0;
228
- const spectralBandwidth = features.spectralBandwidth
229
- ? extractSpectralBandwidth(segmentData, sampleRate)
230
- : 0;
231
- const chromagram = features.chromagram
232
- ? extractChromagram(segmentData, sampleRate)
233
- : [];
234
- const hnr = features.hnr ? extractHNR(segmentData) : 0;
235
-
236
- const newData = {
237
- id: uniqueIdCounter++, // Assign unique ID and increment the counter
238
- amplitude: algorithm === "peak" ? localMaxAmplitude : rms,
239
- activeSpeech,
240
- dB,
241
- silent,
242
- features: {
243
- energy,
244
- rms,
245
- minAmplitude: localMinAmplitude,
246
- maxAmplitude: localMaxAmplitude,
247
- zcr,
248
- mfcc: [], // Placeholder for MFCC features
249
- spectralCentroid, // Computed spectral centroid
250
- spectralFlatness, // Computed spectral flatness
251
- spectralRollOff, // Computed spectral roll-off
252
- spectralBandwidth, // Computed spectral bandwidth
253
- chromagram, // Computed chromagram
254
- hnr, // Computed HNR
255
- },
256
- startTime: start / sampleRate,
257
- endTime: end / sampleRate,
258
- startPosition,
259
- endPosition,
260
- samples: end - start,
261
- speaker: 0, // Assuming speaker detection is to be handled later
262
- };
263
-
264
- dataPoints.push(newData);
265
291
  }
266
292
 
267
- return {
268
- pointsPerSecond,
269
- durationMs: fullAudioDurationMs,
270
- bitDepth,
271
- samples: totalSamples,
272
- numberOfChannels,
273
- sampleRate,
274
- dataPoints,
275
- amplitudeRange: {
276
- min: minAmplitude,
277
- max: maxAmplitude,
278
- },
279
- speakerChanges: [], // Placeholder for future speaker detection logic
280
- };
281
- };
282
-
283
- try {
284
- const result = extractWaveform(
285
- channelData,
286
- sampleRate,
287
- pointsPerSecond,
288
- algorithm,
289
- );
290
- self.postMessage({
291
- command: "features",
292
- result,
293
- });
294
- } catch (error) {
295
- console.error("[AudioFeaturesExtractor] Error in processing", error);
296
- self.postMessage({ error: error.message });
297
- } finally {
298
- // Do not close the worker so it can be re-used for subsequent messages
299
- // self.close();
300
- }
301
- };
302
- `;
293
+ try {
294
+ const result = extractWaveform(
295
+ channelData,
296
+ sampleRate,
297
+ pointsPerSecond,
298
+ algorithm
299
+ )
300
+ self.postMessage({
301
+ command: 'features',
302
+ result,
303
+ })
304
+ } catch (error) {
305
+ console.error('[AudioFeaturesExtractor] Error in processing', error)
306
+ self.postMessage({ error: error.message })
307
+ } finally {
308
+ // Do not close the worker so it can be re-used for subsequent messages
309
+ // self.close();
310
+ }
311
+ }
312
+ `