@siteed/expo-audio-stream 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/.size-limit.json +6 -0
  2. package/README.md +18 -176
  3. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +1 -0
  4. package/app.plugin.js +1 -1
  5. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +74 -0
  6. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
  7. package/build/AudioAnalysis/AudioAnalysis.types.js +3 -0
  8. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  9. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +20 -0
  10. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
  11. package/build/AudioAnalysis/extractAudioAnalysis.js +88 -0
  12. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  13. package/build/AudioAnalysis/extractWaveform.d.ts +8 -0
  14. package/build/AudioAnalysis/extractWaveform.d.ts.map +1 -0
  15. package/build/AudioAnalysis/extractWaveform.js +14 -0
  16. package/build/AudioAnalysis/extractWaveform.js.map +1 -0
  17. package/build/AudioRecorder.provider.d.ts +15 -2
  18. package/build/AudioRecorder.provider.d.ts.map +1 -1
  19. package/build/AudioRecorder.provider.js +21 -8
  20. package/build/AudioRecorder.provider.js.map +1 -1
  21. package/build/ExpoAudioStream.native.d.ts.map +1 -1
  22. package/build/ExpoAudioStream.native.js +2 -2
  23. package/build/ExpoAudioStream.native.js.map +1 -1
  24. package/build/ExpoAudioStream.types.d.ts +33 -89
  25. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  26. package/build/ExpoAudioStream.types.js.map +1 -1
  27. package/build/ExpoAudioStream.web.d.ts +10 -9
  28. package/build/ExpoAudioStream.web.d.ts.map +1 -1
  29. package/build/ExpoAudioStream.web.js +44 -25
  30. package/build/ExpoAudioStream.web.js.map +1 -1
  31. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  32. package/build/ExpoAudioStreamModule.js +13 -8
  33. package/build/ExpoAudioStreamModule.js.map +1 -1
  34. package/build/{WebRecorder.d.ts → WebRecorder.web.d.ts} +13 -9
  35. package/build/WebRecorder.web.d.ts.map +1 -0
  36. package/build/{WebRecorder.js → WebRecorder.web.js} +118 -63
  37. package/build/WebRecorder.web.js.map +1 -0
  38. package/build/constants.d.ts +11 -0
  39. package/build/constants.d.ts.map +1 -0
  40. package/build/constants.js +14 -0
  41. package/build/constants.js.map +1 -0
  42. package/build/events.d.ts +18 -0
  43. package/build/events.d.ts.map +1 -0
  44. package/build/events.js +15 -0
  45. package/build/events.js.map +1 -0
  46. package/build/index.d.ts +9 -17
  47. package/build/index.d.ts.map +1 -1
  48. package/build/index.js +7 -113
  49. package/build/index.js.map +1 -1
  50. package/build/logger.d.ts +9 -0
  51. package/build/logger.d.ts.map +1 -0
  52. package/build/logger.js +13 -0
  53. package/build/logger.js.map +1 -0
  54. package/build/useAudioRecorder.d.ts +20 -0
  55. package/build/useAudioRecorder.d.ts.map +1 -0
  56. package/build/{useAudioRecording.js → useAudioRecorder.js} +90 -86
  57. package/build/useAudioRecorder.js.map +1 -0
  58. package/build/utils/BlobFix.d.ts +9 -0
  59. package/build/utils/BlobFix.d.ts.map +1 -0
  60. package/build/utils/BlobFix.js +494 -0
  61. package/build/utils/BlobFix.js.map +1 -0
  62. package/build/utils/concatenateBuffers.d.ts +8 -0
  63. package/build/utils/concatenateBuffers.d.ts.map +1 -0
  64. package/build/utils/concatenateBuffers.js +21 -0
  65. package/build/utils/concatenateBuffers.js.map +1 -0
  66. package/build/utils/convertPCMToFloat32.d.ts +11 -0
  67. package/build/utils/convertPCMToFloat32.d.ts.map +1 -0
  68. package/build/utils/convertPCMToFloat32.js +54 -0
  69. package/build/utils/convertPCMToFloat32.js.map +1 -0
  70. package/build/utils/encodingToBitDepth.d.ts +5 -0
  71. package/build/utils/encodingToBitDepth.d.ts.map +1 -0
  72. package/build/utils/encodingToBitDepth.js +13 -0
  73. package/build/utils/encodingToBitDepth.js.map +1 -0
  74. package/build/utils/getWavFileInfo.d.ts +26 -0
  75. package/build/utils/getWavFileInfo.d.ts.map +1 -0
  76. package/build/utils/getWavFileInfo.js +92 -0
  77. package/build/utils/getWavFileInfo.js.map +1 -0
  78. package/build/utils/writeWavHeader.d.ts +9 -0
  79. package/build/utils/writeWavHeader.d.ts.map +1 -0
  80. package/build/utils/writeWavHeader.js +41 -0
  81. package/build/utils/writeWavHeader.js.map +1 -0
  82. package/build/workers/InlineFeaturesExtractor.web.d.ts +2 -0
  83. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
  84. package/build/workers/InlineFeaturesExtractor.web.js +303 -0
  85. package/build/workers/InlineFeaturesExtractor.web.js.map +1 -0
  86. package/build/workers/inlineAudioWebWorker.web.d.ts +2 -0
  87. package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
  88. package/build/workers/inlineAudioWebWorker.web.js +243 -0
  89. package/build/workers/inlineAudioWebWorker.web.js.map +1 -0
  90. package/expo-module.config.json +8 -17
  91. package/ios/AudioStreamManager.swift +40 -2
  92. package/ios/ExpoAudioStreamModule.swift +11 -0
  93. package/ios/RecordingResult.swift +1 -0
  94. package/package.json +72 -64
  95. package/plugin/build/index.d.ts +1 -1
  96. package/plugin/build/index.js +7 -7
  97. package/plugin/src/index.ts +47 -47
  98. package/plugin/tsconfig.json +8 -13
  99. package/publish.sh +0 -0
  100. package/src/AudioAnalysis/AudioAnalysis.types.ts +84 -0
  101. package/src/AudioAnalysis/extractAudioAnalysis.ts +147 -0
  102. package/src/AudioAnalysis/extractWaveform.ts +25 -0
  103. package/src/AudioRecorder.provider.tsx +59 -31
  104. package/src/ExpoAudioStream.native.ts +2 -2
  105. package/src/ExpoAudioStream.types.ts +58 -116
  106. package/src/ExpoAudioStream.web.ts +233 -205
  107. package/src/ExpoAudioStreamModule.ts +18 -12
  108. package/src/WebRecorder.web.ts +433 -0
  109. package/src/constants.ts +18 -0
  110. package/src/events.ts +39 -0
  111. package/src/index.ts +15 -176
  112. package/src/logger.ts +23 -0
  113. package/src/useAudioRecorder.tsx +420 -0
  114. package/src/utils/BlobFix.ts +550 -0
  115. package/src/utils/concatenateBuffers.ts +24 -0
  116. package/src/utils/convertPCMToFloat32.ts +75 -0
  117. package/src/utils/encodingToBitDepth.ts +18 -0
  118. package/src/utils/getWavFileInfo.ts +132 -0
  119. package/src/utils/writeWavHeader.ts +56 -0
  120. package/src/workers/InlineFeaturesExtractor.web.tsx +302 -0
  121. package/src/workers/inlineAudioWebWorker.web.tsx +242 -0
  122. package/tsconfig.json +12 -7
  123. package/build/WebRecorder.d.ts.map +0 -1
  124. package/build/WebRecorder.js.map +0 -1
  125. package/build/inlineAudioWebWorker.d.ts +0 -3
  126. package/build/inlineAudioWebWorker.d.ts.map +0 -1
  127. package/build/inlineAudioWebWorker.js +0 -340
  128. package/build/inlineAudioWebWorker.js.map +0 -1
  129. package/build/useAudioRecording.d.ts +0 -38
  130. package/build/useAudioRecording.d.ts.map +0 -1
  131. package/build/useAudioRecording.js.map +0 -1
  132. package/build/utils.d.ts +0 -31
  133. package/build/utils.d.ts.map +0 -1
  134. package/build/utils.js +0 -143
  135. package/build/utils.js.map +0 -1
  136. package/src/WebRecorder.ts +0 -364
  137. package/src/inlineAudioWebWorker.tsx +0 -340
  138. package/src/useAudioRecording.tsx +0 -410
  139. package/src/utils.ts +0 -189
@@ -0,0 +1,132 @@
1
+ // packages/expo-audio-stream/src/utils/getWavFileInfo.ts
2
+
3
+ import { BitDepth, SampleRate } from '../ExpoAudioStream.types'
4
+ import {
5
+ DATA_CHUNK_ID,
6
+ DEFAULT_BIT_DEPTH,
7
+ DEFAULT_SAMPLE_RATE,
8
+ FMT_CHUNK_ID,
9
+ INFO_CHUNK_ID,
10
+ RIFF_HEADER,
11
+ WAVE_HEADER,
12
+ } from '../constants'
13
+
14
+ // Audio format descriptions
15
+ const AUDIO_FORMATS: { [key: number]: string } = {
16
+ 1: 'PCM',
17
+ 3: 'IEEE float',
18
+ 6: '8-bit ITU-T G.711 A-law',
19
+ 7: '8-bit ITU-T G.711 µ-law',
20
+ 65534: 'WAVE_FORMAT_EXTENSIBLE',
21
+ }
22
+
23
+ /**
24
+ * Interface representing the metadata of a WAV file.
25
+ */
26
+ export interface WavFileInfo {
27
+ sampleRate: SampleRate
28
+ numChannels: number
29
+ bitDepth: BitDepth
30
+ size: number // in bytes
31
+ durationMs: number // in ms
32
+ audioFormatDescription: string // Description of the audio format
33
+ byteRate: number // Average bytes per second
34
+ blockAlign: number // Number of bytes for one sample including all channels
35
+ creationDateTime?: string // Optional creation date and time
36
+ comments?: string // Optional comments or tags
37
+ compressionType?: string // Optional compression type
38
+ dataChunkOffset: number // Position of the first data chunk
39
+ }
40
+
41
+ /**
42
+ * Extracts metadata from a WAV buffer.
43
+ *
44
+ * @param arrayBuffer - The array buffer containing the WAV data.
45
+ * @returns A promise that resolves to the extracted metadata.
46
+ */
47
+ export const getWavFileInfo = async (
48
+ arrayBuffer: ArrayBuffer
49
+ ): Promise<WavFileInfo> => {
50
+ const view = new DataView(arrayBuffer)
51
+
52
+ // Check if the file is a valid RIFF/WAVE file
53
+ const riffHeader = view.getUint32(0, false)
54
+ const waveHeader = view.getUint32(8, false)
55
+ if (riffHeader !== RIFF_HEADER || waveHeader !== WAVE_HEADER) {
56
+ throw new Error('Invalid WAV file')
57
+ }
58
+
59
+ // Initialize variables for the metadata
60
+ let fmtChunkOffset = 12
61
+ let sampleRate: SampleRate = DEFAULT_SAMPLE_RATE
62
+ let numChannels = 0
63
+ let bitDepth: BitDepth = DEFAULT_BIT_DEPTH
64
+ let dataChunkSize = 0
65
+ let audioFormat = 0
66
+ let byteRate = 0
67
+ let blockAlign = 0
68
+ let creationDateTime = ''
69
+ let comments = ''
70
+ let dataChunkOffset = 0
71
+
72
+ // Parse chunks to find the "fmt " and "data" chunks
73
+ while (fmtChunkOffset < view.byteLength) {
74
+ const chunkId = view.getUint32(fmtChunkOffset, false)
75
+ const chunkSize = view.getUint32(fmtChunkOffset + 4, true)
76
+ if (chunkId === FMT_CHUNK_ID) {
77
+ // "fmt "
78
+ audioFormat = view.getUint16(fmtChunkOffset + 8, true)
79
+ if (!AUDIO_FORMATS[audioFormat]) {
80
+ throw new Error('Unsupported WAV file format')
81
+ }
82
+ numChannels = view.getUint16(fmtChunkOffset + 10, true)
83
+ sampleRate = view.getUint32(fmtChunkOffset + 12, true) as SampleRate
84
+ byteRate = view.getUint32(fmtChunkOffset + 16, true)
85
+ blockAlign = view.getUint16(fmtChunkOffset + 20, true)
86
+ bitDepth = view.getUint16(fmtChunkOffset + 22, true) as BitDepth
87
+ } else if (chunkId === DATA_CHUNK_ID) {
88
+ // "data"
89
+ dataChunkSize = chunkSize
90
+ dataChunkOffset = fmtChunkOffset + 8 // Position after chunk header
91
+ break
92
+ } else if (chunkId === INFO_CHUNK_ID) {
93
+ // "INFO"
94
+ // Read INFO chunk (assuming it contains a text-based creation date/time and comments)
95
+ const infoStart = fmtChunkOffset + 8
96
+ const infoText = new TextDecoder().decode(
97
+ new Uint8Array(
98
+ arrayBuffer.slice(infoStart, infoStart + chunkSize)
99
+ )
100
+ )
101
+ const infoParts = infoText.split('\0')
102
+ creationDateTime = infoParts[0]
103
+ comments = infoParts[1]
104
+ }
105
+ fmtChunkOffset += 8 + chunkSize
106
+ }
107
+
108
+ if (!sampleRate || !numChannels || !bitDepth || !dataChunkSize) {
109
+ throw new Error('Incomplete WAV file information')
110
+ }
111
+
112
+ // Calculate duration
113
+ const bytesPerSample = bitDepth / 8
114
+ const numSamples = dataChunkSize / (numChannels * bytesPerSample)
115
+ const durationMs = (numSamples / sampleRate) * 1000
116
+
117
+ return {
118
+ sampleRate,
119
+ numChannels,
120
+ bitDepth,
121
+ size: arrayBuffer.byteLength,
122
+ durationMs,
123
+ audioFormatDescription: AUDIO_FORMATS[audioFormat],
124
+ byteRate,
125
+ blockAlign,
126
+ creationDateTime: creationDateTime || undefined,
127
+ comments: comments || undefined,
128
+ compressionType:
129
+ audioFormat === 1 ? 'None' : AUDIO_FORMATS[audioFormat],
130
+ dataChunkOffset,
131
+ }
132
+ }
@@ -0,0 +1,56 @@
1
+ // packages/expo-audio-stream/src/utils/writeWavHeader.ts
2
+ interface WavHeaderOptions {
3
+ buffer: ArrayBuffer
4
+ sampleRate: number
5
+ numChannels: number
6
+ bitDepth: number
7
+ }
8
+
9
+ export const writeWavHeader = ({
10
+ buffer,
11
+ sampleRate,
12
+ numChannels,
13
+ bitDepth,
14
+ }: WavHeaderOptions): ArrayBuffer => {
15
+ const bytesPerSample = bitDepth / 8
16
+ const numSamples = buffer.byteLength / (numChannels * bytesPerSample)
17
+ const view = new DataView(buffer)
18
+ const blockAlign = numChannels * bytesPerSample
19
+ const byteRate = sampleRate * blockAlign
20
+
21
+ // Function to write a string to the DataView
22
+ const writeString = (view: DataView, offset: number, string: string) => {
23
+ for (let i = 0; i < string.length; i++) {
24
+ view.setUint8(offset + i, string.charCodeAt(i))
25
+ }
26
+ }
27
+
28
+ // Check if the buffer already has a WAV header by looking for "RIFF" at the start
29
+ const existingHeader = view.getUint32(0, false) === 0x52494646 // "RIFF" in ASCII
30
+
31
+ if (!existingHeader) {
32
+ // Write the WAV header
33
+ writeString(view, 0, 'RIFF') // ChunkID
34
+ view.setUint32(4, 36 + numSamples * blockAlign, true) // ChunkSize
35
+ writeString(view, 8, 'WAVE') // Format
36
+ writeString(view, 12, 'fmt ') // Subchunk1ID
37
+ view.setUint32(16, 16, true) // Subchunk1Size (16 for PCM)
38
+ view.setUint16(20, bitDepth === 32 ? 3 : 1, true) // AudioFormat (3 for float, 1 for PCM)
39
+ view.setUint16(22, numChannels, true) // NumChannels
40
+ view.setUint32(24, sampleRate, true) // SampleRate
41
+ view.setUint32(28, byteRate, true) // ByteRate
42
+ view.setUint16(32, blockAlign, true) // BlockAlign
43
+ view.setUint16(34, bitDepth, true) // BitsPerSample
44
+ writeString(view, 36, 'data') // Subchunk2ID
45
+ view.setUint32(40, numSamples * blockAlign, true) // Subchunk2Size
46
+ } else {
47
+ // Update the existing WAV header if necessary
48
+ view.setUint32(4, 36 + numSamples * blockAlign, true) // Update ChunkSize
49
+ view.setUint32(24, sampleRate, true) // Update SampleRate
50
+ view.setUint32(28, byteRate, true) // Update ByteRate
51
+ view.setUint16(32, blockAlign, true) // Update BlockAlign
52
+ view.setUint32(40, numSamples * blockAlign, true) // Update Subchunk2Size
53
+ }
54
+
55
+ return buffer
56
+ }
@@ -0,0 +1,302 @@
1
+ export const InlineFeaturesExtractor = `
2
+
3
+ // Unique ID counter
4
+ let uniqueIdCounter = 0;
5
+
6
+ self.onmessage = function (event) {
7
+ const {
8
+ channelData, // this is only the newly recorded data when live recording.
9
+ sampleRate,
10
+ pointsPerSecond,
11
+ algorithm,
12
+ bitDepth,
13
+ fullAudioDurationMs,
14
+ numberOfChannels,
15
+ features: _features,
16
+ } = event.data;
17
+
18
+ console.log("[AudioFeaturesExtractor] Worker received message", event.data);
19
+ const features = _features || {};
20
+
21
+ const SILENCE_THRESHOLD = 0.01;
22
+ const MIN_SILENCE_DURATION = 1.5 * sampleRate; // 1.5 seconds of silence
23
+ const SPEECH_INERTIA_DURATION = 0.1 * sampleRate; // Speech inertia duration in samples
24
+ const RMS_THRESHOLD = 0.01;
25
+ const ZCR_THRESHOLD = 0.1;
26
+
27
+ // Placeholder functions for feature extraction
28
+ const extractMFCC = (segmentData, sampleRate) => {
29
+ // Implement MFCC extraction logic here
30
+ return [];
31
+ };
32
+
33
+ const extractSpectralCentroid = (segmentData, sampleRate) => {
34
+ const magnitudeSpectrum = segmentData.map((v) => v * v);
35
+ const sum = magnitudeSpectrum.reduce((a, b) => a + b, 0);
36
+ if (sum === 0) return 0;
37
+
38
+ const weightedSum = magnitudeSpectrum.reduce(
39
+ (acc, value, index) => acc + index * value,
40
+ 0,
41
+ );
42
+ return ((weightedSum / sum) * (sampleRate / 2)) / magnitudeSpectrum.length;
43
+ };
44
+
45
+ const extractSpectralFlatness = (segmentData) => {
46
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
47
+ const geometricMean = Math.exp(
48
+ magnitudeSpectrum
49
+ .map((v) => Math.log(v + Number.MIN_VALUE))
50
+ .reduce((a, b) => a + b) / magnitudeSpectrum.length,
51
+ );
52
+ const arithmeticMean =
53
+ magnitudeSpectrum.reduce((a, b) => a + b) / magnitudeSpectrum.length;
54
+ return arithmeticMean === 0 ? 0 : geometricMean / arithmeticMean;
55
+ };
56
+
57
+ const extractSpectralRollOff = (segmentData, sampleRate) => {
58
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
59
+ const totalEnergy = magnitudeSpectrum.reduce((a, b) => a + b, 0);
60
+ const rollOffThreshold = totalEnergy * 0.85;
61
+ let cumulativeEnergy = 0;
62
+
63
+ for (let i = 0; i < magnitudeSpectrum.length; i++) {
64
+ cumulativeEnergy += magnitudeSpectrum[i];
65
+ if (cumulativeEnergy >= rollOffThreshold) {
66
+ return (i / magnitudeSpectrum.length) * (sampleRate / 2);
67
+ }
68
+ }
69
+
70
+ return 0;
71
+ };
72
+
73
+ const extractSpectralBandwidth = (segmentData, sampleRate) => {
74
+ const centroid = extractSpectralCentroid(segmentData, sampleRate);
75
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
76
+ const sum = magnitudeSpectrum.reduce((a, b) => a + b, 0);
77
+ if (sum === 0) return 0;
78
+
79
+ const weightedSum = magnitudeSpectrum.reduce(
80
+ (acc, value, index) => acc + value * Math.pow(index - centroid, 2),
81
+ 0,
82
+ );
83
+ return Math.sqrt(weightedSum / sum);
84
+ };
85
+
86
+ const extractChromagram = (segmentData, sampleRate) => {
87
+ return []; // TODO implement
88
+ };
89
+
90
+ const extractHNR = (segmentData) => {
91
+ const frameSize = segmentData.length;
92
+ const autocorrelation = new Float32Array(frameSize);
93
+
94
+ // Compute the autocorrelation of the segment data
95
+ for (let i = 0; i < frameSize; i++) {
96
+ let sum = 0;
97
+ for (let j = 0; j < frameSize - i; j++) {
98
+ sum += segmentData[j] * segmentData[j + i];
99
+ }
100
+ autocorrelation[i] = sum;
101
+ }
102
+
103
+ // Find the maximum autocorrelation value (excluding the zero lag)
104
+ const maxAutocorrelation = Math.max(...autocorrelation.subarray(1));
105
+
106
+ // Compute the HNR
107
+ return autocorrelation[0] !== 0
108
+ ? 10 *
109
+ Math.log10(
110
+ maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation),
111
+ )
112
+ : 0;
113
+ };
114
+
115
+ const extractWaveform = (
116
+ channelData, // Float32Array
117
+ sampleRate, // number
118
+ pointsPerSecond, // number
119
+ algorithm, // string
120
+ ) => {
121
+ const totalSamples = channelData.length;
122
+ const segmentDuration = totalSamples / sampleRate;
123
+ const totalPoints = Math.max(
124
+ Math.ceil(segmentDuration * pointsPerSecond),
125
+ 1,
126
+ );
127
+ const pointInterval = Math.ceil(totalSamples / totalPoints);
128
+ const dataPoints = [];
129
+ let minAmplitude = Infinity;
130
+ let maxAmplitude = -Infinity;
131
+ let silenceStart = null;
132
+ let lastSpeechEnd = -Infinity;
133
+ let isSpeech = false;
134
+
135
+ const expectedPoints = segmentDuration * pointsPerSecond;
136
+ const samplesPerPoint = Math.ceil(channelData.length / expectedPoints);
137
+
138
+ for (let i = 0; i < expectedPoints; i++) {
139
+ const start = i * samplesPerPoint;
140
+ const end = Math.min(start + samplesPerPoint, totalSamples);
141
+
142
+ let sumSquares = 0;
143
+ let zeroCrossings = 0;
144
+ let prevValue = channelData[start];
145
+ let localMinAmplitude = Infinity;
146
+ let localMaxAmplitude = -Infinity;
147
+ let hasNonZeroValue = false;
148
+
149
+ // compute values for the segment
150
+ for (let j = start; j < end; j++) {
151
+ const value = channelData[j];
152
+ sumSquares += value * value;
153
+ if (j > start && value * prevValue < 0) {
154
+ zeroCrossings++;
155
+ }
156
+ prevValue = value;
157
+
158
+ const absValue = Math.abs(value);
159
+ localMinAmplitude = Math.min(localMinAmplitude, absValue);
160
+ localMaxAmplitude = Math.max(localMaxAmplitude, absValue);
161
+
162
+ if (absValue !== 0) {
163
+ hasNonZeroValue = true;
164
+ }
165
+ }
166
+
167
+ // Post-processing checks
168
+ if (!hasNonZeroValue) {
169
+ // All values are zero
170
+ localMinAmplitude = 0;
171
+ localMaxAmplitude = 0;
172
+ }
173
+
174
+ const rms = Math.sqrt(sumSquares / (end - start));
175
+ minAmplitude = Math.min(minAmplitude, rms);
176
+ maxAmplitude = Math.max(maxAmplitude, rms);
177
+
178
+ const energy = sumSquares;
179
+ const zcr = zeroCrossings / (end - start);
180
+
181
+ const silent = rms < SILENCE_THRESHOLD;
182
+ const dB = 20 * Math.log10(rms);
183
+
184
+ if (silent) {
185
+ if (silenceStart === null) {
186
+ silenceStart = start;
187
+ } else if (start - silenceStart > MIN_SILENCE_DURATION) {
188
+ // Silence detected for longer than the threshold, set amplitude to 0
189
+ localMaxAmplitude = 0;
190
+ localMinAmplitude = 0;
191
+ isSpeech = false;
192
+ }
193
+ } else {
194
+ silenceStart = null;
195
+ if (!isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION) {
196
+ isSpeech = true;
197
+ }
198
+ lastSpeechEnd = end;
199
+ }
200
+
201
+ const activeSpeech =
202
+ (rms > RMS_THRESHOLD && zcr > ZCR_THRESHOLD) ||
203
+ (isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION);
204
+
205
+ if (activeSpeech) {
206
+ isSpeech = true;
207
+ lastSpeechEnd = end;
208
+ } else {
209
+ isSpeech = false;
210
+ }
211
+
212
+ const bytesPerSample = bitDepth / 8;
213
+ const startPosition = start * bytesPerSample * numberOfChannels; // Calculate start position in bytes
214
+ const endPosition = end * bytesPerSample * numberOfChannels; // Calculate end position in bytes
215
+
216
+ // Compute features
217
+ const segmentData = channelData.slice(start, end);
218
+ const mfcc = features.mfcc ? extractMFCC(segmentData, sampleRate) : [];
219
+ const spectralCentroid = features.spectralCentroid
220
+ ? extractSpectralCentroid(segmentData, sampleRate)
221
+ : 0;
222
+ const spectralFlatness = features.spectralFlatness
223
+ ? extractSpectralFlatness(segmentData)
224
+ : 0;
225
+ const spectralRollOff = features.spectralRollOff
226
+ ? extractSpectralRollOff(segmentData, sampleRate)
227
+ : 0;
228
+ const spectralBandwidth = features.spectralBandwidth
229
+ ? extractSpectralBandwidth(segmentData, sampleRate)
230
+ : 0;
231
+ const chromagram = features.chromagram
232
+ ? extractChromagram(segmentData, sampleRate)
233
+ : [];
234
+ const hnr = features.hnr ? extractHNR(segmentData) : 0;
235
+
236
+ const newData = {
237
+ id: uniqueIdCounter++, // Assign unique ID and increment the counter
238
+ amplitude: algorithm === "peak" ? localMaxAmplitude : rms,
239
+ activeSpeech,
240
+ dB,
241
+ silent,
242
+ features: {
243
+ energy,
244
+ rms,
245
+ minAmplitude: localMinAmplitude,
246
+ maxAmplitude: localMaxAmplitude,
247
+ zcr,
248
+ mfcc: [], // Placeholder for MFCC features
249
+ spectralCentroid, // Computed spectral centroid
250
+ spectralFlatness, // Computed spectral flatness
251
+ spectralRollOff, // Computed spectral roll-off
252
+ spectralBandwidth, // Computed spectral bandwidth
253
+ chromagram, // Computed chromagram
254
+ hnr, // Computed HNR
255
+ },
256
+ startTime: start / sampleRate,
257
+ endTime: end / sampleRate,
258
+ startPosition,
259
+ endPosition,
260
+ samples: end - start,
261
+ speaker: 0, // Assuming speaker detection is to be handled later
262
+ };
263
+
264
+ dataPoints.push(newData);
265
+ }
266
+
267
+ return {
268
+ pointsPerSecond,
269
+ durationMs: fullAudioDurationMs,
270
+ bitDepth,
271
+ samples: totalSamples,
272
+ numberOfChannels,
273
+ sampleRate,
274
+ dataPoints,
275
+ amplitudeRange: {
276
+ min: minAmplitude,
277
+ max: maxAmplitude,
278
+ },
279
+ speakerChanges: [], // Placeholder for future speaker detection logic
280
+ };
281
+ };
282
+
283
+ try {
284
+ const result = extractWaveform(
285
+ channelData,
286
+ sampleRate,
287
+ pointsPerSecond,
288
+ algorithm,
289
+ );
290
+ self.postMessage({
291
+ command: "features",
292
+ result,
293
+ });
294
+ } catch (error) {
295
+ console.error("[AudioFeaturesExtractor] Error in processing", error);
296
+ self.postMessage({ error: error.message });
297
+ } finally {
298
+ // Do not close the worker so it can be re-used for subsequent messages
299
+ // self.close();
300
+ }
301
+ };
302
+ `