@siteed/expo-audio-stream 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/.size-limit.json +6 -0
  2. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +76 -0
  3. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -0
  4. package/build/AudioAnalysis/AudioAnalysis.types.js +3 -0
  5. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -0
  6. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +4 -0
  7. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -0
  8. package/build/AudioAnalysis/extractAudioAnalysis.js +101 -0
  9. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -0
  10. package/build/AudioAnalysis/extractWaveform.d.ts +8 -0
  11. package/build/AudioAnalysis/extractWaveform.d.ts.map +1 -0
  12. package/build/AudioAnalysis/extractWaveform.js +14 -0
  13. package/build/AudioAnalysis/extractWaveform.js.map +1 -0
  14. package/build/AudioRecorder.provider.d.ts +14 -1
  15. package/build/AudioRecorder.provider.d.ts.map +1 -1
  16. package/build/AudioRecorder.provider.js +17 -4
  17. package/build/AudioRecorder.provider.js.map +1 -1
  18. package/build/ExpoAudioStream.types.d.ts +26 -84
  19. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  20. package/build/ExpoAudioStream.types.js.map +1 -1
  21. package/build/ExpoAudioStream.web.d.ts +6 -5
  22. package/build/ExpoAudioStream.web.d.ts.map +1 -1
  23. package/build/ExpoAudioStream.web.js +9 -8
  24. package/build/ExpoAudioStream.web.js.map +1 -1
  25. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  26. package/build/ExpoAudioStreamModule.js +5 -1
  27. package/build/ExpoAudioStreamModule.js.map +1 -1
  28. package/build/{WebRecorder.d.ts → WebRecorder.web.d.ts} +7 -3
  29. package/build/WebRecorder.web.d.ts.map +1 -0
  30. package/build/{WebRecorder.js → WebRecorder.web.js} +74 -29
  31. package/build/WebRecorder.web.js.map +1 -0
  32. package/build/constants.d.ts +11 -0
  33. package/build/constants.d.ts.map +1 -0
  34. package/build/constants.js +14 -0
  35. package/build/constants.js.map +1 -0
  36. package/build/events.d.ts +6 -0
  37. package/build/events.d.ts.map +1 -0
  38. package/build/events.js +15 -0
  39. package/build/events.js.map +1 -0
  40. package/build/index.d.ts +8 -16
  41. package/build/index.d.ts.map +1 -1
  42. package/build/index.js +6 -112
  43. package/build/index.js.map +1 -1
  44. package/build/logger.d.ts +9 -0
  45. package/build/logger.d.ts.map +1 -0
  46. package/build/logger.js +17 -0
  47. package/build/logger.js.map +1 -0
  48. package/build/{useAudioRecording.d.ts → useAudioRecorder.d.ts} +6 -7
  49. package/build/useAudioRecorder.d.ts.map +1 -0
  50. package/build/{useAudioRecording.js → useAudioRecorder.js} +69 -65
  51. package/build/useAudioRecorder.js.map +1 -0
  52. package/build/utils/convertPCMToFloat32.d.ts +11 -0
  53. package/build/utils/convertPCMToFloat32.d.ts.map +1 -0
  54. package/build/utils/convertPCMToFloat32.js +41 -0
  55. package/build/utils/convertPCMToFloat32.js.map +1 -0
  56. package/build/utils/encodingToBitDepth.d.ts +5 -0
  57. package/build/utils/encodingToBitDepth.d.ts.map +1 -0
  58. package/build/utils/encodingToBitDepth.js +13 -0
  59. package/build/utils/encodingToBitDepth.js.map +1 -0
  60. package/build/utils/getWavFileInfo.d.ts +25 -0
  61. package/build/utils/getWavFileInfo.d.ts.map +1 -0
  62. package/build/utils/getWavFileInfo.js +89 -0
  63. package/build/utils/getWavFileInfo.js.map +1 -0
  64. package/build/utils/writeWavHeader.d.ts +9 -0
  65. package/build/utils/writeWavHeader.d.ts.map +1 -0
  66. package/build/utils/writeWavHeader.js +41 -0
  67. package/build/utils/writeWavHeader.js.map +1 -0
  68. package/build/workers/InlineFeaturesExtractor.web.d.ts +2 -0
  69. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -0
  70. package/build/workers/InlineFeaturesExtractor.web.js +303 -0
  71. package/build/workers/InlineFeaturesExtractor.web.js.map +1 -0
  72. package/build/workers/inlineAudioWebWorker.web.d.ts +2 -0
  73. package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -0
  74. package/build/workers/inlineAudioWebWorker.web.js +243 -0
  75. package/build/workers/inlineAudioWebWorker.web.js.map +1 -0
  76. package/ios/AudioStreamManager.swift +39 -2
  77. package/ios/ExpoAudioStreamModule.swift +10 -0
  78. package/package.json +7 -6
  79. package/plugin/tsconfig.json +1 -1
  80. package/publish.sh +0 -0
  81. package/src/AudioAnalysis/AudioAnalysis.types.ts +85 -0
  82. package/src/AudioAnalysis/extractAudioAnalysis.ts +136 -0
  83. package/src/AudioAnalysis/extractWaveform.ts +25 -0
  84. package/src/AudioRecorder.provider.tsx +35 -7
  85. package/src/ExpoAudioStream.types.ts +33 -94
  86. package/src/ExpoAudioStream.web.ts +17 -16
  87. package/src/ExpoAudioStreamModule.ts +6 -1
  88. package/src/{WebRecorder.ts → WebRecorder.web.ts} +85 -33
  89. package/src/constants.ts +18 -0
  90. package/src/events.ts +25 -0
  91. package/src/index.ts +8 -169
  92. package/src/logger.ts +26 -0
  93. package/src/{useAudioRecording.tsx → useAudioRecorder.tsx} +141 -136
  94. package/src/utils/convertPCMToFloat32.ts +48 -0
  95. package/src/utils/encodingToBitDepth.ts +18 -0
  96. package/src/utils/getWavFileInfo.ts +125 -0
  97. package/src/utils/writeWavHeader.ts +56 -0
  98. package/src/workers/InlineFeaturesExtractor.web.tsx +302 -0
  99. package/src/workers/inlineAudioWebWorker.web.tsx +242 -0
  100. package/build/WebRecorder.d.ts.map +0 -1
  101. package/build/WebRecorder.js.map +0 -1
  102. package/build/inlineAudioWebWorker.d.ts +0 -3
  103. package/build/inlineAudioWebWorker.d.ts.map +0 -1
  104. package/build/inlineAudioWebWorker.js +0 -340
  105. package/build/inlineAudioWebWorker.js.map +0 -1
  106. package/build/useAudioRecording.d.ts.map +0 -1
  107. package/build/useAudioRecording.js.map +0 -1
  108. package/build/utils.d.ts +0 -31
  109. package/build/utils.d.ts.map +0 -1
  110. package/build/utils.js +0 -143
  111. package/build/utils.js.map +0 -1
  112. package/src/inlineAudioWebWorker.tsx +0 -340
  113. package/src/utils.ts +0 -189
@@ -0,0 +1,18 @@
1
+ import { BitDepth, EncodingType } from "../ExpoAudioStream.types";
2
+
3
+ export const encodingToBitDepth = ({
4
+ encoding,
5
+ }: {
6
+ encoding: EncodingType;
7
+ }): BitDepth => {
8
+ switch (encoding) {
9
+ case "pcm_32bit":
10
+ return 32;
11
+ case "pcm_16bit":
12
+ return 16;
13
+ case "pcm_8bit":
14
+ return 8;
15
+ default:
16
+ throw new Error(`Unsupported encoding type: ${encoding}`);
17
+ }
18
+ };
@@ -0,0 +1,125 @@
1
+ // packages/expo-audio-stream/src/utils/getWavFileInfo.ts
2
+
3
+ import { BitDepth, SampleRate } from "../ExpoAudioStream.types";
4
+ import {
5
+ DATA_CHUNK_ID,
6
+ DEFAULT_BIT_DEPTH,
7
+ DEFAULT_SAMPLE_RATE,
8
+ FMT_CHUNK_ID,
9
+ INFO_CHUNK_ID,
10
+ RIFF_HEADER,
11
+ WAVE_HEADER,
12
+ } from "../constants";
13
+
14
+ // Audio format descriptions
15
+ const AUDIO_FORMATS: { [key: number]: string } = {
16
+ 1: "PCM",
17
+ 3: "IEEE float",
18
+ 6: "8-bit ITU-T G.711 A-law",
19
+ 7: "8-bit ITU-T G.711 µ-law",
20
+ 65534: "WAVE_FORMAT_EXTENSIBLE",
21
+ };
22
+
23
+ /**
24
+ * Interface representing the metadata of a WAV file.
25
+ */
26
+ export interface WavFileInfo {
27
+ sampleRate: SampleRate;
28
+ numChannels: number;
29
+ bitDepth: BitDepth;
30
+ size: number; // in bytes
31
+ durationMs: number; // in ms
32
+ audioFormatDescription: string; // Description of the audio format
33
+ byteRate: number; // Average bytes per second
34
+ blockAlign: number; // Number of bytes for one sample including all channels
35
+ creationDateTime?: string; // Optional creation date and time
36
+ comments?: string; // Optional comments or tags
37
+ compressionType?: string; // Optional compression type
38
+ }
39
+
40
+ /**
41
+ * Extracts metadata from a WAV file.
42
+ *
43
+ * @param arrayBuffer - The array buffer containing the WAV file data.
44
+ * @returns A promise that resolves to the extracted metadata.
45
+ */
46
+ export const getWavFileInfo = async (
47
+ arrayBuffer: ArrayBuffer,
48
+ ): Promise<WavFileInfo> => {
49
+ const view = new DataView(arrayBuffer);
50
+
51
+ // Check if the file is a valid RIFF/WAVE file
52
+ const riffHeader = view.getUint32(0, false);
53
+ const waveHeader = view.getUint32(8, false);
54
+ if (riffHeader !== RIFF_HEADER || waveHeader !== WAVE_HEADER) {
55
+ throw new Error("Invalid WAV file");
56
+ }
57
+
58
+ // Initialize variables for the metadata
59
+ let fmtChunkOffset = 12;
60
+ let sampleRate: SampleRate = DEFAULT_SAMPLE_RATE;
61
+ let numChannels = 0;
62
+ let bitDepth: BitDepth = DEFAULT_BIT_DEPTH;
63
+ let dataChunkSize = 0;
64
+ let audioFormat = 0;
65
+ let byteRate = 0;
66
+ let blockAlign = 0;
67
+ let creationDateTime = "";
68
+ let comments = "";
69
+
70
+ // Parse chunks to find the "fmt " and "data" chunks
71
+ while (fmtChunkOffset < view.byteLength) {
72
+ const chunkId = view.getUint32(fmtChunkOffset, false);
73
+ const chunkSize = view.getUint32(fmtChunkOffset + 4, true);
74
+ if (chunkId === FMT_CHUNK_ID) {
75
+ // "fmt "
76
+ audioFormat = view.getUint16(fmtChunkOffset + 8, true);
77
+ if (!AUDIO_FORMATS[audioFormat]) {
78
+ throw new Error("Unsupported WAV file format");
79
+ }
80
+ numChannels = view.getUint16(fmtChunkOffset + 10, true);
81
+ sampleRate = view.getUint32(fmtChunkOffset + 12, true) as SampleRate;
82
+ byteRate = view.getUint32(fmtChunkOffset + 16, true);
83
+ blockAlign = view.getUint16(fmtChunkOffset + 20, true);
84
+ bitDepth = view.getUint16(fmtChunkOffset + 22, true) as BitDepth;
85
+ } else if (chunkId === DATA_CHUNK_ID) {
86
+ // "data"
87
+ dataChunkSize = chunkSize;
88
+ break;
89
+ } else if (chunkId === INFO_CHUNK_ID) {
90
+ // "INFO"
91
+ // Read INFO chunk (assuming it contains a text-based creation date/time and comments)
92
+ const infoStart = fmtChunkOffset + 8;
93
+ const infoText = new TextDecoder().decode(
94
+ new Uint8Array(arrayBuffer.slice(infoStart, infoStart + chunkSize)),
95
+ );
96
+ const infoParts = infoText.split("\0");
97
+ creationDateTime = infoParts[0];
98
+ comments = infoParts[1];
99
+ }
100
+ fmtChunkOffset += 8 + chunkSize;
101
+ }
102
+
103
+ if (!sampleRate || !numChannels || !bitDepth || !dataChunkSize) {
104
+ throw new Error("Incomplete WAV file information");
105
+ }
106
+
107
+ // Calculate duration
108
+ const bytesPerSample = bitDepth / 8;
109
+ const numSamples = dataChunkSize / (numChannels * bytesPerSample);
110
+ const durationMs = (numSamples / sampleRate) * 1000;
111
+
112
+ return {
113
+ sampleRate,
114
+ numChannels,
115
+ bitDepth,
116
+ size: arrayBuffer.byteLength,
117
+ durationMs,
118
+ audioFormatDescription: AUDIO_FORMATS[audioFormat],
119
+ byteRate,
120
+ blockAlign,
121
+ creationDateTime: creationDateTime || undefined,
122
+ comments: comments || undefined,
123
+ compressionType: audioFormat === 1 ? "None" : AUDIO_FORMATS[audioFormat],
124
+ };
125
+ };
@@ -0,0 +1,56 @@
1
+ // packages/expo-audio-stream/src/utils/writeWavHeader.ts
2
+ interface WavHeaderOptions {
3
+ buffer: ArrayBuffer;
4
+ sampleRate: number;
5
+ numChannels: number;
6
+ bitDepth: number;
7
+ }
8
+
9
+ export const writeWavHeader = ({
10
+ buffer,
11
+ sampleRate,
12
+ numChannels,
13
+ bitDepth,
14
+ }: WavHeaderOptions): ArrayBuffer => {
15
+ const bytesPerSample = bitDepth / 8;
16
+ const numSamples = buffer.byteLength / (numChannels * bytesPerSample);
17
+ const view = new DataView(buffer);
18
+ const blockAlign = numChannels * bytesPerSample;
19
+ const byteRate = sampleRate * blockAlign;
20
+
21
+ // Function to write a string to the DataView
22
+ const writeString = (view: DataView, offset: number, string: string) => {
23
+ for (let i = 0; i < string.length; i++) {
24
+ view.setUint8(offset + i, string.charCodeAt(i));
25
+ }
26
+ };
27
+
28
+ // Check if the buffer already has a WAV header by looking for "RIFF" at the start
29
+ const existingHeader = view.getUint32(0, false) === 0x52494646; // "RIFF" in ASCII
30
+
31
+ if (!existingHeader) {
32
+ // Write the WAV header
33
+ writeString(view, 0, "RIFF"); // ChunkID
34
+ view.setUint32(4, 36 + numSamples * blockAlign, true); // ChunkSize
35
+ writeString(view, 8, "WAVE"); // Format
36
+ writeString(view, 12, "fmt "); // Subchunk1ID
37
+ view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM)
38
+ view.setUint16(20, bitDepth === 32 ? 3 : 1, true); // AudioFormat (3 for float, 1 for PCM)
39
+ view.setUint16(22, numChannels, true); // NumChannels
40
+ view.setUint32(24, sampleRate, true); // SampleRate
41
+ view.setUint32(28, byteRate, true); // ByteRate
42
+ view.setUint16(32, blockAlign, true); // BlockAlign
43
+ view.setUint16(34, bitDepth, true); // BitsPerSample
44
+ writeString(view, 36, "data"); // Subchunk2ID
45
+ view.setUint32(40, numSamples * blockAlign, true); // Subchunk2Size
46
+ } else {
47
+ // Update the existing WAV header if necessary
48
+ view.setUint32(4, 36 + numSamples * blockAlign, true); // Update ChunkSize
49
+ view.setUint32(24, sampleRate, true); // Update SampleRate
50
+ view.setUint32(28, byteRate, true); // Update ByteRate
51
+ view.setUint16(32, blockAlign, true); // Update BlockAlign
52
+ view.setUint32(40, numSamples * blockAlign, true); // Update Subchunk2Size
53
+ }
54
+
55
+ return buffer;
56
+ };
@@ -0,0 +1,302 @@
1
+ export const InlineFeaturesExtractor = `
2
+
3
+ // Unique ID counter
4
+ let uniqueIdCounter = 0;
5
+
6
+ self.onmessage = function (event) {
7
+ const {
8
+ channelData, // this is only the newly recorded data when live recording.
9
+ sampleRate,
10
+ pointsPerSecond,
11
+ algorithm,
12
+ bitDepth,
13
+ fullAudioDurationMs,
14
+ numberOfChannels,
15
+ features: _features,
16
+ } = event.data;
17
+
18
+ console.log("[AudioFeaturesExtractor] Worker received message", event.data);
19
+ const features = _features || {};
20
+
21
+ const SILENCE_THRESHOLD = 0.01;
22
+ const MIN_SILENCE_DURATION = 1.5 * sampleRate; // 1.5 seconds of silence
23
+ const SPEECH_INERTIA_DURATION = 0.1 * sampleRate; // Speech inertia duration in samples
24
+ const RMS_THRESHOLD = 0.01;
25
+ const ZCR_THRESHOLD = 0.1;
26
+
27
+ // Placeholder functions for feature extraction
28
+ const extractMFCC = (segmentData, sampleRate) => {
29
+ // Implement MFCC extraction logic here
30
+ return [];
31
+ };
32
+
33
+ const extractSpectralCentroid = (segmentData, sampleRate) => {
34
+ const magnitudeSpectrum = segmentData.map((v) => v * v);
35
+ const sum = magnitudeSpectrum.reduce((a, b) => a + b, 0);
36
+ if (sum === 0) return 0;
37
+
38
+ const weightedSum = magnitudeSpectrum.reduce(
39
+ (acc, value, index) => acc + index * value,
40
+ 0,
41
+ );
42
+ return ((weightedSum / sum) * (sampleRate / 2)) / magnitudeSpectrum.length;
43
+ };
44
+
45
+ const extractSpectralFlatness = (segmentData) => {
46
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
47
+ const geometricMean = Math.exp(
48
+ magnitudeSpectrum
49
+ .map((v) => Math.log(v + Number.MIN_VALUE))
50
+ .reduce((a, b) => a + b) / magnitudeSpectrum.length,
51
+ );
52
+ const arithmeticMean =
53
+ magnitudeSpectrum.reduce((a, b) => a + b) / magnitudeSpectrum.length;
54
+ return arithmeticMean === 0 ? 0 : geometricMean / arithmeticMean;
55
+ };
56
+
57
+ const extractSpectralRollOff = (segmentData, sampleRate) => {
58
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
59
+ const totalEnergy = magnitudeSpectrum.reduce((a, b) => a + b, 0);
60
+ const rollOffThreshold = totalEnergy * 0.85;
61
+ let cumulativeEnergy = 0;
62
+
63
+ for (let i = 0; i < magnitudeSpectrum.length; i++) {
64
+ cumulativeEnergy += magnitudeSpectrum[i];
65
+ if (cumulativeEnergy >= rollOffThreshold) {
66
+ return (i / magnitudeSpectrum.length) * (sampleRate / 2);
67
+ }
68
+ }
69
+
70
+ return 0;
71
+ };
72
+
73
+ const extractSpectralBandwidth = (segmentData, sampleRate) => {
74
+ const centroid = extractSpectralCentroid(segmentData, sampleRate);
75
+ const magnitudeSpectrum = segmentData.map((v) => Math.abs(v));
76
+ const sum = magnitudeSpectrum.reduce((a, b) => a + b, 0);
77
+ if (sum === 0) return 0;
78
+
79
+ const weightedSum = magnitudeSpectrum.reduce(
80
+ (acc, value, index) => acc + value * Math.pow(index - centroid, 2),
81
+ 0,
82
+ );
83
+ return Math.sqrt(weightedSum / sum);
84
+ };
85
+
86
+ const extractChromagram = (segmentData, sampleRate) => {
87
+ return []; // TODO implement
88
+ };
89
+
90
+ const extractHNR = (segmentData) => {
91
+ const frameSize = segmentData.length;
92
+ const autocorrelation = new Float32Array(frameSize);
93
+
94
+ // Compute the autocorrelation of the segment data
95
+ for (let i = 0; i < frameSize; i++) {
96
+ let sum = 0;
97
+ for (let j = 0; j < frameSize - i; j++) {
98
+ sum += segmentData[j] * segmentData[j + i];
99
+ }
100
+ autocorrelation[i] = sum;
101
+ }
102
+
103
+ // Find the maximum autocorrelation value (excluding the zero lag)
104
+ const maxAutocorrelation = Math.max(...autocorrelation.subarray(1));
105
+
106
+ // Compute the HNR
107
+ return autocorrelation[0] !== 0
108
+ ? 10 *
109
+ Math.log10(
110
+ maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation),
111
+ )
112
+ : 0;
113
+ };
114
+
115
+ const extractWaveform = (
116
+ channelData, // Float32Array
117
+ sampleRate, // number
118
+ pointsPerSecond, // number
119
+ algorithm, // string
120
+ ) => {
121
+ const totalSamples = channelData.length;
122
+ const segmentDuration = totalSamples / sampleRate;
123
+ const totalPoints = Math.max(
124
+ Math.ceil(segmentDuration * pointsPerSecond),
125
+ 1,
126
+ );
127
+ const pointInterval = Math.ceil(totalSamples / totalPoints);
128
+ const dataPoints = [];
129
+ let minAmplitude = Infinity;
130
+ let maxAmplitude = -Infinity;
131
+ let silenceStart = null;
132
+ let lastSpeechEnd = -Infinity;
133
+ let isSpeech = false;
134
+
135
+ const expectedPoints = segmentDuration * pointsPerSecond;
136
+ const samplesPerPoint = Math.ceil(channelData.length / expectedPoints);
137
+
138
+ for (let i = 0; i < expectedPoints; i++) {
139
+ const start = i * samplesPerPoint;
140
+ const end = Math.min(start + samplesPerPoint, totalSamples);
141
+
142
+ let sumSquares = 0;
143
+ let zeroCrossings = 0;
144
+ let prevValue = channelData[start];
145
+ let localMinAmplitude = Infinity;
146
+ let localMaxAmplitude = -Infinity;
147
+ let hasNonZeroValue = false;
148
+
149
+ // compute values for the segment
150
+ for (let j = start; j < end; j++) {
151
+ const value = channelData[j];
152
+ sumSquares += value * value;
153
+ if (j > start && value * prevValue < 0) {
154
+ zeroCrossings++;
155
+ }
156
+ prevValue = value;
157
+
158
+ const absValue = Math.abs(value);
159
+ localMinAmplitude = Math.min(localMinAmplitude, absValue);
160
+ localMaxAmplitude = Math.max(localMaxAmplitude, absValue);
161
+
162
+ if (absValue !== 0) {
163
+ hasNonZeroValue = true;
164
+ }
165
+ }
166
+
167
+ // Post-processing checks
168
+ if (!hasNonZeroValue) {
169
+ // All values are zero
170
+ localMinAmplitude = 0;
171
+ localMaxAmplitude = 0;
172
+ }
173
+
174
+ const rms = Math.sqrt(sumSquares / (end - start));
175
+ minAmplitude = Math.min(minAmplitude, rms);
176
+ maxAmplitude = Math.max(maxAmplitude, rms);
177
+
178
+ const energy = sumSquares;
179
+ const zcr = zeroCrossings / (end - start);
180
+
181
+ const silent = rms < SILENCE_THRESHOLD;
182
+ const dB = 20 * Math.log10(rms);
183
+
184
+ if (silent) {
185
+ if (silenceStart === null) {
186
+ silenceStart = start;
187
+ } else if (start - silenceStart > MIN_SILENCE_DURATION) {
188
+ // Silence detected for longer than the threshold, set amplitude to 0
189
+ localMaxAmplitude = 0;
190
+ localMinAmplitude = 0;
191
+ isSpeech = false;
192
+ }
193
+ } else {
194
+ silenceStart = null;
195
+ if (!isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION) {
196
+ isSpeech = true;
197
+ }
198
+ lastSpeechEnd = end;
199
+ }
200
+
201
+ const activeSpeech =
202
+ (rms > RMS_THRESHOLD && zcr > ZCR_THRESHOLD) ||
203
+ (isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION);
204
+
205
+ if (activeSpeech) {
206
+ isSpeech = true;
207
+ lastSpeechEnd = end;
208
+ } else {
209
+ isSpeech = false;
210
+ }
211
+
212
+ const bytesPerSample = bitDepth / 8;
213
+ const startPosition = start * bytesPerSample * numberOfChannels; // Calculate start position in bytes
214
+ const endPosition = end * bytesPerSample * numberOfChannels; // Calculate end position in bytes
215
+
216
+ // Compute features
217
+ const segmentData = channelData.slice(start, end);
218
+ const mfcc = features.mfcc ? extractMFCC(segmentData, sampleRate) : [];
219
+ const spectralCentroid = features.spectralCentroid
220
+ ? extractSpectralCentroid(segmentData, sampleRate)
221
+ : 0;
222
+ const spectralFlatness = features.spectralFlatness
223
+ ? extractSpectralFlatness(segmentData)
224
+ : 0;
225
+ const spectralRollOff = features.spectralRollOff
226
+ ? extractSpectralRollOff(segmentData, sampleRate)
227
+ : 0;
228
+ const spectralBandwidth = features.spectralBandwidth
229
+ ? extractSpectralBandwidth(segmentData, sampleRate)
230
+ : 0;
231
+ const chromagram = features.chromagram
232
+ ? extractChromagram(segmentData, sampleRate)
233
+ : [];
234
+ const hnr = features.hnr ? extractHNR(segmentData) : 0;
235
+
236
+ const newData = {
237
+ id: uniqueIdCounter++, // Assign unique ID and increment the counter
238
+ amplitude: algorithm === "peak" ? localMaxAmplitude : rms,
239
+ activeSpeech,
240
+ dB,
241
+ silent,
242
+ features: {
243
+ energy,
244
+ rms,
245
+ minAmplitude: localMinAmplitude,
246
+ maxAmplitude: localMaxAmplitude,
247
+ zcr,
248
+ mfcc: [], // Placeholder for MFCC features
249
+ spectralCentroid, // Computed spectral centroid
250
+ spectralFlatness, // Computed spectral flatness
251
+ spectralRollOff, // Computed spectral roll-off
252
+ spectralBandwidth, // Computed spectral bandwidth
253
+ chromagram, // Computed chromagram
254
+ hnr, // Computed HNR
255
+ },
256
+ startTime: start / sampleRate,
257
+ endTime: end / sampleRate,
258
+ startPosition,
259
+ endPosition,
260
+ samples: end - start,
261
+ speaker: 0, // Assuming speaker detection is to be handled later
262
+ };
263
+
264
+ dataPoints.push(newData);
265
+ }
266
+
267
+ return {
268
+ pointsPerSecond,
269
+ durationMs: fullAudioDurationMs,
270
+ bitDepth,
271
+ samples: totalSamples,
272
+ numberOfChannels,
273
+ sampleRate,
274
+ dataPoints,
275
+ amplitudeRange: {
276
+ min: minAmplitude,
277
+ max: maxAmplitude,
278
+ },
279
+ speakerChanges: [], // Placeholder for future speaker detection logic
280
+ };
281
+ };
282
+
283
+ try {
284
+ const result = extractWaveform(
285
+ channelData,
286
+ sampleRate,
287
+ pointsPerSecond,
288
+ algorithm,
289
+ );
290
+ self.postMessage({
291
+ command: "features",
292
+ result,
293
+ });
294
+ } catch (error) {
295
+ console.error("[AudioFeaturesExtractor] Error in processing", error);
296
+ self.postMessage({ error: error.message });
297
+ } finally {
298
+ // Do not close the worker so it can be re-used for subsequent messages
299
+ // self.close();
300
+ }
301
+ };
302
+ `;