@siteed/expo-audio-stream 1.16.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +28 -1
  2. package/README.md +1 -1
  3. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
  4. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
  5. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
  6. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +134 -23
  7. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
  8. package/android/src/main/java/net/siteed/audiostream/Constants.kt +1 -0
  9. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
  10. package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
  11. package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
  12. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +4 -4
  13. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
  14. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
  15. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  16. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
  17. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
  18. package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
  19. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
  20. package/build/ExpoAudioStream.types.d.ts +49 -3
  21. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  22. package/build/ExpoAudioStream.types.js.map +1 -1
  23. package/build/ExpoAudioStream.web.d.ts +2 -0
  24. package/build/ExpoAudioStream.web.d.ts.map +1 -1
  25. package/build/ExpoAudioStream.web.js +8 -1
  26. package/build/ExpoAudioStream.web.js.map +1 -1
  27. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  28. package/build/ExpoAudioStreamModule.js +216 -12
  29. package/build/ExpoAudioStreamModule.js.map +1 -1
  30. package/build/WebRecorder.web.d.ts +67 -13
  31. package/build/WebRecorder.web.d.ts.map +1 -1
  32. package/build/WebRecorder.web.js +178 -173
  33. package/build/WebRecorder.web.js.map +1 -1
  34. package/build/index.d.ts +3 -3
  35. package/build/index.d.ts.map +1 -1
  36. package/build/index.js +2 -2
  37. package/build/index.js.map +1 -1
  38. package/build/useAudioRecorder.d.ts.map +1 -1
  39. package/build/useAudioRecorder.js +12 -8
  40. package/build/useAudioRecorder.js.map +1 -1
  41. package/build/utils/audioProcessing.d.ts +24 -0
  42. package/build/utils/audioProcessing.d.ts.map +1 -0
  43. package/build/utils/audioProcessing.js +133 -0
  44. package/build/utils/audioProcessing.js.map +1 -0
  45. package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
  46. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
  47. package/build/workers/InlineFeaturesExtractor.web.js +692 -175
  48. package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
  49. package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
  50. package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
  51. package/build/workers/inlineAudioWebWorker.web.js +3 -2
  52. package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
  53. package/ios/AudioAnalysisData.swift +51 -16
  54. package/ios/AudioProcessingHelpers.swift +710 -26
  55. package/ios/AudioProcessor.swift +334 -185
  56. package/ios/AudioStreamManager.swift +66 -22
  57. package/ios/DataPoint.swift +25 -12
  58. package/ios/DecodingConfig.swift +47 -0
  59. package/ios/ExpoAudioStreamModule.swift +189 -104
  60. package/ios/FFT.swift +62 -0
  61. package/ios/Features.swift +24 -3
  62. package/ios/RecordingSettings.swift +9 -7
  63. package/package.json +2 -1
  64. package/plugin/build/index.d.ts +2 -0
  65. package/plugin/build/index.js +10 -3
  66. package/plugin/src/index.ts +10 -1
  67. package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
  68. package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
  69. package/src/ExpoAudioStream.types.ts +57 -7
  70. package/src/ExpoAudioStream.web.ts +8 -1
  71. package/src/ExpoAudioStreamModule.ts +255 -10
  72. package/src/WebRecorder.web.ts +231 -243
  73. package/src/index.ts +5 -3
  74. package/src/useAudioRecorder.tsx +14 -10
  75. package/src/utils/audioProcessing.ts +205 -0
  76. package/src/workers/InlineFeaturesExtractor.web.tsx +692 -175
  77. package/src/workers/inlineAudioWebWorker.web.tsx +3 -2
@@ -1,18 +1,494 @@
1
+ // packages/expo-audio-stream/src/workers/InlineFeaturesExtractor.web.tsx
1
2
  export const InlineFeaturesExtractor = `
3
+ // Constants
4
+ const N_FFT = 1024; // Default FFT size
5
+ const MAX_FFT_SIZE = 8192; // Maximum FFT size to prevent memory issues
6
+ const N_CHROMA = 12;
7
+
8
+ // FFT Implementation with normalized Hann window
9
+ function FFT(n) {
10
+ this.n = n;
11
+ this.cosTable = new Float32Array(n / 2);
12
+ this.sinTable = new Float32Array(n / 2);
13
+ this.hannWindow = new Float32Array(n);
14
+
15
+ // Match Android implementation with precomputed tables
16
+ const normalizationFactor = Math.sqrt(2.0 / n);
17
+ for (var i = 0; i < n / 2; i++) {
18
+ this.cosTable[i] = Math.cos(2.0 * Math.PI * i / n);
19
+ this.sinTable[i] = Math.sin(2.0 * Math.PI * i / n);
20
+ }
21
+
22
+ // Precompute normalized Hann window to match Android
23
+ for (var i = 0; i < n; i++) {
24
+ this.hannWindow[i] = normalizationFactor * 0.5 * (1 - Math.cos(2.0 * Math.PI * i / (n - 1)));
25
+ }
26
+ }
27
+
28
+ FFT.prototype.transform = function(data) {
29
+ const n = data.length;
30
+
31
+ // Validate input length is power of 2
32
+ if ((n & (n - 1)) !== 0) {
33
+ throw new Error('FFT length must be power of 2');
34
+ }
35
+
36
+ // Use iterative bit reversal instead of recursive
37
+ const bitReversedIndices = new Uint32Array(n);
38
+ for (let i = 0; i < n; i++) {
39
+ let reversed = 0;
40
+ let j = i;
41
+ let bits = Math.log2(n);
42
+ while (bits--) {
43
+ reversed = (reversed << 1) | (j & 1);
44
+ j >>= 1;
45
+ }
46
+ bitReversedIndices[i] = reversed;
47
+ }
48
+
49
+ // Apply bit reversal
50
+ for (let i = 0; i < n; i++) {
51
+ const j = bitReversedIndices[i];
52
+ if (i < j) {
53
+ const temp = data[i];
54
+ data[i] = data[j];
55
+ data[j] = temp;
56
+ }
57
+ }
58
+
59
+ // Iterative FFT computation with optimized memory usage
60
+ for (let step = 1; step < n; step <<= 1) {
61
+ const jump = step << 1;
62
+ const angleStep = Math.PI / step;
63
+
64
+ for (let group = 0; group < n; group += jump) {
65
+ for (let pair = group; pair < group + step; pair++) {
66
+ const match = pair + step;
67
+ const angle = angleStep * (pair - group);
68
+
69
+ const currentCos = Math.cos(angle);
70
+ const currentSin = Math.sin(angle);
71
+
72
+ const real = currentCos * data[match] - currentSin * data[match + 1];
73
+ const imag = currentCos * data[match + 1] + currentSin * data[match];
74
+
75
+ data[match] = data[pair] - real;
76
+ data[match + 1] = data[pair + 1] - imag;
77
+ data[pair] += real;
78
+ data[pair + 1] += imag;
79
+ }
80
+ }
81
+ }
82
+ };
83
+
84
+ // Add realInverse method
85
+ FFT.prototype.realInverse = function(powerSpectrum, output) {
86
+ const n = powerSpectrum.length;
87
+ const complexData = new Float32Array(n * 2);
88
+
89
+ // Copy power spectrum to complex format
90
+ for (let i = 0; i < n/2 + 1; i++) {
91
+ complexData[2 * i] = powerSpectrum[i];
92
+ if (2 * i + 1 < complexData.length) {
93
+ complexData[2 * i + 1] = 0;
94
+ }
95
+ }
96
+
97
+ // Conjugate for inverse FFT
98
+ for (let i = 0; i < n; i++) {
99
+ if (2 * i + 1 < complexData.length) {
100
+ complexData[2 * i + 1] = -complexData[2 * i + 1];
101
+ }
102
+ }
103
+
104
+ this.transform(complexData);
105
+
106
+ // Copy real part to output and scale
107
+ for (let i = 0; i < n; i++) {
108
+ output[i] = complexData[2 * i] / n;
109
+ }
110
+ };
111
+
112
+ // Add helper functions to match Android
113
+ function nextPowerOfTwo(n) {
114
+ let value = 1;
115
+ while (value < n) {
116
+ value *= 2;
117
+ }
118
+ return value;
119
+ }
120
+
121
+ function applyHannWindow(samples) {
122
+ const output = new Float32Array(samples.length);
123
+ for (let i = 0; i < samples.length; i++) {
124
+ const multiplier = 0.5 * (1 - Math.cos(2 * Math.PI * i / (samples.length - 1)));
125
+ output[i] = samples[i] * multiplier;
126
+ }
127
+ return output;
128
+ }
129
+
130
+ // Update spectral feature computation to match Android
131
+ function computeSpectralFeatures(segment, sampleRate, featureOptions = {}) {
132
+ try {
133
+ // Early return if no spectral features are requested
134
+ if (!featureOptions.spectralCentroid &&
135
+ !featureOptions.spectralFlatness &&
136
+ !featureOptions.spectralRollOff &&
137
+ !featureOptions.spectralBandwidth &&
138
+ !featureOptions.magnitudeSpectrum) {
139
+ return {
140
+ centroid: 0,
141
+ flatness: 0,
142
+ rollOff: 0,
143
+ bandwidth: 0,
144
+ magnitudeSpectrum: []
145
+ };
146
+ }
147
+
148
+ // Ensure we have valid data
149
+ if (!segment || segment.length === 0) {
150
+ throw new Error('Invalid segment data');
151
+ }
152
+
153
+ // Process in fixed-size chunks
154
+ const chunkSize = N_FFT;
155
+ const numChunks = Math.ceil(segment.length / chunkSize);
156
+
157
+ let results = {
158
+ centroid: 0,
159
+ flatness: 0,
160
+ rollOff: 0,
161
+ bandwidth: 0,
162
+ magnitudeSpectrum: new Float32Array(N_FFT / 2 + 1).fill(0)
163
+ };
164
+
165
+ let validChunks = 0;
166
+
167
+ // Iterate through chunks
168
+ for (let i = 0; i < numChunks; i++) {
169
+ const start = i * chunkSize;
170
+ const end = Math.min(start + chunkSize, segment.length);
171
+ const chunk = segment.slice(start, end);
172
+
173
+ if (chunk.length < N_FFT / 4) continue; // Skip very small chunks
174
+
175
+ // Process the chunk
176
+ const paddedChunk = new Float32Array(N_FFT);
177
+ paddedChunk.set(applyHannWindow(chunk));
178
+
179
+ const fft = new FFT(N_FFT);
180
+ fft.transform(paddedChunk);
181
+
182
+ // Calculate magnitude spectrum
183
+ const chunkMagnitudeSpectrum = new Float32Array(N_FFT / 2 + 1);
184
+ let hasSignal = false;
185
+
186
+ for (let j = 0; j < N_FFT / 2; j++) {
187
+ const re = paddedChunk[2 * j];
188
+ const im = paddedChunk[2 * j + 1];
189
+ const magnitude = Math.sqrt(re * re + im * im);
190
+ chunkMagnitudeSpectrum[j] = magnitude;
191
+ if (magnitude > Number.EPSILON) hasSignal = true;
192
+ }
193
+
194
+ if (!hasSignal) continue;
195
+ validChunks++;
196
+
197
+ // Accumulate results
198
+ if (featureOptions.spectralCentroid) {
199
+ const centroid = computeSpectralCentroid(chunkMagnitudeSpectrum, sampleRate);
200
+ if (!isNaN(centroid)) results.centroid += centroid;
201
+ }
202
+
203
+ if (featureOptions.spectralFlatness) {
204
+ const flatness = computeSpectralFlatness(chunkMagnitudeSpectrum);
205
+ if (!isNaN(flatness)) results.flatness += flatness;
206
+ }
207
+
208
+ if (featureOptions.spectralRollOff) {
209
+ const rolloff = computeSpectralRollOff(chunkMagnitudeSpectrum, sampleRate);
210
+ if (!isNaN(rolloff)) results.rollOff += rolloff;
211
+ }
212
+
213
+ if (featureOptions.spectralBandwidth && !isNaN(results.centroid)) {
214
+ const bandwidth = computeSpectralBandwidth(chunkMagnitudeSpectrum, sampleRate, results.centroid);
215
+ if (!isNaN(bandwidth)) results.bandwidth += bandwidth;
216
+ }
217
+
218
+ if (featureOptions.magnitudeSpectrum) {
219
+ for (let j = 0; j < results.magnitudeSpectrum.length; j++) {
220
+ results.magnitudeSpectrum[j] += chunkMagnitudeSpectrum[j];
221
+ }
222
+ }
223
+ }
224
+
225
+ // Average the accumulated results
226
+ if (validChunks > 0) {
227
+ results.centroid /= validChunks;
228
+ results.flatness /= validChunks;
229
+ results.rollOff /= validChunks;
230
+ results.bandwidth /= validChunks;
231
+
232
+ if (featureOptions.magnitudeSpectrum) {
233
+ for (let i = 0; i < results.magnitudeSpectrum.length; i++) {
234
+ results.magnitudeSpectrum[i] /= validChunks;
235
+ }
236
+ }
237
+ }
238
+
239
+ return results;
240
+ } catch (error) {
241
+ console.error('[Worker] Spectral feature computation error:', error);
242
+ return {
243
+ centroid: 0,
244
+ flatness: 0,
245
+ rollOff: 0,
246
+ bandwidth: 0,
247
+ magnitudeSpectrum: []
248
+ };
249
+ }
250
+ }
251
+
252
+ function computeSpectralCentroid(magnitudeSpectrum, sampleRate) {
253
+ const sum = magnitudeSpectrum.reduce((a, b) => a + (b || 0), 0);
254
+ if (sum <= Number.EPSILON) return 0;
255
+
256
+ const weightedSum = magnitudeSpectrum.reduce((acc, value, index) =>
257
+ acc + (index * (sampleRate / N_FFT) * (value || 0)), 0);
258
+
259
+ return weightedSum / sum;
260
+ }
261
+
262
+ function computeSpectralFlatness(powerSpectrum) {
263
+ // Add small epsilon to avoid log(0)
264
+ const epsilon = Number.EPSILON;
265
+ const validSpectrum = powerSpectrum.map(v => Math.max(v, epsilon));
266
+
267
+ const geometricMean = Math.exp(
268
+ validSpectrum
269
+ .map(v => Math.log(v))
270
+ .reduce((a, b) => a + b) / validSpectrum.length
271
+ );
272
+
273
+ const arithmeticMean =
274
+ validSpectrum.reduce((a, b) => a + b) / validSpectrum.length;
275
+
276
+ return geometricMean / arithmeticMean;
277
+ }
278
+
279
+ function computeSpectralRollOff(magnitudeSpectrum, sampleRate) {
280
+ const totalEnergy = magnitudeSpectrum.reduce((a, b) => a + b, 0);
281
+ const rollOffThreshold = totalEnergy * 0.85;
282
+ let cumulativeEnergy = 0;
283
+
284
+ for (let i = 0; i < magnitudeSpectrum.length; i++) {
285
+ cumulativeEnergy += magnitudeSpectrum[i];
286
+ if (cumulativeEnergy >= rollOffThreshold) {
287
+ return (i / magnitudeSpectrum.length) * (sampleRate / 2);
288
+ }
289
+ }
290
+
291
+ return 0;
292
+ }
293
+
294
+ function computeSpectralBandwidth(magnitudeSpectrum, sampleRate, centroid) {
295
+ const sum = magnitudeSpectrum.reduce((a, b) => a + (b || 0), 0);
296
+ if (sum <= Number.EPSILON) return 0;
297
+
298
+ const weightedSum = magnitudeSpectrum.reduce(
299
+ (acc, value, index) => {
300
+ const freq = index * sampleRate / (2 * magnitudeSpectrum.length);
301
+ return acc + (value || 0) * Math.pow(freq - centroid, 2);
302
+ }, 0
303
+ );
304
+
305
+ return Math.sqrt(weightedSum / sum);
306
+ }
307
+
308
+ function computeChroma(segmentData, sampleRate) {
309
+ // Ensure we have valid input data
310
+ if (!segmentData || segmentData.length === 0) {
311
+ return new Array(N_CHROMA).fill(0);
312
+ }
313
+
314
+ const fftLength = nextPowerOfTwo(Math.max(segmentData.length, N_FFT));
315
+ const windowed = applyHannWindow(segmentData);
316
+ const padded = new Float32Array(fftLength);
317
+ padded.set(windowed.slice(0, Math.min(windowed.length, fftLength)));
318
+
319
+ const fft = new FFT(fftLength);
320
+ try {
321
+ fft.transform(padded);
322
+ } catch (e) {
323
+ console.error('[Worker] FFT transform failed in chromagram:', e);
324
+ return new Array(N_CHROMA).fill(0);
325
+ }
326
+
327
+ const chroma = new Float32Array(N_CHROMA).fill(0);
328
+ const freqsPerBin = sampleRate / fftLength;
329
+ let totalEnergy = 0;
330
+
331
+ // First pass: compute magnitudes and total energy
332
+ for (let i = 0; i < fftLength / 2; i++) {
333
+ const freq = i * freqsPerBin;
334
+ if (freq > 20) { // Only consider frequencies above 20 Hz
335
+ const re = padded[2 * i];
336
+ const im = padded[2 * i + 1] || 0;
337
+ const magnitude = Math.sqrt(re * re + im * im);
338
+
339
+ if (magnitude > Number.EPSILON) {
340
+ // Use a more stable pitch class calculation
341
+ const midiNote = 69 + 12 * Math.log2(freq / 440.0);
342
+ const pitchClass = Math.round(midiNote) % 12;
343
+
344
+ if (pitchClass >= 0 && pitchClass < 12) {
345
+ chroma[pitchClass] += magnitude;
346
+ totalEnergy += magnitude;
347
+ }
348
+ }
349
+ }
350
+ }
351
+
352
+ // Normalize chroma values only if we have energy
353
+ if (totalEnergy > Number.EPSILON) {
354
+ for (let i = 0; i < N_CHROMA; i++) {
355
+ chroma[i] = chroma[i] / totalEnergy;
356
+ }
357
+ }
358
+
359
+ // Convert to regular array and ensure no NaN values
360
+ return Array.from(chroma, v => isNaN(v) ? 0 : v);
361
+ }
362
+
363
+ function extractHNR(segmentData) {
364
+ const frameSize = segmentData.length;
365
+ const autocorrelation = new Float32Array(frameSize);
366
+
367
+ // Compute the autocorrelation iteratively
368
+ for (let i = 0; i < frameSize; i++) {
369
+ let sum = 0;
370
+ for (let j = 0; j < frameSize - i; j++) {
371
+ sum += segmentData[j] * segmentData[j + i];
372
+ }
373
+ autocorrelation[i] = sum;
374
+ }
375
+
376
+ // Find the maximum autocorrelation value iteratively
377
+ let maxAutocorrelation = -Infinity;
378
+ for (let i = 1; i < autocorrelation.length; i++) {
379
+ if (autocorrelation[i] > maxAutocorrelation) {
380
+ maxAutocorrelation = autocorrelation[i];
381
+ }
382
+ }
383
+
384
+ // Compute the HNR
385
+ return autocorrelation[0] !== 0
386
+ ? 10 * Math.log10(maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation))
387
+ : 0;
388
+ }
389
+
390
+ function estimatePitch(segment, sampleRate) {
391
+ // Early validation
392
+ if (!segment || segment.length < 2 || !sampleRate) return 0;
393
+
394
+ try {
395
+ // Apply Hann window
396
+ const windowed = applyHannWindow(segment);
397
+
398
+ // Pad for FFT
399
+ const fftLength = nextPowerOfTwo(segment.length * 2);
400
+ const padded = new Float32Array(fftLength);
401
+ padded.set(windowed);
402
+
403
+ // Perform FFT
404
+ const fft = new FFT(fftLength);
405
+ fft.transform(padded);
406
+
407
+ // Compute power spectrum
408
+ const powerSpectrum = new Float32Array(fftLength / 2 + 1);
409
+ for (let i = 0; i <= fftLength / 2; i++) {
410
+ const re = padded[2 * i];
411
+ const im = padded[2 * i + 1] || 0;
412
+ powerSpectrum[i] = re * re + im * im;
413
+ }
414
+
415
+ // Find peak frequency
416
+ let maxPower = 0;
417
+ let peakIndex = 0;
418
+ const minFreq = 50; // Minimum frequency to consider (Hz)
419
+ const maxFreq = 1000; // Maximum frequency to consider (Hz)
420
+ const minBin = Math.floor(minFreq * fftLength / sampleRate);
421
+ const maxBin = Math.ceil(maxFreq * fftLength / sampleRate);
422
+
423
+ for (let i = minBin; i <= maxBin; i++) {
424
+ if (powerSpectrum[i] > maxPower) {
425
+ maxPower = powerSpectrum[i];
426
+ peakIndex = i;
427
+ }
428
+ }
429
+
430
+ // Convert peak index to frequency
431
+ const fundamentalFreq = peakIndex * sampleRate / fftLength;
432
+
433
+ // Return 0 if the detected frequency is outside reasonable bounds
434
+ return (fundamentalFreq >= minFreq && fundamentalFreq <= maxFreq) ?
435
+ fundamentalFreq : 0;
436
+
437
+ } catch (error) {
438
+ console.error('[Worker] Pitch estimation error:', error);
439
+ return 0;
440
+ }
441
+ }
442
+
2
443
  // Unique ID counter
3
444
  let uniqueIdCounter = 0
445
+ let accumulatedDataPoints = []
446
+ let lastEmitTime = Date.now()
4
447
 
5
448
  self.onmessage = function (event) {
449
+ // Check if this is a reset command
450
+ if (event.data.command === 'resetCounter') {
451
+ uniqueIdCounter = event.data.startCounterFrom || 0;
452
+ console.log('[Worker] Reset counter to', uniqueIdCounter);
453
+ return; // Exit early, don't process audio
454
+ }
455
+
456
+ // Regular audio processing
6
457
  const {
7
- channelData, // this is only the newly recorded data when live recording.
458
+ channelData,
8
459
  sampleRate,
9
- pointsPerSecond,
460
+ segmentDurationMs,
10
461
  algorithm,
11
462
  bitDepth,
12
463
  fullAudioDurationMs,
13
464
  numberOfChannels,
14
465
  features: _features,
466
+ intervalAnalysis = 500,
467
+ enableLogging,
468
+ resetCounter,
469
+ startCounterFrom,
15
470
  } = event.data
471
+
472
+ // Also handle reset as part of regular message
473
+ if (resetCounter) {
474
+ uniqueIdCounter = startCounterFrom || 0;
475
+ }
476
+
477
+ const subChunkStartTime = fullAudioDurationMs / 1000
478
+
479
+
480
+ // Create a simple logger that only logs when enabled
481
+ const logger = enableLogging ? {
482
+ debug: (...args) => console.debug('[Worker]', ...args),
483
+ log: (...args) => console.log('[Worker]', ...args),
484
+ error: (...args) => console.error('[Worker]', ...args)
485
+ } : {
486
+ debug: () => {},
487
+ log: () => {},
488
+ error: () => {}
489
+ }
490
+ console.log('[Worker] START Feature Extractor - hasData: ' + (event.data ? true : false) + ', channelData: ' + (event.data.channelData ? event.data.channelData.length : 0) + ', fullAudioDurationMs: ' + (event.data.fullAudioDurationMs || 0) + ', sampleRate: ' + (event.data.sampleRate || 0) + ', segmentDurationMs: ' + (event.data.segmentDurationMs || 0) + ', algorithm: ' + (event.data.algorithm || 'none') + ', bitDepth: ' + (event.data.bitDepth || 0) + ', numberOfChannels: ' + (event.data.numberOfChannels || 0) + ', features: ' + (event.data.features ? Object.keys(event.data.features).length : 0) + ', intervalAnalysis: ' + (event.data.intervalAnalysis || 0) + ', dataKeys: ' + (event.data ? Object.keys(event.data).join(',') : ''));
491
+
16
492
  const features = _features || {}
17
493
 
18
494
  const SILENCE_THRESHOLD = 0.01
@@ -86,205 +562,228 @@ self.onmessage = function (event) {
86
562
  return [] // TODO implement
87
563
  }
88
564
 
89
- const extractHNR = (segmentData) => {
90
- const frameSize = segmentData.length
91
- const autocorrelation = new Float32Array(frameSize)
565
+ /**
566
+ * Creates a features object based on requested features
567
+ */
568
+ function createFeaturesObject(
569
+ features,
570
+ maxAmp,
571
+ rms,
572
+ sumSquares,
573
+ zeroCrossings,
574
+ remainingSamples,
575
+ spectralFeatures,
576
+ channelData,
577
+ startIdx,
578
+ endIdx,
579
+ sampleRate
580
+ ) {
581
+ // If no features are requested, return undefined
582
+ if (!Object.values(features).some(function(v) { return v; })) {
583
+ return undefined;
584
+ }
92
585
 
93
- // Compute the autocorrelation of the segment data
94
- for (let i = 0; i < frameSize; i++) {
95
- let sum = 0
96
- for (let j = 0; j < frameSize - i; j++) {
97
- sum += segmentData[j] * segmentData[j + i]
98
- }
99
- autocorrelation[i] = sum
586
+ const result = {};
587
+
588
+ if (features.energy) {
589
+ result.energy = sumSquares;
590
+ }
591
+ if (features.rms) {
592
+ result.rms = rms;
593
+ }
594
+ // Always include min/max amplitude if any features are requested
595
+ result.minAmplitude = -maxAmp;
596
+ result.maxAmplitude = maxAmp;
597
+
598
+ if (features.zcr) {
599
+ result.zcr = zeroCrossings / remainingSamples;
600
+ }
601
+ if (features.spectralCentroid) {
602
+ result.spectralCentroid = spectralFeatures.centroid;
603
+ }
604
+ if (features.spectralFlatness) {
605
+ result.spectralFlatness = spectralFeatures.flatness;
606
+ }
607
+ if (features.spectralRolloff) {
608
+ result.spectralRolloff = spectralFeatures.rollOff;
609
+ }
610
+ if (features.spectralBandwidth) {
611
+ result.spectralBandwidth = spectralFeatures.bandwidth;
612
+ }
613
+ if (features.chromagram) {
614
+ result.chromagram = computeChroma(channelData.slice(startIdx, endIdx), sampleRate);
100
615
  }
616
+ if (features.hnr) {
617
+ result.hnr = extractHNR(channelData.slice(startIdx, endIdx));
618
+ }
619
+ if (features.pitch) {
620
+ result.pitch = estimatePitch(channelData.slice(startIdx, endIdx), sampleRate);
621
+ }
622
+
623
+ return result;
624
+ }
101
625
 
102
- // Find the maximum autocorrelation value (excluding the zero lag)
103
- const maxAutocorrelation = Math.max(...autocorrelation.subarray(1))
626
+ function extractWaveform(
627
+ channelData,
628
+ sampleRate,
629
+ segmentDurationMs
630
+ ) {
631
+ const logger = enableLogging ? {
632
+ debug: (...args) => console.debug('[Worker]', ...args),
633
+ log: (...args) => console.log('[Worker]', ...args),
634
+ error: (...args) => console.error('[Worker]', ...args)
635
+ } : {
636
+ debug: () => {},
637
+ log: () => {},
638
+ error: () => {}
639
+ }
104
640
 
105
- // Compute the HNR
106
- return autocorrelation[0] !== 0
107
- ? 10 *
108
- Math.log10(
109
- maxAutocorrelation /
110
- (autocorrelation[0] - maxAutocorrelation)
111
- )
112
- : 0
113
- }
641
+ // Calculate amplitude range
642
+ let min = Infinity
643
+ let max = -Infinity
644
+ for (let i = 0; i < channelData.length; i++) {
645
+ min = Math.min(min, channelData[i])
646
+ max = Math.max(max, channelData[i])
647
+ }
114
648
 
115
- const extractWaveform = (
116
- channelData, // Float32Array
117
- sampleRate, // number
118
- pointsPerSecond, // number
119
- algorithm // string
120
- ) => {
121
649
  const totalSamples = channelData.length
122
- const segmentDuration = totalSamples / sampleRate
123
- const totalPoints = Math.max(
124
- Math.ceil(segmentDuration * pointsPerSecond),
125
- 1
126
- )
127
- const pointInterval = Math.ceil(totalSamples / totalPoints)
128
- const dataPoints = []
129
- let minAmplitude = Infinity
130
- let maxAmplitude = -Infinity
131
- let silenceStart = null
132
- let lastSpeechEnd = -Infinity
133
- let isSpeech = false
650
+ const durationMs = (totalSamples / sampleRate) * 1000
651
+
652
+ // Calculate fixed segment sizes
653
+ const samplesPerSegment = Math.floor(sampleRate * (segmentDurationMs / 1000));
654
+ const numPoints = Math.floor(totalSamples / samplesPerSegment);
655
+ const remainingSamples = totalSamples % samplesPerSegment;
134
656
 
135
- const expectedPoints = segmentDuration * pointsPerSecond
136
- const samplesPerPoint = Math.ceil(channelData.length / expectedPoints)
137
-
138
- for (let i = 0; i < expectedPoints; i++) {
139
- const start = i * samplesPerPoint
140
- const end = Math.min(start + samplesPerPoint, totalSamples)
657
+ const dataPoints = []
141
658
 
659
+ // Process full segments
660
+ for (let i = 0; i < numPoints; i++) {
661
+ const startIdx = i * samplesPerSegment
662
+ const endIdx = startIdx + samplesPerSegment
663
+
142
664
  let sumSquares = 0
665
+ let maxAmp = 0
143
666
  let zeroCrossings = 0
144
- let prevValue = channelData[start]
145
- let localMinAmplitude = Infinity
146
- let localMaxAmplitude = -Infinity
147
- let hasNonZeroValue = false
148
667
 
149
- // compute values for the segment
150
- for (let j = start; j < end; j++) {
668
+ // Calculate segment features
669
+ for (let j = startIdx; j < endIdx; j++) {
151
670
  const value = channelData[j]
152
671
  sumSquares += value * value
153
- if (j > start && value * prevValue < 0) {
672
+ maxAmp = Math.max(maxAmp, Math.abs(value))
673
+ if (j > 0 && value * channelData[j - 1] < 0) {
154
674
  zeroCrossings++
155
675
  }
156
- prevValue = value
157
-
158
- // We need to keep absolute value otherwise we cannot visualize properly
159
- const absValue = Math.abs(value)
160
- localMinAmplitude = Math.min(localMinAmplitude, absValue)
161
- localMaxAmplitude = Math.max(localMaxAmplitude, absValue)
162
-
163
- if (value !== 0) {
164
- hasNonZeroValue = true
165
- }
166
676
  }
167
677
 
168
- // Post-processing checks
169
- if (!hasNonZeroValue) {
170
- // All values are zero
171
- localMinAmplitude = 0
172
- localMaxAmplitude = 0
678
+ const rms = Math.sqrt(sumSquares / samplesPerSegment)
679
+ const startTime = subChunkStartTime + (startIdx / sampleRate)
680
+ const endTime = subChunkStartTime + (endIdx / sampleRate)
681
+
682
+ var spectralFeatures = computeSpectralFeatures(channelData.slice(startIdx, endIdx), sampleRate, features);
683
+
684
+ const dataPoint = {
685
+ id: uniqueIdCounter++,
686
+ amplitude: maxAmp,
687
+ rms,
688
+ startTime,
689
+ endTime,
690
+ dB: 20 * Math.log10(rms + 1e-6),
691
+ silent: rms < 0.01,
692
+ startPosition: startIdx * 2,
693
+ endPosition: endIdx * 2,
694
+ samples: samplesPerSegment,
173
695
  }
174
696
 
175
- const rms = Math.sqrt(sumSquares / (end - start))
176
- minAmplitude = Math.min(minAmplitude, localMinAmplitude)
177
- maxAmplitude = Math.max(maxAmplitude, localMaxAmplitude)
697
+ // Extract features if any are requested
698
+ const extractedFeatures = createFeaturesObject(
699
+ features,
700
+ maxAmp,
701
+ rms,
702
+ sumSquares,
703
+ zeroCrossings,
704
+ samplesPerSegment,
705
+ spectralFeatures,
706
+ channelData,
707
+ startIdx,
708
+ endIdx,
709
+ sampleRate
710
+ );
711
+
712
+ if (extractedFeatures) {
713
+ dataPoint.features = extractedFeatures;
714
+ }
178
715
 
179
- const energy = sumSquares
180
- const zcr = zeroCrossings / (end - start)
716
+ dataPoints.push(dataPoint)
717
+ }
181
718
 
182
- const silent = rms < SILENCE_THRESHOLD
183
- const dB = 20 * Math.log10(rms)
719
+ // Handle remaining samples if they exist and are enough to process
720
+ if (remainingSamples > samplesPerSegment / 4) { // Only process if we have at least 1/4 of a segment
721
+ const startIdx = numPoints * samplesPerSegment
722
+ const endIdx = totalSamples
723
+
724
+ let sumSquares = 0
725
+ let maxAmp = 0
726
+ let zeroCrossings = 0
184
727
 
185
- if (silent) {
186
- if (silenceStart === null) {
187
- silenceStart = start
188
- } else if (start - silenceStart > MIN_SILENCE_DURATION) {
189
- // Silence detected for longer than the threshold, set amplitude to 0
190
- localMaxAmplitude = 0
191
- localMinAmplitude = 0
192
- isSpeech = false
193
- }
194
- } else {
195
- silenceStart = null
196
- if (
197
- !isSpeech &&
198
- start - lastSpeechEnd < SPEECH_INERTIA_DURATION
199
- ) {
200
- isSpeech = true
728
+ for (let j = startIdx; j < endIdx; j++) {
729
+ const value = channelData[j]
730
+ sumSquares += value * value
731
+ maxAmp = Math.max(maxAmp, Math.abs(value))
732
+ if (j > 0 && value * channelData[j - 1] < 0) {
733
+ zeroCrossings++
201
734
  }
202
- lastSpeechEnd = end
203
735
  }
204
736
 
205
- const activeSpeech =
206
- (rms > RMS_THRESHOLD && zcr > ZCR_THRESHOLD) ||
207
- (isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION)
208
-
209
- if (activeSpeech) {
210
- isSpeech = true
211
- lastSpeechEnd = end
212
- } else {
213
- isSpeech = false
737
+ const rms = Math.sqrt(sumSquares / remainingSamples)
738
+ const startTime = startIdx / sampleRate;
739
+ const endTime = endIdx / sampleRate;
740
+
741
+ var spectralFeatures = computeSpectralFeatures(channelData.slice(startIdx, endIdx), sampleRate, features);
742
+
743
+ const dataPoint = {
744
+ id: uniqueIdCounter++,
745
+ amplitude: maxAmp,
746
+ rms,
747
+ startTime,
748
+ endTime,
749
+ dB: 20 * Math.log10(rms + 1e-6),
750
+ silent: rms < 0.01,
751
+ startPosition: startIdx * 2,
752
+ endPosition: endIdx * 2,
753
+ samples: remainingSamples,
214
754
  }
215
755
 
216
- const bytesPerSample = bitDepth / 8
217
- const startPosition = start * bytesPerSample * numberOfChannels // Calculate start position in bytes
218
- const endPosition = end * bytesPerSample * numberOfChannels // Calculate end position in bytes
219
-
220
- // Compute features
221
- const segmentData = channelData.slice(start, end)
222
- const mfcc = features.mfcc
223
- ? extractMFCC(segmentData, sampleRate)
224
- : []
225
- const spectralCentroid = features.spectralCentroid
226
- ? extractSpectralCentroid(segmentData, sampleRate)
227
- : 0
228
- const spectralFlatness = features.spectralFlatness
229
- ? extractSpectralFlatness(segmentData)
230
- : 0
231
- const spectralRollOff = features.spectralRollOff
232
- ? extractSpectralRollOff(segmentData, sampleRate)
233
- : 0
234
- const spectralBandwidth = features.spectralBandwidth
235
- ? extractSpectralBandwidth(segmentData, sampleRate)
236
- : 0
237
- const chromagram = features.chromagram
238
- ? extractChromagram(segmentData, sampleRate)
239
- : []
240
- const hnr = features.hnr ? extractHNR(segmentData) : 0
241
-
242
- const peakAmp = Math.max(Math.abs(localMaxAmplitude), Math.abs(localMinAmplitude))
243
- const newData = {
244
- id: uniqueIdCounter++, // Assign unique ID and increment the counter
245
- amplitude: algorithm === 'peak' ? peakAmp : rms,
246
- activeSpeech,
247
- dB,
248
- silent,
249
- features: {
250
- energy,
251
- rms,
252
- minAmplitude: localMinAmplitude,
253
- maxAmplitude: localMaxAmplitude,
254
- zcr,
255
- mfcc: [], // Placeholder for MFCC features
256
- spectralCentroid, // Computed spectral centroid
257
- spectralFlatness, // Computed spectral flatness
258
- spectralRollOff, // Computed spectral roll-off
259
- spectralBandwidth, // Computed spectral bandwidth
260
- chromagram, // Computed chromagram
261
- hnr, // Computed HNR
262
- },
263
- startTime: start / sampleRate,
264
- endTime: end / sampleRate,
265
- startPosition,
266
- endPosition,
267
- samples: end - start,
268
- speaker: 0, // Assuming speaker detection is to be handled later
756
+ // Extract features if any are requested
757
+ const extractedFeatures = createFeaturesObject(
758
+ features,
759
+ maxAmp,
760
+ rms,
761
+ sumSquares,
762
+ zeroCrossings,
763
+ remainingSamples,
764
+ spectralFeatures,
765
+ channelData,
766
+ startIdx,
767
+ endIdx,
768
+ sampleRate
769
+ );
770
+
771
+ if (extractedFeatures) {
772
+ dataPoint.features = extractedFeatures;
269
773
  }
270
774
 
271
- dataPoints.push(newData)
775
+ dataPoints.push(dataPoint)
272
776
  }
273
777
 
274
778
  return {
275
- pointsPerSecond,
276
- amplitudeAlgorithm: algorithm,
277
- durationMs: fullAudioDurationMs,
278
- bitDepth,
279
- samples: totalSamples,
280
- numberOfChannels,
281
- sampleRate,
779
+ durationMs,
282
780
  dataPoints,
283
- amplitudeRange: {
284
- min: minAmplitude,
285
- max: maxAmplitude,
781
+ amplitudeRange: { min, max },
782
+ rmsRange: {
783
+ min: 0,
784
+ max: Math.max(Math.abs(min), Math.abs(max))
286
785
  },
287
- speakerChanges: [], // Placeholder for future speaker detection logic
786
+ extractionTimeMs: Date.now() - lastEmitTime
288
787
  }
289
788
  }
290
789
 
@@ -292,19 +791,37 @@ self.onmessage = function (event) {
292
791
  const result = extractWaveform(
293
792
  channelData,
294
793
  sampleRate,
295
- pointsPerSecond,
296
- algorithm
794
+ segmentDurationMs
297
795
  )
796
+
797
+ // Send complete result immediately
298
798
  self.postMessage({
299
799
  command: 'features',
300
- result,
800
+ result: {
801
+ bitDepth,
802
+ samples: channelData.length,
803
+ numberOfChannels,
804
+ sampleRate,
805
+ segmentDurationMs,
806
+ durationMs: result.durationMs,
807
+ dataPoints: result.dataPoints,
808
+ amplitudeRange: result.amplitudeRange,
809
+ rmsRange: result.rmsRange,
810
+ }
301
811
  })
302
812
  } catch (error) {
303
- console.error('[AudioFeaturesExtractor] Error in processing', error)
304
- self.postMessage({ error: error.message })
305
- } finally {
306
- // Do not close the worker so it can be re-used for subsequent messages
307
- // self.close();
813
+ console.error('[Worker] Error', {
814
+ message: error.message,
815
+ stack: error.stack
816
+ });
817
+
818
+ self.postMessage({
819
+ error: {
820
+ message: error.message,
821
+ stack: error.stack,
822
+ name: error.name
823
+ }
824
+ });
308
825
  }
309
826
  }
310
827
  `